src/gromacs/nbnxm/kernels_reference/kernel_ref_outer.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #define UNROLLI    4
  37 #define UNROLLJ    4
  38
  39 static_assert(UNROLLI == c_nbnxnCpuIClusterSize, "UNROLLI should match the i-cluster size");
  40
  41 /* We could use nbat->xstride and nbat->fstride, but macros might be faster */
  42 #define X_STRIDE   3
  43 #define F_STRIDE   3
  44 /* Local i-atom buffer strides */
  45 #define XI_STRIDE  3
  46 #define FI_STRIDE  3
  47
  48
  49 /* All functionality defines are set here, except for:
  50  * CALC_ENERGIES, ENERGY_GROUPS which are defined before.
  51  * CHECK_EXCLS, which is set just before including the inner loop contents.
  52  */
  53
  54 /* We always calculate shift forces, because it's cheap anyhow */
  55 #define CALC_SHIFTFORCES
  56
  57 #ifdef CALC_COUL_RF
  58 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecRF ## ljt ## feg ## _ref
  59 #endif
  60 #ifdef CALC_COUL_TAB
  61 #ifndef VDW_CUTOFF_CHECK
  62 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecQSTab ## ljt ## feg ## _ref
  63 #else
  64 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecQSTabTwinCut ## ljt ## feg ## _ref
  65 #endif
  66 #endif
  67
  68 #if defined LJ_CUT && !defined LJ_EWALD
  69 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJ, feg)
  70 #elif defined LJ_FORCE_SWITCH
  71 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJFsw, feg)
  72 #elif defined LJ_POT_SWITCH
  73 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJPsw, feg)
  74 #elif defined LJ_EWALD
  75 #ifdef LJ_EWALD_COMB_GEOM
  76 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombGeom, feg)
  77 #else
  78 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombLB, feg)
  79 #endif
  80 #else
  81 #error "No VdW type defined"
  82 #endif
  83
  84 void
  85 #ifndef CALC_ENERGIES
  86 NBK_FUNC_NAME(_F)     // NOLINT(misc-definitions-in-headers)
  87 #else
  88 #ifndef ENERGY_GROUPS
  89 NBK_FUNC_NAME(_VF)    // NOLINT(misc-definitions-in-headers)
  90 #else
  91 NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
  92 #endif
  93 #endif
  94 #undef NBK_FUNC_NAME
  95 #undef NBK_FUNC_NAME2
  96 (const NbnxnPairlistCpu     *nbl,
  97  const nbnxn_atomdata_t     *nbat,
  98  const interaction_const_t  *ic,
  99  const rvec                 *shift_vec,
 100  nbnxn_atomdata_output_t    *out)
 101 {
 102     /* Unpack pointers for output */
 103     real               *f      = out->f.data();
 104 #ifdef CALC_SHIFTFORCES
 105     real               *fshift = out->fshift.data();
 106 #endif
 107 #ifdef CALC_ENERGIES
 108     real               *Vvdw   = out->Vvdw.data();
 109     real               *Vc     = out->Vc.data();
 110 #endif
 111
 112     const nbnxn_cj_t   *l_cj;
 113     real                rcut2;
 114 #ifdef VDW_CUTOFF_CHECK
 115     real                rvdw2;
 116 #endif
 117     int                 ntype2;
 118     real                facel;
 119     int                 ci, ci_sh;
 120     int                 ish, ishf;
 121     gmx_bool            do_LJ, half_LJ, do_coul;
 122     int                 cjind0, cjind1, cjind;
 123
 124     real                xi[UNROLLI*XI_STRIDE];
 125     real                fi[UNROLLI*FI_STRIDE];
 126     real                qi[UNROLLI];
 127
 128 #ifdef CALC_ENERGIES
 129 #ifndef ENERGY_GROUPS
 130
 131     real       Vvdw_ci, Vc_ci;
 132 #else
 133     int        egp_mask;
 134     int        egp_sh_i[UNROLLI];
 135 #endif
 136 #endif
 137 #ifdef LJ_POT_SWITCH
 138     real       swV3, swV4, swV5;
 139     real       swF2, swF3, swF4;
 140 #endif
 141 #ifdef LJ_EWALD
 142     real        lje_coeff2, lje_coeff6_6;
 143 #ifdef CALC_ENERGIES
 144     real        lje_vc;
 145 #endif
 146 #endif
 147
 148 #ifdef CALC_COUL_RF
 149     real       k_rf2;
 150 #ifdef CALC_ENERGIES
 151     real       k_rf, c_rf;
 152 #endif
 153 #endif
 154 #ifdef CALC_COUL_TAB
 155 #ifdef CALC_ENERGIES
 156     real       halfsp;
 157 #endif
 158 #if !GMX_DOUBLE
 159     const real            *tab_coul_FDV0;
 160 #else
 161     const real            *tab_coul_F;
 162     const real gmx_unused *tab_coul_V;
 163 #endif
 164 #endif
 165
 166 #ifdef COUNT_PAIRS
 167     int npair = 0;
 168 #endif
 169
 170 #ifdef LJ_POT_SWITCH
 171     swV3 = ic->vdw_switch.c3;
 172     swV4 = ic->vdw_switch.c4;
 173     swV5 = ic->vdw_switch.c5;
 174     swF2 = 3*ic->vdw_switch.c3;
 175     swF3 = 4*ic->vdw_switch.c4;
 176     swF4 = 5*ic->vdw_switch.c5;
 177 #endif
 178
 179     const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
 180
 181 #ifdef LJ_EWALD
 182     lje_coeff2   = ic->ewaldcoeff_lj*ic->ewaldcoeff_lj;
 183     lje_coeff6_6 = lje_coeff2*lje_coeff2*lje_coeff2/6.0;
 184 #ifdef CALC_ENERGIES
 185     lje_vc       = ic->sh_lj_ewald;
 186 #endif
 187
 188     const real *ljc = nbatParams.nbfp_comb.data();
 189 #endif
 190
 191 #ifdef CALC_COUL_RF
 192     k_rf2 = 2*ic->k_rf;
 193 #ifdef CALC_ENERGIES
 194     k_rf = ic->k_rf;
 195     c_rf = ic->c_rf;
 196 #endif
 197 #endif
 198 #ifdef CALC_COUL_TAB
 199     const real tab_coul_scale = ic->coulombEwaldTables->scale;
 200 #ifdef CALC_ENERGIES
 201     halfsp = 0.5/tab_coul_scale;
 202 #endif
 203
 204 #if !GMX_DOUBLE
 205     tab_coul_FDV0 = ic->coulombEwaldTables->tableFDV0.data();
 206 #else
 207     tab_coul_F    = ic->coulombEwaldTables->tableF.data();
 208     tab_coul_V    = ic->coulombEwaldTables->tableV.data();
 209 #endif
 210 #endif
 211
 212 #ifdef ENERGY_GROUPS
 213     egp_mask = (1 << nbatParams.neg_2log) - 1;
 214 #endif
 215
 216
 217     rcut2                = ic->rcoulomb*ic->rcoulomb;
 218 #ifdef VDW_CUTOFF_CHECK
 219     rvdw2                = ic->rvdw*ic->rvdw;
 220 #endif
 221
 222     ntype2               = nbatParams.numTypes*2;
 223     const real *nbfp     = nbatParams.nbfp.data();
 224     const real *q        = nbatParams.q.data();
 225     const int  *type     = nbatParams.type.data();
 226     facel                = ic->epsfac;
 227     const real *shiftvec = shift_vec[0];
 228     const real *x        = nbat->x().data();
 229
 230     l_cj = nbl->cj.data();
 231
 232     for (const nbnxn_ci_t &ciEntry : nbl->ci)
 233     {
 234         int i, d;
 235
 236         ish              = (ciEntry.shift & NBNXN_CI_SHIFT);
 237         /* x, f and fshift are assumed to be stored with stride 3 */
 238         ishf             = ish*DIM;
 239         cjind0           = ciEntry.cj_ind_start;
 240         cjind1           = ciEntry.cj_ind_end;
 241         /* Currently only works super-cells equal to sub-cells */
 242         ci               = ciEntry.ci;
 243         ci_sh            = (ish == CENTRAL ? ci : -1);
 244
 245         /* We have 5 LJ/C combinations, but use only three inner loops,
 246          * as the other combinations are unlikely and/or not much faster:
 247          * inner half-LJ + C for half-LJ + C / no-LJ + C
 248          * inner LJ + C      for full-LJ + C
 249          * inner LJ          for full-LJ + no-C / half-LJ + no-C
 250          */
 251         do_LJ   = ((ciEntry.shift & NBNXN_CI_DO_LJ(0)) != 0);
 252         do_coul = ((ciEntry.shift & NBNXN_CI_DO_COUL(0)) != 0);
 253         half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
 254 #ifdef CALC_ENERGIES
 255
 256 #ifdef LJ_EWALD
 257         gmx_bool do_self = TRUE;
 258 #else
 259         gmx_bool do_self = do_coul;
 260 #endif
 261
 262 #ifndef ENERGY_GROUPS
 263         Vvdw_ci = 0;
 264         Vc_ci   = 0;
 265 #else
 266         for (i = 0; i < UNROLLI; i++)
 267         {
 268             egp_sh_i[i] = ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask)*nbatParams.nenergrp;
 269         }
 270 #endif
 271 #endif
 272
 273         for (i = 0; i < UNROLLI; i++)
 274         {
 275             for (d = 0; d < DIM; d++)
 276             {
 277                 xi[i*XI_STRIDE+d] = x[(ci*UNROLLI+i)*X_STRIDE+d] + shiftvec[ishf+d];
 278                 fi[i*FI_STRIDE+d] = 0;
 279             }
 280
 281             qi[i] = facel*q[ci*UNROLLI+i];
 282         }
 283
 284 #ifdef CALC_ENERGIES
 285         if (do_self)
 286         {
 287             real Vc_sub_self;
 288
 289 #ifdef CALC_COUL_RF
 290             Vc_sub_self = 0.5*c_rf;
 291 #endif
 292 #ifdef CALC_COUL_TAB
 293 #if GMX_DOUBLE
 294             Vc_sub_self = 0.5*tab_coul_V[0];
 295 #else
 296             Vc_sub_self = 0.5*tab_coul_FDV0[2];
 297 #endif
 298 #endif
 299
 300             if (l_cj[ciEntry.cj_ind_start].cj == ci_sh)
 301             {
 302                 for (i = 0; i < UNROLLI; i++)
 303                 {
 304                     int egp_ind;
 305 #ifdef ENERGY_GROUPS
 306                     egp_ind = egp_sh_i[i] + ((nbatParams.energrp[ci] >> (i*nbatParams.neg_2log)) & egp_mask);
 307 #else
 308                     egp_ind = 0;
 309 #endif
 310                     /* Coulomb self interaction */
 311                     Vc[egp_ind]   -= qi[i]*q[ci*UNROLLI+i]*Vc_sub_self;
 312
 313 #ifdef LJ_EWALD
 314                     /* LJ Ewald self interaction */
 315                     Vvdw[egp_ind] += 0.5*nbatParams.nbfp[nbatParams.type[ci*UNROLLI+i]*(nbatParams.numTypes + 1)*2]/6*lje_coeff6_6;
 316 #endif
 317                 }
 318             }
 319         }
 320 #endif  /* CALC_ENERGIES */
 321
 322         cjind = cjind0;
 323         while (cjind < cjind1 && nbl->cj[cjind].excl != 0xffff)
 324         {
 325 #define CHECK_EXCLS
 326             if (half_LJ)
 327             {
 328 #define CALC_COULOMB
 329 #define HALF_LJ
 330 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
 331 #undef HALF_LJ
 332 #undef CALC_COULOMB
 333             }
 334             else if (do_coul)
 335             {
 336 #define CALC_COULOMB
 337 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
 338 #undef CALC_COULOMB
 339             }
 340             else
 341             {
 342 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
 343             }
 344 #undef CHECK_EXCLS
 345             cjind++;
 346         }
 347
 348         for (; (cjind < cjind1); cjind++)
 349         {
 350             if (half_LJ)
 351             {
 352 #define CALC_COULOMB
 353 #define HALF_LJ
 354 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
 355 #undef HALF_LJ
 356 #undef CALC_COULOMB
 357             }
 358             else if (do_coul)
 359             {
 360 #define CALC_COULOMB
 361 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
 362 #undef CALC_COULOMB
 363             }
 364             else
 365             {
 366 #include "gromacs/nbnxm/kernels_reference/kernel_ref_inner.h"
 367             }
 368         }
 369
 370         /* Add accumulated i-forces to the force array */
 371         for (i = 0; i < UNROLLI; i++)
 372         {
 373             for (d = 0; d < DIM; d++)
 374             {
 375                 f[(ci*UNROLLI+i)*F_STRIDE+d] += fi[i*FI_STRIDE+d];
 376             }
 377         }
 378 #ifdef CALC_SHIFTFORCES
 379         if (fshift != nullptr)
 380         {
 381             /* Add i forces to shifted force list */
 382             for (i = 0; i < UNROLLI; i++)
 383             {
 384                 for (d = 0; d < DIM; d++)
 385                 {
 386                     fshift[ishf+d] += fi[i*FI_STRIDE+d];
 387                 }
 388             }
 389         }
 390 #endif
 391
 392 #ifdef CALC_ENERGIES
 393 #ifndef ENERGY_GROUPS
 394         *Vvdw += Vvdw_ci;
 395         *Vc   += Vc_ci;
 396 #endif
 397 #endif
 398     }
 399
 400 #ifdef COUNT_PAIRS
 401     printf("atom pairs %d\n", npair);
 402 #endif
 403 }
 404
 405 #undef CALC_SHIFTFORCES
 406
 407 #undef X_STRIDE
 408 #undef F_STRIDE
 409 #undef XI_STRIDE
 410 #undef FI_STRIDE
 411
 412 #undef UNROLLI
 413 #undef UNROLLJ