src/gromacs/nbnxm/kernels_reference/kernel_ref_outer.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
   5  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 #define UNROLLI 4
  38 #define UNROLLJ 4
  39
  40 static_assert(UNROLLI == c_nbnxnCpuIClusterSize, "UNROLLI should match the i-cluster size");
  41
  42 /* We could use nbat->xstride and nbat->fstride, but macros might be faster */
  43 #define X_STRIDE 3
  44 #define F_STRIDE 3
  45 /* Local i-atom buffer strides */
  46 #define XI_STRIDE 3
  47 #define FI_STRIDE 3
  48
  49
  50 /* All functionality defines are set here, except for:
  51  * CALC_ENERGIES, ENERGY_GROUPS which are defined before.
  52  * CHECK_EXCLS, which is set just before including the inner loop contents.
  53  */
  54
  55 /* We always calculate shift forces, because it's cheap anyhow */
  56 #define CALC_SHIFTFORCES
  57
  58 #ifdef CALC_COUL_RF
  59 #    define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel##_ElecRF##ljt##feg##_ref
  60 #endif
  61 #ifdef CALC_COUL_TAB
  62 #    ifndef VDW_CUTOFF_CHECK
  63 #        define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel##_ElecQSTab##ljt##feg##_ref
  64 #    else
  65 #        define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel##_ElecQSTabTwinCut##ljt##feg##_ref
  66 #    endif
  67 #endif
  68
  69 #if defined LJ_CUT && !defined LJ_EWALD
  70 #    define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJ, feg)
  71 #elif defined LJ_FORCE_SWITCH
  72 #    define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJFsw, feg)
  73 #elif defined LJ_POT_SWITCH
  74 #    define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJPsw, feg)
  75 #elif defined LJ_EWALD
  76 #    ifdef LJ_EWALD_COMB_GEOM
  77 #        define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombGeom, feg)
  78 #    else
  79 #        define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombLB, feg)
  80 #    endif
  81 #else
  82 #    error "No VdW type defined"
  83 #endif
  84
  85 void
  86 #ifndef CALC_ENERGIES
  87         NBK_FUNC_NAME(_F) // NOLINT(misc-definitions-in-headers)
  88 #else
  89 #    ifndef ENERGY_GROUPS
  90         NBK_FUNC_NAME(_VF) // NOLINT(misc-definitions-in-headers)
  91 #    else
  92         NBK_FUNC_NAME(_VgrpF) // NOLINT(misc-definitions-in-headers)
  93 #    endif
  94 #endif
  95 #undef NBK_FUNC_NAME
  96 #undef NBK_FUNC_NAME2
  97         (const NbnxnPairlistCpu*    nbl,
  98          const nbnxn_atomdata_t*    nbat,
  99          const interaction_const_t* ic,
 100          const rvec*                shift_vec,
 101          nbnxn_atomdata_output_t*   out)
 102 {
 103     /* Unpack pointers for output */
 104     real* f = out->f.data();
 105 #ifdef CALC_SHIFTFORCES
 106     real* fshift = out->fshift.data();
 107 #endif
 108 #ifdef CALC_ENERGIES
 109     real* Vvdw = out->Vvdw.data();
 110     real* Vc   = out->Vc.data();
 111 #endif
 112
 113     real xi[UNROLLI * XI_STRIDE];
 114     real fi[UNROLLI * FI_STRIDE];
 115     real qi[UNROLLI];
 116
 117 #ifdef COUNT_PAIRS
 118     int npair = 0;
 119 #endif
 120
 121 #ifdef LJ_POT_SWITCH
 122     const real swV3 = ic->vdw_switch.c3;
 123     const real swV4 = ic->vdw_switch.c4;
 124     const real swV5 = ic->vdw_switch.c5;
 125     const real swF2 = 3 * ic->vdw_switch.c3;
 126     const real swF3 = 4 * ic->vdw_switch.c4;
 127     const real swF4 = 5 * ic->vdw_switch.c5;
 128 #endif
 129
 130     const nbnxn_atomdata_t::Params& nbatParams = nbat->params();
 131
 132 #ifdef LJ_EWALD
 133     const real lje_coeff2   = ic->ewaldcoeff_lj * ic->ewaldcoeff_lj;
 134     const real lje_coeff6_6 = lje_coeff2 * lje_coeff2 * lje_coeff2 / 6.0;
 135 #    ifdef CALC_ENERGIES
 136     const real lje_vc = ic->sh_lj_ewald;
 137 #    endif
 138
 139     const real* ljc = nbatParams.nbfp_comb.data();
 140 #endif
 141
 142 #ifdef CALC_COUL_RF
 143     const real k_rf2 = 2 * ic->k_rf;
 144 #    ifdef CALC_ENERGIES
 145     const real k_rf = ic->k_rf;
 146     const real c_rf = ic->c_rf;
 147 #    endif
 148 #endif
 149 #ifdef CALC_COUL_TAB
 150     const real tab_coul_scale = ic->coulombEwaldTables->scale;
 151 #    ifdef CALC_ENERGIES
 152     const real halfsp = 0.5 / tab_coul_scale;
 153 #    endif
 154
 155 #    if !GMX_DOUBLE
 156     const real* tab_coul_FDV0 = ic->coulombEwaldTables->tableFDV0.data();
 157 #    else
 158     const real* tab_coul_F = ic->coulombEwaldTables->tableF.data();
 159 #        ifdef CALC_ENERGIES
 160     const real* tab_coul_V = ic->coulombEwaldTables->tableV.data();
 161 #        endif
 162 #    endif
 163 #endif
 164
 165 #ifdef ENERGY_GROUPS
 166     const int egp_mask = (1 << nbatParams.neg_2log) - 1;
 167 #endif
 168
 169
 170     const real rcut2 = ic->rcoulomb * ic->rcoulomb;
 171 #ifdef VDW_CUTOFF_CHECK
 172     const real rvdw2 = ic->rvdw * ic->rvdw;
 173 #endif
 174
 175     const int   ntype2   = nbatParams.numTypes * 2;
 176     const real* nbfp     = nbatParams.nbfp.data();
 177     const real* q        = nbatParams.q.data();
 178     const int*  type     = nbatParams.type.data();
 179     const real  facel    = ic->epsfac;
 180     const real* shiftvec = shift_vec[0];
 181     const real* x        = nbat->x().data();
 182
 183     const nbnxn_cj_t* l_cj = nbl->cj.data();
 184
 185     for (const nbnxn_ci_t& ciEntry : nbl->ci)
 186     {
 187         const int ish = (ciEntry.shift & NBNXN_CI_SHIFT);
 188         /* x, f and fshift are assumed to be stored with stride 3 */
 189         const int ishf   = ish * DIM;
 190         const int cjind0 = ciEntry.cj_ind_start;
 191         const int cjind1 = ciEntry.cj_ind_end;
 192         /* Currently only works super-cells equal to sub-cells */
 193         const int ci    = ciEntry.ci;
 194         const int ci_sh = (ish == CENTRAL ? ci : -1);
 195
 196         /* We have 5 LJ/C combinations, but use only three inner loops,
 197          * as the other combinations are unlikely and/or not much faster:
 198          * inner half-LJ + C for half-LJ + C / no-LJ + C
 199          * inner LJ + C      for full-LJ + C
 200          * inner LJ          for full-LJ + no-C / half-LJ + no-C
 201          */
 202         const bool do_LJ   = ((ciEntry.shift & NBNXN_CI_DO_LJ(0)) != 0);
 203         const bool do_coul = ((ciEntry.shift & NBNXN_CI_DO_COUL(0)) != 0);
 204         const bool half_LJ = (((ciEntry.shift & NBNXN_CI_HALF_LJ(0)) != 0) || !do_LJ) && do_coul;
 205 #ifdef CALC_ENERGIES
 206
 207 #    ifdef LJ_EWALD
 208         const bool do_self = true;
 209 #    else
 210         const bool do_self = do_coul;
 211 #    endif
 212
 213 #    ifndef ENERGY_GROUPS
 214         real Vvdw_ci = 0;
 215         real Vc_ci   = 0;
 216 #    else
 217         int        egp_sh_i[UNROLLI];
 218         for (int i = 0; i < UNROLLI; i++)
 219         {
 220             egp_sh_i[i] = ((nbatParams.energrp[ci] >> (i * nbatParams.neg_2log)) & egp_mask)
 221                           * nbatParams.nenergrp;
 222         }
 223 #    endif
 224 #endif
 225
 226         for (int i = 0; i < UNROLLI; i++)
 227         {
 228             for (int d = 0; d < DIM; d++)
 229             {
 230                 xi[i * XI_STRIDE + d] = x[(ci * UNROLLI + i) * X_STRIDE + d] + shiftvec[ishf + d];
 231                 fi[i * FI_STRIDE + d] = 0;
 232             }
 233
 234             qi[i] = facel * q[ci * UNROLLI + i];
 235         }
 236
 237 #ifdef CALC_ENERGIES
 238         if (do_self)
 239         {
 240 #    ifdef CALC_COUL_RF
 241             const real Vc_sub_self = 0.5 * c_rf;
 242 #    endif
 243 #    ifdef CALC_COUL_TAB
 244 #        if GMX_DOUBLE
 245             const real Vc_sub_self = 0.5 * tab_coul_V[0];
 246 #        else
 247             const real Vc_sub_self = 0.5 * tab_coul_FDV0[2];
 248 #        endif
 249 #    endif
 250
 251             if (l_cj[ciEntry.cj_ind_start].cj == ci_sh)
 252             {
 253                 for (int i = 0; i < UNROLLI; i++)
 254                 {
 255 #    ifdef ENERGY_GROUPS
 256                     const int egp_ind =
 257                             egp_sh_i[i] + ((nbatParams.energrp[ci] >> (i * nbatParams.neg_2log)) & egp_mask);
 258 #    else
 259                     const int egp_ind = 0;
 260 #    endif
 261                     /* Coulomb self interaction */
 262                     Vc[egp_ind] -= qi[i] * q[ci * UNROLLI + i] * Vc_sub_self;
 263
 264 #    ifdef LJ_EWALD
 265                     /* LJ Ewald self interaction */
 266                     Vvdw[egp_ind] +=
 267                             0.5
 268                             * nbatParams.nbfp[nbatParams.type[ci * UNROLLI + i] * (nbatParams.numTypes + 1) * 2]
 269                             / 6 * lje_coeff6_6;
 270 #    endif
 271                 }
 272             }
 273         }
 274 #endif /* CALC_ENERGIES */
 275
 276         int cjind = cjind0;
 277         while (cjind < cjind1 && nbl->cj[cjind].excl != 0xffff)
 278         {
 279 #define CHECK_EXCLS
 280             if (half_LJ)
 281             {
 282 #define CALC_COULOMB
 283 #define HALF_LJ
 284 #include "kernel_ref_inner.h"
 285 #undef HALF_LJ
 286 #undef CALC_COULOMB
 287             }
 288             else if (do_coul)
 289             {
 290 #define CALC_COULOMB
 291 #include "kernel_ref_inner.h"
 292 #undef CALC_COULOMB
 293             }
 294             else
 295             {
 296 #include "kernel_ref_inner.h"
 297             }
 298 #undef CHECK_EXCLS
 299             cjind++;
 300         }
 301
 302         for (; (cjind < cjind1); cjind++)
 303         {
 304             if (half_LJ)
 305             {
 306 #define CALC_COULOMB
 307 #define HALF_LJ
 308 #include "kernel_ref_inner.h"
 309 #undef HALF_LJ
 310 #undef CALC_COULOMB
 311             }
 312             else if (do_coul)
 313             {
 314 #define CALC_COULOMB
 315 #include "kernel_ref_inner.h"
 316 #undef CALC_COULOMB
 317             }
 318             else
 319             {
 320 #include "kernel_ref_inner.h"
 321             }
 322         }
 323
 324         /* Add accumulated i-forces to the force array */
 325         for (int i = 0; i < UNROLLI; i++)
 326         {
 327             for (int d = 0; d < DIM; d++)
 328             {
 329                 f[(ci * UNROLLI + i) * F_STRIDE + d] += fi[i * FI_STRIDE + d];
 330             }
 331         }
 332 #ifdef CALC_SHIFTFORCES
 333         if (fshift != nullptr)
 334         {
 335             /* Add i forces to shifted force list */
 336             for (int i = 0; i < UNROLLI; i++)
 337             {
 338                 for (int d = 0; d < DIM; d++)
 339                 {
 340                     fshift[ishf + d] += fi[i * FI_STRIDE + d];
 341                 }
 342             }
 343         }
 344 #endif
 345
 346 #ifdef CALC_ENERGIES
 347 #    ifndef ENERGY_GROUPS
 348         *Vvdw += Vvdw_ci;
 349         *Vc += Vc_ci;
 350 #    endif
 351 #endif
 352     }
 353
 354 #ifdef COUNT_PAIRS
 355     printf("atom pairs %d\n", npair);
 356 #endif
 357 }
 358
 359 #undef CALC_SHIFTFORCES
 360
 361 #undef X_STRIDE
 362 #undef F_STRIDE
 363 #undef XI_STRIDE
 364 #undef FI_STRIDE
 365
 366 #undef UNROLLI
 367 #undef UNROLLJ