src/gromacs/nbnxm/kernels_reference/kernel_ref_inner.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
   5  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 /* When calculating RF or Ewald interactions we calculate the electrostatic
  38  * forces and energies on excluded atom pairs here in the non-bonded loops.
  39  */
  40 #if defined CHECK_EXCLS && (defined CALC_COULOMB || defined LJ_EWALD)
  41 #    define EXCL_FORCES
  42 #endif
  43
  44 {
  45     const int cj = l_cj[cjind].cj;
  46
  47 #ifdef ENERGY_GROUPS
  48     const int egp_cj = nbatParams.energrp[cj];
  49 #endif
  50     for (int i = 0; i < UNROLLI; i++)
  51     {
  52         const int ai = ci * UNROLLI + i;
  53
  54         const int type_i_off = type[ai] * ntype2;
  55
  56         for (int j = 0; j < UNROLLJ; j++)
  57         {
  58             real FrLJ6 = 0, FrLJ12 = 0, frLJ = 0;
  59
  60             /* A multiply mask used to zero an interaction
  61              * when either the distance cutoff is exceeded, or
  62              * (if appropriate) the i and j indices are
  63              * unsuitable for this kind of inner loop. */
  64 #ifdef CHECK_EXCLS
  65             /* A multiply mask used to zero an interaction
  66              * when that interaction should be excluded
  67              * (e.g. because of bonding). */
  68             const real interact = static_cast<real>((l_cj[cjind].excl >> (i * UNROLLI + j)) & 1);
  69 #    ifndef EXCL_FORCES
  70             real skipmask = interact;
  71 #    else
  72             real skipmask = (cj == ci_sh && j <= i) ? 0.0 : 1.0;
  73 #    endif
  74 #else
  75             constexpr real interact = 1.0;
  76             real           skipmask = interact;
  77 #endif
  78
  79             real gmx_unused VLJ = 0;
  80
  81             const int aj = cj * UNROLLJ + j;
  82
  83             const real dx = xi[i * XI_STRIDE + XX] - x[aj * X_STRIDE + XX];
  84             const real dy = xi[i * XI_STRIDE + YY] - x[aj * X_STRIDE + YY];
  85             const real dz = xi[i * XI_STRIDE + ZZ] - x[aj * X_STRIDE + ZZ];
  86
  87             real rsq = dx * dx + dy * dy + dz * dz;
  88
  89             /* Prepare to enforce the cut-off. */
  90             skipmask = (rsq >= rcut2) ? 0 : skipmask;
  91             /* 9 flops for r^2 + cut-off check */
  92
  93             // Ensure the distances do not fall below the limit where r^-12 overflows.
  94             // This should never happen for normal interactions.
  95             rsq = std::max(rsq, c_nbnxnMinDistanceSquared);
  96
  97 #ifdef COUNT_PAIRS
  98             npair++;
  99 #endif
 100
 101             real rinv = gmx::invsqrt(rsq);
 102             /* 5 flops for invsqrt */
 103
 104             /* Partially enforce the cut-off (and perhaps
 105              * exclusions) to avoid possible overflow of
 106              * rinvsix when computing LJ, and/or overflowing
 107              * the Coulomb table during lookup. */
 108             rinv = rinv * skipmask;
 109
 110             const real rinvsq = rinv * rinv;
 111
 112 #ifdef HALF_LJ
 113             if (i < UNROLLI / 2)
 114 #endif
 115             {
 116                 const real c6  = nbfp[type_i_off + type[aj] * 2];
 117                 const real c12 = nbfp[type_i_off + type[aj] * 2 + 1];
 118
 119 #if defined LJ_CUT || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
 120                 real rinvsix = interact * rinvsq * rinvsq * rinvsq;
 121                 FrLJ6        = c6 * rinvsix;
 122                 FrLJ12       = c12 * rinvsix * rinvsix;
 123                 frLJ         = FrLJ12 - FrLJ6;
 124                 /* 7 flops for r^-2 + LJ force */
 125 #    if defined CALC_ENERGIES || defined LJ_POT_SWITCH
 126                 VLJ = (FrLJ12 + c12 * ic->repulsion_shift.cpot) / 12
 127                       - (FrLJ6 + c6 * ic->dispersion_shift.cpot) / 6;
 128                 /* 7 flops for LJ energy */
 129 #    endif
 130 #endif
 131
 132 #if defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
 133                 /* Force or potential switching from ic->rvdw_switch */
 134                 real r   = rsq * rinv;
 135                 real rsw = r - ic->rvdw_switch;
 136                 rsw      = (rsw >= 0.0 ? rsw : 0.0);
 137 #endif
 138 #ifdef LJ_FORCE_SWITCH
 139                 frLJ += -c6 * (ic->dispersion_shift.c2 + ic->dispersion_shift.c3 * rsw) * rsw * rsw * r
 140                         + c12 * (ic->repulsion_shift.c2 + ic->repulsion_shift.c3 * rsw) * rsw * rsw * r;
 141 #    if defined CALC_ENERGIES
 142                 VLJ += -c6 * (-ic->dispersion_shift.c2 / 3 - ic->dispersion_shift.c3 / 4 * rsw)
 143                                * rsw * rsw * rsw
 144                        + c12 * (-ic->repulsion_shift.c2 / 3 - ic->repulsion_shift.c3 / 4 * rsw)
 145                                  * rsw * rsw * rsw;
 146 #    endif
 147 #endif
 148
 149 #if defined CALC_ENERGIES || defined LJ_POT_SWITCH
 150                 /* Masking should be done after force switching,
 151                  * but before potential switching.
 152                  */
 153                 /* Need to zero the interaction if there should be exclusion. */
 154                 VLJ = VLJ * interact;
 155 #endif
 156
 157 #ifdef LJ_POT_SWITCH
 158                 {
 159                     const real sw  = 1.0 + (swV3 + (swV4 + swV5 * rsw) * rsw) * rsw * rsw * rsw;
 160                     const real dsw = (swF2 + (swF3 + swF4 * rsw) * rsw) * rsw * rsw;
 161
 162                     frLJ = frLJ * sw - r * VLJ * dsw;
 163                     VLJ *= sw;
 164                 }
 165 #endif
 166
 167 #ifdef LJ_EWALD
 168                 {
 169 #    ifdef LJ_EWALD_COMB_GEOM
 170                     const real c6grid = ljc[type[ai] * 2] * ljc[type[aj] * 2];
 171 #    elif defined LJ_EWALD_COMB_LB
 172                     real c6grid = NAN;
 173                     {
 174                         /* These sigma and epsilon are scaled to give 6*C6 */
 175                         const real sigma   = ljc[type[ai] * 2] + ljc[type[aj] * 2];
 176                         const real epsilon = ljc[type[ai] * 2 + 1] * ljc[type[aj] * 2 + 1];
 177
 178                         const real sigma2 = sigma * sigma;
 179                         c6grid            = epsilon * sigma2 * sigma2 * sigma2;
 180                     }
 181 #    else
 182 #        error "No LJ Ewald combination rule defined"
 183 #    endif
 184
 185 #    ifdef CHECK_EXCLS
 186                     /* Recalculate rinvsix without exclusion mask */
 187                     const real rinvsix_nm = rinvsq * rinvsq * rinvsq;
 188 #    else
 189                     const real rinvsix_nm = rinvsix;
 190 #    endif
 191                     const real cr2 = lje_coeff2 * rsq;
 192 #    if GMX_DOUBLE
 193                     const real expmcr2 = exp(-cr2);
 194 #    else
 195                     const real expmcr2    = expf(-cr2);
 196 #    endif
 197                     const real poly = 1 + cr2 + 0.5 * cr2 * cr2;
 198
 199                     /* Subtract the grid force from the total LJ force */
 200                     frLJ += c6grid * (rinvsix_nm - expmcr2 * (rinvsix_nm * poly + lje_coeff6_6));
 201 #    ifdef CALC_ENERGIES
 202                     /* Shift should only be applied to real LJ pairs */
 203                     const real sh_mask = lje_vc * interact;
 204
 205                     VLJ += c6grid / 6 * (rinvsix_nm * (1 - expmcr2 * poly) + sh_mask);
 206 #    endif
 207                 }
 208 #endif /* LJ_EWALD */
 209
 210 #ifdef VDW_CUTOFF_CHECK
 211                 /* Mask for VdW cut-off shorter than Coulomb cut-off */
 212                 {
 213                     real skipmask_rvdw = (rsq < rvdw2) ? 1.0 : 0.0;
 214                     frLJ *= skipmask_rvdw;
 215 #    ifdef CALC_ENERGIES
 216                     VLJ *= skipmask_rvdw;
 217 #    endif
 218                 }
 219 #else
 220 #    if defined CALC_ENERGIES
 221                 /* Need to zero the interaction if r >= rcut */
 222                 VLJ = VLJ * skipmask;
 223                 /* 1 more flop for LJ energy */
 224 #    endif
 225 #endif /* VDW_CUTOFF_CHECK */
 226
 227
 228 #ifdef CALC_ENERGIES
 229 #    ifdef ENERGY_GROUPS
 230                 Vvdw[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log * j)) & egp_mask)] += VLJ;
 231 #    else
 232                 Vvdw_ci += VLJ;
 233                 /* 1 flop for LJ energy addition */
 234 #    endif
 235 #endif
 236             }
 237
 238 #ifdef CALC_COULOMB
 239             /* Enforce the cut-off and perhaps exclusions. In
 240              * those cases, rinv is zero because of skipmask,
 241              * but fcoul and vcoul will later be non-zero (in
 242              * both RF and table cases) because of the
 243              * contributions that do not depend on rinv. These
 244              * contributions cannot be allowed to accumulate
 245              * to the force and potential, and the easiest way
 246              * to do this is to zero the charges in
 247              * advance. */
 248             const real qq = skipmask * qi[i] * q[aj];
 249
 250 #    ifdef CALC_COUL_RF
 251             real fcoul = qq * (interact * rinv * rinvsq - k_rf2);
 252             /* 4 flops for RF force */
 253 #        ifdef CALC_ENERGIES
 254             real vcoul = qq * (interact * rinv + reactionFieldCoefficient * rsq - reactionFieldShift);
 255             /* 4 flops for RF energy */
 256 #        endif
 257 #    endif
 258
 259 #    ifdef CALC_COUL_TAB
 260             const real rs   = rsq * rinv * tab_coul_scale;
 261             const int  ri   = int(rs);
 262             const real frac = rs - static_cast<real>(ri);
 263 #        if !GMX_DOUBLE
 264             /* fexcl = F_i + frac * (F_(i+1)-F_i) */
 265             const real fexcl = tab_coul_FDV0[ri * 4] + frac * tab_coul_FDV0[ri * 4 + 1];
 266 #        else
 267             /* fexcl = (1-frac) * F_i + frac * F_(i+1) */
 268             const real fexcl = (1 - frac) * tab_coul_F[ri] + frac * tab_coul_F[ri + 1];
 269 #        endif
 270             real fcoul = interact * rinvsq - fexcl;
 271             /* 7 flops for float 1/r-table force */
 272 #        ifdef CALC_ENERGIES
 273 #            if !GMX_DOUBLE
 274             real vcoul =
 275                     qq
 276                     * (interact * (rinv - ic->sh_ewald)
 277                        - (tab_coul_FDV0[ri * 4 + 2] - halfsp * frac * (tab_coul_FDV0[ri * 4] + fexcl)));
 278             /* 7 flops for float 1/r-table energy (8 with excls) */
 279 #            else
 280             real vcoul = qq
 281                          * (interact * (rinv - ic->sh_ewald)
 282                             - (tab_coul_V[ri] - halfsp * frac * (tab_coul_F[ri] + fexcl)));
 283 #            endif
 284 #        endif
 285             fcoul *= qq * rinv;
 286 #    endif
 287
 288 #    ifdef CALC_ENERGIES
 289 #        ifdef ENERGY_GROUPS
 290             Vc[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log * j)) & egp_mask)] += vcoul;
 291 #        else
 292             Vc_ci += vcoul;
 293             /* 1 flop for Coulomb energy addition */
 294 #        endif
 295 #    endif
 296 #endif
 297
 298 #ifdef CALC_COULOMB
 299             static constexpr bool sc_halfLJ =
 300 #    ifdef HALF_LJ
 301                     true;
 302 #    else
 303                     false;
 304 #    endif
 305             /* 2 flops for scalar LJ+Coulomb force if !HALF_LJ || (i < UNROLLI / 2) */
 306             const real fscal = (!sc_halfLJ || (i < UNROLLI / 2)) ? frLJ * rinvsq + fcoul : fcoul;
 307 #else
 308             const real fscal = frLJ * rinvsq;
 309 #endif
 310             const real fx = fscal * dx;
 311             const real fy = fscal * dy;
 312             const real fz = fscal * dz;
 313
 314             /* Increment i-atom force */
 315             fi[i * FI_STRIDE + XX] += fx;
 316             fi[i * FI_STRIDE + YY] += fy;
 317             fi[i * FI_STRIDE + ZZ] += fz;
 318             /* Decrement j-atom force */
 319             f[aj * F_STRIDE + XX] -= fx;
 320             f[aj * F_STRIDE + YY] -= fy;
 321             f[aj * F_STRIDE + ZZ] -= fz;
 322             /* 9 flops for force addition */
 323         }
 324     }
 325 }
 326
 327 #undef interact
 328 #undef EXCL_FORCES