src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #define UNROLLI    NBNXN_CPU_CLUSTER_I_SIZE
  37 #define UNROLLJ    NBNXN_CPU_CLUSTER_I_SIZE
  38
  39 /* We could use nbat->xstride and nbat->fstride, but macros might be faster */
  40 #define X_STRIDE   3
  41 #define F_STRIDE   3
  42 /* Local i-atom buffer strides */
  43 #define XI_STRIDE  3
  44 #define FI_STRIDE  3
  45
  46
  47 /* All functionality defines are set here, except for:
  48  * CALC_ENERGIES, ENERGY_GROUPS which are defined before.
  49  * CHECK_EXCLS, which is set just before including the inner loop contents.
  50  */
  51
  52 /* We always calculate shift forces, because it's cheap anyhow */
  53 #define CALC_SHIFTFORCES
  54
  55 #ifdef CALC_COUL_RF
  56 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecRF ## ljt ## feg ## _ref
  57 #endif
  58 #ifdef CALC_COUL_TAB
  59 #ifndef VDW_CUTOFF_CHECK
  60 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecQSTab ## ljt ## feg ## _ref
  61 #else
  62 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecQSTabTwinCut ## ljt ## feg ## _ref
  63 #endif
  64 #endif
  65
  66 #if defined LJ_CUT && !defined LJ_EWALD
  67 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJ, feg)
  68 #elif defined LJ_FORCE_SWITCH
  69 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJFsw, feg)
  70 #elif defined LJ_POT_SWITCH
  71 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJPsw, feg)
  72 #elif defined LJ_EWALD
  73 #ifdef LJ_EWALD_COMB_GEOM
  74 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombGeom, feg)
  75 #else
  76 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJEwCombLB, feg)
  77 #endif
  78 #else
  79 #error "No VdW type defined"
  80 #endif
  81
  82 void
  83 #ifndef CALC_ENERGIES
  84 NBK_FUNC_NAME(_F)
  85 #else
  86 #ifndef ENERGY_GROUPS
  87 NBK_FUNC_NAME(_VF)
  88 #else
  89 NBK_FUNC_NAME(_VgrpF)
  90 #endif
  91 #endif
  92 #undef NBK_FUNC_NAME
  93 #undef NBK_FUNC_NAME2
  94 (const nbnxn_pairlist_t     *nbl,
  95  const nbnxn_atomdata_t     *nbat,
  96  const interaction_const_t  *ic,
  97  rvec                       *shift_vec,
  98  real                       *f,
  99  real gmx_unused            *fshift
 100 #ifdef CALC_ENERGIES
 101  ,
 102  real                       *Vvdw,
 103  real                       *Vc
 104 #endif
 105 )
 106 {
 107     const nbnxn_ci_t   *nbln;
 108     const nbnxn_cj_t   *l_cj;
 109     const int          *type;
 110     const real         *q;
 111     const real         *shiftvec;
 112     const real         *x;
 113     const real         *nbfp;
 114     real                rcut2;
 115 #ifdef VDW_CUTOFF_CHECK
 116     real                rvdw2;
 117 #endif
 118     int                 ntype2;
 119     real                facel;
 120     int                 n, ci, ci_sh;
 121     int                 ish, ishf;
 122     gmx_bool            do_LJ, half_LJ, do_coul;
 123     int                 cjind0, cjind1, cjind;
 124
 125     real                xi[UNROLLI*XI_STRIDE];
 126     real                fi[UNROLLI*FI_STRIDE];
 127     real                qi[UNROLLI];
 128
 129 #ifdef CALC_ENERGIES
 130 #ifndef ENERGY_GROUPS
 131
 132     real       Vvdw_ci, Vc_ci;
 133 #else
 134     int        egp_mask;
 135     int        egp_sh_i[UNROLLI];
 136 #endif
 137 #endif
 138 #ifdef LJ_POT_SWITCH
 139     real       swV3, swV4, swV5;
 140     real       swF2, swF3, swF4;
 141 #endif
 142 #ifdef LJ_EWALD
 143     real        lje_coeff2, lje_coeff6_6;
 144 #ifdef CALC_ENERGIES
 145     real        lje_vc;
 146 #endif
 147     const real *ljc;
 148 #endif
 149
 150 #ifdef CALC_COUL_RF
 151     real       k_rf2;
 152 #ifdef CALC_ENERGIES
 153     real       k_rf, c_rf;
 154 #endif
 155 #endif
 156 #ifdef CALC_COUL_TAB
 157 #ifdef CALC_ENERGIES
 158     real       halfsp;
 159 #endif
 160 #if !GMX_DOUBLE
 161     const real            *tab_coul_FDV0;
 162 #else
 163     const real            *tab_coul_F;
 164     const real gmx_unused *tab_coul_V;
 165 #endif
 166 #endif
 167
 168 #ifdef COUNT_PAIRS
 169     int npair = 0;
 170 #endif
 171
 172 #ifdef LJ_POT_SWITCH
 173     swV3 = ic->vdw_switch.c3;
 174     swV4 = ic->vdw_switch.c4;
 175     swV5 = ic->vdw_switch.c5;
 176     swF2 = 3*ic->vdw_switch.c3;
 177     swF3 = 4*ic->vdw_switch.c4;
 178     swF4 = 5*ic->vdw_switch.c5;
 179 #endif
 180
 181 #ifdef LJ_EWALD
 182     lje_coeff2   = ic->ewaldcoeff_lj*ic->ewaldcoeff_lj;
 183     lje_coeff6_6 = lje_coeff2*lje_coeff2*lje_coeff2/6.0;
 184 #ifdef CALC_ENERGIES
 185     lje_vc       = ic->sh_lj_ewald;
 186 #endif
 187
 188     ljc          = nbat->nbfp_comb;
 189 #endif
 190
 191 #ifdef CALC_COUL_RF
 192     k_rf2 = 2*ic->k_rf;
 193 #ifdef CALC_ENERGIES
 194     k_rf = ic->k_rf;
 195     c_rf = ic->c_rf;
 196 #endif
 197 #endif
 198 #ifdef CALC_COUL_TAB
 199 #ifdef CALC_ENERGIES
 200     halfsp = 0.5/ic->tabq_scale;
 201 #endif
 202
 203 #if !GMX_DOUBLE
 204     tab_coul_FDV0 = ic->tabq_coul_FDV0;
 205 #else
 206     tab_coul_F    = ic->tabq_coul_F;
 207     tab_coul_V    = ic->tabq_coul_V;
 208 #endif
 209 #endif
 210
 211 #ifdef ENERGY_GROUPS
 212     egp_mask = (1<<nbat->neg_2log) - 1;
 213 #endif
 214
 215
 216     rcut2               = ic->rcoulomb*ic->rcoulomb;
 217 #ifdef VDW_CUTOFF_CHECK
 218     rvdw2               = ic->rvdw*ic->rvdw;
 219 #endif
 220
 221     ntype2              = nbat->ntype*2;
 222     nbfp                = nbat->nbfp;
 223     q                   = nbat->q;
 224     type                = nbat->type;
 225     facel               = ic->epsfac;
 226     shiftvec            = shift_vec[0];
 227     x                   = nbat->x;
 228
 229     l_cj = nbl->cj;
 230
 231     for (n = 0; n < nbl->nci; n++)
 232     {
 233         int i, d;
 234
 235         nbln = &nbl->ci[n];
 236
 237         ish              = (nbln->shift & NBNXN_CI_SHIFT);
 238         /* x, f and fshift are assumed to be stored with stride 3 */
 239         ishf             = ish*DIM;
 240         cjind0           = nbln->cj_ind_start;
 241         cjind1           = nbln->cj_ind_end;
 242         /* Currently only works super-cells equal to sub-cells */
 243         ci               = nbln->ci;
 244         ci_sh            = (ish == CENTRAL ? ci : -1);
 245
 246         /* We have 5 LJ/C combinations, but use only three inner loops,
 247          * as the other combinations are unlikely and/or not much faster:
 248          * inner half-LJ + C for half-LJ + C / no-LJ + C
 249          * inner LJ + C      for full-LJ + C
 250          * inner LJ          for full-LJ + no-C / half-LJ + no-C
 251          */
 252         do_LJ   = (nbln->shift & NBNXN_CI_DO_LJ(0));
 253         do_coul = (nbln->shift & NBNXN_CI_DO_COUL(0));
 254         half_LJ = ((nbln->shift & NBNXN_CI_HALF_LJ(0)) || !do_LJ) && do_coul;
 255 #ifdef CALC_ENERGIES
 256
 257 #ifdef LJ_EWALD
 258         gmx_bool do_self = TRUE;
 259 #else
 260         gmx_bool do_self = do_coul;
 261 #endif
 262
 263 #ifndef ENERGY_GROUPS
 264         Vvdw_ci = 0;
 265         Vc_ci   = 0;
 266 #else
 267         for (i = 0; i < UNROLLI; i++)
 268         {
 269             egp_sh_i[i] = ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)*nbat->nenergrp;
 270         }
 271 #endif
 272 #endif
 273
 274         for (i = 0; i < UNROLLI; i++)
 275         {
 276             for (d = 0; d < DIM; d++)
 277             {
 278                 xi[i*XI_STRIDE+d] = x[(ci*UNROLLI+i)*X_STRIDE+d] + shiftvec[ishf+d];
 279                 fi[i*FI_STRIDE+d] = 0;
 280             }
 281
 282             qi[i] = facel*q[ci*UNROLLI+i];
 283         }
 284
 285 #ifdef CALC_ENERGIES
 286         if (do_self)
 287         {
 288             real Vc_sub_self;
 289
 290 #ifdef CALC_COUL_RF
 291             Vc_sub_self = 0.5*c_rf;
 292 #endif
 293 #ifdef CALC_COUL_TAB
 294 #if GMX_DOUBLE
 295             Vc_sub_self = 0.5*tab_coul_V[0];
 296 #else
 297             Vc_sub_self = 0.5*tab_coul_FDV0[2];
 298 #endif
 299 #endif
 300
 301             if (l_cj[nbln->cj_ind_start].cj == ci_sh)
 302             {
 303                 for (i = 0; i < UNROLLI; i++)
 304                 {
 305                     int egp_ind;
 306 #ifdef ENERGY_GROUPS
 307                     egp_ind = egp_sh_i[i] + ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask);
 308 #else
 309                     egp_ind = 0;
 310 #endif
 311                     /* Coulomb self interaction */
 312                     Vc[egp_ind]   -= qi[i]*q[ci*UNROLLI+i]*Vc_sub_self;
 313
 314 #ifdef LJ_EWALD
 315                     /* LJ Ewald self interaction */
 316                     Vvdw[egp_ind] += 0.5*nbat->nbfp[nbat->type[ci*UNROLLI+i]*(nbat->ntype + 1)*2]/6*lje_coeff6_6;
 317 #endif
 318                 }
 319             }
 320         }
 321 #endif  /* CALC_ENERGIES */
 322
 323         cjind = cjind0;
 324         while (cjind < cjind1 && nbl->cj[cjind].excl != 0xffff)
 325         {
 326 #define CHECK_EXCLS
 327             if (half_LJ)
 328             {
 329 #define CALC_COULOMB
 330 #define HALF_LJ
 331 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h"
 332 #undef HALF_LJ
 333 #undef CALC_COULOMB
 334             }
 335             else if (do_coul)
 336             {
 337 #define CALC_COULOMB
 338 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h"
 339 #undef CALC_COULOMB
 340             }
 341             else
 342             {
 343 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h"
 344             }
 345 #undef CHECK_EXCLS
 346             cjind++;
 347         }
 348
 349         for (; (cjind < cjind1); cjind++)
 350         {
 351             if (half_LJ)
 352             {
 353 #define CALC_COULOMB
 354 #define HALF_LJ
 355 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h"
 356 #undef HALF_LJ
 357 #undef CALC_COULOMB
 358             }
 359             else if (do_coul)
 360             {
 361 #define CALC_COULOMB
 362 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h"
 363 #undef CALC_COULOMB
 364             }
 365             else
 366             {
 367 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h"
 368             }
 369         }
 370
 371         /* Add accumulated i-forces to the force array */
 372         for (i = 0; i < UNROLLI; i++)
 373         {
 374             for (d = 0; d < DIM; d++)
 375             {
 376                 f[(ci*UNROLLI+i)*F_STRIDE+d] += fi[i*FI_STRIDE+d];
 377             }
 378         }
 379 #ifdef CALC_SHIFTFORCES
 380         if (fshift != nullptr)
 381         {
 382             /* Add i forces to shifted force list */
 383             for (i = 0; i < UNROLLI; i++)
 384             {
 385                 for (d = 0; d < DIM; d++)
 386                 {
 387                     fshift[ishf+d] += fi[i*FI_STRIDE+d];
 388                 }
 389             }
 390         }
 391 #endif
 392
 393 #ifdef CALC_ENERGIES
 394 #ifndef ENERGY_GROUPS
 395         *Vvdw += Vvdw_ci;
 396         *Vc   += Vc_ci;
 397 #endif
 398 #endif
 399     }
 400
 401 #ifdef COUNT_PAIRS
 402     printf("atom pairs %d\n", npair);
 403 #endif
 404 }
 405
 406 #undef CALC_SHIFTFORCES
 407
 408 #undef X_STRIDE
 409 #undef F_STRIDE
 410 #undef XI_STRIDE
 411 #undef FI_STRIDE
 412
 413 #undef UNROLLI
 414 #undef UNROLLJ