src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #define UNROLLI    NBNXN_CPU_CLUSTER_I_SIZE
  37 #define UNROLLJ    NBNXN_CPU_CLUSTER_I_SIZE
  38
  39 /* We could use nbat->xstride and nbat->fstride, but macros might be faster */
  40 #define X_STRIDE   3
  41 #define F_STRIDE   3
  42 /* Local i-atom buffer strides */
  43 #define XI_STRIDE  3
  44 #define FI_STRIDE  3
  45
  46
  47 /* All functionality defines are set here, except for:
  48  * CALC_ENERGIES, ENERGY_GROUPS which are defined before.
  49  * CHECK_EXCLS, which is set just before including the inner loop contents.
  50  */
  51
  52 /* We always calculate shift forces, because it's cheap anyhow */
  53 #define CALC_SHIFTFORCES
  54
  55 #ifdef CALC_COUL_RF
  56 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecRF ## ljt ## feg ## _ref
  57 #endif
  58 #ifdef CALC_COUL_TAB
  59 #ifndef VDW_CUTOFF_CHECK
  60 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecQSTab ## ljt ## feg ## _ref
  61 #else
  62 #define NBK_FUNC_NAME2(ljt, feg) nbnxn_kernel ## _ElecQSTabTwinCut ## ljt ## feg ## _ref
  63 #endif
  64 #endif
  65
  66 #ifdef VDW_FORCE_SWITCH
  67 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJFsw, feg)
  68 #else
  69 #ifdef VDW_POT_SWITCH
  70 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJPsw, feg)
  71 #else
  72 #define NBK_FUNC_NAME(feg) NBK_FUNC_NAME2(_VdwLJ, feg)
  73 #endif
  74 #endif
  75
  76 static void
  77 #ifndef CALC_ENERGIES
  78 NBK_FUNC_NAME(_F)
  79 #else
  80 #ifndef ENERGY_GROUPS
  81 NBK_FUNC_NAME(_VF)
  82 #else
  83 NBK_FUNC_NAME(_VgrpF)
  84 #endif
  85 #endif
  86 #undef NBK_FUNC_NAME
  87 #undef NBK_FUNC_NAME2
  88 (const nbnxn_pairlist_t     *nbl,
  89  const nbnxn_atomdata_t     *nbat,
  90  const interaction_const_t  *ic,
  91  rvec                       *shift_vec,
  92  real                       *f
  93 #ifdef CALC_SHIFTFORCES
  94  ,
  95  real                       *fshift
  96 #endif
  97 #ifdef CALC_ENERGIES
  98  ,
  99  real                       *Vvdw,
 100  real                       *Vc
 101 #endif
 102 )
 103 {
 104     const nbnxn_ci_t   *nbln;
 105     const nbnxn_cj_t   *l_cj;
 106     const int          *type;
 107     const real         *q;
 108     const real         *shiftvec;
 109     const real         *x;
 110     const real         *nbfp;
 111     real                rcut2;
 112 #ifdef VDW_CUTOFF_CHECK
 113     real                rvdw2;
 114 #endif
 115     int                 ntype2;
 116     real                facel;
 117     real               *nbfp_i;
 118     int                 n, ci, ci_sh;
 119     int                 ish, ishf;
 120     gmx_bool            do_LJ, half_LJ, do_coul;
 121     int                 cjind0, cjind1, cjind;
 122     int                 ip, jp;
 123
 124     real                xi[UNROLLI*XI_STRIDE];
 125     real                fi[UNROLLI*FI_STRIDE];
 126     real                qi[UNROLLI];
 127
 128 #ifdef CALC_ENERGIES
 129 #ifndef ENERGY_GROUPS
 130
 131     real       Vvdw_ci, Vc_ci;
 132 #else
 133     int        egp_mask;
 134     int        egp_sh_i[UNROLLI];
 135 #endif
 136 #endif
 137 #ifdef VDW_POT_SWITCH
 138     real       swV3, swV4, swV5;
 139     real       swF2, swF3, swF4;
 140 #endif
 141
 142 #ifdef CALC_COUL_RF
 143     real       k_rf2;
 144 #ifdef CALC_ENERGIES
 145     real       k_rf, c_rf;
 146 #endif
 147 #endif
 148 #ifdef CALC_COUL_TAB
 149     real       tabscale;
 150 #ifdef CALC_ENERGIES
 151     real       halfsp;
 152 #endif
 153 #ifndef GMX_DOUBLE
 154     const real *tab_coul_FDV0;
 155 #else
 156     const real *tab_coul_F;
 157     const real *tab_coul_V;
 158 #endif
 159 #endif
 160
 161     int ninner;
 162
 163 #ifdef COUNT_PAIRS
 164     int npair = 0;
 165 #endif
 166
 167 #ifdef VDW_POT_SWITCH
 168     swV3 = ic->vdw_switch.c3;
 169     swV4 = ic->vdw_switch.c4;
 170     swV5 = ic->vdw_switch.c5;
 171     swF2 = 3*ic->vdw_switch.c3;
 172     swF3 = 4*ic->vdw_switch.c4;
 173     swF4 = 5*ic->vdw_switch.c5;
 174 #endif
 175
 176 #ifdef CALC_COUL_RF
 177     k_rf2 = 2*ic->k_rf;
 178 #ifdef CALC_ENERGIES
 179     k_rf = ic->k_rf;
 180     c_rf = ic->c_rf;
 181 #endif
 182 #endif
 183 #ifdef CALC_COUL_TAB
 184     tabscale = ic->tabq_scale;
 185 #ifdef CALC_ENERGIES
 186     halfsp = 0.5/ic->tabq_scale;
 187 #endif
 188
 189 #ifndef GMX_DOUBLE
 190     tab_coul_FDV0 = ic->tabq_coul_FDV0;
 191 #else
 192     tab_coul_F    = ic->tabq_coul_F;
 193     tab_coul_V    = ic->tabq_coul_V;
 194 #endif
 195 #endif
 196
 197 #ifdef ENERGY_GROUPS
 198     egp_mask = (1<<nbat->neg_2log) - 1;
 199 #endif
 200
 201
 202     rcut2               = ic->rcoulomb*ic->rcoulomb;
 203 #ifdef VDW_CUTOFF_CHECK
 204     rvdw2               = ic->rvdw*ic->rvdw;
 205 #endif
 206
 207     ntype2              = nbat->ntype*2;
 208     nbfp                = nbat->nbfp;
 209     q                   = nbat->q;
 210     type                = nbat->type;
 211     facel               = ic->epsfac;
 212     shiftvec            = shift_vec[0];
 213     x                   = nbat->x;
 214
 215     l_cj = nbl->cj;
 216
 217     ninner = 0;
 218     for (n = 0; n < nbl->nci; n++)
 219     {
 220         int i, d;
 221
 222         nbln = &nbl->ci[n];
 223
 224         ish              = (nbln->shift & NBNXN_CI_SHIFT);
 225         /* x, f and fshift are assumed to be stored with stride 3 */
 226         ishf             = ish*DIM;
 227         cjind0           = nbln->cj_ind_start;
 228         cjind1           = nbln->cj_ind_end;
 229         /* Currently only works super-cells equal to sub-cells */
 230         ci               = nbln->ci;
 231         ci_sh            = (ish == CENTRAL ? ci : -1);
 232
 233         /* We have 5 LJ/C combinations, but use only three inner loops,
 234          * as the other combinations are unlikely and/or not much faster:
 235          * inner half-LJ + C for half-LJ + C / no-LJ + C
 236          * inner LJ + C      for full-LJ + C
 237          * inner LJ          for full-LJ + no-C / half-LJ + no-C
 238          */
 239         do_LJ   = (nbln->shift & NBNXN_CI_DO_LJ(0));
 240         do_coul = (nbln->shift & NBNXN_CI_DO_COUL(0));
 241         half_LJ = ((nbln->shift & NBNXN_CI_HALF_LJ(0)) || !do_LJ) && do_coul;
 242
 243 #ifdef CALC_ENERGIES
 244 #ifndef ENERGY_GROUPS
 245         Vvdw_ci = 0;
 246         Vc_ci   = 0;
 247 #else
 248         for (i = 0; i < UNROLLI; i++)
 249         {
 250             egp_sh_i[i] = ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)*nbat->nenergrp;
 251         }
 252 #endif
 253 #endif
 254
 255         for (i = 0; i < UNROLLI; i++)
 256         {
 257             for (d = 0; d < DIM; d++)
 258             {
 259                 xi[i*XI_STRIDE+d] = x[(ci*UNROLLI+i)*X_STRIDE+d] + shiftvec[ishf+d];
 260                 fi[i*FI_STRIDE+d] = 0;
 261             }
 262         }
 263
 264         if (do_coul)
 265         {
 266 #ifdef CALC_ENERGIES
 267             real Vc_sub_self;
 268
 269 #ifdef CALC_COUL_RF
 270             Vc_sub_self = 0.5*c_rf;
 271 #endif
 272 #ifdef CALC_COUL_TAB
 273 #ifdef GMX_DOUBLE
 274             Vc_sub_self = 0.5*tab_coul_V[0];
 275 #else
 276             Vc_sub_self = 0.5*tab_coul_FDV0[2];
 277 #endif
 278 #endif
 279 #endif
 280
 281             for (i = 0; i < UNROLLI; i++)
 282             {
 283                 qi[i] = facel*q[ci*UNROLLI+i];
 284
 285 #ifdef CALC_ENERGIES
 286                 if (l_cj[nbln->cj_ind_start].cj == ci_sh)
 287                 {
 288 #ifdef ENERGY_GROUPS
 289                     Vc[egp_sh_i[i]+((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)]
 290 #else
 291                     Vc[0]
 292 #endif
 293                         -= qi[i]*q[ci*UNROLLI+i]*Vc_sub_self;
 294                 }
 295 #endif
 296             }
 297         }
 298
 299         cjind = cjind0;
 300         while (cjind < cjind1 && nbl->cj[cjind].excl != 0xffff)
 301         {
 302 #define CHECK_EXCLS
 303             if (half_LJ)
 304             {
 305 #define CALC_COULOMB
 306 #define HALF_LJ
 307 #include "nbnxn_kernel_ref_inner.h"
 308 #undef HALF_LJ
 309 #undef CALC_COULOMB
 310             }
 311             /* cppcheck-suppress duplicateBranch */
 312             else if (do_coul)
 313             {
 314 #define CALC_COULOMB
 315 #include "nbnxn_kernel_ref_inner.h"
 316 #undef CALC_COULOMB
 317             }
 318             else
 319             {
 320 #include "nbnxn_kernel_ref_inner.h"
 321             }
 322 #undef CHECK_EXCLS
 323             cjind++;
 324         }
 325
 326         for (; (cjind < cjind1); cjind++)
 327         {
 328             if (half_LJ)
 329             {
 330 #define CALC_COULOMB
 331 #define HALF_LJ
 332 #include "nbnxn_kernel_ref_inner.h"
 333 #undef HALF_LJ
 334 #undef CALC_COULOMB
 335             }
 336             /* cppcheck-suppress duplicateBranch */
 337             else if (do_coul)
 338             {
 339 #define CALC_COULOMB
 340 #include "nbnxn_kernel_ref_inner.h"
 341 #undef CALC_COULOMB
 342             }
 343             else
 344             {
 345 #include "nbnxn_kernel_ref_inner.h"
 346             }
 347         }
 348         ninner += cjind1 - cjind0;
 349
 350         /* Add accumulated i-forces to the force array */
 351         for (i = 0; i < UNROLLI; i++)
 352         {
 353             for (d = 0; d < DIM; d++)
 354             {
 355                 f[(ci*UNROLLI+i)*F_STRIDE+d] += fi[i*FI_STRIDE+d];
 356             }
 357         }
 358 #ifdef CALC_SHIFTFORCES
 359         if (fshift != NULL)
 360         {
 361             /* Add i forces to shifted force list */
 362             for (i = 0; i < UNROLLI; i++)
 363             {
 364                 for (d = 0; d < DIM; d++)
 365                 {
 366                     fshift[ishf+d] += fi[i*FI_STRIDE+d];
 367                 }
 368             }
 369         }
 370 #endif
 371
 372 #ifdef CALC_ENERGIES
 373 #ifndef ENERGY_GROUPS
 374         *Vvdw += Vvdw_ci;
 375         *Vc   += Vc_ci;
 376 #endif
 377 #endif
 378     }
 379
 380 #ifdef COUNT_PAIRS
 381     printf("atom pairs %d\n", npair);
 382 #endif
 383 }
 384
 385 #undef CALC_SHIFTFORCES
 386
 387 #undef X_STRIDE
 388 #undef F_STRIDE
 389 #undef XI_STRIDE
 390 #undef FI_STRIDE
 391
 392 #undef UNROLLI
 393 #undef UNROLLJ