src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #define UNROLLI    NBNXN_CPU_CLUSTER_I_SIZE
  37 #define UNROLLJ    NBNXN_CPU_CLUSTER_I_SIZE
  38
  39 /* We could use nbat->xstride and nbat->fstride, but macros might be faster */
  40 #define X_STRIDE   3
  41 #define F_STRIDE   3
  42 /* Local i-atom buffer strides */
  43 #define XI_STRIDE  3
  44 #define FI_STRIDE  3
  45
  46
  47 /* All functionality defines are set here, except for:
  48  * CALC_ENERGIES, ENERGY_GROUPS which are defined before.
  49  * CHECK_EXCLS, which is set just before including the inner loop contents.
  50  */
  51
  52 /* We always calculate shift forces, because it's cheap anyhow */
  53 #define CALC_SHIFTFORCES
  54
  55 #ifdef CALC_COUL_RF
  56 #define NBK_FUNC_NAME(base, ene) base ## _rf_ ## ene
  57 #endif
  58 #ifdef CALC_COUL_TAB
  59 #ifndef VDW_CUTOFF_CHECK
  60 #define NBK_FUNC_NAME(base, ene) base ## _tab_ ## ene
  61 #else
  62 #define NBK_FUNC_NAME(base, ene) base ## _tab_twin_ ## ene
  63 #endif
  64 #endif
  65
  66 static void
  67 #ifndef CALC_ENERGIES
  68 NBK_FUNC_NAME(nbnxn_kernel_ref, noener)
  69 #else
  70 #ifndef ENERGY_GROUPS
  71 NBK_FUNC_NAME(nbnxn_kernel_ref, ener)
  72 #else
  73 NBK_FUNC_NAME(nbnxn_kernel_ref, energrp)
  74 #endif
  75 #endif
  76 #undef NBK_FUNC_NAME
  77 (const nbnxn_pairlist_t     *nbl,
  78  const nbnxn_atomdata_t     *nbat,
  79  const interaction_const_t  *ic,
  80  rvec                       *shift_vec,
  81  real                       *f
  82 #ifdef CALC_SHIFTFORCES
  83  ,
  84  real                       *fshift
  85 #endif
  86 #ifdef CALC_ENERGIES
  87  ,
  88  real                       *Vvdw,
  89  real                       *Vc
  90 #endif
  91 )
  92 {
  93     const nbnxn_ci_t   *nbln;
  94     const nbnxn_cj_t   *l_cj;
  95     const int          *type;
  96     const real         *q;
  97     const real         *shiftvec;
  98     const real         *x;
  99     const real         *nbfp;
 100     real                rcut2;
 101 #ifdef VDW_CUTOFF_CHECK
 102     real                rvdw2;
 103 #endif
 104     int                 ntype2;
 105     real                facel;
 106     real               *nbfp_i;
 107     int                 n, ci, ci_sh;
 108     int                 ish, ishf;
 109     gmx_bool            do_LJ, half_LJ, do_coul;
 110     int                 cjind0, cjind1, cjind;
 111     int                 ip, jp;
 112
 113     real                xi[UNROLLI*XI_STRIDE];
 114     real                fi[UNROLLI*FI_STRIDE];
 115     real                qi[UNROLLI];
 116
 117 #ifdef CALC_ENERGIES
 118 #ifndef ENERGY_GROUPS
 119
 120     real       Vvdw_ci, Vc_ci;
 121 #else
 122     int        egp_mask;
 123     int        egp_sh_i[UNROLLI];
 124 #endif
 125     real       sh_invrc6;
 126 #endif
 127
 128 #ifdef CALC_COUL_RF
 129     real       k_rf2;
 130 #ifdef CALC_ENERGIES
 131     real       k_rf, c_rf;
 132 #endif
 133 #endif
 134 #ifdef CALC_COUL_TAB
 135     real       tabscale;
 136 #ifdef CALC_ENERGIES
 137     real       halfsp;
 138 #endif
 139 #ifndef GMX_DOUBLE
 140     const real *tab_coul_FDV0;
 141 #else
 142     const real *tab_coul_F;
 143     const real *tab_coul_V;
 144 #endif
 145 #endif
 146
 147     int ninner;
 148
 149 #ifdef COUNT_PAIRS
 150     int npair = 0;
 151 #endif
 152
 153 #ifdef CALC_ENERGIES
 154     sh_invrc6 = ic->sh_invrc6;
 155 #endif
 156
 157 #ifdef CALC_COUL_RF
 158     k_rf2 = 2*ic->k_rf;
 159 #ifdef CALC_ENERGIES
 160     k_rf = ic->k_rf;
 161     c_rf = ic->c_rf;
 162 #endif
 163 #endif
 164 #ifdef CALC_COUL_TAB
 165     tabscale = ic->tabq_scale;
 166 #ifdef CALC_ENERGIES
 167     halfsp = 0.5/ic->tabq_scale;
 168 #endif
 169
 170 #ifndef GMX_DOUBLE
 171     tab_coul_FDV0 = ic->tabq_coul_FDV0;
 172 #else
 173     tab_coul_F    = ic->tabq_coul_F;
 174     tab_coul_V    = ic->tabq_coul_V;
 175 #endif
 176 #endif
 177
 178 #ifdef ENERGY_GROUPS
 179     egp_mask = (1<<nbat->neg_2log) - 1;
 180 #endif
 181
 182
 183     rcut2               = ic->rcoulomb*ic->rcoulomb;
 184 #ifdef VDW_CUTOFF_CHECK
 185     rvdw2               = ic->rvdw*ic->rvdw;
 186 #endif
 187
 188     ntype2              = nbat->ntype*2;
 189     nbfp                = nbat->nbfp;
 190     q                   = nbat->q;
 191     type                = nbat->type;
 192     facel               = ic->epsfac;
 193     shiftvec            = shift_vec[0];
 194     x                   = nbat->x;
 195
 196     l_cj = nbl->cj;
 197
 198     ninner = 0;
 199     for (n = 0; n < nbl->nci; n++)
 200     {
 201         int i, d;
 202
 203         nbln = &nbl->ci[n];
 204
 205         ish              = (nbln->shift & NBNXN_CI_SHIFT);
 206         /* x, f and fshift are assumed to be stored with stride 3 */
 207         ishf             = ish*DIM;
 208         cjind0           = nbln->cj_ind_start;
 209         cjind1           = nbln->cj_ind_end;
 210         /* Currently only works super-cells equal to sub-cells */
 211         ci               = nbln->ci;
 212         ci_sh            = (ish == CENTRAL ? ci : -1);
 213
 214         /* We have 5 LJ/C combinations, but use only three inner loops,
 215          * as the other combinations are unlikely and/or not much faster:
 216          * inner half-LJ + C for half-LJ + C / no-LJ + C
 217          * inner LJ + C      for full-LJ + C
 218          * inner LJ          for full-LJ + no-C / half-LJ + no-C
 219          */
 220         do_LJ   = (nbln->shift & NBNXN_CI_DO_LJ(0));
 221         do_coul = (nbln->shift & NBNXN_CI_DO_COUL(0));
 222         half_LJ = ((nbln->shift & NBNXN_CI_HALF_LJ(0)) || !do_LJ) && do_coul;
 223
 224 #ifdef CALC_ENERGIES
 225 #ifndef ENERGY_GROUPS
 226         Vvdw_ci = 0;
 227         Vc_ci   = 0;
 228 #else
 229         for (i = 0; i < UNROLLI; i++)
 230         {
 231             egp_sh_i[i] = ((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)*nbat->nenergrp;
 232         }
 233 #endif
 234 #endif
 235
 236         for (i = 0; i < UNROLLI; i++)
 237         {
 238             for (d = 0; d < DIM; d++)
 239             {
 240                 xi[i*XI_STRIDE+d] = x[(ci*UNROLLI+i)*X_STRIDE+d] + shiftvec[ishf+d];
 241                 fi[i*FI_STRIDE+d] = 0;
 242             }
 243         }
 244
 245         if (do_coul)
 246         {
 247 #ifdef CALC_ENERGIES
 248             real Vc_sub_self;
 249
 250 #ifdef CALC_COUL_RF
 251             Vc_sub_self = 0.5*c_rf;
 252 #endif
 253 #ifdef CALC_COUL_TAB
 254 #ifdef GMX_DOUBLE
 255             Vc_sub_self = 0.5*tab_coul_V[0];
 256 #else
 257             Vc_sub_self = 0.5*tab_coul_FDV0[2];
 258 #endif
 259 #endif
 260 #endif
 261
 262             for (i = 0; i < UNROLLI; i++)
 263             {
 264                 qi[i] = facel*q[ci*UNROLLI+i];
 265
 266 #ifdef CALC_ENERGIES
 267                 if (l_cj[nbln->cj_ind_start].cj == ci_sh)
 268                 {
 269 #ifdef ENERGY_GROUPS
 270                     Vc[egp_sh_i[i]+((nbat->energrp[ci]>>(i*nbat->neg_2log)) & egp_mask)]
 271 #else
 272                     Vc[0]
 273 #endif
 274                         -= qi[i]*q[ci*UNROLLI+i]*Vc_sub_self;
 275                 }
 276 #endif
 277             }
 278         }
 279
 280         cjind = cjind0;
 281         while (cjind < cjind1 && nbl->cj[cjind].excl != 0xffff)
 282         {
 283 #define CHECK_EXCLS
 284             if (half_LJ)
 285             {
 286 #define CALC_COULOMB
 287 #define HALF_LJ
 288 #include "nbnxn_kernel_ref_inner.h"
 289 #undef HALF_LJ
 290 #undef CALC_COULOMB
 291             }
 292             /* cppcheck-suppress duplicateBranch */
 293             else if (do_coul)
 294             {
 295 #define CALC_COULOMB
 296 #include "nbnxn_kernel_ref_inner.h"
 297 #undef CALC_COULOMB
 298             }
 299             else
 300             {
 301 #include "nbnxn_kernel_ref_inner.h"
 302             }
 303 #undef CHECK_EXCLS
 304             cjind++;
 305         }
 306
 307         for (; (cjind < cjind1); cjind++)
 308         {
 309             if (half_LJ)
 310             {
 311 #define CALC_COULOMB
 312 #define HALF_LJ
 313 #include "nbnxn_kernel_ref_inner.h"
 314 #undef HALF_LJ
 315 #undef CALC_COULOMB
 316             }
 317             /* cppcheck-suppress duplicateBranch */
 318             else if (do_coul)
 319             {
 320 #define CALC_COULOMB
 321 #include "nbnxn_kernel_ref_inner.h"
 322 #undef CALC_COULOMB
 323             }
 324             else
 325             {
 326 #include "nbnxn_kernel_ref_inner.h"
 327             }
 328         }
 329         ninner += cjind1 - cjind0;
 330
 331         /* Add accumulated i-forces to the force array */
 332         for (i = 0; i < UNROLLI; i++)
 333         {
 334             for (d = 0; d < DIM; d++)
 335             {
 336                 f[(ci*UNROLLI+i)*F_STRIDE+d] += fi[i*FI_STRIDE+d];
 337             }
 338         }
 339 #ifdef CALC_SHIFTFORCES
 340         if (fshift != NULL)
 341         {
 342             /* Add i forces to shifted force list */
 343             for (i = 0; i < UNROLLI; i++)
 344             {
 345                 for (d = 0; d < DIM; d++)
 346                 {
 347                     fshift[ishf+d] += fi[i*FI_STRIDE+d];
 348                 }
 349             }
 350         }
 351 #endif
 352
 353 #ifdef CALC_ENERGIES
 354 #ifndef ENERGY_GROUPS
 355         *Vvdw += Vvdw_ci;
 356         *Vc   += Vc_ci;
 357 #endif
 358 #endif
 359     }
 360
 361 #ifdef COUNT_PAIRS
 362     printf("atom pairs %d\n", npair);
 363 #endif
 364 }
 365
 366 #undef CALC_SHIFTFORCES
 367
 368 #undef X_STRIDE
 369 #undef F_STRIDE
 370 #undef XI_STRIDE
 371 #undef FI_STRIDE
 372
 373 #undef UNROLLI
 374 #undef UNROLLJ