src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nb_free_energy.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/gmxlib/nrnb.h"
  46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  48 #include "gromacs/math/functions.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/forcerec.h"
  51 #include "gromacs/mdtypes/md_enums.h"
  52 #include "gromacs/utility/fatalerror.h"
  53
  54 void
  55 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
  56                           rvec * gmx_restrict              xx,
  57                           rvec * gmx_restrict              ff,
  58                           t_forcerec * gmx_restrict        fr,
  59                           const t_mdatoms * gmx_restrict   mdatoms,
  60                           nb_kernel_data_t * gmx_restrict  kernel_data,
  61                           t_nrnb * gmx_restrict            nrnb)
  62 {
  63
  64 #define  STATE_A  0
  65 #define  STATE_B  1
  66 #define  NSTATES  2
  67     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
  68     real          shX, shY, shZ;
  69     real          tx, ty, tz, Fscal;
  70     double        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
  71     double        Vcoul[NSTATES], Vvdw[NSTATES];     /* Needs double for sc_power==48 */
  72     real          rinv6, r, rtC, rtV;
  73     real          iqA, iqB;
  74     real          qq[NSTATES], vctot;
  75     int           ntiA, ntiB, tj[NSTATES];
  76     real          Vvdw6, Vvdw12, vvtot;
  77     real          ix, iy, iz, fix, fiy, fiz;
  78     real          dx, dy, dz, rsq, rinv;
  79     real          c6[NSTATES], c12[NSTATES], c6grid;
  80     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
  81     double        dvdl_coul, dvdl_vdw;
  82     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
  83     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
  84     double        rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
  85     real          sigma2[NSTATES], sigma_pow[NSTATES];
  86     int           do_tab, tab_elemsize = 0;
  87     int           n0, n1C, n1V, nnn;
  88     real          Y, F, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
  89     int           icoul, ivdw;
  90     int           nri;
  91     const int *   iinr;
  92     const int *   jindex;
  93     const int *   jjnr;
  94     const int *   shift;
  95     const int *   gid;
  96     const int *   typeA;
  97     const int *   typeB;
  98     int           ntype;
  99     const real *  shiftvec;
 100     real *        fshift;
 101     real          tabscale = 0;
 102     const real *  VFtab    = nullptr;
 103     const real *  x;
 104     real *        f;
 105     real          facel, krf, crf;
 106     const real *  chargeA;
 107     const real *  chargeB;
 108     real          sigma6_min, sigma6_def, lam_power, sc_r_power;
 109     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
 110     real          ewcljrsq, ewclj, ewclj2, exponent, poly, vvdw_disp, vvdw_rep, sh_lj_ewald;
 111     real          ewclj6;
 112     const real *  nbfp, *nbfp_grid;
 113     real *        dvdl;
 114     real *        Vv;
 115     real *        Vc;
 116     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
 117     real          rcoulomb, rvdw, sh_invrc6;
 118     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
 119     gmx_bool      bEwald, bEwaldLJ;
 120     real          rcutoff_max2;
 121     const real *  tab_ewald_F_lj = nullptr;
 122     const real *  tab_ewald_V_lj = nullptr;
 123     real          d, d2, sw, dsw, rinvcorr;
 124     real          elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
 125     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 126     gmx_bool      bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
 127     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
 128     const real *  ewtab = nullptr;
 129     int           ewitab;
 130     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
 131
 132     const real    onetwelfth  = 1.0/12.0;
 133     const real    onesixth    = 1.0/6.0;
 134     const real    zero        = 0.0;
 135     const real    half        = 0.5;
 136     const real    one         = 1.0;
 137     const real    two         = 2.0;
 138     const real    six         = 6.0;
 139     const real    fourtyeight = 48.0;
 140
 141     /* Extract pointer to non-bonded interaction constants */
 142     const interaction_const_t *ic = fr->ic;
 143
 144     x                   = xx[0];
 145     f                   = ff[0];
 146
 147     fshift              = fr->fshift[0];
 148
 149     nri                 = nlist->nri;
 150     iinr                = nlist->iinr;
 151     jindex              = nlist->jindex;
 152     jjnr                = nlist->jjnr;
 153     icoul               = nlist->ielec;
 154     ivdw                = nlist->ivdw;
 155     shift               = nlist->shift;
 156     gid                 = nlist->gid;
 157
 158     shiftvec            = fr->shift_vec[0];
 159     chargeA             = mdatoms->chargeA;
 160     chargeB             = mdatoms->chargeB;
 161     facel               = fr->ic->epsfac;
 162     krf                 = ic->k_rf;
 163     crf                 = ic->c_rf;
 164     Vc                  = kernel_data->energygrp_elec;
 165     typeA               = mdatoms->typeA;
 166     typeB               = mdatoms->typeB;
 167     ntype               = fr->ntype;
 168     nbfp                = fr->nbfp;
 169     nbfp_grid           = fr->ljpme_c6grid;
 170     Vv                  = kernel_data->energygrp_vdw;
 171     lambda_coul         = kernel_data->lambda[efptCOUL];
 172     lambda_vdw          = kernel_data->lambda[efptVDW];
 173     dvdl                = kernel_data->dvdl;
 174     alpha_coul          = fr->sc_alphacoul;
 175     alpha_vdw           = fr->sc_alphavdw;
 176     lam_power           = fr->sc_power;
 177     sc_r_power          = fr->sc_r_power;
 178     sigma6_def          = fr->sc_sigma6_def;
 179     sigma6_min          = fr->sc_sigma6_min;
 180     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 181     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 182     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 183
 184     rcoulomb            = ic->rcoulomb;
 185     rvdw                = ic->rvdw;
 186     sh_invrc6           = ic->sh_invrc6;
 187     sh_lj_ewald         = ic->sh_lj_ewald;
 188     ewclj               = ic->ewaldcoeff_lj;
 189     ewclj2              = ewclj*ewclj;
 190     ewclj6              = ewclj2*ewclj2*ewclj2;
 191
 192     if (ic->coulomb_modifier == eintmodPOTSWITCH)
 193     {
 194         d               = ic->rcoulomb - ic->rcoulomb_switch;
 195         elec_swV3       = -10.0/(d*d*d);
 196         elec_swV4       =  15.0/(d*d*d*d);
 197         elec_swV5       =  -6.0/(d*d*d*d*d);
 198         elec_swF2       = -30.0/(d*d*d);
 199         elec_swF3       =  60.0/(d*d*d*d);
 200         elec_swF4       = -30.0/(d*d*d*d*d);
 201     }
 202     else
 203     {
 204         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 205         elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
 206     }
 207
 208     if (ic->vdw_modifier == eintmodPOTSWITCH)
 209     {
 210         d               = ic->rvdw - ic->rvdw_switch;
 211         vdw_swV3        = -10.0/(d*d*d);
 212         vdw_swV4        =  15.0/(d*d*d*d);
 213         vdw_swV5        =  -6.0/(d*d*d*d*d);
 214         vdw_swF2        = -30.0/(d*d*d);
 215         vdw_swF3        =  60.0/(d*d*d*d);
 216         vdw_swF4        = -30.0/(d*d*d*d*d);
 217     }
 218     else
 219     {
 220         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 221         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 222     }
 223
 224     if (fr->cutoff_scheme == ecutsVERLET)
 225     {
 226         const interaction_const_t *ic = fr->ic;
 227
 228         if (EVDW_PME(ic->vdwtype))
 229         {
 230             ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
 231         }
 232         else
 233         {
 234             ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
 235         }
 236
 237         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 238         {
 239             icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 240         }
 241         else if (EEL_PME_EWALD(ic->eeltype))
 242         {
 243             icoul        = GMX_NBKERNEL_ELEC_EWALD;
 244         }
 245         else
 246         {
 247             gmx_incons("Unsupported eeltype with Verlet and free-energy");
 248         }
 249
 250         bExactElecCutoff = TRUE;
 251         bExactVdwCutoff  = TRUE;
 252     }
 253     else
 254     {
 255         bExactElecCutoff = (ic->coulomb_modifier != eintmodNONE) || ic->eeltype == eelRF_ZERO;
 256         bExactVdwCutoff  = (ic->vdw_modifier != eintmodNONE);
 257     }
 258
 259     bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
 260     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
 261     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
 262
 263     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
 264     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
 265
 266     if (bEwald || bEwaldLJ)
 267     {
 268         sh_ewald       = ic->sh_ewald;
 269         ewtab          = ic->tabq_coul_FDV0;
 270         ewtabscale     = ic->tabq_scale;
 271         ewtabhalfspace = half/ewtabscale;
 272         tab_ewald_F_lj = ic->tabq_vdw_F;
 273         tab_ewald_V_lj = ic->tabq_vdw_V;
 274     }
 275
 276     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 277      * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
 278      * can apply the small trick of subtracting the _reciprocal_ space contribution
 279      * in this kernel, and instead apply the free energy interaction to the 1/r
 280      * (standard coulomb) interaction.
 281      *
 282      * However, we cannot use this approach for switch-modified since we would then
 283      * effectively end up evaluating a significantly different interaction here compared to the
 284      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 285      * position than what the user requested, or by switching different
 286      * things (1/r rather than short-range Ewald). For these settings, we just
 287      * use the traditional short-range Ewald interaction in that case.
 288      */
 289     bConvertEwaldToCoulomb = (bEwald && (ic->coulomb_modifier != eintmodPOTSWITCH));
 290     /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
 291      * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
 292      */
 293     bConvertLJEwaldToLJ6   = (bEwaldLJ && (ic->vdw_modifier   != eintmodPOTSWITCH));
 294
 295     /* We currently don't implement exclusion correction, needed with the Verlet cut-off scheme, without conversion */
 296     if (fr->cutoff_scheme == ecutsVERLET &&
 297         ((bEwald   && !bConvertEwaldToCoulomb) ||
 298          (bEwaldLJ && !bConvertLJEwaldToLJ6)))
 299     {
 300         gmx_incons("Unimplemented non-bonded setup");
 301     }
 302
 303     /* fix compiler warnings */
 304     n1C   = n1V   = 0;
 305     epsC  = epsV  = 0;
 306     eps2C = eps2V = 0;
 307
 308     dvdl_coul  = 0;
 309     dvdl_vdw   = 0;
 310
 311     /* Lambda factor for state A, 1-lambda*/
 312     LFC[STATE_A] = one - lambda_coul;
 313     LFV[STATE_A] = one - lambda_vdw;
 314
 315     /* Lambda factor for state B, lambda*/
 316     LFC[STATE_B] = lambda_coul;
 317     LFV[STATE_B] = lambda_vdw;
 318
 319     /*derivative of the lambda factor for state A and B */
 320     DLF[STATE_A] = -1;
 321     DLF[STATE_B] = 1;
 322
 323     for (i = 0; i < NSTATES; i++)
 324     {
 325         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 326         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 327         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 328         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 329     }
 330     /* precalculate */
 331     sigma2_def = std::cbrt(sigma6_def);
 332     sigma2_min = std::cbrt(sigma6_min);
 333
 334     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 335
 336     do_tab = static_cast<int>(icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
 337                               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
 338     if (do_tab)
 339     {
 340         tabscale         = kernel_data->table_elec_vdw->scale;
 341         VFtab            = kernel_data->table_elec_vdw->data;
 342         /* we always use the combined table here */
 343         tab_elemsize     = kernel_data->table_elec_vdw->stride;
 344     }
 345
 346     for (n = 0; (n < nri); n++)
 347     {
 348         int npair_within_cutoff;
 349
 350         npair_within_cutoff = 0;
 351
 352         is3              = 3*shift[n];
 353         shX              = shiftvec[is3];
 354         shY              = shiftvec[is3+1];
 355         shZ              = shiftvec[is3+2];
 356         nj0              = jindex[n];
 357         nj1              = jindex[n+1];
 358         ii               = iinr[n];
 359         ii3              = 3*ii;
 360         ix               = shX + x[ii3+0];
 361         iy               = shY + x[ii3+1];
 362         iz               = shZ + x[ii3+2];
 363         iqA              = facel*chargeA[ii];
 364         iqB              = facel*chargeB[ii];
 365         ntiA             = 2*ntype*typeA[ii];
 366         ntiB             = 2*ntype*typeB[ii];
 367         vctot            = 0;
 368         vvtot            = 0;
 369         fix              = 0;
 370         fiy              = 0;
 371         fiz              = 0;
 372
 373         for (k = nj0; (k < nj1); k++)
 374         {
 375             jnr              = jjnr[k];
 376             j3               = 3*jnr;
 377             dx               = ix - x[j3];
 378             dy               = iy - x[j3+1];
 379             dz               = iz - x[j3+2];
 380             rsq              = dx*dx + dy*dy + dz*dz;
 381
 382             if (bExactCutoffAll && rsq >= rcutoff_max2)
 383             {
 384                 /* We save significant time by skipping all code below.
 385                  * Note that with soft-core interactions, the actual cut-off
 386                  * check might be different. But since the soft-core distance
 387                  * is always larger than r, checking on r here is safe.
 388                  */
 389                 continue;
 390             }
 391             npair_within_cutoff++;
 392
 393             if (rsq > 0)
 394             {
 395                 /* Note that unlike in the nbnxn kernels, we do not need
 396                  * to clamp the value of rsq before taking the invsqrt
 397                  * to avoid NaN in the LJ calculation, since here we do
 398                  * not calculate LJ interactions when C6 and C12 are zero.
 399                  */
 400
 401                 rinv         = gmx::invsqrt(rsq);
 402                 r            = rsq*rinv;
 403             }
 404             else
 405             {
 406                 /* The force at r=0 is zero, because of symmetry.
 407                  * But note that the potential is in general non-zero,
 408                  * since the soft-cored r will be non-zero.
 409                  */
 410                 rinv         = 0;
 411                 r            = 0;
 412             }
 413
 414             if (sc_r_power == six)
 415             {
 416                 rpm2             = rsq*rsq;  /* r4 */
 417                 rp               = rpm2*rsq; /* r6 */
 418             }
 419             else if (sc_r_power == fourtyeight)
 420             {
 421                 rp               = rsq*rsq*rsq; /* r6 */
 422                 rp               = rp*rp;       /* r12 */
 423                 rp               = rp*rp;       /* r24 */
 424                 rp               = rp*rp;       /* r48 */
 425                 rpm2             = rp/rsq;      /* r46 */
 426             }
 427             else
 428             {
 429                 rp             = std::pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
 430                 rpm2           = rp/rsq;
 431             }
 432
 433             Fscal = 0;
 434
 435             qq[STATE_A]      = iqA*chargeA[jnr];
 436             qq[STATE_B]      = iqB*chargeB[jnr];
 437
 438             tj[STATE_A]      = ntiA+2*typeA[jnr];
 439             tj[STATE_B]      = ntiB+2*typeB[jnr];
 440
 441             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 442             {
 443                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 444                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 445
 446                 for (i = 0; i < NSTATES; i++)
 447                 {
 448                     c12[i]             = nbfp[tj[i]+1];
 449                     if ((c6[i] > 0) && (c12[i] > 0))
 450                     {
 451                         /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 452                         sigma6[i]       = half*c12[i]/c6[i];
 453                         sigma2[i]       = std::cbrt(sigma6[i]);
 454                         /* should be able to get rid of cbrt call eventually.  Will require agreement on
 455                            what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 456                         if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 457                         {
 458                             sigma6[i] = sigma6_min;
 459                             sigma2[i] = sigma2_min;
 460                         }
 461                     }
 462                     else
 463                     {
 464                         sigma6[i]       = sigma6_def;
 465                         sigma2[i]       = sigma2_def;
 466                     }
 467                     if (sc_r_power == six)
 468                     {
 469                         sigma_pow[i]    = sigma6[i];
 470                     }
 471                     else if (sc_r_power == fourtyeight)
 472                     {
 473                         sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 474                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 475                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 476                     }
 477                     else
 478                     {    /* not really supported as input, but in here for testing the general case*/
 479                         sigma_pow[i]    = std::pow(sigma2[i], sc_r_power/2);
 480                     }
 481                 }
 482
 483                 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 484                 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 485                 {
 486                     alpha_vdw_eff    = 0;
 487                     alpha_coul_eff   = 0;
 488                 }
 489                 else
 490                 {
 491                     alpha_vdw_eff    = alpha_vdw;
 492                     alpha_coul_eff   = alpha_coul;
 493                 }
 494
 495                 for (i = 0; i < NSTATES; i++)
 496                 {
 497                     FscalC[i]    = 0;
 498                     FscalV[i]    = 0;
 499                     Vcoul[i]     = 0;
 500                     Vvdw[i]      = 0;
 501
 502                     /* Only spend time on A or B state if it is non-zero */
 503                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 504                     {
 505                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 506                         rpinvC         = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 507                         rinvC          = std::pow(rpinvC, one/sc_r_power);
 508                         rC             = one/rinvC;
 509
 510                         rpinvV         = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 511                         rinvV          = std::pow(rpinvV, one/sc_r_power);
 512                         rV             = one/rinvV;
 513
 514                         if (do_tab)
 515                         {
 516                             rtC        = rC*tabscale;
 517                             n0         = rtC;
 518                             epsC       = rtC-n0;
 519                             eps2C      = epsC*epsC;
 520                             n1C        = tab_elemsize*n0;
 521
 522                             rtV        = rV*tabscale;
 523                             n0         = rtV;
 524                             epsV       = rtV-n0;
 525                             eps2V      = epsV*epsV;
 526                             n1V        = tab_elemsize*n0;
 527                         }
 528
 529                         /* Only process the coulomb interactions if we have charges,
 530                          * and if we either include all entries in the list (no cutoff
 531                          * used in the kernel), or if we are within the cutoff.
 532                          */
 533                         bComputeElecInteraction = !bExactElecCutoff ||
 534                             ( bConvertEwaldToCoulomb && r < rcoulomb) ||
 535                             (!bConvertEwaldToCoulomb && rC < rcoulomb);
 536
 537                         if ( (qq[i] != 0) && bComputeElecInteraction)
 538                         {
 539                             switch (icoul)
 540                             {
 541                                 case GMX_NBKERNEL_ELEC_COULOMB:
 542                                     /* simple cutoff */
 543                                     Vcoul[i]   = qq[i]*rinvC;
 544                                     FscalC[i]  = Vcoul[i];
 545                                     /* The shift for the Coulomb potential is stored in
 546                                      * the RF parameter c_rf, which is 0 without shift.
 547                                      */
 548                                     Vcoul[i]  -= qq[i]*ic->c_rf;
 549                                     break;
 550
 551                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 552                                     /* reaction-field */
 553                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
 554                                     FscalC[i]  = qq[i]*(rinvC - two*krf*rC*rC);
 555                                     break;
 556
 557                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 558                                     /* non-Ewald tabulated coulomb */
 559                                     nnn        = n1C;
 560                                     Y          = VFtab[nnn];
 561                                     F          = VFtab[nnn+1];
 562                                     Geps       = epsC*VFtab[nnn+2];
 563                                     Heps2      = eps2C*VFtab[nnn+3];
 564                                     Fp         = F+Geps+Heps2;
 565                                     VV         = Y+epsC*Fp;
 566                                     FF         = Fp+Geps+two*Heps2;
 567                                     Vcoul[i]   = qq[i]*VV;
 568                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
 569                                     break;
 570
 571                                 case GMX_NBKERNEL_ELEC_EWALD:
 572                                     if (bConvertEwaldToCoulomb)
 573                                     {
 574                                         /* Ewald FEP is done only on the 1/r part */
 575                                         Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
 576                                         FscalC[i]  = qq[i]*rinvC;
 577                                     }
 578                                     else
 579                                     {
 580                                         ewrt      = rC*ewtabscale;
 581                                         ewitab    = static_cast<int>(ewrt);
 582                                         eweps     = ewrt-ewitab;
 583                                         ewitab    = 4*ewitab;
 584                                         FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
 585                                         rinvcorr  = rinvC-sh_ewald;
 586                                         Vcoul[i]  = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
 587                                         FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
 588                                     }
 589                                     break;
 590
 591                                 case GMX_NBKERNEL_ELEC_NONE:
 592                                     FscalC[i]  = zero;
 593                                     Vcoul[i]   = zero;
 594                                     break;
 595
 596                                 default:
 597                                     gmx_incons("Invalid icoul in free energy kernel");
 598                             }
 599
 600                             if (ic->coulomb_modifier == eintmodPOTSWITCH)
 601                             {
 602                                 d                = rC - ic->rcoulomb_switch;
 603                                 d                = (d > zero) ? d : zero;
 604                                 d2               = d*d;
 605                                 sw               = one+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
 606                                 dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
 607
 608                                 FscalC[i]        = FscalC[i]*sw - rC*Vcoul[i]*dsw;
 609                                 Vcoul[i]        *= sw;
 610
 611                                 FscalC[i]        = (rC < rcoulomb) ? FscalC[i] : zero;
 612                                 Vcoul[i]         = (rC < rcoulomb) ? Vcoul[i] : zero;
 613                             }
 614                         }
 615
 616                         /* Only process the VDW interactions if we have
 617                          * some non-zero parameters, and if we either
 618                          * include all entries in the list (no cutoff used
 619                          * in the kernel), or if we are within the cutoff.
 620                          */
 621                         bComputeVdwInteraction = !bExactVdwCutoff ||
 622                             ( bConvertLJEwaldToLJ6 && r < rvdw) ||
 623                             (!bConvertLJEwaldToLJ6 && rV < rvdw);
 624                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
 625                         {
 626                             switch (ivdw)
 627                             {
 628                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
 629                                     /* cutoff LJ */
 630                                     if (sc_r_power == six)
 631                                     {
 632                                         rinv6            = rpinvV;
 633                                     }
 634                                     else
 635                                     {
 636                                         rinv6            = rinvV*rinvV;
 637                                         rinv6            = rinv6*rinv6*rinv6;
 638                                     }
 639                                     Vvdw6            = c6[i]*rinv6;
 640                                     Vvdw12           = c12[i]*rinv6*rinv6;
 641
 642                                     Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
 643                                                          - (Vvdw6 - c6[i]*sh_invrc6)*onesixth);
 644                                     FscalV[i]        = Vvdw12 - Vvdw6;
 645                                     break;
 646
 647                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
 648                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
 649                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 650                                     /* Table LJ */
 651                                     nnn = n1V+4;
 652                                     /* dispersion */
 653                                     Y          = VFtab[nnn];
 654                                     F          = VFtab[nnn+1];
 655                                     Geps       = epsV*VFtab[nnn+2];
 656                                     Heps2      = eps2V*VFtab[nnn+3];
 657                                     Fp         = F+Geps+Heps2;
 658                                     VV         = Y+epsV*Fp;
 659                                     FF         = Fp+Geps+two*Heps2;
 660                                     Vvdw[i]   += c6[i]*VV;
 661                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
 662
 663                                     /* repulsion */
 664                                     Y          = VFtab[nnn+4];
 665                                     F          = VFtab[nnn+5];
 666                                     Geps       = epsV*VFtab[nnn+6];
 667                                     Heps2      = eps2V*VFtab[nnn+7];
 668                                     Fp         = F+Geps+Heps2;
 669                                     VV         = Y+epsV*Fp;
 670                                     FF         = Fp+Geps+two*Heps2;
 671                                     Vvdw[i]   += c12[i]*VV;
 672                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
 673                                     break;
 674
 675                                 case GMX_NBKERNEL_VDW_LJEWALD:
 676                                     if (sc_r_power == six)
 677                                     {
 678                                         rinv6            = rpinvV;
 679                                     }
 680                                     else
 681                                     {
 682                                         rinv6            = rinvV*rinvV;
 683                                         rinv6            = rinv6*rinv6*rinv6;
 684                                     }
 685                                     c6grid           = nbfp_grid[tj[i]];
 686
 687                                     if (bConvertLJEwaldToLJ6)
 688                                     {
 689                                         /* cutoff LJ */
 690                                         Vvdw6            = c6[i]*rinv6;
 691                                         Vvdw12           = c12[i]*rinv6*rinv6;
 692
 693                                         Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
 694                                                              - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*onesixth);
 695                                         FscalV[i]        = Vvdw12 - Vvdw6;
 696                                     }
 697                                     else
 698                                     {
 699                                         /* Normal LJ-PME */
 700                                         ewcljrsq         = ewclj2*rV*rV;
 701                                         exponent         = std::exp(-ewcljrsq);
 702                                         poly             = exponent*(one + ewcljrsq + ewcljrsq*ewcljrsq*half);
 703                                         vvdw_disp        = (c6[i]-c6grid*(one-poly))*rinv6;
 704                                         vvdw_rep         = c12[i]*rinv6*rinv6;
 705                                         FscalV[i]        = vvdw_rep - vvdw_disp - c6grid*onesixth*exponent*ewclj6;
 706                                         Vvdw[i]          = (vvdw_rep - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth - (vvdw_disp - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)/six;
 707                                     }
 708                                     break;
 709
 710                                 case GMX_NBKERNEL_VDW_NONE:
 711                                     Vvdw[i]    = zero;
 712                                     FscalV[i]  = zero;
 713                                     break;
 714
 715                                 default:
 716                                     gmx_incons("Invalid ivdw in free energy kernel");
 717                             }
 718
 719                             if (ic->vdw_modifier == eintmodPOTSWITCH)
 720                             {
 721                                 d                = rV - ic->rvdw_switch;
 722                                 d                = (d > zero) ? d : zero;
 723                                 d2               = d*d;
 724                                 sw               = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
 725                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 726
 727                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
 728                                 Vvdw[i]         *= sw;
 729
 730                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : zero;
 731                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : zero;
 732                             }
 733                         }
 734
 735                         /* FscalC (and FscalV) now contain: dV/drC * rC
 736                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 737                          * Further down we first multiply by r^p-2 and then by
 738                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 739                          */
 740                         FscalC[i] *= rpinvC;
 741                         FscalV[i] *= rpinvV;
 742                     }
 743                 }
 744
 745                 /* Assemble A and B states */
 746                 for (i = 0; i < NSTATES; i++)
 747                 {
 748                     vctot         += LFC[i]*Vcoul[i];
 749                     vvtot         += LFV[i]*Vvdw[i];
 750
 751                     Fscal         += LFC[i]*FscalC[i]*rpm2;
 752                     Fscal         += LFV[i]*FscalV[i]*rpm2;
 753
 754                     dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 755                     dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 756                 }
 757             }
 758             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 759             {
 760                 /* For excluded pairs, which are only in this pair list when
 761                  * using the Verlet scheme, we don't use soft-core.
 762                  * The group scheme also doesn't soft-core for these.
 763                  * As there is no singularity, there is no need for soft-core.
 764                  */
 765                 VV = krf*rsq - crf;
 766                 FF = -two*krf;
 767
 768                 if (ii == jnr)
 769                 {
 770                     VV *= half;
 771                 }
 772
 773                 for (i = 0; i < NSTATES; i++)
 774                 {
 775                     vctot      += LFC[i]*qq[i]*VV;
 776                     Fscal      += LFC[i]*qq[i]*FF;
 777                     dvdl_coul  += DLF[i]*qq[i]*VV;
 778                 }
 779             }
 780
 781             if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
 782             {
 783                 /* See comment in the preamble. When using Ewald interactions
 784                  * (unless we use a switch modifier) we subtract the reciprocal-space
 785                  * Ewald component here which made it possible to apply the free
 786                  * energy interaction to 1/r (vanilla coulomb short-range part)
 787                  * above. This gets us closer to the ideal case of applying
 788                  * the softcore to the entire electrostatic interaction,
 789                  * including the reciprocal-space component.
 790                  */
 791                 real v_lr, f_lr;
 792
 793                 ewrt      = r*ewtabscale;
 794                 ewitab    = static_cast<int>(ewrt);
 795                 eweps     = ewrt-ewitab;
 796                 ewitab    = 4*ewitab;
 797                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
 798                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
 799                 f_lr     *= rinv;
 800
 801                 /* Note that any possible Ewald shift has already been applied in
 802                  * the normal interaction part above.
 803                  */
 804
 805                 if (ii == jnr)
 806                 {
 807                     /* If we get here, the i particle (ii) has itself (jnr)
 808                      * in its neighborlist. This can only happen with the Verlet
 809                      * scheme, and corresponds to a self-interaction that will
 810                      * occur twice. Scale it down by 50% to only include it once.
 811                      */
 812                     v_lr *= half;
 813                 }
 814
 815                 for (i = 0; i < NSTATES; i++)
 816                 {
 817                     vctot      -= LFC[i]*qq[i]*v_lr;
 818                     Fscal      -= LFC[i]*qq[i]*f_lr;
 819                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
 820                 }
 821             }
 822
 823             if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
 824             {
 825                 /* See comment in the preamble. When using LJ-Ewald interactions
 826                  * (unless we use a switch modifier) we subtract the reciprocal-space
 827                  * Ewald component here which made it possible to apply the free
 828                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 829                  * above. This gets us closer to the ideal case of applying
 830                  * the softcore to the entire VdW interaction,
 831                  * including the reciprocal-space component.
 832                  */
 833                 /* We could also use the analytical form here
 834                  * iso a table, but that can cause issues for
 835                  * r close to 0 for non-interacting pairs.
 836                  */
 837                 real rs, frac, f_lr;
 838                 int  ri;
 839
 840                 rs     = rsq*rinv*ewtabscale;
 841                 ri     = static_cast<int>(rs);
 842                 frac   = rs - ri;
 843                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 844                 /* TODO: Currently the Ewald LJ table does not contain
 845                  * the factor 1/6, we should add this.
 846                  */
 847                 FF     = f_lr*rinv/six;
 848                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
 849
 850                 if (ii == jnr)
 851                 {
 852                     /* If we get here, the i particle (ii) has itself (jnr)
 853                      * in its neighborlist. This can only happen with the Verlet
 854                      * scheme, and corresponds to a self-interaction that will
 855                      * occur twice. Scale it down by 50% to only include it once.
 856                      */
 857                     VV *= half;
 858                 }
 859
 860                 for (i = 0; i < NSTATES; i++)
 861                 {
 862                     c6grid      = nbfp_grid[tj[i]];
 863                     vvtot      += LFV[i]*c6grid*VV;
 864                     Fscal      += LFV[i]*c6grid*FF;
 865                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
 866                 }
 867             }
 868
 869             if (bDoForces)
 870             {
 871                 tx         = Fscal*dx;
 872                 ty         = Fscal*dy;
 873                 tz         = Fscal*dz;
 874                 fix        = fix + tx;
 875                 fiy        = fiy + ty;
 876                 fiz        = fiz + tz;
 877                 /* OpenMP atomics are expensive, but this kernels is also
 878                  * expensive, so we can take this hit, instead of using
 879                  * thread-local output buffers and extra reduction.
 880                  *
 881                  * All the OpenMP regions in this file are trivial and should
 882                  * not throw, so no need for try/catch.
 883                  */
 884 #pragma omp atomic
 885                 f[j3]     -= tx;
 886 #pragma omp atomic
 887                 f[j3+1]   -= ty;
 888 #pragma omp atomic
 889                 f[j3+2]   -= tz;
 890             }
 891         }
 892
 893         /* The atomics below are expensive with many OpenMP threads.
 894          * Here unperturbed i-particles will usually only have a few
 895          * (perturbed) j-particles in the list. Thus with a buffered list
 896          * we can skip a significant number of i-reductions with a check.
 897          */
 898         if (npair_within_cutoff > 0)
 899         {
 900             if (bDoForces)
 901             {
 902 #pragma omp atomic
 903                 f[ii3]        += fix;
 904 #pragma omp atomic
 905                 f[ii3+1]      += fiy;
 906 #pragma omp atomic
 907                 f[ii3+2]      += fiz;
 908             }
 909             if (bDoShiftForces)
 910             {
 911 #pragma omp atomic
 912                 fshift[is3]   += fix;
 913 #pragma omp atomic
 914                 fshift[is3+1] += fiy;
 915 #pragma omp atomic
 916                 fshift[is3+2] += fiz;
 917             }
 918             if (bDoPotential)
 919             {
 920                 ggid               = gid[n];
 921 #pragma omp atomic
 922                 Vc[ggid]          += vctot;
 923 #pragma omp atomic
 924                 Vv[ggid]          += vvtot;
 925             }
 926         }
 927     }
 928
 929 #pragma omp atomic
 930     dvdl[efptCOUL]     += dvdl_coul;
 931  #pragma omp atomic
 932     dvdl[efptVDW]      += dvdl_vdw;
 933
 934     /* Estimate flops, average for free energy stuff:
 935      * 12  flops per outer iteration
 936      * 150 flops per inner iteration
 937      */
 938 #pragma omp atomic
 939     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 940 }