src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nb_free_energy.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/gmxlib/nrnb.h"
  46 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  47 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  48 #include "gromacs/math/functions.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/mdtypes/forcerec.h"
  51 #include "gromacs/mdtypes/md_enums.h"
  52 #include "gromacs/utility/fatalerror.h"
  53
  54
  55 //! Enum for templating the soft-core treatment in the kernel
  56 enum class SoftCoreTreatment
  57 {
  58     None,    //!< No soft-core
  59     RPower6, //!< Soft-core with r-power = 6
  60     RPower48 //!< Soft-core with r-power = 48
  61 };
  62
  63 //! Most treatments are fine with float in mixed-precision mode.
  64 template <SoftCoreTreatment softCoreTreatment>
  65 struct SoftCoreReal
  66 {
  67     //! Real type for soft-core calculations
  68     using Real = real;
  69 };
  70
  71 //! This treatment requires double precision for some computations.
  72 template <>
  73 struct SoftCoreReal<SoftCoreTreatment::RPower48>
  74 {
  75     //! Real type for soft-core calculations
  76     using Real = double;
  77 };
  78
  79 //! Templated free-energy non-bonded kernel
  80 template<SoftCoreTreatment softCoreTreatment>
  81 static void
  82 nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
  83                       rvec * gmx_restrict              xx,
  84                       rvec * gmx_restrict              ff,
  85                       t_forcerec * gmx_restrict        fr,
  86                       const t_mdatoms * gmx_restrict   mdatoms,
  87                       nb_kernel_data_t * gmx_restrict  kernel_data,
  88                       t_nrnb * gmx_restrict            nrnb)
  89 {
  90     using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
  91
  92     constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
  93
  94 #define  STATE_A  0
  95 #define  STATE_B  1
  96 #define  NSTATES  2
  97     int           i, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
  98     real          shX, shY, shZ;
  99     real          tx, ty, tz, Fscal;
 100     SCReal        FscalC[NSTATES], FscalV[NSTATES];  /* Needs double for sc_power==48 */
 101     SCReal        Vcoul[NSTATES], Vvdw[NSTATES];     /* Needs double for sc_power==48 */
 102     real          rinv6, r, rtC, rtV;
 103     real          iqA, iqB;
 104     real          qq[NSTATES], vctot;
 105     int           ntiA, ntiB, tj[NSTATES];
 106     real          Vvdw6, Vvdw12, vvtot;
 107     real          ix, iy, iz, fix, fiy, fiz;
 108     real          dx, dy, dz, rsq, rinv;
 109     real          c6[NSTATES], c12[NSTATES], c6grid;
 110     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
 111     SCReal        dvdl_coul, dvdl_vdw;
 112     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
 113     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff;
 114     SCReal        rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV; /* Needs double for sc_power==48 */
 115     real          sigma_pow[NSTATES];
 116     int           do_tab, tab_elemsize = 0;
 117     int           n0, n1C, n1V, nnn;
 118     real          Y, F, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
 119     int           icoul, ivdw;
 120     int           nri;
 121     const int *   iinr;
 122     const int *   jindex;
 123     const int *   jjnr;
 124     const int *   shift;
 125     const int *   gid;
 126     const int *   typeA;
 127     const int *   typeB;
 128     int           ntype;
 129     const real *  shiftvec;
 130     real *        fshift;
 131     real          tabscale = 0;
 132     const real *  VFtab    = nullptr;
 133     const real *  x;
 134     real *        f;
 135     real          facel, krf, crf;
 136     const real *  chargeA;
 137     const real *  chargeB;
 138     real          sigma6_min, sigma6_def, lam_power;
 139     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw;
 140     real          sh_lj_ewald;
 141     const real *  nbfp, *nbfp_grid;
 142     real *        dvdl;
 143     real *        Vv;
 144     real *        Vc;
 145     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
 146     real          rcoulomb, rvdw, sh_invrc6;
 147     gmx_bool      bEwald, bEwaldLJ;
 148     real          rcutoff_max2;
 149     const real *  tab_ewald_F_lj = nullptr;
 150     const real *  tab_ewald_V_lj = nullptr;
 151     real          d, d2, sw, dsw;
 152     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 153     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
 154     const real *  ewtab = nullptr;
 155     int           ewitab;
 156     real          ewrt, eweps, ewtabscale = 0, ewtabhalfspace = 0, sh_ewald = 0;
 157
 158     const real    onetwelfth  = 1.0/12.0;
 159     const real    onesixth    = 1.0/6.0;
 160     const real    zero        = 0.0;
 161     const real    half        = 0.5;
 162     const real    one         = 1.0;
 163     const real    two         = 2.0;
 164     const real    six         = 6.0;
 165
 166     /* Extract pointer to non-bonded interaction constants */
 167     const interaction_const_t *ic = fr->ic;
 168
 169     x                   = xx[0];
 170     f                   = ff[0];
 171
 172     fshift              = fr->fshift[0];
 173
 174     nri                 = nlist->nri;
 175     iinr                = nlist->iinr;
 176     jindex              = nlist->jindex;
 177     jjnr                = nlist->jjnr;
 178     shift               = nlist->shift;
 179     gid                 = nlist->gid;
 180
 181     shiftvec            = fr->shift_vec[0];
 182     chargeA             = mdatoms->chargeA;
 183     chargeB             = mdatoms->chargeB;
 184     facel               = fr->ic->epsfac;
 185     krf                 = ic->k_rf;
 186     crf                 = ic->c_rf;
 187     Vc                  = kernel_data->energygrp_elec;
 188     typeA               = mdatoms->typeA;
 189     typeB               = mdatoms->typeB;
 190     ntype               = fr->ntype;
 191     nbfp                = fr->nbfp;
 192     nbfp_grid           = fr->ljpme_c6grid;
 193     Vv                  = kernel_data->energygrp_vdw;
 194     lambda_coul         = kernel_data->lambda[efptCOUL];
 195     lambda_vdw          = kernel_data->lambda[efptVDW];
 196     dvdl                = kernel_data->dvdl;
 197     alpha_coul          = fr->sc_alphacoul;
 198     alpha_vdw           = fr->sc_alphavdw;
 199     lam_power           = fr->sc_power;
 200     sigma6_def          = fr->sc_sigma6_def;
 201     sigma6_min          = fr->sc_sigma6_min;
 202     bDoForces           = ((kernel_data->flags & GMX_NONBONDED_DO_FORCE) != 0);
 203     bDoShiftForces      = ((kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
 204     bDoPotential        = ((kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 205
 206     rcoulomb            = ic->rcoulomb;
 207     rvdw                = ic->rvdw;
 208     sh_invrc6           = ic->sh_invrc6;
 209     sh_lj_ewald         = ic->sh_lj_ewald;
 210
 211     // Note that the nbnxm kernels do not support Coulomb potential switching at all
 212     GMX_ASSERT(ic->coulomb_modifier != eintmodPOTSWITCH,
 213                "Potential switching is not supported for Coulomb with FEP");
 214
 215     if (ic->vdw_modifier == eintmodPOTSWITCH)
 216     {
 217         d               = ic->rvdw - ic->rvdw_switch;
 218         vdw_swV3        = -10.0/(d*d*d);
 219         vdw_swV4        =  15.0/(d*d*d*d);
 220         vdw_swV5        =  -6.0/(d*d*d*d*d);
 221         vdw_swF2        = -30.0/(d*d*d);
 222         vdw_swF3        =  60.0/(d*d*d*d);
 223         vdw_swF4        = -30.0/(d*d*d*d*d);
 224     }
 225     else
 226     {
 227         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 228         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 229     }
 230
 231     if (EVDW_PME(ic->vdwtype))
 232     {
 233         ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
 234     }
 235     else
 236     {
 237         ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
 238     }
 239
 240     if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 241     {
 242         icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 243     }
 244     else if (EEL_PME_EWALD(ic->eeltype))
 245     {
 246         icoul        = GMX_NBKERNEL_ELEC_EWALD;
 247     }
 248     else
 249     {
 250         gmx_incons("Unsupported eeltype with Verlet and free-energy");
 251     }
 252
 253     rcutoff_max2    = std::max(ic->rcoulomb, ic->rvdw);
 254     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
 255
 256     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
 257     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
 258
 259     if (bEwald || bEwaldLJ)
 260     {
 261         sh_ewald       = ic->sh_ewald;
 262         ewtab          = ic->tabq_coul_FDV0;
 263         ewtabscale     = ic->tabq_scale;
 264         ewtabhalfspace = half/ewtabscale;
 265         tab_ewald_F_lj = ic->tabq_vdw_F;
 266         tab_ewald_V_lj = ic->tabq_vdw_V;
 267     }
 268
 269     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 270      * reciprocal space. When we use non-switched Ewald interactions, we
 271      * assume the soft-coring does not significantly affect the grid contribution
 272      * and apply the soft-core only to the full 1/r (- shift) pair contribution.
 273      *
 274      * However, we cannot use this approach for switch-modified since we would then
 275      * effectively end up evaluating a significantly different interaction here compared to the
 276      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 277      * position than what the user requested, or by switching different
 278      * things (1/r rather than short-range Ewald). For these settings, we just
 279      * use the traditional short-range Ewald interaction in that case.
 280      */
 281     GMX_RELEASE_ASSERT(!(bEwald   && ic->coulomb_modifier == eintmodPOTSWITCH) &&
 282                        !(bEwaldLJ && ic->vdw_modifier   == eintmodPOTSWITCH),
 283                        "Can not apply soft-core to switched Ewald potentials");
 284
 285     /* fix compiler warnings */
 286     n1C   = n1V   = 0;
 287     epsC  = epsV  = 0;
 288     eps2C = eps2V = 0;
 289
 290     dvdl_coul  = 0;
 291     dvdl_vdw   = 0;
 292
 293     /* Lambda factor for state A, 1-lambda*/
 294     LFC[STATE_A] = one - lambda_coul;
 295     LFV[STATE_A] = one - lambda_vdw;
 296
 297     /* Lambda factor for state B, lambda*/
 298     LFC[STATE_B] = lambda_coul;
 299     LFV[STATE_B] = lambda_vdw;
 300
 301     /*derivative of the lambda factor for state A and B */
 302     DLF[STATE_A] = -1;
 303     DLF[STATE_B] = 1;
 304
 305     constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
 306     for (i = 0; i < NSTATES; i++)
 307     {
 308         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 309         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 310         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 311         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 312     }
 313
 314     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 315
 316     do_tab = static_cast<int>(icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
 317                               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
 318     if (do_tab)
 319     {
 320         tabscale         = kernel_data->table_elec_vdw->scale;
 321         VFtab            = kernel_data->table_elec_vdw->data;
 322         /* we always use the combined table here */
 323         tab_elemsize     = kernel_data->table_elec_vdw->stride;
 324     }
 325
 326     for (n = 0; (n < nri); n++)
 327     {
 328         int npair_within_cutoff;
 329
 330         npair_within_cutoff = 0;
 331
 332         is3              = 3*shift[n];
 333         shX              = shiftvec[is3];
 334         shY              = shiftvec[is3+1];
 335         shZ              = shiftvec[is3+2];
 336         nj0              = jindex[n];
 337         nj1              = jindex[n+1];
 338         ii               = iinr[n];
 339         ii3              = 3*ii;
 340         ix               = shX + x[ii3+0];
 341         iy               = shY + x[ii3+1];
 342         iz               = shZ + x[ii3+2];
 343         iqA              = facel*chargeA[ii];
 344         iqB              = facel*chargeB[ii];
 345         ntiA             = 2*ntype*typeA[ii];
 346         ntiB             = 2*ntype*typeB[ii];
 347         vctot            = 0;
 348         vvtot            = 0;
 349         fix              = 0;
 350         fiy              = 0;
 351         fiz              = 0;
 352
 353         for (k = nj0; (k < nj1); k++)
 354         {
 355             jnr              = jjnr[k];
 356             j3               = 3*jnr;
 357             dx               = ix - x[j3];
 358             dy               = iy - x[j3+1];
 359             dz               = iz - x[j3+2];
 360             rsq              = dx*dx + dy*dy + dz*dz;
 361
 362             if (rsq >= rcutoff_max2)
 363             {
 364                 /* We save significant time by skipping all code below.
 365                  * Note that with soft-core interactions, the actual cut-off
 366                  * check might be different. But since the soft-core distance
 367                  * is always larger than r, checking on r here is safe.
 368                  */
 369                 continue;
 370             }
 371             npair_within_cutoff++;
 372
 373             if (rsq > 0)
 374             {
 375                 /* Note that unlike in the nbnxn kernels, we do not need
 376                  * to clamp the value of rsq before taking the invsqrt
 377                  * to avoid NaN in the LJ calculation, since here we do
 378                  * not calculate LJ interactions when C6 and C12 are zero.
 379                  */
 380
 381                 rinv         = gmx::invsqrt(rsq);
 382                 r            = rsq*rinv;
 383             }
 384             else
 385             {
 386                 /* The force at r=0 is zero, because of symmetry.
 387                  * But note that the potential is in general non-zero,
 388                  * since the soft-cored r will be non-zero.
 389                  */
 390                 rinv         = 0;
 391                 r            = 0;
 392             }
 393
 394             if (softCoreTreatment == SoftCoreTreatment::None)
 395             {
 396                 /* The soft-core power p will not affect the results
 397                  * with not using soft-core, so we use power of 0 which gives
 398                  * the simplest math and cheapest code.
 399                  */
 400                 rpm2         = rinv*rinv;
 401                 rp           = 1;
 402             }
 403             if (softCoreTreatment == SoftCoreTreatment::RPower6)
 404             {
 405                 rpm2         = rsq*rsq;  /* r4 */
 406                 rp           = rpm2*rsq; /* r6 */
 407             }
 408             if (softCoreTreatment == SoftCoreTreatment::RPower48)
 409             {
 410                 rp           = rsq*rsq*rsq; /* r6 */
 411                 rp           = rp*rp;       /* r12 */
 412                 rp           = rp*rp;       /* r24 */
 413                 rp           = rp*rp;       /* r48 */
 414                 rpm2         = rp/rsq;      /* r46 */
 415             }
 416
 417             Fscal = 0;
 418
 419             qq[STATE_A]      = iqA*chargeA[jnr];
 420             qq[STATE_B]      = iqB*chargeB[jnr];
 421
 422             tj[STATE_A]      = ntiA+2*typeA[jnr];
 423             tj[STATE_B]      = ntiB+2*typeB[jnr];
 424
 425             if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
 426             {
 427                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 428                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 429
 430                 for (i = 0; i < NSTATES; i++)
 431                 {
 432                     c12[i]             = nbfp[tj[i]+1];
 433                     if (useSoftCore)
 434                     {
 435                         if ((c6[i] > 0) && (c12[i] > 0))
 436                         {
 437                             /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 438                             sigma6[i]       = half*c12[i]/c6[i];
 439                             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 440                             {
 441                                 sigma6[i] = sigma6_min;
 442                             }
 443                         }
 444                         else
 445                         {
 446                             sigma6[i]       = sigma6_def;
 447                         }
 448                         if (softCoreTreatment == SoftCoreTreatment::RPower6)
 449                         {
 450                             sigma_pow[i]    = sigma6[i];
 451                         }
 452                         else
 453                         {
 454                             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 455                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 456                             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 457                         }
 458                     }
 459                 }
 460
 461                 if (useSoftCore)
 462                 {
 463                     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 464                     if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 465                     {
 466                         alpha_vdw_eff    = 0;
 467                         alpha_coul_eff   = 0;
 468                     }
 469                     else
 470                     {
 471                         alpha_vdw_eff    = alpha_vdw;
 472                         alpha_coul_eff   = alpha_coul;
 473                     }
 474                 }
 475
 476                 for (i = 0; i < NSTATES; i++)
 477                 {
 478                     FscalC[i]    = 0;
 479                     FscalV[i]    = 0;
 480                     Vcoul[i]     = 0;
 481                     Vvdw[i]      = 0;
 482
 483                     /* Only spend time on A or B state if it is non-zero */
 484                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 485                     {
 486                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 487                         if (useSoftCore)
 488                         {
 489                             rpinvC     = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 490                             rinvC      = std::pow(rpinvC, one/sc_r_power);
 491                             rC         = one/rinvC;
 492
 493                             rpinvV     = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 494                             rinvV      = std::pow(rpinvV, one/sc_r_power);
 495                             rV         = one/rinvV;
 496                         }
 497                         else
 498                         {
 499                             rpinvC     = 1;
 500                             rinvC      = rinv;
 501                             rC         = r;
 502
 503                             rpinvV     = 1;
 504                             rinvV      = rinv;
 505                             rV         = r;
 506                         }
 507
 508                         if (do_tab)
 509                         {
 510                             rtC        = rC*tabscale;
 511                             n0         = rtC;
 512                             epsC       = rtC-n0;
 513                             eps2C      = epsC*epsC;
 514                             n1C        = tab_elemsize*n0;
 515
 516                             rtV        = rV*tabscale;
 517                             n0         = rtV;
 518                             epsV       = rtV-n0;
 519                             eps2V      = epsV*epsV;
 520                             n1V        = tab_elemsize*n0;
 521                         }
 522
 523                         /* Only process the coulomb interactions if we have charges,
 524                          * and if we either include all entries in the list (no cutoff
 525                          * used in the kernel), or if we are within the cutoff.
 526                          */
 527                         bComputeElecInteraction =
 528                             ( bEwald && r < rcoulomb) ||
 529                             (!bEwald && rC < rcoulomb);
 530
 531                         if ( (qq[i] != 0) && bComputeElecInteraction)
 532                         {
 533                             switch (icoul)
 534                             {
 535                                 case GMX_NBKERNEL_ELEC_COULOMB:
 536                                     /* simple cutoff */
 537                                     Vcoul[i]   = qq[i]*rinvC;
 538                                     FscalC[i]  = Vcoul[i];
 539                                     /* The shift for the Coulomb potential is stored in
 540                                      * the RF parameter c_rf, which is 0 without shift.
 541                                      */
 542                                     Vcoul[i]  -= qq[i]*ic->c_rf;
 543                                     break;
 544
 545                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 546                                     /* reaction-field */
 547                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
 548                                     FscalC[i]  = qq[i]*(rinvC - two*krf*rC*rC);
 549                                     break;
 550
 551                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 552                                     /* non-Ewald tabulated coulomb */
 553                                     nnn        = n1C;
 554                                     Y          = VFtab[nnn];
 555                                     F          = VFtab[nnn+1];
 556                                     Geps       = epsC*VFtab[nnn+2];
 557                                     Heps2      = eps2C*VFtab[nnn+3];
 558                                     Fp         = F+Geps+Heps2;
 559                                     VV         = Y+epsC*Fp;
 560                                     FF         = Fp+Geps+two*Heps2;
 561                                     Vcoul[i]   = qq[i]*VV;
 562                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
 563                                     break;
 564
 565                                 case GMX_NBKERNEL_ELEC_EWALD:
 566                                     /* Ewald FEP is done only on the 1/r part */
 567                                     Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
 568                                     FscalC[i]  = qq[i]*rinvC;
 569                                     break;
 570
 571                                 case GMX_NBKERNEL_ELEC_NONE:
 572                                     FscalC[i]  = zero;
 573                                     Vcoul[i]   = zero;
 574                                     break;
 575
 576                                 default:
 577                                     gmx_incons("Invalid icoul in free energy kernel");
 578                             }
 579                         }
 580
 581                         /* Only process the VDW interactions if we have
 582                          * some non-zero parameters, and if we either
 583                          * include all entries in the list (no cutoff used
 584                          * in the kernel), or if we are within the cutoff.
 585                          */
 586                         bComputeVdwInteraction =
 587                             ( bEwaldLJ && r < rvdw) ||
 588                             (!bEwaldLJ && rV < rvdw);
 589                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
 590                         {
 591                             switch (ivdw)
 592                             {
 593                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
 594                                     /* cutoff LJ */
 595                                     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 596                                     {
 597                                         rinv6        = rpinvV;
 598                                     }
 599                                     else
 600                                     {
 601                                         rinv6        = rinvV*rinvV;
 602                                         rinv6        = rinv6*rinv6*rinv6;
 603                                     }
 604                                     Vvdw6            = c6[i]*rinv6;
 605                                     Vvdw12           = c12[i]*rinv6*rinv6;
 606
 607                                     Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
 608                                                          - (Vvdw6 - c6[i]*sh_invrc6)*onesixth);
 609                                     FscalV[i]        = Vvdw12 - Vvdw6;
 610                                     break;
 611
 612                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
 613                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
 614                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 615                                     /* Table LJ */
 616                                     nnn = n1V+4;
 617                                     /* dispersion */
 618                                     Y          = VFtab[nnn];
 619                                     F          = VFtab[nnn+1];
 620                                     Geps       = epsV*VFtab[nnn+2];
 621                                     Heps2      = eps2V*VFtab[nnn+3];
 622                                     Fp         = F+Geps+Heps2;
 623                                     VV         = Y+epsV*Fp;
 624                                     FF         = Fp+Geps+two*Heps2;
 625                                     Vvdw[i]   += c6[i]*VV;
 626                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
 627
 628                                     /* repulsion */
 629                                     Y          = VFtab[nnn+4];
 630                                     F          = VFtab[nnn+5];
 631                                     Geps       = epsV*VFtab[nnn+6];
 632                                     Heps2      = eps2V*VFtab[nnn+7];
 633                                     Fp         = F+Geps+Heps2;
 634                                     VV         = Y+epsV*Fp;
 635                                     FF         = Fp+Geps+two*Heps2;
 636                                     Vvdw[i]   += c12[i]*VV;
 637                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
 638                                     break;
 639
 640                                 case GMX_NBKERNEL_VDW_LJEWALD:
 641                                     if (softCoreTreatment == SoftCoreTreatment::RPower6)
 642                                     {
 643                                         rinv6            = rpinvV;
 644                                     }
 645                                     else
 646                                     {
 647                                         rinv6            = rinvV*rinvV;
 648                                         rinv6            = rinv6*rinv6*rinv6;
 649                                     }
 650                                     c6grid           = nbfp_grid[tj[i]];
 651
 652                                     /* cutoff LJ */
 653                                     Vvdw6            = c6[i]*rinv6;
 654                                     Vvdw12           = c12[i]*rinv6*rinv6;
 655
 656                                     Vvdw[i]          = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
 657                                                          - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*onesixth);
 658                                     FscalV[i]        = Vvdw12 - Vvdw6;
 659                                     break;
 660
 661                                 case GMX_NBKERNEL_VDW_NONE:
 662                                     Vvdw[i]    = zero;
 663                                     FscalV[i]  = zero;
 664                                     break;
 665
 666                                 default:
 667                                     gmx_incons("Invalid ivdw in free energy kernel");
 668                             }
 669
 670                             if (ic->vdw_modifier == eintmodPOTSWITCH)
 671                             {
 672                                 d                = rV - ic->rvdw_switch;
 673                                 d                = (d > zero) ? d : zero;
 674                                 d2               = d*d;
 675                                 sw               = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
 676                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 677
 678                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
 679                                 Vvdw[i]         *= sw;
 680
 681                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : zero;
 682                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : zero;
 683                             }
 684                         }
 685
 686                         /* FscalC (and FscalV) now contain: dV/drC * rC
 687                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 688                          * Further down we first multiply by r^p-2 and then by
 689                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 690                          */
 691                         FscalC[i] *= rpinvC;
 692                         FscalV[i] *= rpinvV;
 693                     }
 694                 }
 695
 696                 /* Assemble A and B states */
 697                 for (i = 0; i < NSTATES; i++)
 698                 {
 699                     vctot         += LFC[i]*Vcoul[i];
 700                     vvtot         += LFV[i]*Vvdw[i];
 701
 702                     Fscal     += LFC[i]*FscalC[i]*rpm2;
 703                     Fscal     += LFV[i]*FscalV[i]*rpm2;
 704
 705                     if (useSoftCore)
 706                     {
 707                         dvdl_coul += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 708                         dvdl_vdw  += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 709                     }
 710                     else
 711                     {
 712                         dvdl_coul += Vcoul[i]*DLF[i];
 713                         dvdl_vdw  += Vvdw[i]*DLF[i];
 714                     }
 715                 }
 716             }
 717             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 718             {
 719                 /* For excluded pairs, which are only in this pair list when
 720                  * using the Verlet scheme, we don't use soft-core.
 721                  * The group scheme also doesn't soft-core for these.
 722                  * As there is no singularity, there is no need for soft-core.
 723                  */
 724                 VV = krf*rsq - crf;
 725                 FF = -two*krf;
 726
 727                 if (ii == jnr)
 728                 {
 729                     VV *= half;
 730                 }
 731
 732                 for (i = 0; i < NSTATES; i++)
 733                 {
 734                     vctot      += LFC[i]*qq[i]*VV;
 735                     Fscal      += LFC[i]*qq[i]*FF;
 736                     dvdl_coul  += DLF[i]*qq[i]*VV;
 737                 }
 738             }
 739
 740             if (bEwald && r < rcoulomb)
 741             {
 742                 /* See comment in the preamble. When using Ewald interactions
 743                  * (unless we use a switch modifier) we subtract the reciprocal-space
 744                  * Ewald component here which made it possible to apply the free
 745                  * energy interaction to 1/r (vanilla coulomb short-range part)
 746                  * above. This gets us closer to the ideal case of applying
 747                  * the softcore to the entire electrostatic interaction,
 748                  * including the reciprocal-space component.
 749                  */
 750                 real v_lr, f_lr;
 751
 752                 ewrt      = r*ewtabscale;
 753                 ewitab    = static_cast<int>(ewrt);
 754                 eweps     = ewrt-ewitab;
 755                 ewitab    = 4*ewitab;
 756                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
 757                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
 758                 f_lr     *= rinv;
 759
 760                 /* Note that any possible Ewald shift has already been applied in
 761                  * the normal interaction part above.
 762                  */
 763
 764                 if (ii == jnr)
 765                 {
 766                     /* If we get here, the i particle (ii) has itself (jnr)
 767                      * in its neighborlist. This can only happen with the Verlet
 768                      * scheme, and corresponds to a self-interaction that will
 769                      * occur twice. Scale it down by 50% to only include it once.
 770                      */
 771                     v_lr *= half;
 772                 }
 773
 774                 for (i = 0; i < NSTATES; i++)
 775                 {
 776                     vctot      -= LFC[i]*qq[i]*v_lr;
 777                     Fscal      -= LFC[i]*qq[i]*f_lr;
 778                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
 779                 }
 780             }
 781
 782             if (bEwaldLJ && r < rvdw)
 783             {
 784                 /* See comment in the preamble. When using LJ-Ewald interactions
 785                  * (unless we use a switch modifier) we subtract the reciprocal-space
 786                  * Ewald component here which made it possible to apply the free
 787                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 788                  * above. This gets us closer to the ideal case of applying
 789                  * the softcore to the entire VdW interaction,
 790                  * including the reciprocal-space component.
 791                  */
 792                 /* We could also use the analytical form here
 793                  * iso a table, but that can cause issues for
 794                  * r close to 0 for non-interacting pairs.
 795                  */
 796                 real rs, frac, f_lr;
 797                 int  ri;
 798
 799                 rs     = rsq*rinv*ewtabscale;
 800                 ri     = static_cast<int>(rs);
 801                 frac   = rs - ri;
 802                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 803                 /* TODO: Currently the Ewald LJ table does not contain
 804                  * the factor 1/6, we should add this.
 805                  */
 806                 FF     = f_lr*rinv/six;
 807                 VV     = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
 808
 809                 if (ii == jnr)
 810                 {
 811                     /* If we get here, the i particle (ii) has itself (jnr)
 812                      * in its neighborlist. This can only happen with the Verlet
 813                      * scheme, and corresponds to a self-interaction that will
 814                      * occur twice. Scale it down by 50% to only include it once.
 815                      */
 816                     VV *= half;
 817                 }
 818
 819                 for (i = 0; i < NSTATES; i++)
 820                 {
 821                     c6grid      = nbfp_grid[tj[i]];
 822                     vvtot      += LFV[i]*c6grid*VV;
 823                     Fscal      += LFV[i]*c6grid*FF;
 824                     dvdl_vdw   += (DLF[i]*c6grid)*VV;
 825                 }
 826             }
 827
 828             if (bDoForces)
 829             {
 830                 tx         = Fscal*dx;
 831                 ty         = Fscal*dy;
 832                 tz         = Fscal*dz;
 833                 fix        = fix + tx;
 834                 fiy        = fiy + ty;
 835                 fiz        = fiz + tz;
 836                 /* OpenMP atomics are expensive, but this kernels is also
 837                  * expensive, so we can take this hit, instead of using
 838                  * thread-local output buffers and extra reduction.
 839                  *
 840                  * All the OpenMP regions in this file are trivial and should
 841                  * not throw, so no need for try/catch.
 842                  */
 843 #pragma omp atomic
 844                 f[j3]     -= tx;
 845 #pragma omp atomic
 846                 f[j3+1]   -= ty;
 847 #pragma omp atomic
 848                 f[j3+2]   -= tz;
 849             }
 850         }
 851
 852         /* The atomics below are expensive with many OpenMP threads.
 853          * Here unperturbed i-particles will usually only have a few
 854          * (perturbed) j-particles in the list. Thus with a buffered list
 855          * we can skip a significant number of i-reductions with a check.
 856          */
 857         if (npair_within_cutoff > 0)
 858         {
 859             if (bDoForces)
 860             {
 861 #pragma omp atomic
 862                 f[ii3]        += fix;
 863 #pragma omp atomic
 864                 f[ii3+1]      += fiy;
 865 #pragma omp atomic
 866                 f[ii3+2]      += fiz;
 867             }
 868             if (bDoShiftForces)
 869             {
 870 #pragma omp atomic
 871                 fshift[is3]   += fix;
 872 #pragma omp atomic
 873                 fshift[is3+1] += fiy;
 874 #pragma omp atomic
 875                 fshift[is3+2] += fiz;
 876             }
 877             if (bDoPotential)
 878             {
 879                 ggid               = gid[n];
 880 #pragma omp atomic
 881                 Vc[ggid]          += vctot;
 882 #pragma omp atomic
 883                 Vv[ggid]          += vvtot;
 884             }
 885         }
 886     }
 887
 888 #pragma omp atomic
 889     dvdl[efptCOUL]     += dvdl_coul;
 890  #pragma omp atomic
 891     dvdl[efptVDW]      += dvdl_vdw;
 892
 893     /* Estimate flops, average for free energy stuff:
 894      * 12  flops per outer iteration
 895      * 150 flops per inner iteration
 896      */
 897 #pragma omp atomic
 898     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 899 }
 900
 901 void gmx_nb_free_energy_kernel(const t_nblist   *nlist,
 902                                rvec             *xx,
 903                                rvec             *ff,
 904                                t_forcerec       *fr,
 905                                const t_mdatoms  *mdatoms,
 906                                nb_kernel_data_t *kernel_data,
 907                                t_nrnb           *nrnb)
 908 {
 909     if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
 910     {
 911         nb_free_energy_kernel<SoftCoreTreatment::None>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 912     }
 913     else if (fr->sc_r_power == 6.0_real)
 914     {
 915         nb_free_energy_kernel<SoftCoreTreatment::RPower6>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 916     }
 917     else if (fr->sc_r_power == 48.0_real)
 918     {
 919         nb_free_energy_kernel<SoftCoreTreatment::RPower48>(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
 920     }
 921     else
 922     {
 923         GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
 924     }
 925 }