src/gromacs/gmxlib/nonbonded/nb_free_energy.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42
  43 #include "gromacs/math/vec.h"
  44 #include "typedefs.h"
  45 #include "nonbonded.h"
  46 #include "nb_kernel.h"
  47 #include "nrnb.h"
  48 #include "macros.h"
  49 #include "nb_free_energy.h"
  50
  51 #include "gromacs/utility/fatalerror.h"
  52
  53 void
  54 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
  55                           rvec * gmx_restrict              xx,
  56                           rvec * gmx_restrict              ff,
  57                           t_forcerec * gmx_restrict        fr,
  58                           const t_mdatoms * gmx_restrict   mdatoms,
  59                           nb_kernel_data_t * gmx_restrict  kernel_data,
  60                           t_nrnb * gmx_restrict            nrnb)
  61 {
  62
  63 #define  STATE_A  0
  64 #define  STATE_B  1
  65 #define  NSTATES  2
  66     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
  67     real          shX, shY, shZ;
  68     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
  69     real          Vcoul[NSTATES], Vvdw[NSTATES];
  70     real          rinv6, r, rt, rtC, rtV;
  71     real          iqA, iqB;
  72     real          qq[NSTATES], vctot, krsq;
  73     int           ntiA, ntiB, tj[NSTATES];
  74     real          Vvdw6, Vvdw12, vvtot;
  75     real          ix, iy, iz, fix, fiy, fiz;
  76     real          dx, dy, dz, rsq, rinv;
  77     real          c6[NSTATES], c12[NSTATES], c6grid;
  78     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
  79     double        dvdl_coul, dvdl_vdw;
  80     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
  81     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
  82     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
  83     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
  84     int           do_tab, tab_elemsize;
  85     int           n0, n1C, n1V, nnn;
  86     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
  87     int           icoul, ivdw;
  88     int           nri;
  89     const int *   iinr;
  90     const int *   jindex;
  91     const int *   jjnr;
  92     const int *   shift;
  93     const int *   gid;
  94     const int *   typeA;
  95     const int *   typeB;
  96     int           ntype;
  97     const real *  shiftvec;
  98     real          dvdl_part;
  99     real *        fshift;
 100     real          tabscale = 0;
 101     const real *  VFtab    = NULL;
 102     const real *  x;
 103     real *        f;
 104     real          facel, krf, crf;
 105     const real *  chargeA;
 106     const real *  chargeB;
 107     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
 108     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
 109     const real *  nbfp, *nbfp_grid;
 110     real *        dvdl;
 111     real *        Vv;
 112     real *        Vc;
 113     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
 114     real          rcoulomb, rvdw, sh_invrc6;
 115     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll;
 116     gmx_bool      bEwald, bEwaldLJ;
 117     real          rcutoff_max2;
 118     const real *  tab_ewald_F_lj;
 119     const real *  tab_ewald_V_lj;
 120     real          d, d2, sw, dsw, rinvcorr;
 121     real          elec_swV3, elec_swV4, elec_swV5, elec_swF2, elec_swF3, elec_swF4;
 122     real          vdw_swV3, vdw_swV4, vdw_swV5, vdw_swF2, vdw_swF3, vdw_swF4;
 123     gmx_bool      bConvertEwaldToCoulomb, bConvertLJEwaldToLJ6;
 124     gmx_bool      bComputeVdwInteraction, bComputeElecInteraction;
 125     const real *  ewtab;
 126     int           ewitab;
 127     real          ewrt, eweps, ewtabscale, ewtabhalfspace, sh_ewald;
 128
 129     sh_ewald            = fr->ic->sh_ewald;
 130     ewtab               = fr->ic->tabq_coul_FDV0;
 131     ewtabscale          = fr->ic->tabq_scale;
 132     ewtabhalfspace      = 0.5/ewtabscale;
 133     tab_ewald_F_lj      = fr->ic->tabq_vdw_F;
 134     tab_ewald_V_lj      = fr->ic->tabq_vdw_V;
 135
 136     x                   = xx[0];
 137     f                   = ff[0];
 138
 139     fshift              = fr->fshift[0];
 140
 141     nri                 = nlist->nri;
 142     iinr                = nlist->iinr;
 143     jindex              = nlist->jindex;
 144     jjnr                = nlist->jjnr;
 145     icoul               = nlist->ielec;
 146     ivdw                = nlist->ivdw;
 147     shift               = nlist->shift;
 148     gid                 = nlist->gid;
 149
 150     shiftvec            = fr->shift_vec[0];
 151     chargeA             = mdatoms->chargeA;
 152     chargeB             = mdatoms->chargeB;
 153     facel               = fr->epsfac;
 154     krf                 = fr->k_rf;
 155     crf                 = fr->c_rf;
 156     ewc_lj              = fr->ewaldcoeff_lj;
 157     Vc                  = kernel_data->energygrp_elec;
 158     typeA               = mdatoms->typeA;
 159     typeB               = mdatoms->typeB;
 160     ntype               = fr->ntype;
 161     nbfp                = fr->nbfp;
 162     nbfp_grid           = fr->ljpme_c6grid;
 163     Vv                  = kernel_data->energygrp_vdw;
 164     lambda_coul         = kernel_data->lambda[efptCOUL];
 165     lambda_vdw          = kernel_data->lambda[efptVDW];
 166     dvdl                = kernel_data->dvdl;
 167     alpha_coul          = fr->sc_alphacoul;
 168     alpha_vdw           = fr->sc_alphavdw;
 169     lam_power           = fr->sc_power;
 170     sc_r_power          = fr->sc_r_power;
 171     sigma6_def          = fr->sc_sigma6_def;
 172     sigma6_min          = fr->sc_sigma6_min;
 173     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
 174     bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
 175     bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
 176
 177     rcoulomb            = fr->rcoulomb;
 178     rvdw                = fr->rvdw;
 179     sh_invrc6           = fr->ic->sh_invrc6;
 180
 181     if (fr->coulomb_modifier == eintmodPOTSWITCH)
 182     {
 183         d               = fr->rcoulomb-fr->rcoulomb_switch;
 184         elec_swV3       = -10.0/(d*d*d);
 185         elec_swV4       =  15.0/(d*d*d*d);
 186         elec_swV5       =  -6.0/(d*d*d*d*d);
 187         elec_swF2       = -30.0/(d*d*d);
 188         elec_swF3       =  60.0/(d*d*d*d);
 189         elec_swF4       = -30.0/(d*d*d*d*d);
 190     }
 191     else
 192     {
 193         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 194         elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
 195     }
 196
 197     if (fr->vdw_modifier == eintmodPOTSWITCH)
 198     {
 199         d               = fr->rvdw-fr->rvdw_switch;
 200         vdw_swV3        = -10.0/(d*d*d);
 201         vdw_swV4        =  15.0/(d*d*d*d);
 202         vdw_swV5        =  -6.0/(d*d*d*d*d);
 203         vdw_swF2        = -30.0/(d*d*d);
 204         vdw_swF3        =  60.0/(d*d*d*d);
 205         vdw_swF4        = -30.0/(d*d*d*d*d);
 206     }
 207     else
 208     {
 209         /* Avoid warnings from stupid compilers (looking at you, Clang!) */
 210         vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
 211     }
 212
 213     if (fr->cutoff_scheme == ecutsVERLET)
 214     {
 215         const interaction_const_t *ic;
 216
 217         ic = fr->ic;
 218         if (EVDW_PME(ic->vdwtype))
 219         {
 220             ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
 221         }
 222         else
 223         {
 224             ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
 225         }
 226
 227         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 228         {
 229             icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 230         }
 231         else if (EEL_PME_EWALD(ic->eeltype))
 232         {
 233             icoul        = GMX_NBKERNEL_ELEC_EWALD;
 234         }
 235         else
 236         {
 237             gmx_incons("Unsupported eeltype with Verlet and free-energy");
 238         }
 239
 240         bExactElecCutoff = TRUE;
 241         bExactVdwCutoff  = TRUE;
 242     }
 243     else
 244     {
 245         bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
 246         bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
 247     }
 248
 249     bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
 250     rcutoff_max2    = max(fr->rcoulomb, fr->rvdw);
 251     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
 252
 253     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
 254     bEwaldLJ        = (ivdw == GMX_NBKERNEL_VDW_LJEWALD);
 255
 256     /* For Ewald/PME interactions we cannot easily apply the soft-core component to
 257      * reciprocal space. When we use vanilla (not switch/shift) Ewald interactions, we
 258      * can apply the small trick of subtracting the _reciprocal_ space contribution
 259      * in this kernel, and instead apply the free energy interaction to the 1/r
 260      * (standard coulomb) interaction.
 261      *
 262      * However, we cannot use this approach for switch-modified since we would then
 263      * effectively end up evaluating a significantly different interaction here compared to the
 264      * normal (non-free-energy) kernels, either by applying a cutoff at a different
 265      * position than what the user requested, or by switching different
 266      * things (1/r rather than short-range Ewald). For these settings, we just
 267      * use the traditional short-range Ewald interaction in that case.
 268      */
 269     bConvertEwaldToCoulomb = (bEwald && (fr->coulomb_modifier != eintmodPOTSWITCH));
 270     /* For now the below will always be true (since LJ-PME only works with Shift in Gromacs-5.0),
 271      * but writing it this way means we stay in sync with coulomb, and it avoids future bugs.
 272      */
 273     bConvertLJEwaldToLJ6   = (bEwaldLJ && (fr->vdw_modifier   != eintmodPOTSWITCH));
 274
 275     /* fix compiler warnings */
 276     nj1   = 0;
 277     n1C   = n1V   = 0;
 278     epsC  = epsV  = 0;
 279     eps2C = eps2V = 0;
 280
 281     dvdl_coul  = 0;
 282     dvdl_vdw   = 0;
 283
 284     /* Lambda factor for state A, 1-lambda*/
 285     LFC[STATE_A] = 1.0 - lambda_coul;
 286     LFV[STATE_A] = 1.0 - lambda_vdw;
 287
 288     /* Lambda factor for state B, lambda*/
 289     LFC[STATE_B] = lambda_coul;
 290     LFV[STATE_B] = lambda_vdw;
 291
 292     /*derivative of the lambda factor for state A and B */
 293     DLF[STATE_A] = -1;
 294     DLF[STATE_B] = 1;
 295
 296     for (i = 0; i < NSTATES; i++)
 297     {
 298         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 299         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 300         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 301         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 302     }
 303     /* precalculate */
 304     sigma2_def = pow(sigma6_def, 1.0/3.0);
 305     sigma2_min = pow(sigma6_min, 1.0/3.0);
 306
 307     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 308
 309     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
 310               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
 311     if (do_tab)
 312     {
 313         tabscale         = kernel_data->table_elec_vdw->scale;
 314         VFtab            = kernel_data->table_elec_vdw->data;
 315         /* we always use the combined table here */
 316         tab_elemsize     = 12;
 317     }
 318
 319     for (n = 0; (n < nri); n++)
 320     {
 321         int npair_within_cutoff;
 322
 323         npair_within_cutoff = 0;
 324
 325         is3              = 3*shift[n];
 326         shX              = shiftvec[is3];
 327         shY              = shiftvec[is3+1];
 328         shZ              = shiftvec[is3+2];
 329         nj0              = jindex[n];
 330         nj1              = jindex[n+1];
 331         ii               = iinr[n];
 332         ii3              = 3*ii;
 333         ix               = shX + x[ii3+0];
 334         iy               = shY + x[ii3+1];
 335         iz               = shZ + x[ii3+2];
 336         iqA              = facel*chargeA[ii];
 337         iqB              = facel*chargeB[ii];
 338         ntiA             = 2*ntype*typeA[ii];
 339         ntiB             = 2*ntype*typeB[ii];
 340         vctot            = 0;
 341         vvtot            = 0;
 342         fix              = 0;
 343         fiy              = 0;
 344         fiz              = 0;
 345
 346         for (k = nj0; (k < nj1); k++)
 347         {
 348             jnr              = jjnr[k];
 349             j3               = 3*jnr;
 350             dx               = ix - x[j3];
 351             dy               = iy - x[j3+1];
 352             dz               = iz - x[j3+2];
 353             rsq              = dx*dx + dy*dy + dz*dz;
 354
 355             if (bExactCutoffAll && rsq >= rcutoff_max2)
 356             {
 357                 /* We save significant time by skipping all code below.
 358                  * Note that with soft-core interactions, the actual cut-off
 359                  * check might be different. But since the soft-core distance
 360                  * is always larger than r, checking on r here is safe.
 361                  */
 362                 continue;
 363             }
 364             npair_within_cutoff++;
 365
 366             if (rsq > 0)
 367             {
 368                 rinv         = gmx_invsqrt(rsq);
 369                 r            = rsq*rinv;
 370             }
 371             else
 372             {
 373                 /* The force at r=0 is zero, because of symmetry.
 374                  * But note that the potential is in general non-zero,
 375                  * since the soft-cored r will be non-zero.
 376                  */
 377                 rinv         = 0;
 378                 r            = 0;
 379             }
 380
 381             if (sc_r_power == 6.0)
 382             {
 383                 rpm2             = rsq*rsq;  /* r4 */
 384                 rp               = rpm2*rsq; /* r6 */
 385             }
 386             else if (sc_r_power == 48.0)
 387             {
 388                 rp               = rsq*rsq*rsq; /* r6 */
 389                 rp               = rp*rp;       /* r12 */
 390                 rp               = rp*rp;       /* r24 */
 391                 rp               = rp*rp;       /* r48 */
 392                 rpm2             = rp/rsq;      /* r46 */
 393             }
 394             else
 395             {
 396                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
 397                 rpm2           = rp/rsq;
 398             }
 399
 400             Fscal = 0;
 401
 402             qq[STATE_A]      = iqA*chargeA[jnr];
 403             qq[STATE_B]      = iqB*chargeB[jnr];
 404
 405             tj[STATE_A]      = ntiA+2*typeA[jnr];
 406             tj[STATE_B]      = ntiB+2*typeB[jnr];
 407
 408             if (nlist->excl_fep == NULL || nlist->excl_fep[k])
 409             {
 410                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 411                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 412
 413                 for (i = 0; i < NSTATES; i++)
 414                 {
 415                     c12[i]             = nbfp[tj[i]+1];
 416                     if ((c6[i] > 0) && (c12[i] > 0))
 417                     {
 418                         /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 419                         sigma6[i]       = 0.5*c12[i]/c6[i];
 420                         sigma2[i]       = pow(sigma6[i], 1.0/3.0);
 421                         /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 422                            what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 423                         if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 424                         {
 425                             sigma6[i] = sigma6_min;
 426                             sigma2[i] = sigma2_min;
 427                         }
 428                     }
 429                     else
 430                     {
 431                         sigma6[i]       = sigma6_def;
 432                         sigma2[i]       = sigma2_def;
 433                     }
 434                     if (sc_r_power == 6.0)
 435                     {
 436                         sigma_pow[i]    = sigma6[i];
 437                         sigma_powm2[i]  = sigma6[i]/sigma2[i];
 438                     }
 439                     else if (sc_r_power == 48.0)
 440                     {
 441                         sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 442                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 443                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 444                         sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 445                     }
 446                     else
 447                     {    /* not really supported as input, but in here for testing the general case*/
 448                         sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 449                         sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 450                     }
 451                 }
 452
 453                 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 454                 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 455                 {
 456                     alpha_vdw_eff    = 0;
 457                     alpha_coul_eff   = 0;
 458                 }
 459                 else
 460                 {
 461                     alpha_vdw_eff    = alpha_vdw;
 462                     alpha_coul_eff   = alpha_coul;
 463                 }
 464
 465                 for (i = 0; i < NSTATES; i++)
 466                 {
 467                     FscalC[i]    = 0;
 468                     FscalV[i]    = 0;
 469                     Vcoul[i]     = 0;
 470                     Vvdw[i]      = 0;
 471
 472                     /* Only spend time on A or B state if it is non-zero */
 473                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 474                     {
 475                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 476                         rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 477                         rinvC          = pow(rpinvC, 1.0/sc_r_power);
 478                         rC             = 1.0/rinvC;
 479
 480                         rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 481                         rinvV          = pow(rpinvV, 1.0/sc_r_power);
 482                         rV             = 1.0/rinvV;
 483
 484                         if (do_tab)
 485                         {
 486                             rtC        = rC*tabscale;
 487                             n0         = rtC;
 488                             epsC       = rtC-n0;
 489                             eps2C      = epsC*epsC;
 490                             n1C        = tab_elemsize*n0;
 491
 492                             rtV        = rV*tabscale;
 493                             n0         = rtV;
 494                             epsV       = rtV-n0;
 495                             eps2V      = epsV*epsV;
 496                             n1V        = tab_elemsize*n0;
 497                         }
 498
 499                         /* Only process the coulomb interactions if we have charges,
 500                          * and if we either include all entries in the list (no cutoff
 501                          * used in the kernel), or if we are within the cutoff.
 502                          */
 503                         bComputeElecInteraction = !bExactElecCutoff ||
 504                             ( bConvertEwaldToCoulomb && r < rcoulomb) ||
 505                             (!bConvertEwaldToCoulomb && rC < rcoulomb);
 506
 507                         if ( (qq[i] != 0) && bComputeElecInteraction)
 508                         {
 509                             switch (icoul)
 510                             {
 511                                 case GMX_NBKERNEL_ELEC_COULOMB:
 512                                     /* simple cutoff */
 513                                     Vcoul[i]   = qq[i]*rinvC;
 514                                     FscalC[i]  = Vcoul[i];
 515                                     /* The shift for the Coulomb potential is stored in
 516                                      * the RF parameter c_rf, which is 0 without shift
 517                                      */
 518                                     Vcoul[i]  -= qq[i]*fr->ic->c_rf;
 519                                     break;
 520
 521                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 522                                     /* reaction-field */
 523                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
 524                                     FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
 525                                     break;
 526
 527                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 528                                     /* non-Ewald tabulated coulomb */
 529                                     nnn        = n1C;
 530                                     Y          = VFtab[nnn];
 531                                     F          = VFtab[nnn+1];
 532                                     Geps       = epsC*VFtab[nnn+2];
 533                                     Heps2      = eps2C*VFtab[nnn+3];
 534                                     Fp         = F+Geps+Heps2;
 535                                     VV         = Y+epsC*Fp;
 536                                     FF         = Fp+Geps+2.0*Heps2;
 537                                     Vcoul[i]   = qq[i]*VV;
 538                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
 539                                     break;
 540
 541                                 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
 542                                     gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
 543                                     break;
 544
 545                                 case GMX_NBKERNEL_ELEC_EWALD:
 546                                     if (bConvertEwaldToCoulomb)
 547                                     {
 548                                         /* Ewald FEP is done only on the 1/r part */
 549                                         Vcoul[i]   = qq[i]*(rinvC-sh_ewald);
 550                                         FscalC[i]  = qq[i]*rinvC;
 551                                     }
 552                                     else
 553                                     {
 554                                         ewrt      = rC*ewtabscale;
 555                                         ewitab    = (int) ewrt;
 556                                         eweps     = ewrt-ewitab;
 557                                         ewitab    = 4*ewitab;
 558                                         FscalC[i] = ewtab[ewitab]+eweps*ewtab[ewitab+1];
 559                                         rinvcorr  = rinvC-sh_ewald;
 560                                         Vcoul[i]  = qq[i]*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+FscalC[i])));
 561                                         FscalC[i] = qq[i]*(rinvC-rC*FscalC[i]);
 562                                     }
 563                                     break;
 564
 565                                 case GMX_NBKERNEL_ELEC_NONE:
 566                                     FscalC[i]  = 0.0;
 567                                     Vcoul[i]   = 0.0;
 568                                     break;
 569
 570                                 default:
 571                                     gmx_incons("Invalid icoul in free energy kernel");
 572                                     break;
 573                             }
 574
 575                             if (fr->coulomb_modifier == eintmodPOTSWITCH)
 576                             {
 577                                 d                = rC-fr->rcoulomb_switch;
 578                                 d                = (d > 0.0) ? d : 0.0;
 579                                 d2               = d*d;
 580                                 sw               = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
 581                                 dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
 582
 583                                 FscalC[i]        = FscalC[i]*sw - rC*Vcoul[i]*dsw;
 584                                 Vcoul[i]        *= sw;
 585
 586                                 FscalC[i]        = (rC < rcoulomb) ? FscalC[i] : 0.0;
 587                                 Vcoul[i]         = (rC < rcoulomb) ? Vcoul[i] : 0.0;
 588                             }
 589                         }
 590
 591                         /* Only process the VDW interactions if we have
 592                          * some non-zero parameters, and if we either
 593                          * include all entries in the list (no cutoff used
 594                          * in the kernel), or if we are within the cutoff.
 595                          */
 596                         bComputeVdwInteraction = !bExactVdwCutoff ||
 597                             ( bConvertLJEwaldToLJ6 && r < rvdw) ||
 598                             (!bConvertLJEwaldToLJ6 && rV < rvdw);
 599                         if ((c6[i] != 0 || c12[i] != 0) && bComputeVdwInteraction)
 600                         {
 601                             switch (ivdw)
 602                             {
 603                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
 604                                 case GMX_NBKERNEL_VDW_LJEWALD:
 605                                     /* cutoff LJ */
 606                                     if (sc_r_power == 6.0)
 607                                     {
 608                                         rinv6            = rpinvV;
 609                                     }
 610                                     else
 611                                     {
 612                                         rinv6            = pow(rinvV, 6.0);
 613                                     }
 614                                     Vvdw6            = c6[i]*rinv6;
 615                                     Vvdw12           = c12[i]*rinv6*rinv6;
 616                                     if (fr->vdw_modifier == eintmodPOTSHIFT)
 617                                     {
 618                                         Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
 619                                                              -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
 620                                     }
 621                                     else
 622                                     {
 623                                         Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
 624                                     }
 625                                     FscalV[i]        = Vvdw12 - Vvdw6;
 626                                     break;
 627
 628                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
 629                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
 630                                     break;
 631
 632                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 633                                     /* Table LJ */
 634                                     nnn = n1V+4;
 635                                     /* dispersion */
 636                                     Y          = VFtab[nnn];
 637                                     F          = VFtab[nnn+1];
 638                                     Geps       = epsV*VFtab[nnn+2];
 639                                     Heps2      = eps2V*VFtab[nnn+3];
 640                                     Fp         = F+Geps+Heps2;
 641                                     VV         = Y+epsV*Fp;
 642                                     FF         = Fp+Geps+2.0*Heps2;
 643                                     Vvdw[i]   += c6[i]*VV;
 644                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
 645
 646                                     /* repulsion */
 647                                     Y          = VFtab[nnn+4];
 648                                     F          = VFtab[nnn+5];
 649                                     Geps       = epsV*VFtab[nnn+6];
 650                                     Heps2      = eps2V*VFtab[nnn+7];
 651                                     Fp         = F+Geps+Heps2;
 652                                     VV         = Y+epsV*Fp;
 653                                     FF         = Fp+Geps+2.0*Heps2;
 654                                     Vvdw[i]   += c12[i]*VV;
 655                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
 656                                     break;
 657
 658                                 case GMX_NBKERNEL_VDW_NONE:
 659                                     Vvdw[i]    = 0.0;
 660                                     FscalV[i]  = 0.0;
 661                                     break;
 662
 663                                 default:
 664                                     gmx_incons("Invalid ivdw in free energy kernel");
 665                                     break;
 666                             }
 667
 668                             if (fr->vdw_modifier == eintmodPOTSWITCH)
 669                             {
 670                                 d                = rV-fr->rvdw_switch;
 671                                 d                = (d > 0.0) ? d : 0.0;
 672                                 d2               = d*d;
 673                                 sw               = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
 674                                 dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
 675
 676                                 FscalV[i]        = FscalV[i]*sw - rV*Vvdw[i]*dsw;
 677                                 Vvdw[i]         *= sw;
 678
 679                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : 0.0;
 680                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : 0.0;
 681                             }
 682                         }
 683
 684                         /* FscalC (and FscalV) now contain: dV/drC * rC
 685                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 686                          * Further down we first multiply by r^p-2 and then by
 687                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 688                          */
 689                         FscalC[i] *= rpinvC;
 690                         FscalV[i] *= rpinvV;
 691                     }
 692                 }
 693
 694                 /* Assemble A and B states */
 695                 for (i = 0; i < NSTATES; i++)
 696                 {
 697                     vctot         += LFC[i]*Vcoul[i];
 698                     vvtot         += LFV[i]*Vvdw[i];
 699
 700                     Fscal         += LFC[i]*FscalC[i]*rpm2;
 701                     Fscal         += LFV[i]*FscalV[i]*rpm2;
 702
 703                     dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 704                     dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 705                 }
 706             }
 707             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 708             {
 709                 /* For excluded pairs, which are only in this pair list when
 710                  * using the Verlet scheme, we don't use soft-core.
 711                  * The group scheme also doesn't soft-core for these.
 712                  * As there is no singularity, there is no need for soft-core.
 713                  */
 714                 VV = krf*rsq - crf;
 715                 FF = -2.0*krf;
 716
 717                 if (ii == jnr)
 718                 {
 719                     VV *= 0.5;
 720                 }
 721
 722                 for (i = 0; i < NSTATES; i++)
 723                 {
 724                     vctot      += LFC[i]*qq[i]*VV;
 725                     Fscal      += LFC[i]*qq[i]*FF;
 726                     dvdl_coul  += DLF[i]*qq[i]*VV;
 727                 }
 728             }
 729
 730             if (bConvertEwaldToCoulomb && ( !bExactElecCutoff || r < rcoulomb ) )
 731             {
 732                 /* See comment in the preamble. When using Ewald interactions
 733                  * (unless we use a switch modifier) we subtract the reciprocal-space
 734                  * Ewald component here which made it possible to apply the free
 735                  * energy interaction to 1/r (vanilla coulomb short-range part)
 736                  * above. This gets us closer to the ideal case of applying
 737                  * the softcore to the entire electrostatic interaction,
 738                  * including the reciprocal-space component.
 739                  */
 740                 real v_lr, f_lr;
 741
 742                 ewrt      = r*ewtabscale;
 743                 ewitab    = (int) ewrt;
 744                 eweps     = ewrt-ewitab;
 745                 ewitab    = 4*ewitab;
 746                 f_lr      = ewtab[ewitab]+eweps*ewtab[ewitab+1];
 747                 v_lr      = (ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+f_lr));
 748                 f_lr     *= rinv;
 749
 750                 if (ii == jnr)
 751                 {
 752                     /* If we get here, the i particle (ii) has itself (jnr)
 753                      * in its neighborlist. This can only happen with the Verlet
 754                      * scheme, and corresponds to a self-interaction that will
 755                      * occur twice. Scale it down by 50% to only include it once.
 756                      */
 757                     v_lr *= 0.5;
 758                 }
 759
 760                 for (i = 0; i < NSTATES; i++)
 761                 {
 762                     vctot      -= LFC[i]*qq[i]*v_lr;
 763                     Fscal      -= LFC[i]*qq[i]*f_lr;
 764                     dvdl_coul  -= (DLF[i]*qq[i])*v_lr;
 765                 }
 766             }
 767
 768             if (bConvertLJEwaldToLJ6 && (!bExactVdwCutoff || r < rvdw))
 769             {
 770                 /* See comment in the preamble. When using LJ-Ewald interactions
 771                  * (unless we use a switch modifier) we subtract the reciprocal-space
 772                  * Ewald component here which made it possible to apply the free
 773                  * energy interaction to r^-6 (vanilla LJ6 short-range part)
 774                  * above. This gets us closer to the ideal case of applying
 775                  * the softcore to the entire VdW interaction,
 776                  * including the reciprocal-space component.
 777                  */
 778                 real rs, frac, f_lr;
 779                 int  ri;
 780
 781                 rs     = rsq*rinv*ewtabscale;
 782                 ri     = (int)rs;
 783                 frac   = rs - ri;
 784                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 785                 FF     = f_lr*rinv;
 786                 VV     = tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr);
 787
 788                 if (ii == jnr)
 789                 {
 790                     /* If we get here, the i particle (ii) has itself (jnr)
 791                      * in its neighborlist. This can only happen with the Verlet
 792                      * scheme, and corresponds to a self-interaction that will
 793                      * occur twice. Scale it down by 50% to only include it once.
 794                      */
 795                     VV *= 0.5;
 796                 }
 797
 798                 for (i = 0; i < NSTATES; i++)
 799                 {
 800                     c6grid      = nbfp_grid[tj[i]];
 801                     vvtot      += LFV[i]*c6grid*VV*(1.0/6.0);
 802                     Fscal      += LFV[i]*c6grid*FF*(1.0/6.0);
 803                     dvdl_vdw   += (DLF[i]*c6grid)*VV*(1.0/6.0);
 804                 }
 805
 806             }
 807
 808             if (bDoForces)
 809             {
 810                 tx         = Fscal*dx;
 811                 ty         = Fscal*dy;
 812                 tz         = Fscal*dz;
 813                 fix        = fix + tx;
 814                 fiy        = fiy + ty;
 815                 fiz        = fiz + tz;
 816                 /* OpenMP atomics are expensive, but this kernels is also
 817                  * expensive, so we can take this hit, instead of using
 818                  * thread-local output buffers and extra reduction.
 819                  */
 820 #pragma omp atomic
 821                 f[j3]     -= tx;
 822 #pragma omp atomic
 823                 f[j3+1]   -= ty;
 824 #pragma omp atomic
 825                 f[j3+2]   -= tz;
 826             }
 827         }
 828
 829         /* The atomics below are expensive with many OpenMP threads.
 830          * Here unperturbed i-particles will usually only have a few
 831          * (perturbed) j-particles in the list. Thus with a buffered list
 832          * we can skip a significant number of i-reductions with a check.
 833          */
 834         if (npair_within_cutoff > 0)
 835         {
 836             if (bDoForces)
 837             {
 838 #pragma omp atomic
 839                 f[ii3]        += fix;
 840 #pragma omp atomic
 841                 f[ii3+1]      += fiy;
 842 #pragma omp atomic
 843                 f[ii3+2]      += fiz;
 844             }
 845             if (bDoShiftForces)
 846             {
 847 #pragma omp atomic
 848                 fshift[is3]   += fix;
 849 #pragma omp atomic
 850                 fshift[is3+1] += fiy;
 851 #pragma omp atomic
 852                 fshift[is3+2] += fiz;
 853             }
 854             if (bDoPotential)
 855             {
 856                 ggid               = gid[n];
 857 #pragma omp atomic
 858                 Vc[ggid]          += vctot;
 859 #pragma omp atomic
 860                 Vv[ggid]          += vvtot;
 861             }
 862         }
 863     }
 864
 865 #pragma omp atomic
 866     dvdl[efptCOUL]     += dvdl_coul;
 867  #pragma omp atomic
 868     dvdl[efptVDW]      += dvdl_vdw;
 869
 870     /* Estimate flops, average for free energy stuff:
 871      * 12  flops per outer iteration
 872      * 150 flops per inner iteration
 873      */
 874 #pragma omp atomic
 875     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 876 }
 877
 878 real
 879 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
 880                                real tabscale, real *vftab,
 881                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
 882                                real LFC[2], real LFV[2], real DLF[2],
 883                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
 884                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
 885                                real *velectot, real *vvdwtot, real *dvdl)
 886 {
 887     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 888     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
 889     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
 890     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 891     real       fscal_vdw[2], fscal_elec[2];
 892     real       velec[2], vvdw[2];
 893     int        i, ntab;
 894
 895     qq[0]    = qqA;
 896     qq[1]    = qqB;
 897     c6[0]    = c6A;
 898     c6[1]    = c6B;
 899     c12[0]   = c12A;
 900     c12[1]   = c12B;
 901
 902     if (sc_r_power == 6.0)
 903     {
 904         rpm2             = r2*r2;   /* r4 */
 905         rp               = rpm2*r2; /* r6 */
 906     }
 907     else if (sc_r_power == 48.0)
 908     {
 909         rp               = r2*r2*r2; /* r6 */
 910         rp               = rp*rp;    /* r12 */
 911         rp               = rp*rp;    /* r24 */
 912         rp               = rp*rp;    /* r48 */
 913         rpm2             = rp/r2;    /* r46 */
 914     }
 915     else
 916     {
 917         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
 918         rpm2           = rp/r2;
 919     }
 920
 921     /* Loop over state A(0) and B(1) */
 922     for (i = 0; i < 2; i++)
 923     {
 924         if ((c6[i] > 0) && (c12[i] > 0))
 925         {
 926             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 927              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 928              */
 929             sigma6[i]       = 0.5*c12[i]/c6[i];
 930             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
 931             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 932                what data to store externally.  Can't be fixed without larger scale changes, so not 5.0 */
 933             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 934             {
 935                 sigma6[i] = sigma6_min;
 936                 sigma2[i] = sigma2_min;
 937             }
 938         }
 939         else
 940         {
 941             sigma6[i]       = sigma6_def;
 942             sigma2[i]       = sigma2_def;
 943         }
 944         if (sc_r_power == 6.0)
 945         {
 946             sigma_pow[i]    = sigma6[i];
 947             sigma_powm2[i]  = sigma6[i]/sigma2[i];
 948         }
 949         else if (sc_r_power == 48.0)
 950         {
 951             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 952             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 953             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 954             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 955         }
 956         else
 957         {    /* not really supported as input, but in here for testing the general case*/
 958             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 959             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 960         }
 961     }
 962
 963     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 964     if ((c12[0] > 0) && (c12[1] > 0))
 965     {
 966         alpha_vdw_eff    = 0;
 967         alpha_coul_eff   = 0;
 968     }
 969     else
 970     {
 971         alpha_vdw_eff    = alpha_vdw;
 972         alpha_coul_eff   = alpha_coul;
 973     }
 974
 975     /* Loop over A and B states again */
 976     for (i = 0; i < 2; i++)
 977     {
 978         fscal_elec[i] = 0;
 979         fscal_vdw[i]  = 0;
 980         velec[i]      = 0;
 981         vvdw[i]       = 0;
 982
 983         /* Only spend time on A or B state if it is non-zero */
 984         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 985         {
 986             /* Coulomb */
 987             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 988             r_coul           = pow(rpinv, -1.0/sc_r_power);
 989
 990             /* Electrostatics table lookup data */
 991             rtab             = r_coul*tabscale;
 992             ntab             = rtab;
 993             eps              = rtab-ntab;
 994             eps2             = eps*eps;
 995             ntab             = 12*ntab;
 996             /* Electrostatics */
 997             Y                = vftab[ntab];
 998             F                = vftab[ntab+1];
 999             Geps             = eps*vftab[ntab+2];
1000             Heps2            = eps2*vftab[ntab+3];
1001             Fp               = F+Geps+Heps2;
1002             VV               = Y+eps*Fp;
1003             FF               = Fp+Geps+2.0*Heps2;
1004             velec[i]         = qq[i]*VV;
1005             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
1006
1007             /* Vdw */
1008             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
1009             r_vdw            = pow(rpinv, -1.0/sc_r_power);
1010             /* Vdw table lookup data */
1011             rtab             = r_vdw*tabscale;
1012             ntab             = rtab;
1013             eps              = rtab-ntab;
1014             eps2             = eps*eps;
1015             ntab             = 12*ntab;
1016             /* Dispersion */
1017             Y                = vftab[ntab+4];
1018             F                = vftab[ntab+5];
1019             Geps             = eps*vftab[ntab+6];
1020             Heps2            = eps2*vftab[ntab+7];
1021             Fp               = F+Geps+Heps2;
1022             VV               = Y+eps*Fp;
1023             FF               = Fp+Geps+2.0*Heps2;
1024             vvdw[i]          = c6[i]*VV;
1025             fscal_vdw[i]     = -c6[i]*FF;
1026
1027             /* Repulsion */
1028             Y                = vftab[ntab+8];
1029             F                = vftab[ntab+9];
1030             Geps             = eps*vftab[ntab+10];
1031             Heps2            = eps2*vftab[ntab+11];
1032             Fp               = F+Geps+Heps2;
1033             VV               = Y+eps*Fp;
1034             FF               = Fp+Geps+2.0*Heps2;
1035             vvdw[i]         += c12[i]*VV;
1036             fscal_vdw[i]    -= c12[i]*FF;
1037             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
1038         }
1039     }
1040     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
1041     /* Assemble A and B states */
1042     velecsum  = 0;
1043     vvdwsum   = 0;
1044     dvdl_coul = 0;
1045     dvdl_vdw  = 0;
1046     fscal     = 0;
1047     for (i = 0; i < 2; i++)
1048     {
1049         velecsum      += LFC[i]*velec[i];
1050         vvdwsum       += LFV[i]*vvdw[i];
1051
1052         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
1053
1054         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
1055         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
1056     }
1057
1058     dvdl[efptCOUL]     += dvdl_coul;
1059     dvdl[efptVDW]      += dvdl_vdw;
1060
1061     *velectot           = velecsum;
1062     *vvdwtot            = vvdwsum;
1063
1064     return fscal;
1065 }