src/gromacs/gmxlib/nonbonded/nb_free_energy.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42
  43 #include "vec.h"
  44 #include "typedefs.h"
  45 #include "nonbonded.h"
  46 #include "nb_kernel.h"
  47 #include "nrnb.h"
  48 #include "macros.h"
  49 #include "nb_free_energy.h"
  50
  51 void
  52 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
  53                           rvec * gmx_restrict              xx,
  54                           rvec * gmx_restrict              ff,
  55                           t_forcerec * gmx_restrict        fr,
  56                           const t_mdatoms * gmx_restrict   mdatoms,
  57                           nb_kernel_data_t * gmx_restrict  kernel_data,
  58                           t_nrnb * gmx_restrict            nrnb)
  59 {
  60
  61 #define  STATE_A  0
  62 #define  STATE_B  1
  63 #define  NSTATES  2
  64     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
  65     real          shX, shY, shZ;
  66     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
  67     real          Vcoul[NSTATES], Vvdw[NSTATES];
  68     real          rinv6, r, rt, rtC, rtV;
  69     real          iqA, iqB;
  70     real          qq[NSTATES], vctot, krsq;
  71     int           ntiA, ntiB, tj[NSTATES];
  72     real          Vvdw6, Vvdw12, vvtot;
  73     real          ix, iy, iz, fix, fiy, fiz;
  74     real          dx, dy, dz, rsq, rinv;
  75     real          c6[NSTATES], c12[NSTATES], c6grid[NSTATES];
  76     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
  77     double        dvdl_coul, dvdl_vdw;
  78     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
  79     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
  80     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
  81     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
  82     int           do_tab, tab_elemsize;
  83     int           n0, n1C, n1V, nnn;
  84     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
  85     int           icoul, ivdw;
  86     int           nri;
  87     const int *   iinr;
  88     const int *   jindex;
  89     const int *   jjnr;
  90     const int *   shift;
  91     const int *   gid;
  92     const int *   typeA;
  93     const int *   typeB;
  94     int           ntype;
  95     const real *  shiftvec;
  96     real          dvdl_part;
  97     real *        fshift;
  98     real          tabscale = 0;
  99     const real *  VFtab    = NULL;
 100     const real *  x;
 101     real *        f;
 102     real          facel, krf, crf;
 103     const real *  chargeA;
 104     const real *  chargeB;
 105     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
 106     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
 107     const real *  nbfp, *nbfp_grid;
 108     real *        dvdl;
 109     real *        Vv;
 110     real *        Vc;
 111     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
 112     real          rcoulomb, sh_ewald;
 113     real          rvdw, sh_invrc6;
 114     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll, bEwald;
 115     real          rcutoff_max2;
 116     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
 117     const real *  tab_ewald_F;
 118     const real *  tab_ewald_V;
 119     const real *  tab_ewald_F_lj;
 120     const real *  tab_ewald_V_lj;
 121     real          tab_ewald_scale, tab_ewald_halfsp;
 122
 123     x                   = xx[0];
 124     f                   = ff[0];
 125
 126     fshift              = fr->fshift[0];
 127
 128     nri                 = nlist->nri;
 129     iinr                = nlist->iinr;
 130     jindex              = nlist->jindex;
 131     jjnr                = nlist->jjnr;
 132     icoul               = nlist->ielec;
 133     ivdw                = nlist->ivdw;
 134     shift               = nlist->shift;
 135     gid                 = nlist->gid;
 136
 137     shiftvec            = fr->shift_vec[0];
 138     chargeA             = mdatoms->chargeA;
 139     chargeB             = mdatoms->chargeB;
 140     facel               = fr->epsfac;
 141     krf                 = fr->k_rf;
 142     crf                 = fr->c_rf;
 143     ewc_lj              = fr->ewaldcoeff_lj;
 144     Vc                  = kernel_data->energygrp_elec;
 145     typeA               = mdatoms->typeA;
 146     typeB               = mdatoms->typeB;
 147     ntype               = fr->ntype;
 148     nbfp                = fr->nbfp;
 149     nbfp_grid           = fr->ljpme_c6grid;
 150     Vv                  = kernel_data->energygrp_vdw;
 151     lambda_coul         = kernel_data->lambda[efptCOUL];
 152     lambda_vdw          = kernel_data->lambda[efptVDW];
 153     dvdl                = kernel_data->dvdl;
 154     alpha_coul          = fr->sc_alphacoul;
 155     alpha_vdw           = fr->sc_alphavdw;
 156     lam_power           = fr->sc_power;
 157     sc_r_power          = fr->sc_r_power;
 158     sigma6_def          = fr->sc_sigma6_def;
 159     sigma6_min          = fr->sc_sigma6_min;
 160     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
 161     bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
 162     bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
 163
 164     rcoulomb            = fr->rcoulomb;
 165     sh_ewald            = fr->ic->sh_ewald;
 166     rvdw                = fr->rvdw;
 167     sh_invrc6           = fr->ic->sh_invrc6;
 168
 169     /* Ewald (PME) reciprocal force and energy quadratic spline tables */
 170     tab_ewald_F         = fr->ic->tabq_coul_F;
 171     tab_ewald_V         = fr->ic->tabq_coul_V;
 172     tab_ewald_scale     = fr->ic->tabq_scale;
 173     tab_ewald_F_lj      = fr->ic->tabq_vdw_F;
 174     tab_ewald_V_lj      = fr->ic->tabq_vdw_V;
 175     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
 176
 177     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
 178     {
 179         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
 180         rcutoff2        = rcutoff*rcutoff;
 181         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
 182         d               = rcutoff-rswitch;
 183         swV3            = -10.0/(d*d*d);
 184         swV4            =  15.0/(d*d*d*d);
 185         swV5            =  -6.0/(d*d*d*d*d);
 186         swF2            = -30.0/(d*d*d);
 187         swF3            =  60.0/(d*d*d*d);
 188         swF4            = -30.0/(d*d*d*d*d);
 189     }
 190     else
 191     {
 192         /* Stupid compilers dont realize these variables will not be used */
 193         rswitch         = 0.0;
 194         swV3            = 0.0;
 195         swV4            = 0.0;
 196         swV5            = 0.0;
 197         swF2            = 0.0;
 198         swF3            = 0.0;
 199         swF4            = 0.0;
 200     }
 201
 202     if (fr->cutoff_scheme == ecutsVERLET)
 203     {
 204         const interaction_const_t *ic;
 205
 206         ic = fr->ic;
 207         if (EVDW_PME(ic->vdwtype))
 208         {
 209             ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
 210         }
 211         else
 212         {
 213             ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
 214         }
 215
 216         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 217         {
 218             icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 219         }
 220         else if (EEL_PME_EWALD(ic->eeltype))
 221         {
 222             icoul        = GMX_NBKERNEL_ELEC_EWALD;
 223         }
 224         else
 225         {
 226             gmx_incons("Unsupported eeltype with Verlet and free-energy");
 227         }
 228
 229         bExactElecCutoff = TRUE;
 230         bExactVdwCutoff  = TRUE;
 231     }
 232     else
 233     {
 234         bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
 235         bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
 236     }
 237
 238     bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
 239     rcutoff_max2    = max(fr->rcoulomb, fr->rvdw);
 240     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
 241
 242     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
 243
 244     /* fix compiler warnings */
 245     nj1   = 0;
 246     n1C   = n1V   = 0;
 247     epsC  = epsV  = 0;
 248     eps2C = eps2V = 0;
 249
 250     dvdl_coul  = 0;
 251     dvdl_vdw   = 0;
 252
 253     /* Lambda factor for state A, 1-lambda*/
 254     LFC[STATE_A] = 1.0 - lambda_coul;
 255     LFV[STATE_A] = 1.0 - lambda_vdw;
 256
 257     /* Lambda factor for state B, lambda*/
 258     LFC[STATE_B] = lambda_coul;
 259     LFV[STATE_B] = lambda_vdw;
 260
 261     /*derivative of the lambda factor for state A and B */
 262     DLF[STATE_A] = -1;
 263     DLF[STATE_B] = 1;
 264
 265     for (i = 0; i < NSTATES; i++)
 266     {
 267         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 268         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 269         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 270         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 271     }
 272     /* precalculate */
 273     sigma2_def = pow(sigma6_def, 1.0/3.0);
 274     sigma2_min = pow(sigma6_min, 1.0/3.0);
 275
 276     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 277
 278     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
 279               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
 280     if (do_tab)
 281     {
 282         tabscale         = kernel_data->table_elec_vdw->scale;
 283         VFtab            = kernel_data->table_elec_vdw->data;
 284         /* we always use the combined table here */
 285         tab_elemsize     = 12;
 286     }
 287
 288     for (n = 0; (n < nri); n++)
 289     {
 290         int npair_within_cutoff;
 291
 292         npair_within_cutoff = 0;
 293
 294         is3              = 3*shift[n];
 295         shX              = shiftvec[is3];
 296         shY              = shiftvec[is3+1];
 297         shZ              = shiftvec[is3+2];
 298         nj0              = jindex[n];
 299         nj1              = jindex[n+1];
 300         ii               = iinr[n];
 301         ii3              = 3*ii;
 302         ix               = shX + x[ii3+0];
 303         iy               = shY + x[ii3+1];
 304         iz               = shZ + x[ii3+2];
 305         iqA              = facel*chargeA[ii];
 306         iqB              = facel*chargeB[ii];
 307         ntiA             = 2*ntype*typeA[ii];
 308         ntiB             = 2*ntype*typeB[ii];
 309         vctot            = 0;
 310         vvtot            = 0;
 311         fix              = 0;
 312         fiy              = 0;
 313         fiz              = 0;
 314
 315         for (k = nj0; (k < nj1); k++)
 316         {
 317             jnr              = jjnr[k];
 318             j3               = 3*jnr;
 319             dx               = ix - x[j3];
 320             dy               = iy - x[j3+1];
 321             dz               = iz - x[j3+2];
 322             rsq              = dx*dx + dy*dy + dz*dz;
 323
 324             if (bExactCutoffAll && rsq >= rcutoff_max2)
 325             {
 326                 /* We save significant time by skipping all code below.
 327                  * Note that with soft-core interactions, the actual cut-off
 328                  * check might be different. But since the soft-core distance
 329                  * is always larger than r, checking on r here is safe.
 330                  */
 331                 continue;
 332             }
 333             npair_within_cutoff++;
 334
 335             if (rsq > 0)
 336             {
 337                 rinv         = gmx_invsqrt(rsq);
 338                 r            = rsq*rinv;
 339             }
 340             else
 341             {
 342                 /* The force at r=0 is zero, because of symmetry.
 343                  * But note that the potential is in general non-zero,
 344                  * since the soft-cored r will be non-zero.
 345                  */
 346                 rinv         = 0;
 347                 r            = 0;
 348             }
 349
 350             if (sc_r_power == 6.0)
 351             {
 352                 rpm2             = rsq*rsq;  /* r4 */
 353                 rp               = rpm2*rsq; /* r6 */
 354             }
 355             else if (sc_r_power == 48.0)
 356             {
 357                 rp               = rsq*rsq*rsq; /* r6 */
 358                 rp               = rp*rp;       /* r12 */
 359                 rp               = rp*rp;       /* r24 */
 360                 rp               = rp*rp;       /* r48 */
 361                 rpm2             = rp/rsq;      /* r46 */
 362             }
 363             else
 364             {
 365                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
 366                 rpm2           = rp/rsq;
 367             }
 368
 369             Fscal = 0;
 370
 371             qq[STATE_A]      = iqA*chargeA[jnr];
 372             qq[STATE_B]      = iqB*chargeB[jnr];
 373
 374             tj[STATE_A]      = ntiA+2*typeA[jnr];
 375             tj[STATE_B]      = ntiB+2*typeB[jnr];
 376
 377             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD)
 378             {
 379                 c6grid[STATE_A] = nbfp_grid[tj[STATE_A]];
 380                 c6grid[STATE_B] = nbfp_grid[tj[STATE_B]];
 381             }
 382
 383             if (nlist->excl_fep == NULL || nlist->excl_fep[k])
 384             {
 385                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 386                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 387
 388                 for (i = 0; i < NSTATES; i++)
 389                 {
 390                     c12[i]             = nbfp[tj[i]+1];
 391                     if ((c6[i] > 0) && (c12[i] > 0))
 392                     {
 393                         /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 394                         sigma6[i]       = 0.5*c12[i]/c6[i];
 395                         sigma2[i]       = pow(sigma6[i], 1.0/3.0);
 396                         /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 397                            what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 398                         if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 399                         {
 400                             sigma6[i] = sigma6_min;
 401                             sigma2[i] = sigma2_min;
 402                         }
 403                     }
 404                     else
 405                     {
 406                         sigma6[i]       = sigma6_def;
 407                         sigma2[i]       = sigma2_def;
 408                     }
 409                     if (sc_r_power == 6.0)
 410                     {
 411                         sigma_pow[i]    = sigma6[i];
 412                         sigma_powm2[i]  = sigma6[i]/sigma2[i];
 413                     }
 414                     else if (sc_r_power == 48.0)
 415                     {
 416                         sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 417                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 418                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 419                         sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 420                     }
 421                     else
 422                     {    /* not really supported as input, but in here for testing the general case*/
 423                         sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 424                         sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 425                     }
 426                 }
 427
 428                 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 429                 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 430                 {
 431                     alpha_vdw_eff    = 0;
 432                     alpha_coul_eff   = 0;
 433                 }
 434                 else
 435                 {
 436                     alpha_vdw_eff    = alpha_vdw;
 437                     alpha_coul_eff   = alpha_coul;
 438                 }
 439
 440                 for (i = 0; i < NSTATES; i++)
 441                 {
 442                     FscalC[i]    = 0;
 443                     FscalV[i]    = 0;
 444                     Vcoul[i]     = 0;
 445                     Vvdw[i]      = 0;
 446
 447                     /* Only spend time on A or B state if it is non-zero */
 448                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 449                     {
 450                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 451                         rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 452                         rinvC          = pow(rpinvC, 1.0/sc_r_power);
 453                         rC             = 1.0/rinvC;
 454
 455                         rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 456                         rinvV          = pow(rpinvV, 1.0/sc_r_power);
 457                         rV             = 1.0/rinvV;
 458
 459                         if (do_tab)
 460                         {
 461                             rtC        = rC*tabscale;
 462                             n0         = rtC;
 463                             epsC       = rtC-n0;
 464                             eps2C      = epsC*epsC;
 465                             n1C        = tab_elemsize*n0;
 466
 467                             rtV        = rV*tabscale;
 468                             n0         = rtV;
 469                             epsV       = rtV-n0;
 470                             eps2V      = epsV*epsV;
 471                             n1V        = tab_elemsize*n0;
 472                         }
 473
 474                         /* With Ewald and soft-core we should put the cut-off on r,
 475                          * not on the soft-cored rC, as the real-space and
 476                          * reciprocal space contributions should (almost) cancel.
 477                          */
 478                         if (qq[i] != 0 &&
 479                             !(bExactElecCutoff &&
 480                               ((!bEwald && rC >= rcoulomb) ||
 481                                (bEwald && r >= rcoulomb))))
 482                         {
 483                             switch (icoul)
 484                             {
 485                                 case GMX_NBKERNEL_ELEC_COULOMB:
 486                                     /* simple cutoff */
 487                                     Vcoul[i]   = qq[i]*rinvC;
 488                                     FscalC[i]  = Vcoul[i];
 489                                     break;
 490
 491                                 case GMX_NBKERNEL_ELEC_EWALD:
 492                                     /* Ewald FEP is done only on the 1/r part */
 493                                     Vcoul[i]   = qq[i]*(rinvC - sh_ewald);
 494                                     FscalC[i]  = Vcoul[i];
 495                                     break;
 496
 497                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 498                                     /* reaction-field */
 499                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
 500                                     FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
 501                                     break;
 502
 503                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 504                                     /* non-Ewald tabulated coulomb */
 505                                     nnn        = n1C;
 506                                     Y          = VFtab[nnn];
 507                                     F          = VFtab[nnn+1];
 508                                     Geps       = epsC*VFtab[nnn+2];
 509                                     Heps2      = eps2C*VFtab[nnn+3];
 510                                     Fp         = F+Geps+Heps2;
 511                                     VV         = Y+epsC*Fp;
 512                                     FF         = Fp+Geps+2.0*Heps2;
 513                                     Vcoul[i]   = qq[i]*VV;
 514                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
 515                                     break;
 516
 517                                 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
 518                                     gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
 519                                     break;
 520
 521                                 case GMX_NBKERNEL_ELEC_NONE:
 522                                     FscalC[i]  = 0.0;
 523                                     Vcoul[i]   = 0.0;
 524                                     break;
 525
 526                                 default:
 527                                     gmx_incons("Invalid icoul in free energy kernel");
 528                                     break;
 529                             }
 530
 531                             if (fr->coulomb_modifier == eintmodPOTSWITCH)
 532                             {
 533                                 d                = rC-rswitch;
 534                                 d                = (d > 0.0) ? d : 0.0;
 535                                 d2               = d*d;
 536                                 sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 537                                 dsw              = d2*(swF2+d*(swF3+d*swF4));
 538
 539                                 Vcoul[i]  *= sw;
 540                                 FscalC[i]  = FscalC[i]*sw + Vcoul[i]*dsw;
 541                             }
 542                         }
 543
 544                         if ((c6[i] != 0 || c12[i] != 0) &&
 545                             !(bExactVdwCutoff &&
 546                               ((ivdw != GMX_NBKERNEL_VDW_LJEWALD && rV >= rvdw) ||
 547                                (ivdw == GMX_NBKERNEL_VDW_LJEWALD && r >= rvdw))))
 548                         {
 549                             switch (ivdw)
 550                             {
 551                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
 552                                 case GMX_NBKERNEL_VDW_LJEWALD:
 553                                     /* cutoff LJ */
 554                                     if (sc_r_power == 6.0)
 555                                     {
 556                                         rinv6            = rpinvV;
 557                                     }
 558                                     else
 559                                     {
 560                                         rinv6            = pow(rinvV, 6.0);
 561                                     }
 562                                     Vvdw6            = c6[i]*rinv6;
 563                                     Vvdw12           = c12[i]*rinv6*rinv6;
 564                                     if (fr->vdw_modifier == eintmodPOTSHIFT)
 565                                     {
 566                                         Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
 567                                                              -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
 568                                     }
 569                                     else
 570                                     {
 571                                         Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
 572                                     }
 573                                     FscalV[i]        = Vvdw12 - Vvdw6;
 574                                     break;
 575
 576                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
 577                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
 578                                     break;
 579
 580                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 581                                     /* Table LJ */
 582                                     nnn = n1V+4;
 583                                     /* dispersion */
 584                                     Y          = VFtab[nnn];
 585                                     F          = VFtab[nnn+1];
 586                                     Geps       = epsV*VFtab[nnn+2];
 587                                     Heps2      = eps2V*VFtab[nnn+3];
 588                                     Fp         = F+Geps+Heps2;
 589                                     VV         = Y+epsV*Fp;
 590                                     FF         = Fp+Geps+2.0*Heps2;
 591                                     Vvdw[i]   += c6[i]*VV;
 592                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
 593
 594                                     /* repulsion */
 595                                     Y          = VFtab[nnn+4];
 596                                     F          = VFtab[nnn+5];
 597                                     Geps       = epsV*VFtab[nnn+6];
 598                                     Heps2      = eps2V*VFtab[nnn+7];
 599                                     Fp         = F+Geps+Heps2;
 600                                     VV         = Y+epsV*Fp;
 601                                     FF         = Fp+Geps+2.0*Heps2;
 602                                     Vvdw[i]   += c12[i]*VV;
 603                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
 604                                     break;
 605
 606                                 case GMX_NBKERNEL_VDW_NONE:
 607                                     Vvdw[i]    = 0.0;
 608                                     FscalV[i]  = 0.0;
 609                                     break;
 610
 611                                 default:
 612                                     gmx_incons("Invalid ivdw in free energy kernel");
 613                                     break;
 614                             }
 615
 616                             if (fr->vdw_modifier == eintmodPOTSWITCH)
 617                             {
 618                                 d          = rV-rswitch;
 619                                 d          = (d > 0.0) ? d : 0.0;
 620                                 d2         = d*d;
 621                                 sw         = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 622                                 dsw        = d2*(swF2+d*(swF3+d*swF4));
 623
 624                                 Vvdw[i]   *= sw;
 625                                 FscalV[i]  = FscalV[i]*sw + Vvdw[i]*dsw;
 626
 627                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : 0.0;
 628                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : 0.0;
 629                             }
 630                         }
 631
 632                         /* FscalC (and FscalV) now contain: dV/drC * rC
 633                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 634                          * Further down we first multiply by r^p-2 and then by
 635                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 636                          */
 637                         FscalC[i] *= rpinvC;
 638                         FscalV[i] *= rpinvV;
 639                     }
 640                 }
 641
 642                 /* Assemble A and B states */
 643                 for (i = 0; i < NSTATES; i++)
 644                 {
 645                     vctot         += LFC[i]*Vcoul[i];
 646                     vvtot         += LFV[i]*Vvdw[i];
 647
 648                     Fscal         += LFC[i]*FscalC[i]*rpm2;
 649                     Fscal         += LFV[i]*FscalV[i]*rpm2;
 650
 651                     dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 652                     dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 653                 }
 654             }
 655             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 656             {
 657                 /* For excluded pairs, which are only in this pair list when
 658                  * using the Verlet scheme, we don't use soft-core.
 659                  * The group scheme also doesn't soft-core for these.
 660                  * As there is no singularity, there is no need for soft-core.
 661                  */
 662                 VV = krf*rsq - crf;
 663                 FF = -2.0*krf;
 664
 665                 if (ii == jnr)
 666                 {
 667                     VV *= 0.5;
 668                 }
 669
 670                 for (i = 0; i < NSTATES; i++)
 671                 {
 672                     vctot      += LFC[i]*qq[i]*VV;
 673                     Fscal      += LFC[i]*qq[i]*FF;
 674                     dvdl_coul  += DLF[i]*qq[i]*VV;
 675                 }
 676             }
 677
 678             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
 679                 !(bExactElecCutoff && r >= rcoulomb))
 680             {
 681                 /* Because we compute the soft-core normally,
 682                  * we have to remove the Ewald short range portion.
 683                  * Done outside of the states loop because this part
 684                  * doesn't depend on the scaled R.
 685                  */
 686                 real rs, frac, f_lr;
 687                 int  ri;
 688
 689                 rs     = rsq*rinv*tab_ewald_scale;
 690                 ri     = (int)rs;
 691                 frac   = rs - ri;
 692                 f_lr   = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
 693                 FF     = f_lr*rinv;
 694                 VV     = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
 695
 696                 if (ii == jnr)
 697                 {
 698                     VV   *= 0.5;
 699                 }
 700
 701                 for (i = 0; i < NSTATES; i++)
 702                 {
 703                     vctot      -= LFC[i]*qq[i]*VV;
 704                     Fscal      -= LFC[i]*qq[i]*FF;
 705                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
 706                 }
 707             }
 708
 709             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD &&
 710                 !(bExactVdwCutoff && r >= rvdw))
 711             {
 712                 real rs, frac, f_lr;
 713                 int  ri;
 714
 715                 rs     = rsq*rinv*tab_ewald_scale;
 716                 ri     = (int)rs;
 717                 frac   = rs - ri;
 718                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 719                 FF     = f_lr*rinv;
 720                 VV     = tab_ewald_V_lj[ri] - tab_ewald_halfsp*frac*(tab_ewald_F_lj[ri] + f_lr);
 721                 for (i = 0; i < NSTATES; i++)
 722                 {
 723                     vvtot      += LFV[i]*c6grid[i]*VV*(1.0/6.0);
 724                     Fscal      += LFV[i]*c6grid[i]*FF*(1.0/6.0);
 725                     dvdl_vdw   += (DLF[i]*c6grid[i])*VV*(1.0/6.0);
 726                 }
 727
 728             }
 729
 730             if (bDoForces)
 731             {
 732                 tx         = Fscal*dx;
 733                 ty         = Fscal*dy;
 734                 tz         = Fscal*dz;
 735                 fix        = fix + tx;
 736                 fiy        = fiy + ty;
 737                 fiz        = fiz + tz;
 738                 /* OpenMP atomics are expensive, but this kernels is also
 739                  * expensive, so we can take this hit, instead of using
 740                  * thread-local output buffers and extra reduction.
 741                  */
 742 #pragma omp atomic
 743                 f[j3]     -= tx;
 744 #pragma omp atomic
 745                 f[j3+1]   -= ty;
 746 #pragma omp atomic
 747                 f[j3+2]   -= tz;
 748             }
 749         }
 750
 751         /* The atomics below are expensive with many OpenMP threads.
 752          * Here unperturbed i-particles will usually only have a few
 753          * (perturbed) j-particles in the list. Thus with a buffered list
 754          * we can skip a significant number of i-reductions with a check.
 755          */
 756         if (npair_within_cutoff > 0)
 757         {
 758             if (bDoForces)
 759             {
 760 #pragma omp atomic
 761                 f[ii3]        += fix;
 762 #pragma omp atomic
 763                 f[ii3+1]      += fiy;
 764 #pragma omp atomic
 765                 f[ii3+2]      += fiz;
 766             }
 767             if (bDoShiftForces)
 768             {
 769 #pragma omp atomic
 770                 fshift[is3]   += fix;
 771 #pragma omp atomic
 772                 fshift[is3+1] += fiy;
 773 #pragma omp atomic
 774                 fshift[is3+2] += fiz;
 775             }
 776             if (bDoPotential)
 777             {
 778                 ggid               = gid[n];
 779 #pragma omp atomic
 780                 Vc[ggid]          += vctot;
 781 #pragma omp atomic
 782                 Vv[ggid]          += vvtot;
 783             }
 784         }
 785     }
 786
 787 #pragma omp atomic
 788     dvdl[efptCOUL]     += dvdl_coul;
 789  #pragma omp atomic
 790     dvdl[efptVDW]      += dvdl_vdw;
 791
 792     /* Estimate flops, average for free energy stuff:
 793      * 12  flops per outer iteration
 794      * 150 flops per inner iteration
 795      */
 796     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 797 }
 798
 799 real
 800 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
 801                                real tabscale, real *vftab,
 802                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
 803                                real LFC[2], real LFV[2], real DLF[2],
 804                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
 805                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
 806                                real *velectot, real *vvdwtot, real *dvdl)
 807 {
 808     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 809     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
 810     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
 811     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 812     real       fscal_vdw[2], fscal_elec[2];
 813     real       velec[2], vvdw[2];
 814     int        i, ntab;
 815
 816     qq[0]    = qqA;
 817     qq[1]    = qqB;
 818     c6[0]    = c6A;
 819     c6[1]    = c6B;
 820     c12[0]   = c12A;
 821     c12[1]   = c12B;
 822
 823     if (sc_r_power == 6.0)
 824     {
 825         rpm2             = r2*r2;   /* r4 */
 826         rp               = rpm2*r2; /* r6 */
 827     }
 828     else if (sc_r_power == 48.0)
 829     {
 830         rp               = r2*r2*r2; /* r6 */
 831         rp               = rp*rp;    /* r12 */
 832         rp               = rp*rp;    /* r24 */
 833         rp               = rp*rp;    /* r48 */
 834         rpm2             = rp/r2;    /* r46 */
 835     }
 836     else
 837     {
 838         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
 839         rpm2           = rp/r2;
 840     }
 841
 842     /* Loop over state A(0) and B(1) */
 843     for (i = 0; i < 2; i++)
 844     {
 845         if ((c6[i] > 0) && (c12[i] > 0))
 846         {
 847             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 848              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 849              */
 850             sigma6[i]       = 0.5*c12[i]/c6[i];
 851             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
 852             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 853                what data to store externally.  Can't be fixed without larger scale changes, so not 5.0 */
 854             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 855             {
 856                 sigma6[i] = sigma6_min;
 857                 sigma2[i] = sigma2_min;
 858             }
 859         }
 860         else
 861         {
 862             sigma6[i]       = sigma6_def;
 863             sigma2[i]       = sigma2_def;
 864         }
 865         if (sc_r_power == 6.0)
 866         {
 867             sigma_pow[i]    = sigma6[i];
 868             sigma_powm2[i]  = sigma6[i]/sigma2[i];
 869         }
 870         else if (sc_r_power == 48.0)
 871         {
 872             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 873             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 874             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 875             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 876         }
 877         else
 878         {    /* not really supported as input, but in here for testing the general case*/
 879             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 880             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 881         }
 882     }
 883
 884     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 885     if ((c12[0] > 0) && (c12[1] > 0))
 886     {
 887         alpha_vdw_eff    = 0;
 888         alpha_coul_eff   = 0;
 889     }
 890     else
 891     {
 892         alpha_vdw_eff    = alpha_vdw;
 893         alpha_coul_eff   = alpha_coul;
 894     }
 895
 896     /* Loop over A and B states again */
 897     for (i = 0; i < 2; i++)
 898     {
 899         fscal_elec[i] = 0;
 900         fscal_vdw[i]  = 0;
 901         velec[i]      = 0;
 902         vvdw[i]       = 0;
 903
 904         /* Only spend time on A or B state if it is non-zero */
 905         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 906         {
 907             /* Coulomb */
 908             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 909             r_coul           = pow(rpinv, -1.0/sc_r_power);
 910
 911             /* Electrostatics table lookup data */
 912             rtab             = r_coul*tabscale;
 913             ntab             = rtab;
 914             eps              = rtab-ntab;
 915             eps2             = eps*eps;
 916             ntab             = 12*ntab;
 917             /* Electrostatics */
 918             Y                = vftab[ntab];
 919             F                = vftab[ntab+1];
 920             Geps             = eps*vftab[ntab+2];
 921             Heps2            = eps2*vftab[ntab+3];
 922             Fp               = F+Geps+Heps2;
 923             VV               = Y+eps*Fp;
 924             FF               = Fp+Geps+2.0*Heps2;
 925             velec[i]         = qq[i]*VV;
 926             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
 927
 928             /* Vdw */
 929             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 930             r_vdw            = pow(rpinv, -1.0/sc_r_power);
 931             /* Vdw table lookup data */
 932             rtab             = r_vdw*tabscale;
 933             ntab             = rtab;
 934             eps              = rtab-ntab;
 935             eps2             = eps*eps;
 936             ntab             = 12*ntab;
 937             /* Dispersion */
 938             Y                = vftab[ntab+4];
 939             F                = vftab[ntab+5];
 940             Geps             = eps*vftab[ntab+6];
 941             Heps2            = eps2*vftab[ntab+7];
 942             Fp               = F+Geps+Heps2;
 943             VV               = Y+eps*Fp;
 944             FF               = Fp+Geps+2.0*Heps2;
 945             vvdw[i]          = c6[i]*VV;
 946             fscal_vdw[i]     = -c6[i]*FF;
 947
 948             /* Repulsion */
 949             Y                = vftab[ntab+8];
 950             F                = vftab[ntab+9];
 951             Geps             = eps*vftab[ntab+10];
 952             Heps2            = eps2*vftab[ntab+11];
 953             Fp               = F+Geps+Heps2;
 954             VV               = Y+eps*Fp;
 955             FF               = Fp+Geps+2.0*Heps2;
 956             vvdw[i]         += c12[i]*VV;
 957             fscal_vdw[i]    -= c12[i]*FF;
 958             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
 959         }
 960     }
 961     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 962     /* Assemble A and B states */
 963     velecsum  = 0;
 964     vvdwsum   = 0;
 965     dvdl_coul = 0;
 966     dvdl_vdw  = 0;
 967     fscal     = 0;
 968     for (i = 0; i < 2; i++)
 969     {
 970         velecsum      += LFC[i]*velec[i];
 971         vvdwsum       += LFV[i]*vvdw[i];
 972
 973         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
 974
 975         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
 976         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
 977     }
 978
 979     dvdl[efptCOUL]     += dvdl_coul;
 980     dvdl[efptVDW]      += dvdl_vdw;
 981
 982     *velectot           = velecsum;
 983     *vvdwtot            = vvdwsum;
 984
 985     return fscal;
 986 }