src/gromacs/gmxlib/nonbonded/nb_free_energy.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42
  43 #include "gromacs/math/vec.h"
  44 #include "typedefs.h"
  45 #include "nonbonded.h"
  46 #include "nb_kernel.h"
  47 #include "nrnb.h"
  48 #include "macros.h"
  49 #include "nb_free_energy.h"
  50
  51 #include "gromacs/utility/fatalerror.h"
  52
  53 void
  54 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
  55                           rvec * gmx_restrict              xx,
  56                           rvec * gmx_restrict              ff,
  57                           t_forcerec * gmx_restrict        fr,
  58                           const t_mdatoms * gmx_restrict   mdatoms,
  59                           nb_kernel_data_t * gmx_restrict  kernel_data,
  60                           t_nrnb * gmx_restrict            nrnb)
  61 {
  62
  63 #define  STATE_A  0
  64 #define  STATE_B  1
  65 #define  NSTATES  2
  66     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
  67     real          shX, shY, shZ;
  68     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
  69     real          Vcoul[NSTATES], Vvdw[NSTATES];
  70     real          rinv6, r, rt, rtC, rtV;
  71     real          iqA, iqB;
  72     real          qq[NSTATES], vctot, krsq;
  73     int           ntiA, ntiB, tj[NSTATES];
  74     real          Vvdw6, Vvdw12, vvtot;
  75     real          ix, iy, iz, fix, fiy, fiz;
  76     real          dx, dy, dz, rsq, rinv;
  77     real          c6[NSTATES], c12[NSTATES], c6grid[NSTATES];
  78     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
  79     double        dvdl_coul, dvdl_vdw;
  80     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
  81     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
  82     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
  83     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
  84     int           do_tab, tab_elemsize;
  85     int           n0, n1C, n1V, nnn;
  86     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
  87     int           icoul, ivdw;
  88     int           nri;
  89     const int *   iinr;
  90     const int *   jindex;
  91     const int *   jjnr;
  92     const int *   shift;
  93     const int *   gid;
  94     const int *   typeA;
  95     const int *   typeB;
  96     int           ntype;
  97     const real *  shiftvec;
  98     real          dvdl_part;
  99     real *        fshift;
 100     real          tabscale = 0;
 101     const real *  VFtab    = NULL;
 102     const real *  x;
 103     real *        f;
 104     real          facel, krf, crf;
 105     const real *  chargeA;
 106     const real *  chargeB;
 107     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
 108     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
 109     const real *  nbfp, *nbfp_grid;
 110     real *        dvdl;
 111     real *        Vv;
 112     real *        Vc;
 113     gmx_bool      bDoForces, bDoShiftForces, bDoPotential;
 114     real          rcoulomb, sh_ewald;
 115     real          rvdw, sh_invrc6;
 116     gmx_bool      bExactElecCutoff, bExactVdwCutoff, bExactCutoffAll, bEwald;
 117     real          rcutoff_max2;
 118     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
 119     const real *  tab_ewald_F;
 120     const real *  tab_ewald_V;
 121     const real *  tab_ewald_F_lj;
 122     const real *  tab_ewald_V_lj;
 123     real          tab_ewald_scale, tab_ewald_halfsp;
 124
 125     x                   = xx[0];
 126     f                   = ff[0];
 127
 128     fshift              = fr->fshift[0];
 129
 130     nri                 = nlist->nri;
 131     iinr                = nlist->iinr;
 132     jindex              = nlist->jindex;
 133     jjnr                = nlist->jjnr;
 134     icoul               = nlist->ielec;
 135     ivdw                = nlist->ivdw;
 136     shift               = nlist->shift;
 137     gid                 = nlist->gid;
 138
 139     shiftvec            = fr->shift_vec[0];
 140     chargeA             = mdatoms->chargeA;
 141     chargeB             = mdatoms->chargeB;
 142     facel               = fr->epsfac;
 143     krf                 = fr->k_rf;
 144     crf                 = fr->c_rf;
 145     ewc_lj              = fr->ewaldcoeff_lj;
 146     Vc                  = kernel_data->energygrp_elec;
 147     typeA               = mdatoms->typeA;
 148     typeB               = mdatoms->typeB;
 149     ntype               = fr->ntype;
 150     nbfp                = fr->nbfp;
 151     nbfp_grid           = fr->ljpme_c6grid;
 152     Vv                  = kernel_data->energygrp_vdw;
 153     lambda_coul         = kernel_data->lambda[efptCOUL];
 154     lambda_vdw          = kernel_data->lambda[efptVDW];
 155     dvdl                = kernel_data->dvdl;
 156     alpha_coul          = fr->sc_alphacoul;
 157     alpha_vdw           = fr->sc_alphavdw;
 158     lam_power           = fr->sc_power;
 159     sc_r_power          = fr->sc_r_power;
 160     sigma6_def          = fr->sc_sigma6_def;
 161     sigma6_min          = fr->sc_sigma6_min;
 162     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
 163     bDoShiftForces      = kernel_data->flags & GMX_NONBONDED_DO_SHIFTFORCE;
 164     bDoPotential        = kernel_data->flags & GMX_NONBONDED_DO_POTENTIAL;
 165
 166     rcoulomb            = fr->rcoulomb;
 167     sh_ewald            = fr->ic->sh_ewald;
 168     rvdw                = fr->rvdw;
 169     sh_invrc6           = fr->ic->sh_invrc6;
 170
 171     /* Ewald (PME) reciprocal force and energy quadratic spline tables */
 172     tab_ewald_F         = fr->ic->tabq_coul_F;
 173     tab_ewald_V         = fr->ic->tabq_coul_V;
 174     tab_ewald_scale     = fr->ic->tabq_scale;
 175     tab_ewald_F_lj      = fr->ic->tabq_vdw_F;
 176     tab_ewald_V_lj      = fr->ic->tabq_vdw_V;
 177     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
 178
 179     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
 180     {
 181         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
 182         rcutoff2        = rcutoff*rcutoff;
 183         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
 184         d               = rcutoff-rswitch;
 185         swV3            = -10.0/(d*d*d);
 186         swV4            =  15.0/(d*d*d*d);
 187         swV5            =  -6.0/(d*d*d*d*d);
 188         swF2            = -30.0/(d*d*d);
 189         swF3            =  60.0/(d*d*d*d);
 190         swF4            = -30.0/(d*d*d*d*d);
 191     }
 192     else
 193     {
 194         /* Stupid compilers dont realize these variables will not be used */
 195         rswitch         = 0.0;
 196         swV3            = 0.0;
 197         swV4            = 0.0;
 198         swV5            = 0.0;
 199         swF2            = 0.0;
 200         swF3            = 0.0;
 201         swF4            = 0.0;
 202     }
 203
 204     if (fr->cutoff_scheme == ecutsVERLET)
 205     {
 206         const interaction_const_t *ic;
 207
 208         ic = fr->ic;
 209         if (EVDW_PME(ic->vdwtype))
 210         {
 211             ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
 212         }
 213         else
 214         {
 215             ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
 216         }
 217
 218         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
 219         {
 220             icoul        = GMX_NBKERNEL_ELEC_REACTIONFIELD;
 221         }
 222         else if (EEL_PME_EWALD(ic->eeltype))
 223         {
 224             icoul        = GMX_NBKERNEL_ELEC_EWALD;
 225         }
 226         else
 227         {
 228             gmx_incons("Unsupported eeltype with Verlet and free-energy");
 229         }
 230
 231         bExactElecCutoff = TRUE;
 232         bExactVdwCutoff  = TRUE;
 233     }
 234     else
 235     {
 236         bExactElecCutoff = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
 237         bExactVdwCutoff  = (fr->vdw_modifier != eintmodNONE);
 238     }
 239
 240     bExactCutoffAll = (bExactElecCutoff && bExactVdwCutoff);
 241     rcutoff_max2    = max(fr->rcoulomb, fr->rvdw);
 242     rcutoff_max2    = rcutoff_max2*rcutoff_max2;
 243
 244     bEwald          = (icoul == GMX_NBKERNEL_ELEC_EWALD);
 245
 246     /* fix compiler warnings */
 247     nj1   = 0;
 248     n1C   = n1V   = 0;
 249     epsC  = epsV  = 0;
 250     eps2C = eps2V = 0;
 251
 252     dvdl_coul  = 0;
 253     dvdl_vdw   = 0;
 254
 255     /* Lambda factor for state A, 1-lambda*/
 256     LFC[STATE_A] = 1.0 - lambda_coul;
 257     LFV[STATE_A] = 1.0 - lambda_vdw;
 258
 259     /* Lambda factor for state B, lambda*/
 260     LFC[STATE_B] = lambda_coul;
 261     LFV[STATE_B] = lambda_vdw;
 262
 263     /*derivative of the lambda factor for state A and B */
 264     DLF[STATE_A] = -1;
 265     DLF[STATE_B] = 1;
 266
 267     for (i = 0; i < NSTATES; i++)
 268     {
 269         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 270         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 271         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 272         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 273     }
 274     /* precalculate */
 275     sigma2_def = pow(sigma6_def, 1.0/3.0);
 276     sigma2_min = pow(sigma6_min, 1.0/3.0);
 277
 278     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 279
 280     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
 281               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
 282     if (do_tab)
 283     {
 284         tabscale         = kernel_data->table_elec_vdw->scale;
 285         VFtab            = kernel_data->table_elec_vdw->data;
 286         /* we always use the combined table here */
 287         tab_elemsize     = 12;
 288     }
 289
 290     for (n = 0; (n < nri); n++)
 291     {
 292         int npair_within_cutoff;
 293
 294         npair_within_cutoff = 0;
 295
 296         is3              = 3*shift[n];
 297         shX              = shiftvec[is3];
 298         shY              = shiftvec[is3+1];
 299         shZ              = shiftvec[is3+2];
 300         nj0              = jindex[n];
 301         nj1              = jindex[n+1];
 302         ii               = iinr[n];
 303         ii3              = 3*ii;
 304         ix               = shX + x[ii3+0];
 305         iy               = shY + x[ii3+1];
 306         iz               = shZ + x[ii3+2];
 307         iqA              = facel*chargeA[ii];
 308         iqB              = facel*chargeB[ii];
 309         ntiA             = 2*ntype*typeA[ii];
 310         ntiB             = 2*ntype*typeB[ii];
 311         vctot            = 0;
 312         vvtot            = 0;
 313         fix              = 0;
 314         fiy              = 0;
 315         fiz              = 0;
 316
 317         for (k = nj0; (k < nj1); k++)
 318         {
 319             jnr              = jjnr[k];
 320             j3               = 3*jnr;
 321             dx               = ix - x[j3];
 322             dy               = iy - x[j3+1];
 323             dz               = iz - x[j3+2];
 324             rsq              = dx*dx + dy*dy + dz*dz;
 325
 326             if (bExactCutoffAll && rsq >= rcutoff_max2)
 327             {
 328                 /* We save significant time by skipping all code below.
 329                  * Note that with soft-core interactions, the actual cut-off
 330                  * check might be different. But since the soft-core distance
 331                  * is always larger than r, checking on r here is safe.
 332                  */
 333                 continue;
 334             }
 335             npair_within_cutoff++;
 336
 337             if (rsq > 0)
 338             {
 339                 rinv         = gmx_invsqrt(rsq);
 340                 r            = rsq*rinv;
 341             }
 342             else
 343             {
 344                 /* The force at r=0 is zero, because of symmetry.
 345                  * But note that the potential is in general non-zero,
 346                  * since the soft-cored r will be non-zero.
 347                  */
 348                 rinv         = 0;
 349                 r            = 0;
 350             }
 351
 352             if (sc_r_power == 6.0)
 353             {
 354                 rpm2             = rsq*rsq;  /* r4 */
 355                 rp               = rpm2*rsq; /* r6 */
 356             }
 357             else if (sc_r_power == 48.0)
 358             {
 359                 rp               = rsq*rsq*rsq; /* r6 */
 360                 rp               = rp*rp;       /* r12 */
 361                 rp               = rp*rp;       /* r24 */
 362                 rp               = rp*rp;       /* r48 */
 363                 rpm2             = rp/rsq;      /* r46 */
 364             }
 365             else
 366             {
 367                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
 368                 rpm2           = rp/rsq;
 369             }
 370
 371             Fscal = 0;
 372
 373             qq[STATE_A]      = iqA*chargeA[jnr];
 374             qq[STATE_B]      = iqB*chargeB[jnr];
 375
 376             tj[STATE_A]      = ntiA+2*typeA[jnr];
 377             tj[STATE_B]      = ntiB+2*typeB[jnr];
 378
 379             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD)
 380             {
 381                 c6grid[STATE_A] = nbfp_grid[tj[STATE_A]];
 382                 c6grid[STATE_B] = nbfp_grid[tj[STATE_B]];
 383             }
 384
 385             if (nlist->excl_fep == NULL || nlist->excl_fep[k])
 386             {
 387                 c6[STATE_A]      = nbfp[tj[STATE_A]];
 388                 c6[STATE_B]      = nbfp[tj[STATE_B]];
 389
 390                 for (i = 0; i < NSTATES; i++)
 391                 {
 392                     c12[i]             = nbfp[tj[i]+1];
 393                     if ((c6[i] > 0) && (c12[i] > 0))
 394                     {
 395                         /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 396                         sigma6[i]       = 0.5*c12[i]/c6[i];
 397                         sigma2[i]       = pow(sigma6[i], 1.0/3.0);
 398                         /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 399                            what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 400                         if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 401                         {
 402                             sigma6[i] = sigma6_min;
 403                             sigma2[i] = sigma2_min;
 404                         }
 405                     }
 406                     else
 407                     {
 408                         sigma6[i]       = sigma6_def;
 409                         sigma2[i]       = sigma2_def;
 410                     }
 411                     if (sc_r_power == 6.0)
 412                     {
 413                         sigma_pow[i]    = sigma6[i];
 414                         sigma_powm2[i]  = sigma6[i]/sigma2[i];
 415                     }
 416                     else if (sc_r_power == 48.0)
 417                     {
 418                         sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 419                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 420                         sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 421                         sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 422                     }
 423                     else
 424                     {    /* not really supported as input, but in here for testing the general case*/
 425                         sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 426                         sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 427                     }
 428                 }
 429
 430                 /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 431                 if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 432                 {
 433                     alpha_vdw_eff    = 0;
 434                     alpha_coul_eff   = 0;
 435                 }
 436                 else
 437                 {
 438                     alpha_vdw_eff    = alpha_vdw;
 439                     alpha_coul_eff   = alpha_coul;
 440                 }
 441
 442                 for (i = 0; i < NSTATES; i++)
 443                 {
 444                     FscalC[i]    = 0;
 445                     FscalV[i]    = 0;
 446                     Vcoul[i]     = 0;
 447                     Vvdw[i]      = 0;
 448
 449                     /* Only spend time on A or B state if it is non-zero */
 450                     if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 451                     {
 452                         /* this section has to be inside the loop because of the dependence on sigma_pow */
 453                         rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 454                         rinvC          = pow(rpinvC, 1.0/sc_r_power);
 455                         rC             = 1.0/rinvC;
 456
 457                         rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 458                         rinvV          = pow(rpinvV, 1.0/sc_r_power);
 459                         rV             = 1.0/rinvV;
 460
 461                         if (do_tab)
 462                         {
 463                             rtC        = rC*tabscale;
 464                             n0         = rtC;
 465                             epsC       = rtC-n0;
 466                             eps2C      = epsC*epsC;
 467                             n1C        = tab_elemsize*n0;
 468
 469                             rtV        = rV*tabscale;
 470                             n0         = rtV;
 471                             epsV       = rtV-n0;
 472                             eps2V      = epsV*epsV;
 473                             n1V        = tab_elemsize*n0;
 474                         }
 475
 476                         /* With Ewald and soft-core we should put the cut-off on r,
 477                          * not on the soft-cored rC, as the real-space and
 478                          * reciprocal space contributions should (almost) cancel.
 479                          */
 480                         if (qq[i] != 0 &&
 481                             !(bExactElecCutoff &&
 482                               ((!bEwald && rC >= rcoulomb) ||
 483                                (bEwald && r >= rcoulomb))))
 484                         {
 485                             switch (icoul)
 486                             {
 487                                 case GMX_NBKERNEL_ELEC_COULOMB:
 488                                     /* simple cutoff */
 489                                     Vcoul[i]   = qq[i]*rinvC;
 490                                     FscalC[i]  = Vcoul[i];
 491                                     break;
 492
 493                                 case GMX_NBKERNEL_ELEC_EWALD:
 494                                     /* Ewald FEP is done only on the 1/r part */
 495                                     Vcoul[i]   = qq[i]*(rinvC - sh_ewald);
 496                                     FscalC[i]  = Vcoul[i];
 497                                     break;
 498
 499                                 case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 500                                     /* reaction-field */
 501                                     Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
 502                                     FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
 503                                     break;
 504
 505                                 case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 506                                     /* non-Ewald tabulated coulomb */
 507                                     nnn        = n1C;
 508                                     Y          = VFtab[nnn];
 509                                     F          = VFtab[nnn+1];
 510                                     Geps       = epsC*VFtab[nnn+2];
 511                                     Heps2      = eps2C*VFtab[nnn+3];
 512                                     Fp         = F+Geps+Heps2;
 513                                     VV         = Y+epsC*Fp;
 514                                     FF         = Fp+Geps+2.0*Heps2;
 515                                     Vcoul[i]   = qq[i]*VV;
 516                                     FscalC[i]  = -qq[i]*tabscale*FF*rC;
 517                                     break;
 518
 519                                 case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
 520                                     gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
 521                                     break;
 522
 523                                 case GMX_NBKERNEL_ELEC_NONE:
 524                                     FscalC[i]  = 0.0;
 525                                     Vcoul[i]   = 0.0;
 526                                     break;
 527
 528                                 default:
 529                                     gmx_incons("Invalid icoul in free energy kernel");
 530                                     break;
 531                             }
 532
 533                             if (fr->coulomb_modifier == eintmodPOTSWITCH)
 534                             {
 535                                 d                = rC-rswitch;
 536                                 d                = (d > 0.0) ? d : 0.0;
 537                                 d2               = d*d;
 538                                 sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 539                                 dsw              = d2*(swF2+d*(swF3+d*swF4));
 540
 541                                 Vcoul[i]  *= sw;
 542                                 FscalC[i]  = FscalC[i]*sw + Vcoul[i]*dsw;
 543                             }
 544                         }
 545
 546                         if ((c6[i] != 0 || c12[i] != 0) &&
 547                             !(bExactVdwCutoff &&
 548                               ((ivdw != GMX_NBKERNEL_VDW_LJEWALD && rV >= rvdw) ||
 549                                (ivdw == GMX_NBKERNEL_VDW_LJEWALD && r >= rvdw))))
 550                         {
 551                             switch (ivdw)
 552                             {
 553                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
 554                                 case GMX_NBKERNEL_VDW_LJEWALD:
 555                                     /* cutoff LJ */
 556                                     if (sc_r_power == 6.0)
 557                                     {
 558                                         rinv6            = rpinvV;
 559                                     }
 560                                     else
 561                                     {
 562                                         rinv6            = pow(rinvV, 6.0);
 563                                     }
 564                                     Vvdw6            = c6[i]*rinv6;
 565                                     Vvdw12           = c12[i]*rinv6*rinv6;
 566                                     if (fr->vdw_modifier == eintmodPOTSHIFT)
 567                                     {
 568                                         Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
 569                                                              -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
 570                                     }
 571                                     else
 572                                     {
 573                                         Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
 574                                     }
 575                                     FscalV[i]        = Vvdw12 - Vvdw6;
 576                                     break;
 577
 578                                 case GMX_NBKERNEL_VDW_BUCKINGHAM:
 579                                     gmx_fatal(FARGS, "Buckingham free energy not supported.");
 580                                     break;
 581
 582                                 case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 583                                     /* Table LJ */
 584                                     nnn = n1V+4;
 585                                     /* dispersion */
 586                                     Y          = VFtab[nnn];
 587                                     F          = VFtab[nnn+1];
 588                                     Geps       = epsV*VFtab[nnn+2];
 589                                     Heps2      = eps2V*VFtab[nnn+3];
 590                                     Fp         = F+Geps+Heps2;
 591                                     VV         = Y+epsV*Fp;
 592                                     FF         = Fp+Geps+2.0*Heps2;
 593                                     Vvdw[i]   += c6[i]*VV;
 594                                     FscalV[i] -= c6[i]*tabscale*FF*rV;
 595
 596                                     /* repulsion */
 597                                     Y          = VFtab[nnn+4];
 598                                     F          = VFtab[nnn+5];
 599                                     Geps       = epsV*VFtab[nnn+6];
 600                                     Heps2      = eps2V*VFtab[nnn+7];
 601                                     Fp         = F+Geps+Heps2;
 602                                     VV         = Y+epsV*Fp;
 603                                     FF         = Fp+Geps+2.0*Heps2;
 604                                     Vvdw[i]   += c12[i]*VV;
 605                                     FscalV[i] -= c12[i]*tabscale*FF*rV;
 606                                     break;
 607
 608                                 case GMX_NBKERNEL_VDW_NONE:
 609                                     Vvdw[i]    = 0.0;
 610                                     FscalV[i]  = 0.0;
 611                                     break;
 612
 613                                 default:
 614                                     gmx_incons("Invalid ivdw in free energy kernel");
 615                                     break;
 616                             }
 617
 618                             if (fr->vdw_modifier == eintmodPOTSWITCH)
 619                             {
 620                                 d          = rV-rswitch;
 621                                 d          = (d > 0.0) ? d : 0.0;
 622                                 d2         = d*d;
 623                                 sw         = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 624                                 dsw        = d2*(swF2+d*(swF3+d*swF4));
 625
 626                                 Vvdw[i]   *= sw;
 627                                 FscalV[i]  = FscalV[i]*sw + Vvdw[i]*dsw;
 628
 629                                 FscalV[i]  = (rV < rvdw) ? FscalV[i] : 0.0;
 630                                 Vvdw[i]    = (rV < rvdw) ? Vvdw[i] : 0.0;
 631                             }
 632                         }
 633
 634                         /* FscalC (and FscalV) now contain: dV/drC * rC
 635                          * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 636                          * Further down we first multiply by r^p-2 and then by
 637                          * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 638                          */
 639                         FscalC[i] *= rpinvC;
 640                         FscalV[i] *= rpinvV;
 641                     }
 642                 }
 643
 644                 /* Assemble A and B states */
 645                 for (i = 0; i < NSTATES; i++)
 646                 {
 647                     vctot         += LFC[i]*Vcoul[i];
 648                     vvtot         += LFV[i]*Vvdw[i];
 649
 650                     Fscal         += LFC[i]*FscalC[i]*rpm2;
 651                     Fscal         += LFV[i]*FscalV[i]*rpm2;
 652
 653                     dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 654                     dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 655                 }
 656             }
 657             else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
 658             {
 659                 /* For excluded pairs, which are only in this pair list when
 660                  * using the Verlet scheme, we don't use soft-core.
 661                  * The group scheme also doesn't soft-core for these.
 662                  * As there is no singularity, there is no need for soft-core.
 663                  */
 664                 VV = krf*rsq - crf;
 665                 FF = -2.0*krf;
 666
 667                 if (ii == jnr)
 668                 {
 669                     VV *= 0.5;
 670                 }
 671
 672                 for (i = 0; i < NSTATES; i++)
 673                 {
 674                     vctot      += LFC[i]*qq[i]*VV;
 675                     Fscal      += LFC[i]*qq[i]*FF;
 676                     dvdl_coul  += DLF[i]*qq[i]*VV;
 677                 }
 678             }
 679
 680             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
 681                 !(bExactElecCutoff && r >= rcoulomb))
 682             {
 683                 /* Because we compute the soft-core normally,
 684                  * we have to remove the Ewald short range portion.
 685                  * Done outside of the states loop because this part
 686                  * doesn't depend on the scaled R.
 687                  */
 688                 real rs, frac, f_lr;
 689                 int  ri;
 690
 691                 rs     = rsq*rinv*tab_ewald_scale;
 692                 ri     = (int)rs;
 693                 frac   = rs - ri;
 694                 f_lr   = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
 695                 FF     = f_lr*rinv;
 696                 VV     = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
 697
 698                 if (ii == jnr)
 699                 {
 700                     VV   *= 0.5;
 701                 }
 702
 703                 for (i = 0; i < NSTATES; i++)
 704                 {
 705                     vctot      -= LFC[i]*qq[i]*VV;
 706                     Fscal      -= LFC[i]*qq[i]*FF;
 707                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
 708                 }
 709             }
 710
 711             if (ivdw == GMX_NBKERNEL_VDW_LJEWALD &&
 712                 !(bExactVdwCutoff && r >= rvdw))
 713             {
 714                 real rs, frac, f_lr;
 715                 int  ri;
 716
 717                 rs     = rsq*rinv*tab_ewald_scale;
 718                 ri     = (int)rs;
 719                 frac   = rs - ri;
 720                 f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
 721                 FF     = f_lr*rinv;
 722                 VV     = tab_ewald_V_lj[ri] - tab_ewald_halfsp*frac*(tab_ewald_F_lj[ri] + f_lr);
 723                 for (i = 0; i < NSTATES; i++)
 724                 {
 725                     vvtot      += LFV[i]*c6grid[i]*VV*(1.0/6.0);
 726                     Fscal      += LFV[i]*c6grid[i]*FF*(1.0/6.0);
 727                     dvdl_vdw   += (DLF[i]*c6grid[i])*VV*(1.0/6.0);
 728                 }
 729
 730             }
 731
 732             if (bDoForces)
 733             {
 734                 tx         = Fscal*dx;
 735                 ty         = Fscal*dy;
 736                 tz         = Fscal*dz;
 737                 fix        = fix + tx;
 738                 fiy        = fiy + ty;
 739                 fiz        = fiz + tz;
 740                 /* OpenMP atomics are expensive, but this kernels is also
 741                  * expensive, so we can take this hit, instead of using
 742                  * thread-local output buffers and extra reduction.
 743                  */
 744 #pragma omp atomic
 745                 f[j3]     -= tx;
 746 #pragma omp atomic
 747                 f[j3+1]   -= ty;
 748 #pragma omp atomic
 749                 f[j3+2]   -= tz;
 750             }
 751         }
 752
 753         /* The atomics below are expensive with many OpenMP threads.
 754          * Here unperturbed i-particles will usually only have a few
 755          * (perturbed) j-particles in the list. Thus with a buffered list
 756          * we can skip a significant number of i-reductions with a check.
 757          */
 758         if (npair_within_cutoff > 0)
 759         {
 760             if (bDoForces)
 761             {
 762 #pragma omp atomic
 763                 f[ii3]        += fix;
 764 #pragma omp atomic
 765                 f[ii3+1]      += fiy;
 766 #pragma omp atomic
 767                 f[ii3+2]      += fiz;
 768             }
 769             if (bDoShiftForces)
 770             {
 771 #pragma omp atomic
 772                 fshift[is3]   += fix;
 773 #pragma omp atomic
 774                 fshift[is3+1] += fiy;
 775 #pragma omp atomic
 776                 fshift[is3+2] += fiz;
 777             }
 778             if (bDoPotential)
 779             {
 780                 ggid               = gid[n];
 781 #pragma omp atomic
 782                 Vc[ggid]          += vctot;
 783 #pragma omp atomic
 784                 Vv[ggid]          += vvtot;
 785             }
 786         }
 787     }
 788
 789 #pragma omp atomic
 790     dvdl[efptCOUL]     += dvdl_coul;
 791  #pragma omp atomic
 792     dvdl[efptVDW]      += dvdl_vdw;
 793
 794     /* Estimate flops, average for free energy stuff:
 795      * 12  flops per outer iteration
 796      * 150 flops per inner iteration
 797      */
 798 #pragma omp atomic
 799     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 800 }
 801
 802 real
 803 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
 804                                real tabscale, real *vftab,
 805                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
 806                                real LFC[2], real LFV[2], real DLF[2],
 807                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
 808                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
 809                                real *velectot, real *vvdwtot, real *dvdl)
 810 {
 811     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 812     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
 813     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
 814     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 815     real       fscal_vdw[2], fscal_elec[2];
 816     real       velec[2], vvdw[2];
 817     int        i, ntab;
 818
 819     qq[0]    = qqA;
 820     qq[1]    = qqB;
 821     c6[0]    = c6A;
 822     c6[1]    = c6B;
 823     c12[0]   = c12A;
 824     c12[1]   = c12B;
 825
 826     if (sc_r_power == 6.0)
 827     {
 828         rpm2             = r2*r2;   /* r4 */
 829         rp               = rpm2*r2; /* r6 */
 830     }
 831     else if (sc_r_power == 48.0)
 832     {
 833         rp               = r2*r2*r2; /* r6 */
 834         rp               = rp*rp;    /* r12 */
 835         rp               = rp*rp;    /* r24 */
 836         rp               = rp*rp;    /* r48 */
 837         rpm2             = rp/r2;    /* r46 */
 838     }
 839     else
 840     {
 841         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
 842         rpm2           = rp/r2;
 843     }
 844
 845     /* Loop over state A(0) and B(1) */
 846     for (i = 0; i < 2; i++)
 847     {
 848         if ((c6[i] > 0) && (c12[i] > 0))
 849         {
 850             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 851              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 852              */
 853             sigma6[i]       = 0.5*c12[i]/c6[i];
 854             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
 855             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 856                what data to store externally.  Can't be fixed without larger scale changes, so not 5.0 */
 857             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 858             {
 859                 sigma6[i] = sigma6_min;
 860                 sigma2[i] = sigma2_min;
 861             }
 862         }
 863         else
 864         {
 865             sigma6[i]       = sigma6_def;
 866             sigma2[i]       = sigma2_def;
 867         }
 868         if (sc_r_power == 6.0)
 869         {
 870             sigma_pow[i]    = sigma6[i];
 871             sigma_powm2[i]  = sigma6[i]/sigma2[i];
 872         }
 873         else if (sc_r_power == 48.0)
 874         {
 875             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 876             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 877             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 878             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 879         }
 880         else
 881         {    /* not really supported as input, but in here for testing the general case*/
 882             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 883             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 884         }
 885     }
 886
 887     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 888     if ((c12[0] > 0) && (c12[1] > 0))
 889     {
 890         alpha_vdw_eff    = 0;
 891         alpha_coul_eff   = 0;
 892     }
 893     else
 894     {
 895         alpha_vdw_eff    = alpha_vdw;
 896         alpha_coul_eff   = alpha_coul;
 897     }
 898
 899     /* Loop over A and B states again */
 900     for (i = 0; i < 2; i++)
 901     {
 902         fscal_elec[i] = 0;
 903         fscal_vdw[i]  = 0;
 904         velec[i]      = 0;
 905         vvdw[i]       = 0;
 906
 907         /* Only spend time on A or B state if it is non-zero */
 908         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 909         {
 910             /* Coulomb */
 911             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 912             r_coul           = pow(rpinv, -1.0/sc_r_power);
 913
 914             /* Electrostatics table lookup data */
 915             rtab             = r_coul*tabscale;
 916             ntab             = rtab;
 917             eps              = rtab-ntab;
 918             eps2             = eps*eps;
 919             ntab             = 12*ntab;
 920             /* Electrostatics */
 921             Y                = vftab[ntab];
 922             F                = vftab[ntab+1];
 923             Geps             = eps*vftab[ntab+2];
 924             Heps2            = eps2*vftab[ntab+3];
 925             Fp               = F+Geps+Heps2;
 926             VV               = Y+eps*Fp;
 927             FF               = Fp+Geps+2.0*Heps2;
 928             velec[i]         = qq[i]*VV;
 929             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
 930
 931             /* Vdw */
 932             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 933             r_vdw            = pow(rpinv, -1.0/sc_r_power);
 934             /* Vdw table lookup data */
 935             rtab             = r_vdw*tabscale;
 936             ntab             = rtab;
 937             eps              = rtab-ntab;
 938             eps2             = eps*eps;
 939             ntab             = 12*ntab;
 940             /* Dispersion */
 941             Y                = vftab[ntab+4];
 942             F                = vftab[ntab+5];
 943             Geps             = eps*vftab[ntab+6];
 944             Heps2            = eps2*vftab[ntab+7];
 945             Fp               = F+Geps+Heps2;
 946             VV               = Y+eps*Fp;
 947             FF               = Fp+Geps+2.0*Heps2;
 948             vvdw[i]          = c6[i]*VV;
 949             fscal_vdw[i]     = -c6[i]*FF;
 950
 951             /* Repulsion */
 952             Y                = vftab[ntab+8];
 953             F                = vftab[ntab+9];
 954             Geps             = eps*vftab[ntab+10];
 955             Heps2            = eps2*vftab[ntab+11];
 956             Fp               = F+Geps+Heps2;
 957             VV               = Y+eps*Fp;
 958             FF               = Fp+Geps+2.0*Heps2;
 959             vvdw[i]         += c12[i]*VV;
 960             fscal_vdw[i]    -= c12[i]*FF;
 961             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
 962         }
 963     }
 964     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 965     /* Assemble A and B states */
 966     velecsum  = 0;
 967     vvdwsum   = 0;
 968     dvdl_coul = 0;
 969     dvdl_vdw  = 0;
 970     fscal     = 0;
 971     for (i = 0; i < 2; i++)
 972     {
 973         velecsum      += LFC[i]*velec[i];
 974         vvdwsum       += LFV[i]*vvdw[i];
 975
 976         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
 977
 978         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
 979         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
 980     }
 981
 982     dvdl[efptCOUL]     += dvdl_coul;
 983     dvdl[efptVDW]      += dvdl_vdw;
 984
 985     *velectot           = velecsum;
 986     *vvdwtot            = vvdwsum;
 987
 988     return fscal;
 989 }