src/gromacs/gmxlib/nonbonded/nb_free_energy.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42
  43 #include "vec.h"
  44 #include "typedefs.h"
  45 #include "nonbonded.h"
  46 #include "nb_kernel.h"
  47 #include "nrnb.h"
  48 #include "nb_free_energy.h"
  49
  50 void
  51 gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
  52                           rvec * gmx_restrict              xx,
  53                           rvec * gmx_restrict              ff,
  54                           t_forcerec * gmx_restrict        fr,
  55                           const t_mdatoms * gmx_restrict   mdatoms,
  56                           nb_kernel_data_t * gmx_restrict  kernel_data,
  57                           t_nrnb * gmx_restrict            nrnb)
  58 {
  59
  60 #define  STATE_A  0
  61 #define  STATE_B  1
  62 #define  NSTATES  2
  63     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
  64     real          shX, shY, shZ;
  65     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
  66     real          Vcoul[NSTATES], Vvdw[NSTATES];
  67     real          rinv6, r, rt, rtC, rtV;
  68     real          iqA, iqB;
  69     real          qq[NSTATES], vctot, krsq;
  70     int           ntiA, ntiB, tj[NSTATES];
  71     real          Vvdw6, Vvdw12, vvtot;
  72     real          ix, iy, iz, fix, fiy, fiz;
  73     real          dx, dy, dz, rsq, rinv;
  74     real          c6[NSTATES], c12[NSTATES];
  75     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
  76     double        dvdl_coul, dvdl_vdw;
  77     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
  78     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
  79     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
  80     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
  81     int           do_tab, tab_elemsize;
  82     int           n0, n1C, n1V, nnn;
  83     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
  84     int           icoul, ivdw;
  85     int           nri;
  86     const int *   iinr;
  87     const int *   jindex;
  88     const int *   jjnr;
  89     const int *   shift;
  90     const int *   gid;
  91     const int *   typeA;
  92     const int *   typeB;
  93     int           ntype;
  94     const real *  shiftvec;
  95     real          dvdl_part;
  96     real *        fshift;
  97     real          tabscale;
  98     const real *  VFtab;
  99     const real *  x;
 100     real *        f;
 101     real          facel, krf, crf;
 102     const real *  chargeA;
 103     const real *  chargeB;
 104     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
 105     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
 106     const real *  nbfp;
 107     real *        dvdl;
 108     real *        Vv;
 109     real *        Vc;
 110     gmx_bool      bDoForces;
 111     real          rcoulomb, rvdw, sh_invrc6;
 112     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
 113     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
 114     const real *  tab_ewald_F;
 115     const real *  tab_ewald_V;
 116     real          tab_ewald_scale, tab_ewald_halfsp;
 117
 118     x                   = xx[0];
 119     f                   = ff[0];
 120
 121     fshift              = fr->fshift[0];
 122
 123     nri                 = nlist->nri;
 124     iinr                = nlist->iinr;
 125     jindex              = nlist->jindex;
 126     jjnr                = nlist->jjnr;
 127     icoul               = nlist->ielec;
 128     ivdw                = nlist->ivdw;
 129     shift               = nlist->shift;
 130     gid                 = nlist->gid;
 131
 132     shiftvec            = fr->shift_vec[0];
 133     chargeA             = mdatoms->chargeA;
 134     chargeB             = mdatoms->chargeB;
 135     facel               = fr->epsfac;
 136     krf                 = fr->k_rf;
 137     crf                 = fr->c_rf;
 138     ewc                 = fr->ewaldcoeff_q;
 139     Vc                  = kernel_data->energygrp_elec;
 140     typeA               = mdatoms->typeA;
 141     typeB               = mdatoms->typeB;
 142     ntype               = fr->ntype;
 143     nbfp                = fr->nbfp;
 144     Vv                  = kernel_data->energygrp_vdw;
 145     tabscale            = kernel_data->table_elec_vdw->scale;
 146     VFtab               = kernel_data->table_elec_vdw->data;
 147     lambda_coul         = kernel_data->lambda[efptCOUL];
 148     lambda_vdw          = kernel_data->lambda[efptVDW];
 149     dvdl                = kernel_data->dvdl;
 150     alpha_coul          = fr->sc_alphacoul;
 151     alpha_vdw           = fr->sc_alphavdw;
 152     lam_power           = fr->sc_power;
 153     sc_r_power          = fr->sc_r_power;
 154     sigma6_def          = fr->sc_sigma6_def;
 155     sigma6_min          = fr->sc_sigma6_min;
 156     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
 157
 158     rcoulomb            = fr->rcoulomb;
 159     rvdw                = fr->rvdw;
 160     sh_invrc6           = fr->ic->sh_invrc6;
 161
 162     /* Ewald (PME) reciprocal force and energy quadratic spline tables */
 163     tab_ewald_F         = fr->ic->tabq_coul_F;
 164     tab_ewald_V         = fr->ic->tabq_coul_V;
 165     tab_ewald_scale     = fr->ic->tabq_scale;
 166     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
 167
 168     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
 169     {
 170         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
 171         rcutoff2        = rcutoff*rcutoff;
 172         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
 173         d               = rcutoff-rswitch;
 174         swV3            = -10.0/(d*d*d);
 175         swV4            =  15.0/(d*d*d*d);
 176         swV5            =  -6.0/(d*d*d*d*d);
 177         swF2            = -30.0/(d*d*d);
 178         swF3            =  60.0/(d*d*d*d);
 179         swF4            = -30.0/(d*d*d*d*d);
 180     }
 181     else
 182     {
 183         /* Stupid compilers dont realize these variables will not be used */
 184         rswitch         = 0.0;
 185         swV3            = 0.0;
 186         swV4            = 0.0;
 187         swV5            = 0.0;
 188         swF2            = 0.0;
 189         swF3            = 0.0;
 190         swF4            = 0.0;
 191     }
 192
 193     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
 194     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
 195
 196     /* fix compiler warnings */
 197     nj1   = 0;
 198     n1C   = n1V   = 0;
 199     epsC  = epsV  = 0;
 200     eps2C = eps2V = 0;
 201
 202     dvdl_coul  = 0;
 203     dvdl_vdw   = 0;
 204
 205     /* Lambda factor for state A, 1-lambda*/
 206     LFC[STATE_A] = 1.0 - lambda_coul;
 207     LFV[STATE_A] = 1.0 - lambda_vdw;
 208
 209     /* Lambda factor for state B, lambda*/
 210     LFC[STATE_B] = lambda_coul;
 211     LFV[STATE_B] = lambda_vdw;
 212
 213     /*derivative of the lambda factor for state A and B */
 214     DLF[STATE_A] = -1;
 215     DLF[STATE_B] = 1;
 216
 217     for (i = 0; i < NSTATES; i++)
 218     {
 219         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 220         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 221         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 222         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 223     }
 224     /* precalculate */
 225     sigma2_def = pow(sigma6_def, 1.0/3.0);
 226     sigma2_min = pow(sigma6_min, 1.0/3.0);
 227
 228     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 229
 230     do_tab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE ||
 231               ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
 232
 233     /* we always use the combined table here */
 234     tab_elemsize = 12;
 235
 236     for (n = 0; (n < nri); n++)
 237     {
 238         is3              = 3*shift[n];
 239         shX              = shiftvec[is3];
 240         shY              = shiftvec[is3+1];
 241         shZ              = shiftvec[is3+2];
 242         nj0              = jindex[n];
 243         nj1              = jindex[n+1];
 244         ii               = iinr[n];
 245         ii3              = 3*ii;
 246         ix               = shX + x[ii3+0];
 247         iy               = shY + x[ii3+1];
 248         iz               = shZ + x[ii3+2];
 249         iqA              = facel*chargeA[ii];
 250         iqB              = facel*chargeB[ii];
 251         ntiA             = 2*ntype*typeA[ii];
 252         ntiB             = 2*ntype*typeB[ii];
 253         vctot            = 0;
 254         vvtot            = 0;
 255         fix              = 0;
 256         fiy              = 0;
 257         fiz              = 0;
 258
 259         for (k = nj0; (k < nj1); k++)
 260         {
 261             jnr              = jjnr[k];
 262             j3               = 3*jnr;
 263             dx               = ix - x[j3];
 264             dy               = iy - x[j3+1];
 265             dz               = iz - x[j3+2];
 266             rsq              = dx*dx+dy*dy+dz*dz;
 267             rinv             = gmx_invsqrt(rsq);
 268             r                = rsq*rinv;
 269             if (sc_r_power == 6.0)
 270             {
 271                 rpm2             = rsq*rsq;  /* r4 */
 272                 rp               = rpm2*rsq; /* r6 */
 273             }
 274             else if (sc_r_power == 48.0)
 275             {
 276                 rp               = rsq*rsq*rsq; /* r6 */
 277                 rp               = rp*rp;       /* r12 */
 278                 rp               = rp*rp;       /* r24 */
 279                 rp               = rp*rp;       /* r48 */
 280                 rpm2             = rp/rsq;      /* r46 */
 281             }
 282             else
 283             {
 284                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
 285                 rpm2           = rp/rsq;
 286             }
 287
 288             tj[STATE_A]      = ntiA+2*typeA[jnr];
 289             tj[STATE_B]      = ntiB+2*typeB[jnr];
 290             qq[STATE_A]      = iqA*chargeA[jnr];
 291             qq[STATE_B]      = iqB*chargeB[jnr];
 292
 293             for (i = 0; i < NSTATES; i++)
 294             {
 295
 296                 c6[i]              = nbfp[tj[i]];
 297                 c12[i]             = nbfp[tj[i]+1];
 298                 if ((c6[i] > 0) && (c12[i] > 0))
 299                 {
 300                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 301                     sigma6[i]       = 0.5*c12[i]/c6[i];
 302                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
 303                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 304                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 305                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 306                     {
 307                         sigma6[i] = sigma6_min;
 308                         sigma2[i] = sigma2_min;
 309                     }
 310                 }
 311                 else
 312                 {
 313                     sigma6[i]       = sigma6_def;
 314                     sigma2[i]       = sigma2_def;
 315                 }
 316                 if (sc_r_power == 6.0)
 317                 {
 318                     sigma_pow[i]    = sigma6[i];
 319                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
 320                 }
 321                 else if (sc_r_power == 48.0)
 322                 {
 323                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 324                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 325                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 326                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 327                 }
 328                 else
 329                 {    /* not really supported as input, but in here for testing the general case*/
 330                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 331                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 332                 }
 333             }
 334
 335             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 336             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 337             {
 338                 alpha_vdw_eff    = 0;
 339                 alpha_coul_eff   = 0;
 340             }
 341             else
 342             {
 343                 alpha_vdw_eff    = alpha_vdw;
 344                 alpha_coul_eff   = alpha_coul;
 345             }
 346
 347             for (i = 0; i < NSTATES; i++)
 348             {
 349                 FscalC[i]    = 0;
 350                 FscalV[i]    = 0;
 351                 Vcoul[i]     = 0;
 352                 Vvdw[i]      = 0;
 353
 354                 /* Only spend time on A or B state if it is non-zero */
 355                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 356                 {
 357
 358                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
 359                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 360                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
 361                     rC             = 1.0/rinvC;
 362
 363                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 364                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
 365                     rV             = 1.0/rinvV;
 366
 367                     if (do_tab)
 368                     {
 369                         rtC        = rC*tabscale;
 370                         n0         = rtC;
 371                         epsC       = rtC-n0;
 372                         eps2C      = epsC*epsC;
 373                         n1C        = tab_elemsize*n0;
 374
 375                         rtV        = rV*tabscale;
 376                         n0         = rtV;
 377                         epsV       = rtV-n0;
 378                         eps2V      = epsV*epsV;
 379                         n1V        = tab_elemsize*n0;
 380                     }
 381
 382                     /* With Ewald and soft-core we should put the cut-off on r,
 383                      * not on the soft-cored rC, as the real-space and
 384                      * reciprocal space contributions should (almost) cancel.
 385                      */
 386                     if (qq[i] != 0 &&
 387                         !(bExactElecCutoff &&
 388                           ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
 389                            (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
 390                     {
 391                         switch (icoul)
 392                         {
 393                             case GMX_NBKERNEL_ELEC_COULOMB:
 394                             case GMX_NBKERNEL_ELEC_EWALD:
 395                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
 396                                 Vcoul[i]   = qq[i]*rinvC;
 397                                 FscalC[i]  = Vcoul[i];
 398                                 break;
 399
 400                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 401                                 /* reaction-field */
 402                                 Vcoul[i]   = qq[i]*(rinvC + krf*rC*rC-crf);
 403                                 FscalC[i]  = qq[i]*(rinvC - 2.0*krf*rC*rC);
 404                                 break;
 405
 406                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 407                                 /* non-Ewald tabulated coulomb */
 408                                 nnn        = n1C;
 409                                 Y          = VFtab[nnn];
 410                                 F          = VFtab[nnn+1];
 411                                 Geps       = epsC*VFtab[nnn+2];
 412                                 Heps2      = eps2C*VFtab[nnn+3];
 413                                 Fp         = F+Geps+Heps2;
 414                                 VV         = Y+epsC*Fp;
 415                                 FF         = Fp+Geps+2.0*Heps2;
 416                                 Vcoul[i]   = qq[i]*VV;
 417                                 FscalC[i]  = -qq[i]*tabscale*FF*rC;
 418                                 break;
 419
 420                             case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
 421                                 gmx_fatal(FARGS, "Free energy and GB not implemented.\n");
 422                                 break;
 423
 424                             case GMX_NBKERNEL_ELEC_NONE:
 425                                 FscalC[i]  = 0.0;
 426                                 Vcoul[i]   = 0.0;
 427                                 break;
 428
 429                             default:
 430                                 gmx_incons("Invalid icoul in free energy kernel");
 431                                 break;
 432                         }
 433
 434                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
 435                         {
 436                             d                = rC-rswitch;
 437                             d                = (d > 0.0) ? d : 0.0;
 438                             d2               = d*d;
 439                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 440                             dsw              = d2*(swF2+d*(swF3+d*swF4));
 441
 442                             Vcoul[i]        *= sw;
 443                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
 444                         }
 445                     }
 446
 447                     if ((c6[i] != 0 || c12[i] != 0) &&
 448                         !(bExactVdwCutoff && rV >= rvdw))
 449                     {
 450                         switch (ivdw)
 451                         {
 452                             case GMX_NBKERNEL_VDW_LENNARDJONES:
 453                                 /* cutoff LJ */
 454                                 if (sc_r_power == 6.0)
 455                                 {
 456                                     rinv6            = rpinvV;
 457                                 }
 458                                 else
 459                                 {
 460                                     rinv6            = pow(rinvV, 6.0);
 461                                 }
 462                                 Vvdw6            = c6[i]*rinv6;
 463                                 Vvdw12           = c12[i]*rinv6*rinv6;
 464                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
 465                                 {
 466                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
 467                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
 468                                 }
 469                                 else
 470                                 {
 471                                     Vvdw[i]          = Vvdw12*(1.0/12.0) - Vvdw6*(1.0/6.0);
 472                                 }
 473                                 FscalV[i]        = Vvdw12 - Vvdw6;
 474                                 break;
 475
 476                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
 477                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
 478                                 break;
 479
 480                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 481                                 /* Table LJ */
 482                                 nnn = n1V+4;
 483                                 /* dispersion */
 484                                 Y          = VFtab[nnn];
 485                                 F          = VFtab[nnn+1];
 486                                 Geps       = epsV*VFtab[nnn+2];
 487                                 Heps2      = eps2V*VFtab[nnn+3];
 488                                 Fp         = F+Geps+Heps2;
 489                                 VV         = Y+epsV*Fp;
 490                                 FF         = Fp+Geps+2.0*Heps2;
 491                                 Vvdw[i]   += c6[i]*VV;
 492                                 FscalV[i] -= c6[i]*tabscale*FF*rV;
 493
 494                                 /* repulsion */
 495                                 Y          = VFtab[nnn+4];
 496                                 F          = VFtab[nnn+5];
 497                                 Geps       = epsV*VFtab[nnn+6];
 498                                 Heps2      = eps2V*VFtab[nnn+7];
 499                                 Fp         = F+Geps+Heps2;
 500                                 VV         = Y+epsV*Fp;
 501                                 FF         = Fp+Geps+2.0*Heps2;
 502                                 Vvdw[i]   += c12[i]*VV;
 503                                 FscalV[i] -= c12[i]*tabscale*FF*rV;
 504                                 break;
 505
 506                             case GMX_NBKERNEL_VDW_NONE:
 507                                 Vvdw[i]    = 0.0;
 508                                 FscalV[i]  = 0.0;
 509                                 break;
 510
 511                             default:
 512                                 gmx_incons("Invalid ivdw in free energy kernel");
 513                                 break;
 514                         }
 515
 516                         if (fr->vdw_modifier == eintmodPOTSWITCH)
 517                         {
 518                             d                = rV-rswitch;
 519                             d                = (d > 0.0) ? d : 0.0;
 520                             d2               = d*d;
 521                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 522                             dsw              = d2*(swF2+d*(swF3+d*swF4));
 523
 524                             Vvdw[i]         *= sw;
 525                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
 526
 527                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
 528                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
 529                         }
 530                     }
 531
 532                     /* FscalC (and FscalV) now contain: dV/drC * rC
 533                      * Now we multiply by rC^-p, so it will be: dV/drC * rC^1-p
 534                      * Further down we first multiply by r^p-2 and then by
 535                      * the vector r, which in total gives: dV/drC * (r/rC)^1-p
 536                      */
 537                     FscalC[i] *= rpinvC;
 538                     FscalV[i] *= rpinvV;
 539                 }
 540             }
 541
 542             Fscal = 0;
 543
 544             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
 545                 !(bExactElecCutoff && r >= rcoulomb))
 546             {
 547                 /* Because we compute the soft-core normally,
 548                  * we have to remove the Ewald short range portion.
 549                  * Done outside of the states loop because this part
 550                  * doesn't depend on the scaled R.
 551                  */
 552                 real rs, frac, f_lr;
 553                 int  ri;
 554
 555                 rs     = rsq*rinv*tab_ewald_scale;
 556                 ri     = (int)rs;
 557                 frac   = rs - ri;
 558                 f_lr   = (1 - frac)*tab_ewald_F[ri] + frac*tab_ewald_F[ri+1];
 559                 FF     = f_lr*rinv;
 560                 VV     = tab_ewald_V[ri] - tab_ewald_halfsp*frac*(tab_ewald_F[ri] + f_lr);
 561
 562                 for (i = 0; i < NSTATES; i++)
 563                 {
 564                     vctot      -= LFC[i]*qq[i]*VV;
 565                     Fscal      -= LFC[i]*qq[i]*FF;
 566                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
 567                 }
 568             }
 569
 570             /* Assemble A and B states */
 571             for (i = 0; i < NSTATES; i++)
 572             {
 573                 vctot         += LFC[i]*Vcoul[i];
 574                 vvtot         += LFV[i]*Vvdw[i];
 575
 576                 Fscal         += LFC[i]*FscalC[i]*rpm2;
 577                 Fscal         += LFV[i]*FscalV[i]*rpm2;
 578
 579                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 580                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 581             }
 582
 583             if (bDoForces)
 584             {
 585                 tx         = Fscal*dx;
 586                 ty         = Fscal*dy;
 587                 tz         = Fscal*dz;
 588                 fix        = fix + tx;
 589                 fiy        = fiy + ty;
 590                 fiz        = fiz + tz;
 591                 f[j3]      = f[j3]   - tx;
 592                 f[j3+1]    = f[j3+1] - ty;
 593                 f[j3+2]    = f[j3+2] - tz;
 594             }
 595         }
 596
 597         if (bDoForces)
 598         {
 599             f[ii3]         = f[ii3]        + fix;
 600             f[ii3+1]       = f[ii3+1]      + fiy;
 601             f[ii3+2]       = f[ii3+2]      + fiz;
 602             fshift[is3]    = fshift[is3]   + fix;
 603             fshift[is3+1]  = fshift[is3+1] + fiy;
 604             fshift[is3+2]  = fshift[is3+2] + fiz;
 605         }
 606         ggid               = gid[n];
 607         Vc[ggid]           = Vc[ggid] + vctot;
 608         Vv[ggid]           = Vv[ggid] + vvtot;
 609     }
 610
 611     dvdl[efptCOUL]     += dvdl_coul;
 612     dvdl[efptVDW]      += dvdl_vdw;
 613
 614     /* Estimate flops, average for free energy stuff:
 615      * 12  flops per outer iteration
 616      * 150 flops per inner iteration
 617      */
 618     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 619 }
 620
 621 real
 622 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
 623                                real tabscale, real *vftab,
 624                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
 625                                real LFC[2], real LFV[2], real DLF[2],
 626                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
 627                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
 628                                real *velectot, real *vvdwtot, real *dvdl)
 629 {
 630     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 631     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
 632     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
 633     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 634     real       fscal_vdw[2], fscal_elec[2];
 635     real       velec[2], vvdw[2];
 636     int        i, ntab;
 637
 638     qq[0]    = qqA;
 639     qq[1]    = qqB;
 640     c6[0]    = c6A;
 641     c6[1]    = c6B;
 642     c12[0]   = c12A;
 643     c12[1]   = c12B;
 644
 645     if (sc_r_power == 6.0)
 646     {
 647         rpm2             = r2*r2;   /* r4 */
 648         rp               = rpm2*r2; /* r6 */
 649     }
 650     else if (sc_r_power == 48.0)
 651     {
 652         rp               = r2*r2*r2; /* r6 */
 653         rp               = rp*rp;    /* r12 */
 654         rp               = rp*rp;    /* r24 */
 655         rp               = rp*rp;    /* r48 */
 656         rpm2             = rp/r2;    /* r46 */
 657     }
 658     else
 659     {
 660         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
 661         rpm2           = rp/r2;
 662     }
 663
 664     /* Loop over state A(0) and B(1) */
 665     for (i = 0; i < 2; i++)
 666     {
 667         if ((c6[i] > 0) && (c12[i] > 0))
 668         {
 669             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 670              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 671              */
 672             sigma6[i]       = 0.5*c12[i]/c6[i];
 673             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
 674             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 675                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 676             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 677             {
 678                 sigma6[i] = sigma6_min;
 679                 sigma2[i] = sigma2_min;
 680             }
 681         }
 682         else
 683         {
 684             sigma6[i]       = sigma6_def;
 685             sigma2[i]       = sigma2_def;
 686         }
 687         if (sc_r_power == 6.0)
 688         {
 689             sigma_pow[i]    = sigma6[i];
 690             sigma_powm2[i]  = sigma6[i]/sigma2[i];
 691         }
 692         else if (sc_r_power == 48.0)
 693         {
 694             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 695             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 696             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 697             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 698         }
 699         else
 700         {    /* not really supported as input, but in here for testing the general case*/
 701             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 702             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 703         }
 704     }
 705
 706     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 707     if ((c12[0] > 0) && (c12[1] > 0))
 708     {
 709         alpha_vdw_eff    = 0;
 710         alpha_coul_eff   = 0;
 711     }
 712     else
 713     {
 714         alpha_vdw_eff    = alpha_vdw;
 715         alpha_coul_eff   = alpha_coul;
 716     }
 717
 718     /* Loop over A and B states again */
 719     for (i = 0; i < 2; i++)
 720     {
 721         fscal_elec[i] = 0;
 722         fscal_vdw[i]  = 0;
 723         velec[i]      = 0;
 724         vvdw[i]       = 0;
 725
 726         /* Only spend time on A or B state if it is non-zero */
 727         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 728         {
 729             /* Coulomb */
 730             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 731             r_coul           = pow(rpinv, -1.0/sc_r_power);
 732
 733             /* Electrostatics table lookup data */
 734             rtab             = r_coul*tabscale;
 735             ntab             = rtab;
 736             eps              = rtab-ntab;
 737             eps2             = eps*eps;
 738             ntab             = 12*ntab;
 739             /* Electrostatics */
 740             Y                = vftab[ntab];
 741             F                = vftab[ntab+1];
 742             Geps             = eps*vftab[ntab+2];
 743             Heps2            = eps2*vftab[ntab+3];
 744             Fp               = F+Geps+Heps2;
 745             VV               = Y+eps*Fp;
 746             FF               = Fp+Geps+2.0*Heps2;
 747             velec[i]         = qq[i]*VV;
 748             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
 749
 750             /* Vdw */
 751             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 752             r_vdw            = pow(rpinv, -1.0/sc_r_power);
 753             /* Vdw table lookup data */
 754             rtab             = r_vdw*tabscale;
 755             ntab             = rtab;
 756             eps              = rtab-ntab;
 757             eps2             = eps*eps;
 758             ntab             = 12*ntab;
 759             /* Dispersion */
 760             Y                = vftab[ntab+4];
 761             F                = vftab[ntab+5];
 762             Geps             = eps*vftab[ntab+6];
 763             Heps2            = eps2*vftab[ntab+7];
 764             Fp               = F+Geps+Heps2;
 765             VV               = Y+eps*Fp;
 766             FF               = Fp+Geps+2.0*Heps2;
 767             vvdw[i]          = c6[i]*VV;
 768             fscal_vdw[i]     = -c6[i]*FF;
 769
 770             /* Repulsion */
 771             Y                = vftab[ntab+8];
 772             F                = vftab[ntab+9];
 773             Geps             = eps*vftab[ntab+10];
 774             Heps2            = eps2*vftab[ntab+11];
 775             Fp               = F+Geps+Heps2;
 776             VV               = Y+eps*Fp;
 777             FF               = Fp+Geps+2.0*Heps2;
 778             vvdw[i]         += c12[i]*VV;
 779             fscal_vdw[i]    -= c12[i]*FF;
 780             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
 781         }
 782     }
 783     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 784     /* Assemble A and B states */
 785     velecsum  = 0;
 786     vvdwsum   = 0;
 787     dvdl_coul = 0;
 788     dvdl_vdw  = 0;
 789     fscal     = 0;
 790     for (i = 0; i < 2; i++)
 791     {
 792         velecsum      += LFC[i]*velec[i];
 793         vvdwsum       += LFV[i]*vvdw[i];
 794
 795         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
 796
 797         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
 798         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
 799     }
 800
 801     dvdl[efptCOUL]     += dvdl_coul;
 802     dvdl[efptVDW]      += dvdl_vdw;
 803
 804     *velectot           = velecsum;
 805     *vvdwtot            = vvdwsum;
 806
 807     return fscal;
 808 }