src/gromacs/gmxlib/nonbonded/nb_free_energy.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROningen Mixture of Alchemy and Childrens' Stories
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41
  42 #include "vec.h"
  43 #include "typedefs.h"
  44 #include "nonbonded.h"
  45 #include "nb_kernel.h"
  46 #include "nrnb.h"
  47
  48 void
  49 gmx_nb_free_energy_kernel(t_nblist *                nlist,
  50                           rvec *                    xx,
  51                           rvec *                    ff,
  52                           t_forcerec *              fr,
  53                           t_mdatoms *               mdatoms,
  54                           nb_kernel_data_t *        kernel_data,
  55                           t_nrnb *                  nrnb)
  56 {
  57
  58 #define  STATE_A  0
  59 #define  STATE_B  1
  60 #define  NSTATES  2
  61     int           i, j, n, ii, is3, ii3, k, nj0, nj1, jnr, j3, ggid;
  62     real          shX, shY, shZ;
  63     real          Fscal, FscalC[NSTATES], FscalV[NSTATES], tx, ty, tz;
  64     real          Vcoul[NSTATES], Vvdw[NSTATES];
  65     real          rinv6, r, rt, rtC, rtV;
  66     real          iqA, iqB;
  67     real          qq[NSTATES], vctot, krsq;
  68     int           ntiA, ntiB, tj[NSTATES];
  69     real          Vvdw6, Vvdw12, vvtot;
  70     real          ix, iy, iz, fix, fiy, fiz;
  71     real          dx, dy, dz, rsq, rinv;
  72     real          c6[NSTATES], c12[NSTATES];
  73     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
  74     double        dvdl_coul, dvdl_vdw;
  75     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
  76     real          sigma6[NSTATES], alpha_vdw_eff, alpha_coul_eff, sigma2_def, sigma2_min;
  77     real          rp, rpm2, rC, rV, rinvC, rpinvC, rinvV, rpinvV;
  78     real          sigma2[NSTATES], sigma_pow[NSTATES], sigma_powm2[NSTATES], rs, rs2;
  79     int           do_coultab, do_vdwtab, do_tab, tab_elemsize;
  80     int           n0, n1C, n1V, nnn;
  81     real          Y, F, G, H, Fp, Geps, Heps2, epsC, eps2C, epsV, eps2V, VV, FF;
  82     int           icoul, ivdw;
  83     int           nri;
  84     int *         iinr;
  85     int *         jindex;
  86     int *         jjnr;
  87     int *         shift;
  88     int *         gid;
  89     int *         typeA;
  90     int *         typeB;
  91     int           ntype;
  92     real *        shiftvec;
  93     real          dvdl_part;
  94     real *        fshift;
  95     real          tabscale;
  96     real *        VFtab;
  97     real *        x;
  98     real *        f;
  99     real          facel, krf, crf;
 100     real *        chargeA;
 101     real *        chargeB;
 102     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
 103     real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
 104     real *        nbfp;
 105     real *        dvdl;
 106     real *        Vv;
 107     real *        Vc;
 108     gmx_bool      bDoForces;
 109     real          rcoulomb, rvdw, sh_invrc6;
 110     gmx_bool      bExactElecCutoff, bExactVdwCutoff;
 111     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
 112
 113     x                   = xx[0];
 114     f                   = ff[0];
 115
 116     fshift              = fr->fshift[0];
 117     Vc                  = kernel_data->energygrp_elec;
 118     Vv                  = kernel_data->energygrp_vdw;
 119     tabscale            = kernel_data->table_elec_vdw->scale;
 120     VFtab               = kernel_data->table_elec_vdw->data;
 121
 122     nri                 = nlist->nri;
 123     iinr                = nlist->iinr;
 124     jindex              = nlist->jindex;
 125     jjnr                = nlist->jjnr;
 126     icoul               = nlist->ielec;
 127     ivdw                = nlist->ivdw;
 128     shift               = nlist->shift;
 129     gid                 = nlist->gid;
 130
 131     shiftvec            = fr->shift_vec[0];
 132     chargeA             = mdatoms->chargeA;
 133     chargeB             = mdatoms->chargeB;
 134     facel               = fr->epsfac;
 135     krf                 = fr->k_rf;
 136     crf                 = fr->c_rf;
 137     ewc                 = fr->ewaldcoeff;
 138     Vc                  = kernel_data->energygrp_elec;
 139     typeA               = mdatoms->typeA;
 140     typeB               = mdatoms->typeB;
 141     ntype               = fr->ntype;
 142     nbfp                = fr->nbfp;
 143     Vv                  = kernel_data->energygrp_vdw;
 144     tabscale            = kernel_data->table_elec_vdw->scale;
 145     VFtab               = kernel_data->table_elec_vdw->data;
 146     lambda_coul         = kernel_data->lambda[efptCOUL];
 147     lambda_vdw          = kernel_data->lambda[efptVDW];
 148     dvdl                = kernel_data->dvdl;
 149     alpha_coul          = fr->sc_alphacoul;
 150     alpha_vdw           = fr->sc_alphavdw;
 151     lam_power           = fr->sc_power;
 152     sc_r_power          = fr->sc_r_power;
 153     sigma6_def          = fr->sc_sigma6_def;
 154     sigma6_min          = fr->sc_sigma6_min;
 155     bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
 156
 157     rcoulomb            = fr->rcoulomb;
 158     rvdw                = fr->rvdw;
 159     sh_invrc6           = fr->ic->sh_invrc6;
 160
 161     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
 162     {
 163         rcutoff         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
 164         rcutoff2        = rcutoff*rcutoff;
 165         rswitch         = (fr->coulomb_modifier == eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
 166         d               = rcutoff-rswitch;
 167         swV3            = -10.0/(d*d*d);
 168         swV4            =  15.0/(d*d*d*d);
 169         swV5            =  -6.0/(d*d*d*d*d);
 170         swF2            = -30.0/(d*d*d);
 171         swF3            =  60.0/(d*d*d*d);
 172         swF4            = -30.0/(d*d*d*d*d);
 173     }
 174     else
 175     {
 176         /* Stupid compilers dont realize these variables will not be used */
 177         rswitch         = 0.0;
 178         swV3            = 0.0;
 179         swV4            = 0.0;
 180         swV5            = 0.0;
 181         swF2            = 0.0;
 182         swF3            = 0.0;
 183         swF4            = 0.0;
 184     }
 185
 186     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
 187     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
 188
 189     /* fix compiler warnings */
 190     nj1   = 0;
 191     n1C   = n1V   = 0;
 192     epsC  = epsV  = 0;
 193     eps2C = eps2V = 0;
 194
 195     dvdl_coul  = 0;
 196     dvdl_vdw   = 0;
 197
 198     /* Lambda factor for state A, 1-lambda*/
 199     LFC[STATE_A] = 1.0 - lambda_coul;
 200     LFV[STATE_A] = 1.0 - lambda_vdw;
 201
 202     /* Lambda factor for state B, lambda*/
 203     LFC[STATE_B] = lambda_coul;
 204     LFV[STATE_B] = lambda_vdw;
 205
 206     /*derivative of the lambda factor for state A and B */
 207     DLF[STATE_A] = -1;
 208     DLF[STATE_B] = 1;
 209
 210     for (i = 0; i < NSTATES; i++)
 211     {
 212         lfac_coul[i]  = (lam_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 213         dlfac_coul[i] = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFC[i]) : 1);
 214         lfac_vdw[i]   = (lam_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 215         dlfac_vdw[i]  = DLF[i]*lam_power/sc_r_power*(lam_power == 2 ? (1-LFV[i]) : 1);
 216     }
 217     /* precalculate */
 218     sigma2_def = pow(sigma6_def, 1.0/3.0);
 219     sigma2_min = pow(sigma6_min, 1.0/3.0);
 220
 221     /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
 222
 223     do_coultab = (icoul == GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
 224     do_vdwtab  = (ivdw == GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
 225
 226     do_tab = do_coultab || do_vdwtab;
 227
 228     /* we always use the combined table here */
 229     tab_elemsize = 12;
 230
 231     for (n = 0; (n < nri); n++)
 232     {
 233         is3              = 3*shift[n];
 234         shX              = shiftvec[is3];
 235         shY              = shiftvec[is3+1];
 236         shZ              = shiftvec[is3+2];
 237         nj0              = jindex[n];
 238         nj1              = jindex[n+1];
 239         ii               = iinr[n];
 240         ii3              = 3*ii;
 241         ix               = shX + x[ii3+0];
 242         iy               = shY + x[ii3+1];
 243         iz               = shZ + x[ii3+2];
 244         iqA              = facel*chargeA[ii];
 245         iqB              = facel*chargeB[ii];
 246         ntiA             = 2*ntype*typeA[ii];
 247         ntiB             = 2*ntype*typeB[ii];
 248         vctot            = 0;
 249         vvtot            = 0;
 250         fix              = 0;
 251         fiy              = 0;
 252         fiz              = 0;
 253
 254         for (k = nj0; (k < nj1); k++)
 255         {
 256             jnr              = jjnr[k];
 257             j3               = 3*jnr;
 258             dx               = ix - x[j3];
 259             dy               = iy - x[j3+1];
 260             dz               = iz - x[j3+2];
 261             rsq              = dx*dx+dy*dy+dz*dz;
 262             rinv             = gmx_invsqrt(rsq);
 263             r                = rsq*rinv;
 264             if (sc_r_power == 6.0)
 265             {
 266                 rpm2             = rsq*rsq;  /* r4 */
 267                 rp               = rpm2*rsq; /* r6 */
 268             }
 269             else if (sc_r_power == 48.0)
 270             {
 271                 rp               = rsq*rsq*rsq; /* r6 */
 272                 rp               = rp*rp;       /* r12 */
 273                 rp               = rp*rp;       /* r24 */
 274                 rp               = rp*rp;       /* r48 */
 275                 rpm2             = rp/rsq;      /* r46 */
 276             }
 277             else
 278             {
 279                 rp             = pow(r, sc_r_power);  /* not currently supported as input, but can handle it */
 280                 rpm2           = rp/rsq;
 281             }
 282
 283             tj[STATE_A]      = ntiA+2*typeA[jnr];
 284             tj[STATE_B]      = ntiB+2*typeB[jnr];
 285             qq[STATE_A]      = iqA*chargeA[jnr];
 286             qq[STATE_B]      = iqB*chargeB[jnr];
 287
 288             for (i = 0; i < NSTATES; i++)
 289             {
 290
 291                 c6[i]              = nbfp[tj[i]];
 292                 c12[i]             = nbfp[tj[i]+1];
 293                 if ((c6[i] > 0) && (c12[i] > 0))
 294                 {
 295                     /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
 296                     sigma6[i]       = 0.5*c12[i]/c6[i];
 297                     sigma2[i]       = pow(sigma6[i], 1.0/3.0);
 298                     /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 299                        what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 300                     if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 301                     {
 302                         sigma6[i] = sigma6_min;
 303                         sigma2[i] = sigma2_min;
 304                     }
 305                 }
 306                 else
 307                 {
 308                     sigma6[i]       = sigma6_def;
 309                     sigma2[i]       = sigma2_def;
 310                 }
 311                 if (sc_r_power == 6.0)
 312                 {
 313                     sigma_pow[i]    = sigma6[i];
 314                     sigma_powm2[i]  = sigma6[i]/sigma2[i];
 315                 }
 316                 else if (sc_r_power == 48.0)
 317                 {
 318                     sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 319                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 320                     sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 321                     sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 322                 }
 323                 else
 324                 {    /* not really supported as input, but in here for testing the general case*/
 325                     sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 326                     sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 327                 }
 328             }
 329
 330             /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 331             if ((c12[STATE_A] > 0) && (c12[STATE_B] > 0))
 332             {
 333                 alpha_vdw_eff    = 0;
 334                 alpha_coul_eff   = 0;
 335             }
 336             else
 337             {
 338                 alpha_vdw_eff    = alpha_vdw;
 339                 alpha_coul_eff   = alpha_coul;
 340             }
 341
 342             for (i = 0; i < NSTATES; i++)
 343             {
 344                 FscalC[i]    = 0;
 345                 FscalV[i]    = 0;
 346                 Vcoul[i]     = 0;
 347                 Vvdw[i]      = 0;
 348
 349                 /* Only spend time on A or B state if it is non-zero */
 350                 if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 351                 {
 352
 353                     /* this section has to be inside the loop becaue of the dependence on sigma_pow */
 354                     rpinvC         = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 355                     rinvC          = pow(rpinvC, 1.0/sc_r_power);
 356                     rC             = 1.0/rinvC;
 357
 358                     rpinvV         = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 359                     rinvV          = pow(rpinvV, 1.0/sc_r_power);
 360                     rV             = 1.0/rinvV;
 361
 362                     if (do_tab)
 363                     {
 364                         rtC        = rC*tabscale;
 365                         n0         = rtC;
 366                         epsC       = rtC-n0;
 367                         eps2C      = epsC*epsC;
 368                         n1C        = tab_elemsize*n0;
 369
 370                         rtV        = rV*tabscale;
 371                         n0         = rtV;
 372                         epsV       = rtV-n0;
 373                         eps2V      = epsV*epsV;
 374                         n1V        = tab_elemsize*n0;
 375                     }
 376
 377                     /* With Ewald and soft-core we should put the cut-off on r,
 378                      * not on the soft-cored rC, as the real-space and
 379                      * reciprocal space contributions should (almost) cancel.
 380                      */
 381                     if (qq[i] != 0 &&
 382                         !(bExactElecCutoff &&
 383                           ((icoul != GMX_NBKERNEL_ELEC_EWALD && rC >= rcoulomb) ||
 384                            (icoul == GMX_NBKERNEL_ELEC_EWALD && r >= rcoulomb))))
 385                     {
 386                         switch (icoul)
 387                         {
 388                             case GMX_NBKERNEL_ELEC_COULOMB:
 389                             case GMX_NBKERNEL_ELEC_EWALD:
 390                                 /* simple cutoff (yes, ewald is done all on direct space for free energy) */
 391                                 Vcoul[i]   = qq[i]*rinvC;
 392                                 FscalC[i]  = Vcoul[i]*rpinvC;
 393                                 break;
 394
 395                             case GMX_NBKERNEL_ELEC_REACTIONFIELD:
 396                                 /* reaction-field */
 397                                 Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
 398                                 FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
 399                                 break;
 400
 401                             case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
 402                                 /* non-Ewald tabulated coulomb */
 403                                 nnn        = n1C;
 404                                 Y          = VFtab[nnn];
 405                                 F          = VFtab[nnn+1];
 406                                 Geps       = epsC*VFtab[nnn+2];
 407                                 Heps2      = eps2C*VFtab[nnn+3];
 408                                 Fp         = F+Geps+Heps2;
 409                                 VV         = Y+epsC*Fp;
 410                                 FF         = Fp+Geps+2.0*Heps2;
 411                                 Vcoul[i]   = qq[i]*VV;
 412                                 FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
 413                                 break;
 414
 415                             default:
 416                                 FscalC[i]  = 0.0;
 417                                 Vcoul[i]   = 0.0;
 418                                 break;
 419                         }
 420
 421                         if (fr->coulomb_modifier == eintmodPOTSWITCH)
 422                         {
 423                             d                = rC-rswitch;
 424                             d                = (d > 0.0) ? d : 0.0;
 425                             d2               = d*d;
 426                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 427                             dsw              = d2*(swF2+d*(swF3+d*swF4));
 428
 429                             Vcoul[i]        *= sw;
 430                             FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
 431                         }
 432                     }
 433
 434                     if ((c6[i] != 0 || c12[i] != 0) &&
 435                         !(bExactVdwCutoff && rV >= rvdw))
 436                     {
 437                         switch (ivdw)
 438                         {
 439                             case GMX_NBKERNEL_VDW_LENNARDJONES:
 440                                 /* cutoff LJ */
 441                                 if (sc_r_power == 6.0)
 442                                 {
 443                                     rinv6            = rpinvV;
 444                                 }
 445                                 else
 446                                 {
 447                                     rinv6            = pow(rinvV, 6.0);
 448                                 }
 449                                 Vvdw6            = c6[i]*rinv6;
 450                                 Vvdw12           = c12[i]*rinv6*rinv6;
 451                                 if (fr->vdw_modifier == eintmodPOTSHIFT)
 452                                 {
 453                                     Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
 454                                                          -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
 455                                 }
 456                                 else
 457                                 {
 458                                     Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
 459                                 }
 460                                 FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
 461                                 break;
 462
 463                             case GMX_NBKERNEL_VDW_BUCKINGHAM:
 464                                 gmx_fatal(FARGS, "Buckingham free energy not supported.");
 465                                 break;
 466
 467                             case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
 468                                 /* Table LJ */
 469                                 nnn = n1V+4;
 470                                 /* dispersion */
 471                                 Y          = VFtab[nnn];
 472                                 F          = VFtab[nnn+1];
 473                                 Geps       = epsV*VFtab[nnn+2];
 474                                 Heps2      = eps2V*VFtab[nnn+3];
 475                                 Fp         = F+Geps+Heps2;
 476                                 VV         = Y+epsV*Fp;
 477                                 FF         = Fp+Geps+2.0*Heps2;
 478                                 Vvdw[i]   += c6[i]*VV;
 479                                 FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
 480
 481                                 /* repulsion */
 482                                 Y          = VFtab[nnn+4];
 483                                 F          = VFtab[nnn+5];
 484                                 Geps       = epsV*VFtab[nnn+6];
 485                                 Heps2      = eps2V*VFtab[nnn+7];
 486                                 Fp         = F+Geps+Heps2;
 487                                 VV         = Y+epsV*Fp;
 488                                 FF         = Fp+Geps+2.0*Heps2;
 489                                 Vvdw[i]   += c12[i]*VV;
 490                                 FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
 491                                 break;
 492
 493                             default:
 494                                 Vvdw[i]    = 0.0;
 495                                 FscalV[i]  = 0.0;
 496                                 break;
 497                         }
 498
 499                         if (fr->vdw_modifier == eintmodPOTSWITCH)
 500                         {
 501                             d                = rV-rswitch;
 502                             d                = (d > 0.0) ? d : 0.0;
 503                             d2               = d*d;
 504                             sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
 505                             dsw              = d2*(swF2+d*(swF3+d*swF4));
 506
 507                             Vvdw[i]         *= sw;
 508                             FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
 509
 510                             FscalV[i]        = (rV < rvdw) ? FscalV[i] : 0.0;
 511                             Vvdw[i]          = (rV < rvdw) ? Vvdw[i] : 0.0;
 512                         }
 513                     }
 514                 }
 515             }
 516
 517             Fscal = 0;
 518
 519             if (icoul == GMX_NBKERNEL_ELEC_EWALD &&
 520                 !(bExactElecCutoff && r >= rcoulomb))
 521             {
 522                 /* because we compute the softcore normally,
 523                    we have to remove the ewald short range portion. Done outside of
 524                    the states loop because this part doesn't depend on the scaled R */
 525
 526 #ifdef GMX_DOUBLE
 527                 /* Relative accuracy at R_ERF_R_INACC of 3e-10 */
 528 #define         R_ERF_R_INACC 0.006
 529 #else
 530                 /* Relative accuracy at R_ERF_R_INACC of 2e-5 */
 531 #define         R_ERF_R_INACC 0.1
 532 #endif
 533                 if (ewc*r > R_ERF_R_INACC)
 534                 {
 535                     VV    = gmx_erf(ewc*r)*rinv;
 536                     FF    = rinv*rinv*(VV - ewc*M_2_SQRTPI*exp(-ewc*ewc*rsq));
 537                 }
 538                 else
 539                 {
 540                     VV    = ewc*M_2_SQRTPI;
 541                     FF    = ewc*ewc*ewc*M_2_SQRTPI*(2.0/3.0 - 0.4*ewc*ewc*rsq);
 542                 }
 543
 544                 for (i = 0; i < NSTATES; i++)
 545                 {
 546                     vctot      -= LFC[i]*qq[i]*VV;
 547                     Fscal      -= LFC[i]*qq[i]*FF;
 548                     dvdl_coul  -= (DLF[i]*qq[i])*VV;
 549                 }
 550             }
 551
 552             /* Assemble A and B states */
 553             for (i = 0; i < NSTATES; i++)
 554             {
 555                 vctot         += LFC[i]*Vcoul[i];
 556                 vvtot         += LFV[i]*Vvdw[i];
 557
 558                 Fscal         += LFC[i]*FscalC[i]*rpm2;
 559                 Fscal         += LFV[i]*FscalV[i]*rpm2;
 560
 561                 dvdl_coul     += Vcoul[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*FscalC[i]*sigma_pow[i];
 562                 dvdl_vdw      += Vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*FscalV[i]*sigma_pow[i];
 563             }
 564
 565             if (bDoForces)
 566             {
 567                 tx         = Fscal*dx;
 568                 ty         = Fscal*dy;
 569                 tz         = Fscal*dz;
 570                 fix        = fix + tx;
 571                 fiy        = fiy + ty;
 572                 fiz        = fiz + tz;
 573                 f[j3]      = f[j3]   - tx;
 574                 f[j3+1]    = f[j3+1] - ty;
 575                 f[j3+2]    = f[j3+2] - tz;
 576             }
 577         }
 578
 579         if (bDoForces)
 580         {
 581             f[ii3]         = f[ii3]        + fix;
 582             f[ii3+1]       = f[ii3+1]      + fiy;
 583             f[ii3+2]       = f[ii3+2]      + fiz;
 584             fshift[is3]    = fshift[is3]   + fix;
 585             fshift[is3+1]  = fshift[is3+1] + fiy;
 586             fshift[is3+2]  = fshift[is3+2] + fiz;
 587         }
 588         ggid               = gid[n];
 589         Vc[ggid]           = Vc[ggid] + vctot;
 590         Vv[ggid]           = Vv[ggid] + vvtot;
 591     }
 592
 593     dvdl[efptCOUL]     += dvdl_coul;
 594     dvdl[efptVDW]      += dvdl_vdw;
 595
 596     /* Estimate flops, average for free energy stuff:
 597      * 12  flops per outer iteration
 598      * 150 flops per inner iteration
 599      */
 600     inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
 601 }
 602
 603 real
 604 nb_free_energy_evaluate_single(real r2, real sc_r_power, real alpha_coul, real alpha_vdw,
 605                                real tabscale, real *vftab,
 606                                real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
 607                                real LFC[2], real LFV[2], real DLF[2],
 608                                real lfac_coul[2], real lfac_vdw[2], real dlfac_coul[2], real dlfac_vdw[2],
 609                                real sigma6_def, real sigma6_min, real sigma2_def, real sigma2_min,
 610                                real *velectot, real *vvdwtot, real *dvdl)
 611 {
 612     real       r, rp, rpm2, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VV, FF, fscal;
 613     real       qq[2], c6[2], c12[2], sigma6[2], sigma2[2], sigma_pow[2], sigma_powm2[2];
 614     real       alpha_coul_eff, alpha_vdw_eff, dvdl_coul, dvdl_vdw;
 615     real       rpinv, r_coul, r_vdw, velecsum, vvdwsum;
 616     real       fscal_vdw[2], fscal_elec[2];
 617     real       velec[2], vvdw[2];
 618     int        i, ntab;
 619
 620     qq[0]    = qqA;
 621     qq[1]    = qqB;
 622     c6[0]    = c6A;
 623     c6[1]    = c6B;
 624     c12[0]   = c12A;
 625     c12[1]   = c12B;
 626
 627     if (sc_r_power == 6.0)
 628     {
 629         rpm2             = r2*r2;   /* r4 */
 630         rp               = rpm2*r2; /* r6 */
 631     }
 632     else if (sc_r_power == 48.0)
 633     {
 634         rp               = r2*r2*r2; /* r6 */
 635         rp               = rp*rp;    /* r12 */
 636         rp               = rp*rp;    /* r24 */
 637         rp               = rp*rp;    /* r48 */
 638         rpm2             = rp/r2;    /* r46 */
 639     }
 640     else
 641     {
 642         rp             = pow(r2, 0.5*sc_r_power);  /* not currently supported as input, but can handle it */
 643         rpm2           = rp/r2;
 644     }
 645
 646     /* Loop over state A(0) and B(1) */
 647     for (i = 0; i < 2; i++)
 648     {
 649         if ((c6[i] > 0) && (c12[i] > 0))
 650         {
 651             /* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
 652              * Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
 653              */
 654             sigma6[i]       = 0.5*c12[i]/c6[i];
 655             sigma2[i]       = pow(0.5*c12[i]/c6[i], 1.0/3.0);
 656             /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
 657                what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
 658             if (sigma6[i] < sigma6_min)   /* for disappearing coul and vdw with soft core at the same time */
 659             {
 660                 sigma6[i] = sigma6_min;
 661                 sigma2[i] = sigma2_min;
 662             }
 663         }
 664         else
 665         {
 666             sigma6[i]       = sigma6_def;
 667             sigma2[i]       = sigma2_def;
 668         }
 669         if (sc_r_power == 6.0)
 670         {
 671             sigma_pow[i]    = sigma6[i];
 672             sigma_powm2[i]  = sigma6[i]/sigma2[i];
 673         }
 674         else if (sc_r_power == 48.0)
 675         {
 676             sigma_pow[i]    = sigma6[i]*sigma6[i];       /* sigma^12 */
 677             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
 678             sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
 679             sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
 680         }
 681         else
 682         {    /* not really supported as input, but in here for testing the general case*/
 683             sigma_pow[i]    = pow(sigma2[i], sc_r_power/2);
 684             sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
 685         }
 686     }
 687
 688     /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
 689     if ((c12[0] > 0) && (c12[1] > 0))
 690     {
 691         alpha_vdw_eff    = 0;
 692         alpha_coul_eff   = 0;
 693     }
 694     else
 695     {
 696         alpha_vdw_eff    = alpha_vdw;
 697         alpha_coul_eff   = alpha_coul;
 698     }
 699
 700     /* Loop over A and B states again */
 701     for (i = 0; i < 2; i++)
 702     {
 703         fscal_elec[i] = 0;
 704         fscal_vdw[i]  = 0;
 705         velec[i]      = 0;
 706         vvdw[i]       = 0;
 707
 708         /* Only spend time on A or B state if it is non-zero */
 709         if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
 710         {
 711             /* Coulomb */
 712             rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
 713             r_coul           = pow(rpinv, -1.0/sc_r_power);
 714
 715             /* Electrostatics table lookup data */
 716             rtab             = r_coul*tabscale;
 717             ntab             = rtab;
 718             eps              = rtab-ntab;
 719             eps2             = eps*eps;
 720             ntab             = 12*ntab;
 721             /* Electrostatics */
 722             Y                = vftab[ntab];
 723             F                = vftab[ntab+1];
 724             Geps             = eps*vftab[ntab+2];
 725             Heps2            = eps2*vftab[ntab+3];
 726             Fp               = F+Geps+Heps2;
 727             VV               = Y+eps*Fp;
 728             FF               = Fp+Geps+2.0*Heps2;
 729             velec[i]         = qq[i]*VV;
 730             fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
 731
 732             /* Vdw */
 733             rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
 734             r_vdw            = pow(rpinv, -1.0/sc_r_power);
 735             /* Vdw table lookup data */
 736             rtab             = r_vdw*tabscale;
 737             ntab             = rtab;
 738             eps              = rtab-ntab;
 739             eps2             = eps*eps;
 740             ntab             = 12*ntab;
 741             /* Dispersion */
 742             Y                = vftab[ntab+4];
 743             F                = vftab[ntab+5];
 744             Geps             = eps*vftab[ntab+6];
 745             Heps2            = eps2*vftab[ntab+7];
 746             Fp               = F+Geps+Heps2;
 747             VV               = Y+eps*Fp;
 748             FF               = Fp+Geps+2.0*Heps2;
 749             vvdw[i]          = c6[i]*VV;
 750             fscal_vdw[i]     = -c6[i]*FF;
 751
 752             /* Repulsion */
 753             Y                = vftab[ntab+8];
 754             F                = vftab[ntab+9];
 755             Geps             = eps*vftab[ntab+10];
 756             Heps2            = eps2*vftab[ntab+11];
 757             Fp               = F+Geps+Heps2;
 758             VV               = Y+eps*Fp;
 759             FF               = Fp+Geps+2.0*Heps2;
 760             vvdw[i]         += c12[i]*VV;
 761             fscal_vdw[i]    -= c12[i]*FF;
 762             fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
 763         }
 764     }
 765     /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
 766     /* Assemble A and B states */
 767     velecsum  = 0;
 768     vvdwsum   = 0;
 769     dvdl_coul = 0;
 770     dvdl_vdw  = 0;
 771     fscal     = 0;
 772     for (i = 0; i < 2; i++)
 773     {
 774         velecsum      += LFC[i]*velec[i];
 775         vvdwsum       += LFV[i]*vvdw[i];
 776
 777         fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
 778
 779         dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
 780         dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
 781     }
 782
 783     dvdl[efptCOUL]     += dvdl_coul;
 784     dvdl[efptVDW]      += dvdl_vdw;
 785
 786     *velectot           = velecsum;
 787     *vvdwtot            = vvdwsum;
 788
 789     return fscal;
 790 }