Free-energy support for LJ-PME
authorChristian Wennberg <christian.wennberg@scilifelab.se>
Tue, 11 Mar 2014 14:44:45 +0000 (15:44 +0100)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Sat, 29 Mar 2014 21:59:52 +0000 (22:59 +0100)
Free energy calculations is now supported together with
LJ-PME.

Change-Id: I8f30bbf565d06bb10a5b6be37a3680fbd5cef6af

src/gromacs/gmxlib/nonbonded/nb_free_energy.c
src/gromacs/gmxpreprocess/grompp.c
src/gromacs/legacyheaders/tables.h
src/gromacs/legacyheaders/types/interaction_const.h
src/gromacs/mdlib/force.c
src/gromacs/mdlib/forcerec.c
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
src/gromacs/mdlib/pme.c
src/gromacs/mdlib/tables.c

index cb6f5c26a82a9e2d0154a97d1c7e80f05caabb37..f09b0069ba637919e405a4577c404548dc6e6dbf 100644 (file)
@@ -72,7 +72,7 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
     real          Vvdw6, Vvdw12, vvtot;
     real          ix, iy, iz, fix, fiy, fiz;
     real          dx, dy, dz, rsq, rinv;
-    real          c6[NSTATES], c12[NSTATES];
+    real          c6[NSTATES], c12[NSTATES], c6grid[NSTATES];
     real          LFC[NSTATES], LFV[NSTATES], DLF[NSTATES];
     double        dvdl_coul, dvdl_vdw;
     real          lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
@@ -103,8 +103,8 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
     const real *  chargeA;
     const real *  chargeB;
     real          sigma6_min, sigma6_def, lam_power, sc_power, sc_r_power;
-    real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc;
-    const real *  nbfp;
+    real          alpha_coul, alpha_vdw, lambda_coul, lambda_vdw, ewc_lj;
+    const real *  nbfp, *nbfp_grid;
     real *        dvdl;
     real *        Vv;
     real *        Vc;
@@ -116,6 +116,8 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
     real          rcutoff, rcutoff2, rswitch, d, d2, swV3, swV4, swV5, swF2, swF3, swF4, sw, dsw, rinvcorr;
     const real *  tab_ewald_F;
     const real *  tab_ewald_V;
+    const real *  tab_ewald_F_lj;
+    const real *  tab_ewald_V_lj;
     real          tab_ewald_scale, tab_ewald_halfsp;
 
     x                   = xx[0];
@@ -138,12 +140,13 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
     facel               = fr->epsfac;
     krf                 = fr->k_rf;
     crf                 = fr->c_rf;
-    ewc                 = fr->ewaldcoeff_q;
+    ewc_lj              = fr->ewaldcoeff_lj;
     Vc                  = kernel_data->energygrp_elec;
     typeA               = mdatoms->typeA;
     typeB               = mdatoms->typeB;
     ntype               = fr->ntype;
     nbfp                = fr->nbfp;
+    nbfp_grid           = fr->ljpme_c6grid;
     Vv                  = kernel_data->energygrp_vdw;
     lambda_coul         = kernel_data->lambda[efptCOUL];
     lambda_vdw          = kernel_data->lambda[efptVDW];
@@ -167,6 +170,8 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
     tab_ewald_F         = fr->ic->tabq_coul_F;
     tab_ewald_V         = fr->ic->tabq_coul_V;
     tab_ewald_scale     = fr->ic->tabq_scale;
+    tab_ewald_F_lj      = fr->ic->tabq_vdw_F;
+    tab_ewald_V_lj      = fr->ic->tabq_vdw_V;
     tab_ewald_halfsp    = 0.5/tab_ewald_scale;
 
     if (fr->coulomb_modifier == eintmodPOTSWITCH || fr->vdw_modifier == eintmodPOTSWITCH)
@@ -199,8 +204,14 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
         const interaction_const_t *ic;
 
         ic = fr->ic;
-
-        ivdw             = GMX_NBKERNEL_VDW_LENNARDJONES;
+        if (EVDW_PME(ic->vdwtype))
+        {
+            ivdw         = GMX_NBKERNEL_VDW_LJEWALD;
+        }
+        else
+        {
+            ivdw         = GMX_NBKERNEL_VDW_LENNARDJONES;
+        }
 
         if (ic->eeltype == eelCUT || EEL_RF(ic->eeltype))
         {
@@ -360,15 +371,22 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
             qq[STATE_A]      = iqA*chargeA[jnr];
             qq[STATE_B]      = iqB*chargeB[jnr];
 
+            tj[STATE_A]      = ntiA+2*typeA[jnr];
+            tj[STATE_B]      = ntiB+2*typeB[jnr];
+
+            if (ivdw == GMX_NBKERNEL_VDW_LJEWALD)
+            {
+                c6grid[STATE_A] = nbfp_grid[tj[STATE_A]];
+                c6grid[STATE_B] = nbfp_grid[tj[STATE_B]];
+            }
+
             if (nlist->excl_fep == NULL || nlist->excl_fep[k])
             {
-                tj[STATE_A]      = ntiA+2*typeA[jnr];
-                tj[STATE_B]      = ntiB+2*typeB[jnr];
+                c6[STATE_A]      = nbfp[tj[STATE_A]];
+                c6[STATE_B]      = nbfp[tj[STATE_B]];
 
                 for (i = 0; i < NSTATES; i++)
                 {
-
-                    c6[i]              = nbfp[tj[i]];
                     c12[i]             = nbfp[tj[i]+1];
                     if ((c6[i] > 0) && (c12[i] > 0))
                     {
@@ -524,11 +542,14 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
                         }
 
                         if ((c6[i] != 0 || c12[i] != 0) &&
-                            !(bExactVdwCutoff && rV >= rvdw))
+                            !(bExactVdwCutoff &&
+                              ((ivdw != GMX_NBKERNEL_VDW_LJEWALD && rV >= rvdw) ||
+                               (ivdw == GMX_NBKERNEL_VDW_LJEWALD && r >= rvdw))))
                         {
                             switch (ivdw)
                             {
                                 case GMX_NBKERNEL_VDW_LENNARDJONES:
+                                case GMX_NBKERNEL_VDW_LJEWALD:
                                     /* cutoff LJ */
                                     if (sc_r_power == 6.0)
                                     {
@@ -685,6 +706,27 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict    nlist,
                 }
             }
 
+            if (ivdw == GMX_NBKERNEL_VDW_LJEWALD &&
+                !(bExactVdwCutoff && r >= rvdw))
+            {
+                real rs, frac, f_lr;
+                int  ri;
+
+                rs     = rsq*rinv*tab_ewald_scale;
+                ri     = (int)rs;
+                frac   = rs - ri;
+                f_lr   = (1 - frac)*tab_ewald_F_lj[ri] + frac*tab_ewald_F_lj[ri+1];
+                FF     = f_lr*rinv;
+                VV     = tab_ewald_V_lj[ri] - tab_ewald_halfsp*frac*(tab_ewald_F_lj[ri] + f_lr);
+                for (i = 0; i < NSTATES; i++)
+                {
+                    vvtot      += LFV[i]*c6grid[i]*VV*(1.0/6.0);
+                    Fscal      += LFV[i]*c6grid[i]*FF*(1.0/6.0);
+                    dvdl_vdw   += (DLF[i]*c6grid[i])*VV*(1.0/6.0);
+                }
+
+            }
+
             if (bDoForces)
             {
                 tx         = Fscal*dx;
index e6964a9ad6c06546260af41bc3f48d33a52fc6a8..e27a8810ca9bae67da06c16cd0a2b5c95aef0691 100644 (file)
@@ -1962,11 +1962,6 @@ int gmx_grompp(int argc, char *argv[])
        potentially conflict if not handled correctly. */
     if (ir->efep != efepNO)
     {
-        if (EVDW_PME(ir->vdwtype))
-        {
-            gmx_fatal(FARGS, "LJ-PME not implemented together with free energy calculations!");
-        }
-
         state.fep_state = ir->fepvals->init_fep_state;
         for (i = 0; i < efptNR; i++)
         {
index e15f3c71f6cfbc5d69321e1ccdc6c9e9b724b2e9..affbf51ac9e0b300324eebc9a01082fb11c3a269 100644 (file)
@@ -44,12 +44,18 @@ extern "C" {
 }
 #endif
 
-void table_spline3_fill_ewald_lr(real *table_F,
-                                 real *table_V,
-                                 real *table_FDV0,
-                                 int   ntab,
-                                 real  dx,
-                                 real  beta);
+typedef double (*real_space_grid_contribution_computer)(double, double);
+/* Function pointer used to tell table_spline3_fill_ewald_lr whether it
+ * should calculate the grid contribution for electrostatics or LJ.
+ */
+
+void table_spline3_fill_ewald_lr(real                                 *table_F,
+                                 real                                 *table_V,
+                                 real                                 *table_FDV0,
+                                 int                                   ntab,
+                                 real                                  dx,
+                                 real                                  beta,
+                                 real_space_grid_contribution_computer v_lr);
 /* Fill tables of ntab points with spacing dr with the ewald long-range
  * (mesh) force.
  * There are three separate tables with format FDV0, F, and V.
@@ -61,6 +67,11 @@ void table_spline3_fill_ewald_lr(real *table_F,
 real ewald_spline3_table_scale(real ewaldcoeff, real rc);
 /* Return the scaling for the Ewald quadratic spline tables. */
 
+double v_q_ewald_lr(double beta, double r);
+/* Return the real space grid contribution for Ewald*/
+
+double v_lj_ewald_lr(double beta, double r);
+/* Return the real space grid contribution for LJ-Ewald*/
 
 #ifdef __cplusplus
 }
index b5f6e79055fc174a5b4d024e2dfc0e8a1ffce672..1f1a50ae63c69ad57508c92702ed3d79808edbed 100644 (file)
@@ -117,6 +117,16 @@ typedef struct {
        entry quadruplets are: F[i], F[i+1]-F[i], V[i], 0,
        this is used with single precision x86 SIMD for aligned loads */
     real *tabq_coul_FDV0;
+
+    /* Vdw force table for LJ-PME, size of array is tabq_size (when used) */
+    real *tabq_vdw_F;
+    /* Vdw energy table for LJ-PME, size of array is tabq_size (when used) */
+    real *tabq_vdw_V;
+    /* Vdw force+energy table for LJ-PME, size of array is tabq_size*4, entry
+       quadruplets are: F[i], F[i+1]-F[i], V[i], 0, this is used with
+       single precision x86 SIMD for aligned loads */
+    real *tabq_vdw_FDV0;
+
 } interaction_const_t;
 
 #ifdef __cplusplus
index 50c661ff0dd44547d50d31097668cb1c5ca43800..d16e1a3c86e4885a9c620c649384b793b96aff4e 100644 (file)
@@ -535,16 +535,17 @@ void do_force_lowlevel(FILE       *fplog,   gmx_int64_t step,
                     }
                     *dvdlt_q  = 0;
                     *dvdlt_lj = 0;
+
                     ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
                                        cr, t, fr,
                                        md->chargeA,
                                        md->nChargePerturbed ? md->chargeB : NULL,
                                        md->sqrt_c6A,
-                                       md->nChargePerturbed ? md->sqrt_c6B : NULL,
+                                       md->nTypePerturbed ? md->sqrt_c6B : NULL,
                                        md->sigmaA,
-                                       md->nChargePerturbed ? md->sigmaB : NULL,
+                                       md->nTypePerturbed ? md->sigmaB : NULL,
                                        md->sigma3A,
-                                       md->nChargePerturbed ? md->sigma3B : NULL,
+                                       md->nTypePerturbed ? md->sigma3B : NULL,
                                        ir->cutoff_scheme != ecutsVERLET,
                                        excl, x, bSB ? boxs : box, mu_tot,
                                        ir->ewald_geometry,
index 4c46b4804e461f09b6290242636a27ac77dd3c52..019a6826f07ff41dbd3f364676dcccc172bd3b67 100644 (file)
@@ -1887,12 +1887,28 @@ static void init_ewald_f_table(interaction_const_t *ic,
     sfree_aligned(ic->tabq_coul_F);
     sfree_aligned(ic->tabq_coul_V);
 
-    /* Create the original table data in FDV0 */
-    snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
-    snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
-    snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
-    table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
-                                ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
+    sfree_aligned(ic->tabq_vdw_FDV0);
+    sfree_aligned(ic->tabq_vdw_F);
+    sfree_aligned(ic->tabq_vdw_V);
+
+    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
+    {
+        /* Create the original table data in FDV0 */
+        snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
+        snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
+        snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
+        table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
+                                    ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
+    }
+
+    if (EVDW_PME(ic->vdwtype))
+    {
+        snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
+        snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
+        snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
+        table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
+                                    ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
+    }
 }
 
 void init_interaction_const_tables(FILE                *fp,
@@ -1902,7 +1918,7 @@ void init_interaction_const_tables(FILE                *fp,
 {
     real spacing;
 
-    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
+    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
     {
         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
 
index 57d9def08ec0317bcbc6ba273a611652226dcf46..32dc486a4ba96a84fa1a963b5175268337381d01 100644 (file)
@@ -126,7 +126,7 @@ static void init_ewald_coulomb_force_table(cu_nbparam_t          *nbp,
     pmalloc((void**)&ftmp, tabsize*sizeof(*ftmp));
 
     table_spline3_fill_ewald_lr(ftmp, NULL, NULL, tabsize,
-                                1/tabscale, nbp->ewald_beta);
+                                1/tabscale, nbp->ewald_beta, v_q_ewald_lr);
 
     /* If the table pointer == NULL the table is generated the first time =>
        the array pointer will be saved to nbparam and the texture is bound.
index 3d8809e8b360aa38ad061e30cc67bf0d030b6977..bd2a5a6a591f721293ee8f86c3ddcb913aea2082 100644 (file)
@@ -304,13 +304,10 @@ typedef struct gmx_pme {
     int        nthread;       /* The number of threads doing PME on our rank */
 
     gmx_bool   bPPnode;       /* Node also does particle-particle forces */
-
     gmx_bool   bFEP;          /* Compute Free energy contribution */
     gmx_bool   bFEP_q;
     gmx_bool   bFEP_lj;
-
     int        nkx, nky, nkz; /* Grid dimensions */
-
     gmx_bool   bP3M;          /* Do P3M: optimize the influence function */
     int        pme_order;
     real       epsilon_r;
@@ -4683,14 +4680,12 @@ do_redist_pos_coeffs(gmx_pme_t pme, t_commrec *cr, int start, int homenr,
     }
 }
 
-/* TODO: when adding free-energy support, sigmaB will no longer be
- * unused */
 int gmx_pme_do(gmx_pme_t pme,
                int start,       int homenr,
                rvec x[],        rvec f[],
                real *chargeA,   real *chargeB,
                real *c6A,       real *c6B,
-               real *sigmaA,    real gmx_unused *sigmaB,
+               real *sigmaA,    real *sigmaB,
                matrix box, t_commrec *cr,
                int  maxshift_x, int maxshift_y,
                t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
@@ -4701,7 +4696,7 @@ int gmx_pme_do(gmx_pme_t pme,
                real *dvdlambda_q, real *dvdlambda_lj,
                int flags)
 {
-    int     d, i, j, ntot, npme, grid_index, max_grid_index;
+    int     d, i, j, k, ntot, npme, grid_index, max_grid_index;
     int     nx, ny, nz;
     int     n_d, local_ny;
     pme_atomcomm_t *atc = NULL;
@@ -4719,6 +4714,8 @@ int gmx_pme_do(gmx_pme_t pme,
     t_complex * cfftgrid;
     int     thread;
     gmx_bool bFirst, bDoSplines;
+    int fep_state;
+    int fep_states_lj           = pme->bFEP_lj ? 2 : 1;
     const gmx_bool bCalcEnerVir = flags & GMX_PME_CALC_ENER_VIR;
     const gmx_bool bCalcF       = flags & GMX_PME_CALC_F;
 
@@ -5019,227 +5016,252 @@ int gmx_pme_do(gmx_pme_t pme,
 
     if ((flags & GMX_PME_DO_LJ) && pme->ljpme_combination_rule == eljpmeLB)
     {
-        real *local_c6 = NULL, *local_sigma = NULL;
-        if (pme->nnodes == 1)
-        {
-            if (pme->lb_buf1 == NULL)
-            {
-                pme->lb_buf_nalloc = pme->atc[0].n;
-                snew(pme->lb_buf1, pme->lb_buf_nalloc);
-            }
-            pme->atc[0].coefficient = pme->lb_buf1;
-            local_c6                = c6A;
-            local_sigma             = sigmaA;
-        }
-        else
+        /* Loop over A- and B-state if we are doing FEP */
+        for (fep_state = 0; fep_state < fep_states_lj; ++fep_state)
         {
-            atc = &pme->atc[0];
-
-            wallcycle_start(wcycle, ewcPME_REDISTXF);
-
-            do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, c6A);
-            if (pme->lb_buf_nalloc < atc->n)
-            {
-                pme->lb_buf_nalloc = atc->nalloc;
-                srenew(pme->lb_buf1, pme->lb_buf_nalloc);
-                srenew(pme->lb_buf2, pme->lb_buf_nalloc);
-            }
-            local_c6 = pme->lb_buf1;
-            for (i = 0; i < atc->n; ++i)
-            {
-                local_c6[i] = atc->coefficient[i];
-            }
-            where();
-
-            do_redist_pos_coeffs(pme, cr, start, homenr, FALSE, x, sigmaA);
-            local_sigma = pme->lb_buf2;
-            for (i = 0; i < atc->n; ++i)
+            real *local_c6 = NULL, *local_sigma = NULL, *RedistC6 = NULL, *RedistSigma = NULL;
+            if (pme->nnodes == 1)
             {
-                local_sigma[i] = atc->coefficient[i];
+                if (pme->lb_buf1 == NULL)
+                {
+                    pme->lb_buf_nalloc = pme->atc[0].n;
+                    snew(pme->lb_buf1, pme->lb_buf_nalloc);
+                }
+                pme->atc[0].coefficient = pme->lb_buf1;
+                switch (fep_state)
+                {
+                    case 0:
+                        local_c6      = c6A;
+                        local_sigma   = sigmaA;
+                        break;
+                    case 1:
+                        local_c6      = c6B;
+                        local_sigma   = sigmaB;
+                        break;
+                    default:
+                        gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
+                }
             }
-            where();
-
-            wallcycle_stop(wcycle, ewcPME_REDISTXF);
-        }
-        calc_initial_lb_coeffs(pme, local_c6, local_sigma);
-
-        /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
-        for (grid_index = 2; grid_index < 9; ++grid_index)
-        {
-            /* Unpack structure */
-            pmegrid    = &pme->pmegrid[grid_index];
-            fftgrid    = pme->fftgrid[grid_index];
-            cfftgrid   = pme->cfftgrid[grid_index];
-            pfft_setup = pme->pfft_setup[grid_index];
-            calc_next_lb_coeffs(pme, local_sigma);
-            grid = pmegrid->grid.grid;
-            where();
-
-            if (flags & GMX_PME_SPREAD)
+            else
             {
-                wallcycle_start(wcycle, ewcPME_SPREADGATHER);
-                /* Spread the c6 on a grid */
-                spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
+                atc = &pme->atc[0];
+                switch (fep_state)
+                {
+                    case 0:
+                        RedistC6      = c6A;
+                        RedistSigma   = sigmaA;
+                        break;
+                    case 1:
+                        RedistC6      = c6B;
+                        RedistSigma   = sigmaB;
+                        break;
+                    default:
+                        gmx_incons("Trying to access wrong FEP-state in LJ-PME routine");
+                }
+                wallcycle_start(wcycle, ewcPME_REDISTXF);
 
-                if (bFirst)
+                do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, RedistC6);
+                if (pme->lb_buf_nalloc < atc->n)
                 {
-                    inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
+                    pme->lb_buf_nalloc = atc->nalloc;
+                    srenew(pme->lb_buf1, pme->lb_buf_nalloc);
+                    srenew(pme->lb_buf2, pme->lb_buf_nalloc);
                 }
-                inc_nrnb(nrnb, eNR_SPREADBSP,
-                         pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
-                if (pme->nthread == 1)
+                local_c6 = pme->lb_buf1;
+                for (i = 0; i < atc->n; ++i)
                 {
-                    wrap_periodic_pmegrid(pme, grid);
+                    local_c6[i] = atc->coefficient[i];
+                }
+                where();
 
-                    /* sum contributions to local grid from other nodes */
-#ifdef GMX_MPI
-                    if (pme->nnodes > 1)
-                    {
-                        gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_FORWARD);
-                        where();
-                    }
-#endif
-                    copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
+                do_redist_pos_coeffs(pme, cr, start, homenr, FALSE, x, RedistSigma);
+                local_sigma = pme->lb_buf2;
+                for (i = 0; i < atc->n; ++i)
+                {
+                    local_sigma[i] = atc->coefficient[i];
                 }
+                where();
 
-                wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
+                wallcycle_stop(wcycle, ewcPME_REDISTXF);
             }
+            calc_initial_lb_coeffs(pme, local_c6, local_sigma);
 
-            /*Here we start a large thread parallel region*/
-#pragma omp parallel num_threads(pme->nthread) private(thread)
+            /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
+            for (grid_index = 2; grid_index < 9; ++grid_index)
             {
-                thread = gmx_omp_get_thread_num();
-                if (flags & GMX_PME_SOLVE)
+                /* Unpack structure */
+                pmegrid    = &pme->pmegrid[grid_index];
+                fftgrid    = pme->fftgrid[grid_index];
+                cfftgrid   = pme->cfftgrid[grid_index];
+                pfft_setup = pme->pfft_setup[grid_index];
+                calc_next_lb_coeffs(pme, local_sigma);
+                grid = pmegrid->grid.grid;
+                where();
+
+                if (flags & GMX_PME_SPREAD)
                 {
-                    /* do 3d-fft */
-                    if (thread == 0)
+                    wallcycle_start(wcycle, ewcPME_SPREADGATHER);
+                    /* Spread the c6 on a grid */
+                    spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
+
+                    if (bFirst)
                     {
-                        wallcycle_start(wcycle, ewcPME_FFT);
+                        inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
                     }
 
-                    gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX,
-                                               thread, wcycle);
-                    if (thread == 0)
+                    inc_nrnb(nrnb, eNR_SPREADBSP,
+                             pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+                    if (pme->nthread == 1)
                     {
-                        wallcycle_stop(wcycle, ewcPME_FFT);
+                        wrap_periodic_pmegrid(pme, grid);
+                        /* sum contributions to local grid from other nodes */
+#ifdef GMX_MPI
+                        if (pme->nnodes > 1)
+                        {
+                            gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_FORWARD);
+                            where();
+                        }
+#endif
+                        copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index);
                     }
-                    where();
+                    wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
                 }
-            }
-            bFirst = FALSE;
-        }
-        if (flags & GMX_PME_SOLVE)
-        {
-            int loop_count;
-            /* solve in k-space for our local cells */
+                /*Here we start a large thread parallel region*/
 #pragma omp parallel num_threads(pme->nthread) private(thread)
-            {
-                thread = gmx_omp_get_thread_num();
-                if (thread == 0)
                 {
-                    wallcycle_start(wcycle, ewcLJPME);
-                }
+                    thread = gmx_omp_get_thread_num();
+                    if (flags & GMX_PME_SOLVE)
+                    {
+                        /* do 3d-fft */
+                        if (thread == 0)
+                        {
+                            wallcycle_start(wcycle, ewcPME_FFT);
+                        }
 
-                loop_count =
-                    solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE, ewaldcoeff_lj,
-                                     box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
-                                     bCalcEnerVir,
-                                     pme->nthread, thread);
-                if (thread == 0)
-                {
-                    wallcycle_stop(wcycle, ewcLJPME);
-                    where();
-                    inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
+                        gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX,
+                                                   thread, wcycle);
+                        if (thread == 0)
+                        {
+                            wallcycle_stop(wcycle, ewcPME_FFT);
+                        }
+                        where();
+                    }
                 }
+                bFirst = FALSE;
             }
-        }
-
-        if (bCalcEnerVir)
-        {
-            /* This should only be called on the master thread and
-             * after the threads have synchronized.
-             */
-            get_pme_ener_vir_lj(pme, pme->nthread, &energy_AB[2], vir_AB[2]);
-        }
-
-        if (bCalcF)
-        {
-            bFirst = !(flags & GMX_PME_DO_COULOMB);
-            calc_initial_lb_coeffs(pme, local_c6, local_sigma);
-            for (grid_index = 8; grid_index >= 2; --grid_index)
+            if (flags & GMX_PME_SOLVE)
             {
-                /* Unpack structure */
-                pmegrid    = &pme->pmegrid[grid_index];
-                fftgrid    = pme->fftgrid[grid_index];
-                cfftgrid   = pme->cfftgrid[grid_index];
-                pfft_setup = pme->pfft_setup[grid_index];
-                grid       = pmegrid->grid.grid;
-                calc_next_lb_coeffs(pme, local_sigma);
-                where();
+                int loop_count;
+                /* solve in k-space for our local cells */
 #pragma omp parallel num_threads(pme->nthread) private(thread)
                 {
                     thread = gmx_omp_get_thread_num();
-                    /* do 3d-invfft */
                     if (thread == 0)
                     {
-                        where();
-                        wallcycle_start(wcycle, ewcPME_FFT);
+                        wallcycle_start(wcycle, ewcLJPME);
                     }
 
-                    gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL,
-                                               thread, wcycle);
+                    loop_count =
+                        solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE, ewaldcoeff_lj,
+                                         box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
+                                         bCalcEnerVir,
+                                         pme->nthread, thread);
                     if (thread == 0)
                     {
-                        wallcycle_stop(wcycle, ewcPME_FFT);
-
+                        wallcycle_stop(wcycle, ewcLJPME);
                         where();
+                        inc_nrnb(nrnb, eNR_SOLVEPME, loop_count);
+                    }
+                }
+            }
+
+            if (bCalcEnerVir)
+            {
+                /* This should only be called on the master thread and
+                 * after the threads have synchronized.
+                 */
+                get_pme_ener_vir_lj(pme, pme->nthread, &energy_AB[2+fep_state], vir_AB[2+fep_state]);
+            }
 
-                        if (pme->nodeid == 0)
+            if (bCalcF)
+            {
+                bFirst = !(flags & GMX_PME_DO_COULOMB);
+                calc_initial_lb_coeffs(pme, local_c6, local_sigma);
+                for (grid_index = 8; grid_index >= 2; --grid_index)
+                {
+                    /* Unpack structure */
+                    pmegrid    = &pme->pmegrid[grid_index];
+                    fftgrid    = pme->fftgrid[grid_index];
+                    cfftgrid   = pme->cfftgrid[grid_index];
+                    pfft_setup = pme->pfft_setup[grid_index];
+                    grid       = pmegrid->grid.grid;
+                    calc_next_lb_coeffs(pme, local_sigma);
+                    where();
+#pragma omp parallel num_threads(pme->nthread) private(thread)
+                    {
+                        thread = gmx_omp_get_thread_num();
+                        /* do 3d-invfft */
+                        if (thread == 0)
                         {
-                            ntot  = pme->nkx*pme->nky*pme->nkz;
-                            npme  = ntot*log((real)ntot)/log(2.0);
-                            inc_nrnb(nrnb, eNR_FFT, 2*npme);
+                            where();
+                            wallcycle_start(wcycle, ewcPME_FFT);
+                        }
+
+                        gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL,
+                                                   thread, wcycle);
+                        if (thread == 0)
+                        {
+                            wallcycle_stop(wcycle, ewcPME_FFT);
+
+                            where();
+
+                            if (pme->nodeid == 0)
+                            {
+                                ntot  = pme->nkx*pme->nky*pme->nkz;
+                                npme  = ntot*log((real)ntot)/log(2.0);
+                                inc_nrnb(nrnb, eNR_FFT, 2*npme);
+                            }
+                            wallcycle_start(wcycle, ewcPME_SPREADGATHER);
                         }
-                        wallcycle_start(wcycle, ewcPME_SPREADGATHER);
-                    }
 
-                    copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
+                        copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread);
 
-                } /*#pragma omp parallel*/
+                    } /*#pragma omp parallel*/
 
-                /* distribute local grid to all nodes */
+                    /* distribute local grid to all nodes */
 #ifdef GMX_MPI
-                if (pme->nnodes > 1)
-                {
-                    gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_BACKWARD);
-                }
+                    if (pme->nnodes > 1)
+                    {
+                        gmx_sum_qgrid_dd(pme, grid, GMX_SUM_GRID_BACKWARD);
+                    }
 #endif
-                where();
+                    where();
 
-                unwrap_periodic_pmegrid(pme, grid);
+                    unwrap_periodic_pmegrid(pme, grid);
 
-                /* interpolate forces for our local atoms */
-                where();
-                bClearF = (bFirst && PAR(cr));
-                scale   = pme->bFEP ? (grid_index < 9 ? 1.0-lambda_lj : lambda_lj) : 1.0;
-                scale  *= lb_scale_factor[grid_index-2];
+                    /* interpolate forces for our local atoms */
+                    where();
+                    bClearF = (bFirst && PAR(cr));
+                    scale   = pme->bFEP ? (fep_state < 1 ? 1.0-lambda_lj : lambda_lj) : 1.0;
+                    scale  *= lb_scale_factor[grid_index-2];
 #pragma omp parallel for num_threads(pme->nthread) schedule(static)
-                for (thread = 0; thread < pme->nthread; thread++)
-                {
-                    gather_f_bsplines(pme, grid, bClearF, &pme->atc[0],
-                                      &pme->atc[0].spline[thread],
-                                      scale);
-                }
-                where();
+                    for (thread = 0; thread < pme->nthread; thread++)
+                    {
+                        gather_f_bsplines(pme, grid, bClearF, &pme->atc[0],
+                                          &pme->atc[0].spline[thread],
+                                          scale);
+                    }
+                    where();
 
-                inc_nrnb(nrnb, eNR_GATHERFBSP,
-                         pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
-                wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
+                    inc_nrnb(nrnb, eNR_GATHERFBSP,
+                             pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+                    wallcycle_stop(wcycle, ewcPME_SPREADGATHER);
 
-                bFirst = FALSE;
-            } /*for (grid_index = 8; grid_index >= 2; --grid_index)*/
-        }     /*if (bCalcF)*/
-    }         /*if ((flags & GMX_PME_DO_LJ) && pme->ljpme_combination_rule == eljpmeLB) */
+                    bFirst = FALSE;
+                } /* for (grid_index = 8; grid_index >= 2; --grid_index) */
+            }     /* if (bCalcF) */
+        }         /* for (fep_state = 0; fep_state < fep_states_lj; ++fep_state) */
+    }             /* if ((flags & GMX_PME_DO_LJ) && pme->ljpme_combination_rule == eljpmeLB) */
 
     if (bCalcF && pme->nnodes > 1)
     {
index 5c2391ea5c542c1f0770615e913e8724afb7f852..61f69e2cff6837909ffc75ff854761180bdcbf42 100644 (file)
@@ -132,8 +132,7 @@ typedef struct {
 #define pow4(x) ((x)*(x)*(x)*(x))
 #define pow5(x) ((x)*(x)*(x)*(x)*(x))
 
-
-static double v_ewald_lr(double beta, double r)
+double v_q_ewald_lr(double beta, double r)
 {
     if (r == 0)
     {
@@ -145,12 +144,31 @@ static double v_ewald_lr(double beta, double r)
     }
 }
 
-void table_spline3_fill_ewald_lr(real *table_f,
-                                 real *table_v,
-                                 real *table_fdv0,
-                                 int   ntab,
-                                 real  dx,
-                                 real  beta)
+double v_lj_ewald_lr(double beta, double r)
+{
+    double br, br2, br4, r6, factor;
+    if (r == 0)
+    {
+        return pow(beta, 6)/6;
+    }
+    else
+    {
+        br     = beta*r;
+        br2    = br*br;
+        br4    = br2*br2;
+        r6     = pow(r, 6.0);
+        factor = (1.0 - exp(-br2)*(1 + br2 + 0.5*br4))/r6;
+        return factor;
+    }
+}
+
+void table_spline3_fill_ewald_lr(real                                 *table_f,
+                                 real                                 *table_v,
+                                 real                                 *table_fdv0,
+                                 int                                   ntab,
+                                 real                                  dx,
+                                 real                                  beta,
+                                 real_space_grid_contribution_computer v_lr)
 {
     real     tab_max;
     int      i, i_inrange;
@@ -159,6 +177,10 @@ void table_spline3_fill_ewald_lr(real *table_f,
     double   v_r0, v_r1, v_inrange, vi, a0, a1, a2dx;
     double   x_r0;
 
+    /* This function is called using either v_ewald_lr or v_lj_ewald_lr as a function argument
+     * depending on wether we should create electrostatic or Lennard-Jones Ewald tables.
+     */
+
     if (ntab < 2)
     {
         gmx_fatal(FARGS, "Can not make a spline table with less than 2 points");
@@ -184,7 +206,7 @@ void table_spline3_fill_ewald_lr(real *table_f,
     {
         x_r0 = i*dx;
 
-        v_r0 = v_ewald_lr(beta, x_r0);
+        v_r0 = (*v_lr)(beta, x_r0);
 
         if (!bOutOfRange)
         {
@@ -210,7 +232,7 @@ void table_spline3_fill_ewald_lr(real *table_f,
         }
 
         /* Get the potential at table point i-1 */
-        v_r1 = v_ewald_lr(beta, (i-1)*dx);
+        v_r1 = (*v_lr)(beta, (i-1)*dx);
 
         if (v_r1 != v_r1 || v_r1 < -tab_max || v_r1 > tab_max)
         {
@@ -222,7 +244,7 @@ void table_spline3_fill_ewald_lr(real *table_f,
             /* Calculate the average second derivative times dx over interval i-1 to i.
              * Using the function values at the end points and in the middle.
              */
-            a2dx = (v_r0 + v_r1 - 2*v_ewald_lr(beta, x_r0-0.5*dx))/(0.25*dx);
+            a2dx = (v_r0 + v_r1 - 2*(*v_lr)(beta, x_r0-0.5*dx))/(0.25*dx);
             /* Set the derivative of the spline to match the difference in potential
              * over the interval plus the average effect of the quadratic term.
              * This is the essential step for minimizing the error in the force.