From: Berk Hess <hess@kth.se>
Date: Wed, 12 Mar 2014 07:23:19 +0000 (+0100)
Subject: CUDA cut-off kernels now shift exclusion energies
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=5c13f5e8f699e5f1aefc37c31ea34a05344b190b;p=alexxy%2Fgromacs.git

CUDA cut-off kernels now shift exclusion energies

This is to make the GPU kernels consistent with the CPU nbnxn
kernels, which for eeltype=cut-off modifier=potential-shift
effectively do reaction-field with epsilon_rf=1.
Also implemented shifting of the Coulomb potential for the group
cut-off scheme for non-excluded pairs only.
The manual now explains these details.

Also fixed the generic group kernel with an exact cutoff for
either Coulomb or VdW. I think this was supposed to not be supported
but neither grompp nor mdrun checked for this. The fix is trivial.

Change-Id: I48bff73587e43338162f90fa7d526e1909ce5ad1
---

diff --git a/manual/forcefield.tex b/manual/forcefield.tex
index 0b66aee2d6..1d35f33116 100644
--- a/manual/forcefield.tex
+++ b/manual/forcefield.tex
@@ -178,6 +178,8 @@ The force derived from this potential is:
 \ve{F}_i(\rvij) = f \frac{q_i q_j}{\epsr\rij^2}\rnorm
 \eeq
 
+A plain Coulomb interaction should only be used without cut-off or when all pairs fall within the cut-off, since there is an abrupt, large change in the force at the cut-off. In case you do want to use a cut-off, the potential can be shifted by a constant to make the potential the integral of the force. With the group cut-off scheme, this shift is only applied to non-excluded pairs. With the Verlet cut-off scheme, the shift is also applied to excluded pairs and self interactions, which makes the potential equivalent to a reaction-field with $\epsrf=1$ (see below).
+
 In {\gromacs} the  relative \swapindex{dielectric}{constant} 
 \normindex{$\epsr$}
 may be set in the in the input for {\tt grompp}. 
diff --git a/src/gromacs/gmxlib/nonbonded/nb_generic.c b/src/gromacs/gmxlib/nonbonded/nb_generic.c
index e81ca49723..fbf88de132 100644
--- a/src/gromacs/gmxlib/nonbonded/nb_generic.c
+++ b/src/gromacs/gmxlib/nonbonded/nb_generic.c
@@ -154,7 +154,7 @@ gmx_nb_generic_kernel(t_nblist *                nlist,
 
     bExactElecCutoff    = (fr->coulomb_modifier != eintmodNONE) || fr->eeltype == eelRF_ZERO;
     bExactVdwCutoff     = (fr->vdw_modifier != eintmodNONE);
-    bExactCutoff        = bExactElecCutoff || bExactVdwCutoff;
+    bExactCutoff        = bExactElecCutoff && bExactVdwCutoff;
 
     if (bExactCutoff)
     {
@@ -222,7 +222,7 @@ gmx_nb_generic_kernel(t_nblist *                nlist,
             velec            = 0;
             vvdw             = 0;
 
-            if (bExactCutoff && rsq > rcutoff2)
+            if (bExactCutoff && rsq >= rcutoff2)
             {
                 continue;
             }
@@ -251,6 +251,10 @@ gmx_nb_generic_kernel(t_nblist *                nlist,
                         /* Vanilla cutoff coulomb */
                         velec            = qq*rinv;
                         felec            = velec*rinvsq;
+                        /* The shift for the Coulomb potential is stored in
+                         * the RF parameter c_rf, which is 0 without shift
+                         */
+                        velec           -= qq*fr->ic->c_rf;
                         break;
 
                     case GMX_NBKERNEL_ELEC_REACTIONFIELD:
@@ -309,8 +313,8 @@ gmx_nb_generic_kernel(t_nblist *                nlist,
                 }
                 if (bExactElecCutoff)
                 {
-                    felec            = (rsq <= rcoulomb2) ? felec : 0.0;
-                    velec            = (rsq <= rcoulomb2) ? velec : 0.0;
+                    felec            = (rsq < rcoulomb2) ? felec : 0.0;
+                    velec            = (rsq < rcoulomb2) ? velec : 0.0;
                 }
                 vctot           += velec;
             } /* End of coulomb interactions */
@@ -408,8 +412,8 @@ gmx_nb_generic_kernel(t_nblist *                nlist,
                 }
                 if (bExactVdwCutoff)
                 {
-                    fvdw             = (rsq <= rvdw2) ? fvdw : 0.0;
-                    vvdw             = (rsq <= rvdw2) ? vvdw : 0.0;
+                    fvdw             = (rsq < rvdw2) ? fvdw : 0.0;
+                    vvdw             = (rsq < rvdw2) ? vvdw : 0.0;
                 }
                 vvdwtot         += vvdw;
             } /* end VdW interactions */
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh
index 07520285d9..84634d5e7e 100644
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh
@@ -51,9 +51,10 @@
 #define EL_EWALD_ANY
 #endif
 
-#if defined EL_EWALD_ANY || defined EL_RF || defined LJ_EWALD
+#if defined EL_EWALD_ANY || defined EL_RF || defined LJ_EWALD || (defined EL_CUTOFF && defined CALC_ENERGIES)
 /* Macro to control the calculation of exclusion forces in the kernel
- * We do that with Ewald (elec/vdw) and RF.
+ * We do that with Ewald (elec/vdw) and RF. Cut-off only has exclusion
+ * energy terms.
  *
  * Note: convenience macro, needs to be undef-ed at the end of the file.
  */
@@ -223,7 +224,7 @@ __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
         /* we have the diagonal: add the charge and LJ self interaction energy term */
         for (i = 0; i < NCL_PER_SUPERCL; i++)
         {
-#if defined EL_EWALD_ANY || defined EL_RF
+#if defined EL_EWALD_ANY || defined EL_RF || defined EL_CUTOFF
             qi    = xqib[i * CL_SIZE + tidxi].w;
             E_el += qi*qi;
 #endif
@@ -244,14 +245,14 @@ __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
         E_lj *= 0.5f*ONE_SIXTH_F*lje_coeff6_6;
 #endif  /* LJ_EWALD */
 
-#if defined EL_EWALD_ANY || defined EL_RF
+#if defined EL_EWALD_ANY || defined EL_RF || defined EL_CUTOFF
         E_el /= CL_SIZE;
-#ifdef EL_RF
+#if defined EL_RF || defined EL_CUTOFF
         E_el *= -nbparam.epsfac*0.5f*c_rf;
 #else
         E_el *= -nbparam.epsfac*beta*M_FLOAT_1_SQRTPI; /* last factor 1/sqrt(pi) */
 #endif
-#endif                                                 /* EL_EWALD_ANY || defined EL_RF */
+#endif                                                 /* EL_EWALD_ANY || defined EL_RF || defined EL_CUTOFF */
     }
 #endif                                                 /* EXCLUSION_FORCES */
 
@@ -308,7 +309,7 @@ __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
                     fcj_buf = make_float3(0.0f);
 
                     /* The PME and RF kernels don't unroll with CUDA <v4.1. */
-#if !defined PRUNE_NBL && !(CUDA_VERSION < 4010 && (defined EL_EWALD_ANY || defined EL_RF))
+#if !defined PRUNE_NBL && !(CUDA_VERSION < 4010 && defined EXCLUSION_FORCES)
 #pragma unroll 8
 #endif
                     for (i = 0; i < NCL_PER_SUPERCL; i++)
@@ -436,8 +437,12 @@ __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
 
 
 #ifdef EL_CUTOFF
+#ifdef EXCLUSION_FORCES
+                                F_invr  += qi * qj_f * int_bit * inv_r2 * inv_r;
+#else
                                 F_invr  += qi * qj_f * inv_r2 * inv_r;
 #endif
+#endif
 #ifdef EL_RF
                                 F_invr  += qi * qj_f * (int_bit*inv_r2 * inv_r - two_k_rf);
 #endif
@@ -455,7 +460,7 @@ __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda)
 
 #ifdef CALC_ENERGIES
 #ifdef EL_CUTOFF
-                                E_el    += qi * qj_f * (inv_r - c_rf);
+                                E_el    += qi * qj_f * (int_bit*inv_r - c_rf);
 #endif
 #ifdef EL_RF
                                 E_el    += qi * qj_f * (int_bit*inv_r + 0.5f * two_k_rf * r2 - c_rf);