Merge release-4-6 into master

[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_cuda / nbnxn_cuda_data_mgmt.cu
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index 3475ad96da5f9a648271373bbd62aaf5952d4996..19c0b2d4f4d865786162b4f8f049ba68fa344d33 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -177,10 +177,65 @@ static void init_atomdata_first(cu_atomdata_t *ad, int ntypes)
      ad->nalloc = -1;
  }
  
+/*! Selects the Ewald kernel type, analytical on SM 3.0 and later, tabulated on
+    earlier GPUs, single or twin cut-off. */
+static int pick_ewald_kernel_type(bool                   bTwinCut,
+                                  const cuda_dev_info_t *dev_info)
+{
+    bool bUseAnalyticalEwald, bForceAnalyticalEwald, bForceTabulatedEwald;
+    int  kernel_type;
+
+    /* Benchmarking/development environment variables to force the use of
+       analytical or tabulated Ewald kernel. */
+    bForceAnalyticalEwald = (getenv("GMX_CUDA_NB_ANA_EWALD") != NULL);
+    bForceTabulatedEwald  = (getenv("GMX_CUDA_NB_TAB_EWALD") != NULL);
+
+    if (bForceAnalyticalEwald && bForceTabulatedEwald)
+    {
+        gmx_incons("Both analytical and tabulated Ewald CUDA non-bonded kernels "
+                   "requested through environment variables.");
+    }
+
+    /* By default, on SM 3.0 and later use analytical Ewald, on earlier tabulated. */
+    if ((dev_info->prop.major >= 3 || bForceAnalyticalEwald) && !bForceTabulatedEwald)
+    {
+        bUseAnalyticalEwald = true;
+
+        if (debug)
+        {
+            fprintf(debug, "Using analytical Ewald CUDA kernels\n");
+        }
+    }
+    else
+    {
+        bUseAnalyticalEwald = false;
+
+        if (debug)
+        {
+            fprintf(debug, "Using tabulated Ewald CUDA kernels\n");
+        }
+    }
+
+    /* Use twin cut-off kernels if requested by bTwinCut or the env. var.
+       forces it (use it for debugging/benchmarking only). */
+    if (!bTwinCut && (getenv("GMX_CUDA_NB_EWALD_TWINCUT") == NULL))
+    {
+        kernel_type = bUseAnalyticalEwald ? eelCuEWALD_ANA : eelCuEWALD_TAB;
+    }
+    else
+    {
+        kernel_type = bUseAnalyticalEwald ? eelCuEWALD_ANA_TWIN : eelCuEWALD_TAB_TWIN;
+    }
+
+    return kernel_type;
+}
+
+
  /*! Initializes the nonbonded parameter data structure. */
  static void init_nbparam(cu_nbparam_t *nbp,
                           const interaction_const_t *ic,
-                         const nonbonded_verlet_t *nbv)
+                         const nonbonded_verlet_t *nbv,
+                         const cuda_dev_info_t *dev_info)
  {
      cudaError_t stat;
      int         ntypes, nnbfp;
@@ -207,16 +262,8 @@ static void init_nbparam(cu_nbparam_t *nbp,
      }
      else if ((EEL_PME(ic->eeltype) || ic->eeltype==eelEWALD))
      {
-        /* Initially rcoulomb == rvdw, so it's surely not twin cut-off, unless
-           forced by the env. var. (used only for benchmarking). */
-        if (getenv("GMX_CUDA_NB_EWALD_TWINCUT") == NULL)
-        {
-            nbp->eeltype = eelCuEWALD;
-        }
-        else
-        {
-            nbp->eeltype = eelCuEWALD_TWIN;
-        }
+        /* Initially rcoulomb == rvdw, so it's surely not twin cut-off. */
+        nbp->eeltype = pick_ewald_kernel_type(false, dev_info);
      }
      else
      {
@@ -225,9 +272,9 @@ static void init_nbparam(cu_nbparam_t *nbp,
      }
  
      /* generate table for PME */
-    if (nbp->eeltype == eelCuEWALD)
+    nbp->coulomb_tab = NULL;
+    if (nbp->eeltype == eelCuEWALD_TAB || nbp->eeltype == eelCuEWALD_TAB_TWIN)
      {
-        nbp->coulomb_tab = NULL;
          init_ewald_coulomb_force_table(nbp);
      }
  
@@ -253,17 +300,8 @@ void nbnxn_cuda_pme_loadbal_update_param(nbnxn_cuda_ptr_t cu_nb,
      nbp->rcoulomb_sq    = ic->rcoulomb * ic->rcoulomb;
      nbp->ewald_beta     = ic->ewaldcoeff;
  
-    /* When switching to/from twin cut-off, the electrostatics type needs updating.
-       (The env. var. that forces twin cut-off is for benchmarking only!) */
-    if (ic->rcoulomb == ic->rvdw &&
-        getenv("GMX_CUDA_NB_EWALD_TWINCUT") == NULL)
-    {
-        nbp->eeltype = eelCuEWALD;
-    }
-    else
-    {
-        nbp->eeltype = eelCuEWALD_TWIN;
-    }
+    nbp->eeltype        = pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw,
+                                                 cu_nb->dev_info);
  
      init_ewald_coulomb_force_table(cu_nb->nbparam);
  }
@@ -606,7 +644,7 @@ void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t cu_nb,
                             const nonbonded_verlet_t *nbv)
  {
      init_atomdata_first(cu_nb->atdat, nbv->grp[0].nbat->ntype);
-    init_nbparam(cu_nb->nbparam, ic, nbv);
+    init_nbparam(cu_nb->nbparam, ic, nbv, cu_nb->dev_info);
  
      /* clear energy and shift force outputs */
      nbnxn_cuda_clear_e_fshift(cu_nb);
@@ -801,7 +839,7 @@ void nbnxn_cuda_free(FILE *fplog, nbnxn_cuda_ptr_t cu_nb)
      plist_nl    = cu_nb->plist[eintNonlocal];
      timers      = cu_nb->timers;
  
-    if (nbparam->eeltype == eelCuEWALD || nbparam->eeltype == eelCuEWALD_TWIN)
+    if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN)
      {
        stat = cudaUnbindTexture(nbnxn_cuda_get_coulomb_tab_texref());
        CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab failed");