Merge branch release-5-0

[alexxy/gromacs.git] / src / gromacs / mdlib / forcerec.cpp
diff --git a/src/gromacs/mdlib/forcerec.c b/src/gromacs/mdlib/forcerec.cpp

similarity index 95%

rename from src/gromacs/mdlib/forcerec.c

rename to src/gromacs/mdlib/forcerec.cpp

index 1ec1c2b171b3a282638d0cccbe1d59ec97834ab9..3f6b1874a7ba4aa1d3cacf039344e28e52517220 100644 (file)
--- a/src/gromacs/mdlib/forcerec.c
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -34,53 +34,53 @@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "config.h"
  
+#include <assert.h>
  #include <math.h>
  #include <string.h>
-#include <assert.h>
-#include "sysstuff.h"
-#include "typedefs.h"
-#include "types/commrec.h"
-#include "vec.h"
+
+#include "gromacs/domdec/domdec.h"
+#include "gromacs/ewald/ewald.h"
+#include "gromacs/gmxlib/cuda_tools/pmalloc_cuda.h"
+#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
+#include "gromacs/legacyheaders/copyrite.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/gmx_detect_hardware.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/inputrec.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/md_support.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nonbonded.h"
+#include "gromacs/legacyheaders/ns.h"
+#include "gromacs/legacyheaders/qmmm.h"
+#include "gromacs/legacyheaders/tables.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
+#include "gromacs/math/calculate-ewald-splitting-coefficient.h"
+#include "gromacs/math/units.h"
  #include "gromacs/math/utilities.h"
-#include "macros.h"
-#include "gromacs/utility/smalloc.h"
-#include "macros.h"
-#include "gmx_fatal.h"
-#include "physics.h"
-#include "force.h"
-#include "tables.h"
-#include "nonbonded.h"
-#include "invblock.h"
-#include "names.h"
-#include "network.h"
-#include "pbc.h"
-#include "ns.h"
-#include "mshift.h"
-#include "txtdump.h"
-#include "coulomb.h"
-#include "md_support.h"
-#include "md_logging.h"
-#include "domdec.h"
-#include "qmmm.h"
-#include "copyrite.h"
-#include "mtop_util.h"
-#include "nbnxn_simd.h"
-#include "nbnxn_search.h"
-#include "nbnxn_atomdata.h"
-#include "nbnxn_consts.h"
-#include "gmx_omp_nthreads.h"
-#include "gmx_detect_hardware.h"
-#include "inputrec.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/forcerec-threading.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_atomdata.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/mdlib/nbnxn_simd.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/pbc.h"
  #include "gromacs/simd/simd.h"
-
-#include "types/nbnxn_cuda_types_ext.h"
-#include "gpu_utils.h"
-#include "nbnxn_cuda_data_mgmt.h"
-#include "pmalloc_cuda.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
  
  t_forcerec *mk_forcerec(void)
  {
@@ -155,9 +155,10 @@ static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
  
  static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
  {
-    int   i, j, k, atnr;
-    real  c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
-    real *grid;
+    int        i, j, k, atnr;
+    real       c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
+    real      *grid;
+    const real oneOverSix = 1.0 / 6.0;
  
      /* For LJ-PME simulations, we correct the energies with the reciprocal space
       * inside of the cut-off. To do this the non-bonded kernels needs to have
@@ -178,8 +179,8 @@ static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
              if (fr->ljpme_combination_rule == eljpmeLB
                  && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
              {
-                sigmai = pow(c12i / c6i, 1.0/6.0);
-                sigmaj = pow(c12j / c6j, 1.0/6.0);
+                sigmai = pow(c12i / c6i, oneOverSix);
+                sigmaj = pow(c12j / c6j, oneOverSix);
                  epsi   = c6i * c6i / c12i;
                  epsj   = c6j * c6j / c12j;
                  c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
@@ -195,10 +196,11 @@ static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
  
  static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
  {
-    real *nbfp;
-    int   i, j, k, atnr;
-    real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
-    real  c6, c12;
+    real      *nbfp;
+    int        i, j, atnr;
+    real       c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
+    real       c6, c12;
+    const real oneOverSix = 1.0 / 6.0;
  
      atnr = idef->atnr;
      snew(nbfp, 2*atnr*atnr);
@@ -215,8 +217,8 @@ static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
              if (comb_rule == eCOMB_ARITHMETIC
                  && !gmx_numzero(c6) && !gmx_numzero(c12))
              {
-                sigmai = pow(c12i / c6i, 1.0/6.0);
-                sigmaj = pow(c12j / c6j, 1.0/6.0);
+                sigmai = pow(c12i / c6i, oneOverSix);
+                sigmaj = pow(c12j / c6j, oneOverSix);
                  epsi   = c6i * c6i / c12i;
                  epsj   = c6j * c6j / c12j;
                  c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
@@ -264,7 +266,6 @@ check_solvent_cg(const gmx_moltype_t    *molt,
                   int                     cginfo,
                   int                    *cg_sp)
  {
-    const t_blocka       *excl;
      t_atom               *atom;
      int                   j, k;
      int                   j0, j1, nj;
@@ -509,9 +510,8 @@ check_solvent(FILE  *                fp,
                cginfo_mb_t           *cginfo_mb)
  {
      const t_block     *   cgs;
-    const t_block     *   mols;
      const gmx_moltype_t  *molt;
-    int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
+    int                   mb, mol, cg_mol, at_offset, am, cgm, i, nmol_ch, nmol;
      int                   n_solvent_parameters;
      solvent_parameters_t *solvent_parameters;
      int                 **cg_sp;
@@ -522,14 +522,11 @@ check_solvent(FILE  *                fp,
          fprintf(debug, "Going to determine what solvent types we have.\n");
      }
  
-    mols = &mtop->mols;
-
      n_solvent_parameters = 0;
      solvent_parameters   = NULL;
      /* Allocate temporary array for solvent type */
      snew(cg_sp, mtop->nmolblock);
  
-    cg_offset = 0;
      at_offset = 0;
      for (mb = 0; mb < mtop->nmolblock; mb++)
      {
@@ -557,7 +554,6 @@ check_solvent(FILE  *                fp,
                                   &cg_sp[mb][cgm+cg_mol]);
              }
          }
-        cg_offset += cgs->nr;
          at_offset += cgs->index[cgs->nr];
      }
  
@@ -585,10 +581,6 @@ check_solvent(FILE  *                fp,
          bestsol = esolNO;
      }
  
-#ifdef DISABLE_WATER_NLIST
-    bestsol = esolNO;
-#endif
-
      fr->nWatMol = 0;
      for (mb = 0; mb < mtop->nmolblock; mb++)
      {
@@ -637,14 +629,13 @@ static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
      cginfo_mb_t          *cginfo_mb;
      gmx_bool             *type_VDW;
      int                  *cginfo;
-    int                   cg_offset, a_offset, cgm, am;
-    int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
+    int                   cg_offset, a_offset;
+    int                   mb, m, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
      int                  *a_con;
      int                   ftype;
      int                   ia;
      gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
  
-    ncg_tot = ncg_mtop(mtop);
      snew(cginfo_mb, mtop->nmolblock);
  
      snew(type_VDW, fr->ntype);
@@ -679,10 +670,9 @@ static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
           * Otherwise we make an array of #mol times #cgs per molecule.
           */
          bId = TRUE;
-        am  = 0;
          for (m = 0; m < molb->nmol; m++)
          {
-            am = m*cgs->index[cgs->nr];
+            int am = m*cgs->index[cgs->nr];
              for (cg = 0; cg < cgs->nr; cg++)
              {
                  a0 = cgs->index[cg];
@@ -736,8 +726,8 @@ static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
  
          for (m = 0; m < (bId ? 1 : molb->nmol); m++)
          {
-            cgm = m*cgs->nr;
-            am  = m*cgs->index[cgs->nr];
+            int cgm = m*cgs->nr;
+            int am  = m*cgs->index[cgs->nr];
              for (cg = 0; cg < cgs->nr; cg++)
              {
                  a0 = cgs->index[cg];
@@ -991,7 +981,7 @@ void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
  {
      const t_atoms  *atoms, *atoms_tpi;
      const t_blocka *excl;
-    int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
+    int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, nexcl, q;
      gmx_int64_t     npair, npair_ij, tmpi, tmpj;
      double          csix, ctwelve;
      int             ntp, *typecount;
@@ -1788,7 +1778,8 @@ static void pick_nbnxn_kernel(FILE                *fp,
      }
  }
  
-static void pick_nbnxn_resources(const t_commrec     *cr,
+static void pick_nbnxn_resources(FILE                *fp,
+                                 const t_commrec     *cr,
                                   const gmx_hw_info_t *hwinfo,
                                   gmx_bool             bDoNonbonded,
                                   gmx_bool            *bUseGPU,
@@ -1823,7 +1814,7 @@ static void pick_nbnxn_resources(const t_commrec     *cr,
      {
          /* Each PP node will use the intra-node id-th device from the
           * list of detected/selected GPUs. */
-        if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
+        if (!init_gpu(fp, cr->rank_pp_intranode, gpu_err_str,
                        &hwinfo->gpu_info, gpu_opt))
          {
              /* At this point the init should never fail as we made sure that
@@ -1919,8 +1910,6 @@ void init_interaction_const_tables(FILE                *fp,
                                     gmx_bool             bUsesSimpleTables,
                                     real                 rtab)
  {
-    real spacing;
-
      if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
      {
          init_ewald_f_table(ic, bUsesSimpleTables, rtab);
@@ -1982,6 +1971,8 @@ init_interaction_const(FILE                       *fp,
  {
      interaction_const_t *ic;
      gmx_bool             bUsesSimpleTables = TRUE;
+    const real           minusSix          = -6.0;
+    const real           minusTwelve       = -12.0;
  
      snew(ic, 1);
  
@@ -2008,14 +1999,14 @@ init_interaction_const(FILE                       *fp,
      {
          case eintmodPOTSHIFT:
              /* Only shift the potential, don't touch the force */
-            ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
-            ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
+            ic->dispersion_shift.cpot = -pow(ic->rvdw, minusSix);
+            ic->repulsion_shift.cpot  = -pow(ic->rvdw, minusTwelve);
              if (EVDW_PME(ic->vdwtype))
              {
                  real crc2;
  
                  crc2            = sqr(ic->ewaldcoeff_lj*ic->rvdw);
-                ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
+                ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, minusSix);
              }
              break;
          case eintmodFORCESWITCH:
@@ -2150,7 +2141,7 @@ static void init_nb_verlet(FILE                *fp,
  
      snew(nbv, 1);
  
-    pick_nbnxn_resources(cr, fr->hwinfo,
+    pick_nbnxn_resources(fp, cr, fr->hwinfo,
                           fr->bNonbonded,
                           &nbv->bUseGPU,
                           &bEmulateGPU,
@@ -2311,6 +2302,11 @@ static void init_nb_verlet(FILE                *fp,
      }
  }
  
+gmx_bool usingGpu(nonbonded_verlet_t *nbv)
+{
+    return nbv != NULL && nbv->bUseGPU;
+}
+
  void init_forcerec(FILE              *fp,
                     const output_env_t oenv,
                     t_forcerec        *fr,
@@ -2327,7 +2323,7 @@ void init_forcerec(FILE              *fp,
                     gmx_bool           bNoSolvOpt,
                     real               print_force)
  {
-    int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
+    int            i, m, negp_pp, negptable, egi, egj;
      real           rtab;
      char          *env;
      double         dbl;
@@ -2335,7 +2331,6 @@ void init_forcerec(FILE              *fp,
      gmx_bool       bGenericKernelOnly;
      gmx_bool       bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
      gmx_bool       bFEP_NonBonded;
-    t_nblists     *nbl;
      int           *nm_ind, egp_flags;
  
      if (fr->hwinfo == NULL)
@@ -2352,8 +2347,6 @@ void init_forcerec(FILE              *fp,
  
      fr->bDomDec = DOMAINDECOMP(cr);
  
-    natoms = mtop->natoms;
-
      if (check_box(ir->ePBC, box))
      {
          gmx_fatal(FARGS, check_box(ir->ePBC, box));
@@ -2445,7 +2438,7 @@ void init_forcerec(FILE              *fp,
      if (env != NULL)
      {
          dbl = 0;
-        sscanf(env, "%lf", &dbl);
+        sscanf(env, "%20lf", &dbl);
          fr->sc_sigma6_min = pow(dbl, 6);
          if (fp)
          {
@@ -3093,7 +3086,6 @@ void init_forcerec(FILE              *fp,
                      egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
                      if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
                      {
-                        nbl = &(fr->nblists[m]);
                          if (fr->nnblists > 1)
                          {
                              fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
@@ -3324,3 +3316,46 @@ void forcerec_set_excl_load(t_forcerec           *fr,
          fr->excl_load[t] = i;
      }
  }
+
+/* Frees GPU memory and destroys the CUDA context.
+ *
+ * Note that this function needs to be called even if GPUs are not used
+ * in this run because the PME ranks have no knowledge of whether GPUs
+ * are used or not, but all ranks need to enter the barrier below.
+ */
+void free_gpu_resources(const t_forcerec     *fr,
+                        const t_commrec      *cr,
+                        const gmx_gpu_info_t *gpu_info,
+                        const gmx_gpu_opt_t  *gpu_opt)
+{
+    gmx_bool bIsPPrankUsingGPU;
+    char     gpu_err_str[STRLEN];
+
+    bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
+
+    if (bIsPPrankUsingGPU)
+    {
+        /* free nbnxn data in GPU memory */
+        nbnxn_cuda_free(fr->nbv->cu_nbv);
+
+        /* With tMPI we need to wait for all ranks to finish deallocation before
+         * destroying the context in free_gpu() as some ranks may be sharing
+         * GPU and context.
+         * Note: as only PP ranks need to free GPU resources, so it is safe to
+         * not call the barrier on PME ranks.
+         */
+#ifdef GMX_THREAD_MPI
+        if (PAR(cr))
+        {
+            gmx_barrier(cr);
+        }
+#endif  /* GMX_THREAD_MPI */
+
+        /* uninitialize GPU (by destroying the context) */
+        if (!free_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
+        {
+            gmx_warning("On rank %d failed to free GPU #%d: %s",
+                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
+        }
+    }
+}