Merge branch release-2020 into master

[alexxy/gromacs.git] / src / gromacs / mdlib / forcerec.cpp
diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp

index cb2e49a4dd08470c6a45bb01942caf12743db8ef..2ccb5601bee14a679a1d2bfa1af743153fd19f12 100644 (file)
--- a/src/gromacs/mdlib/forcerec.cpp
+++ b/src/gromacs/mdlib/forcerec.cpp
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013-2020, by the GROMACS development team, led by
+ * Copyright (c) 2013-2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -38,8 +38,6 @@
  
  #include "forcerec.h"
  
-#include "config.h"
-
  #include <cassert>
  #include <cmath>
  #include <cstdlib>
@@ -76,9 +74,11 @@
  #include "gromacs/mdlib/wall.h"
  #include "gromacs/mdtypes/commrec.h"
  #include "gromacs/mdtypes/fcdata.h"
+#include "gromacs/mdtypes/forcerec.h"
  #include "gromacs/mdtypes/group.h"
  #include "gromacs/mdtypes/iforceprovider.h"
  #include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/mdtypes/interaction_const.h"
  #include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/nbnxm/gpu_data_mgmt.h"
  #include "gromacs/nbnxm/nbnxm.h"
@@ -98,22 +98,19 @@
  #include "gromacs/utility/smalloc.h"
  #include "gromacs/utility/strconvert.h"
  
-/*! \brief environment variable to enable GPU P2P communication */
-static const bool c_enableGpuPmePpComms =
-        (getenv("GMX_GPU_PME_PP_COMMS") != nullptr) && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
-
-static real* mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM)
+static std::vector<real> mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM)
  {
-    real* nbfp;
-    int   i, j, k, atnr;
+    std::vector<real> nbfp;
+    int               atnr;
  
      atnr = idef->atnr;
      if (bBHAM)
      {
-        snew(nbfp, 3 * atnr * atnr);
-        for (i = k = 0; (i < atnr); i++)
+        nbfp.resize(3 * atnr * atnr);
+        int k = 0;
+        for (int i = 0; (i < atnr); i++)
          {
-            for (j = 0; (j < atnr); j++, k++)
+            for (int j = 0; (j < atnr); j++, k++)
              {
                  BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
                  BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
@@ -124,10 +121,11 @@ static real* mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM)
      }
      else
      {
-        snew(nbfp, 2 * atnr * atnr);
-        for (i = k = 0; (i < atnr); i++)
+        nbfp.resize(2 * atnr * atnr);
+        int k = 0;
+        for (int i = 0; (i < atnr); i++)
          {
-            for (j = 0; (j < atnr); j++, k++)
+            for (int j = 0; (j < atnr); j++, k++)
              {
                  /* nbfp now includes the 6.0/12.0 derivative prefactors */
                  C6(nbfp, atnr, i, j)  = idef->iparams[k].lj.c6 * 6.0;
@@ -186,14 +184,10 @@ enum
      acSETTLE
  };
  
-static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr, gmx_bool* bFEP_NonBonded)
+static std::vector<cginfo_mb_t> init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr)
  {
-    cginfo_mb_t* cginfo_mb;
-    gmx_bool*    type_VDW;
-    int*         cginfo;
-    int*         a_con;
-
-    snew(cginfo_mb, mtop->molblock.size());
+    gmx_bool* type_VDW;
+    int*      a_con;
  
      snew(type_VDW, fr->ntype);
      for (int ai = 0; ai < fr->ntype; ai++)
@@ -206,14 +200,13 @@ static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr,
          }
      }
  
-    *bFEP_NonBonded = FALSE;
-
-    int a_offset = 0;
+    std::vector<cginfo_mb_t> cginfoPerMolblock;
+    int                      a_offset = 0;
      for (size_t mb = 0; mb < mtop->molblock.size(); mb++)
      {
          const gmx_molblock_t& molb = mtop->molblock[mb];
          const gmx_moltype_t&  molt = mtop->moltype[molb.type];
-        const t_blocka&       excl = molt.excls;
+        const auto&           excl = molt.excls;
  
          /* Check if the cginfo is identical for all molecules in this block.
           * If so, we only need an array of the size of one molecule.
@@ -241,11 +234,12 @@ static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr,
              }
          }
  
-        cginfo_mb[mb].cg_start = a_offset;
-        cginfo_mb[mb].cg_end   = a_offset + molb.nmol * molt.atoms.nr;
-        cginfo_mb[mb].cg_mod   = (bId ? 1 : molb.nmol) * molt.atoms.nr;
-        snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
-        cginfo = cginfo_mb[mb].cginfo;
+        cginfo_mb_t cginfo_mb;
+        cginfo_mb.cg_start = a_offset;
+        cginfo_mb.cg_end   = a_offset + molb.nmol * molt.atoms.nr;
+        cginfo_mb.cg_mod   = (bId ? 1 : molb.nmol) * molt.atoms.nr;
+        cginfo_mb.cginfo.resize(cginfo_mb.cg_mod);
+        gmx::ArrayRef<int> cginfo = cginfo_mb.cginfo;
  
          /* Set constraints flags for constrained atoms */
          snew(a_con, molt.atoms.nr);
@@ -285,9 +279,9 @@ static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr,
  
                  bool haveExclusions = false;
                  /* Loop over all the exclusions of atom ai */
-                for (int j = excl.index[a]; j < excl.index[a + 1]; j++)
+                for (const int j : excl[a])
                  {
-                    if (excl.a[j] != a)
+                    if (j != a)
                      {
                          haveExclusions = true;
                          break;
@@ -315,21 +309,22 @@ static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr,
                  if (fr->efep != efepNO && PERTURBED(atom))
                  {
                      SET_CGINFO_FEP(atomInfo);
-                    *bFEP_NonBonded = TRUE;
                  }
              }
          }
  
          sfree(a_con);
  
+        cginfoPerMolblock.push_back(cginfo_mb);
+
          a_offset += molb.nmol * molt.atoms.nr;
      }
      sfree(type_VDW);
  
-    return cginfo_mb;
+    return cginfoPerMolblock;
  }
  
-static std::vector<int> cginfo_expand(const int nmb, const cginfo_mb_t* cgi_mb)
+static std::vector<int> cginfo_expand(const int nmb, gmx::ArrayRef<const cginfo_mb_t> cgi_mb)
  {
      const int ncg = cgi_mb[nmb - 1].cg_end;
  
@@ -348,19 +343,6 @@ static std::vector<int> cginfo_expand(const int nmb, const cginfo_mb_t* cgi_mb)
      return cginfo;
  }
  
-static void done_cginfo_mb(cginfo_mb_t* cginfo_mb, int numMolBlocks)
-{
-    if (cginfo_mb == nullptr)
-    {
-        return;
-    }
-    for (int mb = 0; mb < numMolBlocks; ++mb)
-    {
-        sfree(cginfo_mb[mb].cginfo);
-    }
-    sfree(cginfo_mb);
-}
-
  /* Sets the sum of charges (squared) and C6 in the system in fr.
   * Returns whether the system has a net charge.
   */
@@ -1016,24 +998,14 @@ void init_forcerec(FILE*                            fp,
                     const char*                      tabfn,
                     const char*                      tabpfn,
                     gmx::ArrayRef<const std::string> tabbfnm,
-                   const gmx_hw_info_t&             hardwareInfo,
-                   const gmx_device_info_t*         deviceInfo,
-                   const bool                       useGpuForBonded,
-                   const bool                       pmeOnlyRankUsesGpu,
-                   real                             print_force,
-                   gmx_wallcycle*                   wcycle)
+                   real                             print_force)
  {
-    real     rtab;
-    char*    env;
-    double   dbl;
-    gmx_bool bFEP_NonBonded;
-
      /* By default we turn SIMD kernels on, but it might be turned off further down... */
      fr->use_simd_kernels = TRUE;
  
-    if (check_box(ir->ePBC, box))
+    if (check_box(ir->pbcType, box))
      {
-        gmx_fatal(FARGS, "%s", check_box(ir->ePBC, box));
+        gmx_fatal(FARGS, "%s", check_box(ir->pbcType, box));
      }
  
      /* Test particle insertion ? */
@@ -1091,10 +1063,10 @@ void init_forcerec(FILE*                            fp,
      fr->sc_r_power    = ir->fepvals->sc_r_power;
      fr->sc_sigma6_def = gmx::power6(ir->fepvals->sc_sigma);
  
-    env = getenv("GMX_SCSIGMA_MIN");
+    char* env = getenv("GMX_SCSIGMA_MIN");
      if (env != nullptr)
      {
-        dbl = 0;
+        double dbl = 0;
          sscanf(env, "%20lf", &dbl);
          fr->sc_sigma6_min = gmx::power6(dbl);
          if (fp)
@@ -1131,10 +1103,10 @@ void init_forcerec(FILE*                            fp,
  
      /* Neighbour searching stuff */
      fr->cutoff_scheme = ir->cutoff_scheme;
-    fr->ePBC          = ir->ePBC;
+    fr->pbcType       = ir->pbcType;
  
      /* Determine if we will do PBC for distances in bonded interactions */
-    if (fr->ePBC == epbcNONE)
+    if (fr->pbcType == PbcType::No)
      {
          fr->bMolPBC = FALSE;
      }
@@ -1154,7 +1126,7 @@ void init_forcerec(FILE*                            fp,
          }
          else
          {
-            fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
+            fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->pbcType);
  
              if (useEwaldSurfaceCorrection && !dd_moleculesAreAlwaysWhole(*cr->dd))
              {
@@ -1283,7 +1255,7 @@ void init_forcerec(FILE*                            fp,
  
      fr->shiftForces.resize(SHIFTS);
  
-    if (fr->nbfp == nullptr)
+    if (fr->nbfp.empty())
      {
          fr->ntype = mtop->ffparams.atnr;
          fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
@@ -1341,7 +1313,7 @@ void init_forcerec(FILE*                            fp,
       * in that case grompp should already have checked that we do not need
       * normal tables and we only generate tables for 1-4 interactions.
       */
-    rtab = ir->rlist + ir->tabext;
+    real rtab = ir->rlist + ir->tabext;
  
      /* We want to use unmodified tables for 1-4 coulombic
       * interactions, so we must in general have an extra set of
@@ -1409,7 +1381,7 @@ void init_forcerec(FILE*                            fp,
      }
  
      /* Set all the static charge group info */
-    fr->cginfo_mb = init_cginfo_mb(mtop, fr, &bFEP_NonBonded);
+    fr->cginfo_mb = init_cginfo_mb(mtop, fr);
      if (!DOMAINDECOMP(cr))
      {
          fr->cginfo = cginfo_expand(mtop->molblock.size(), fr->cginfo_mb);
@@ -1429,43 +1401,10 @@ void init_forcerec(FILE*                            fp,
      fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
      snew(fr->ewc_t, fr->nthread_ewc);
  
-    if (fr->cutoff_scheme == ecutsVERLET)
-    {
-        // We checked the cut-offs in grompp, but double-check here.
-        // We have PME+LJcutoff kernels for rcoulomb>rvdw.
-        if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == eelCUT)
-        {
-            GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw,
-                               "With Verlet lists and PME we should have rcoulomb>=rvdw");
-        }
-        else
-        {
-            GMX_RELEASE_ASSERT(
-                    ir->rcoulomb == ir->rvdw,
-                    "With Verlet lists and no PME rcoulomb and rvdw should be identical");
-        }
-
-        fr->nbv = Nbnxm::init_nb_verlet(mdlog, bFEP_NonBonded, ir, fr, cr, hardwareInfo, deviceInfo,
-                                        mtop, box, wcycle);
-
-        if (useGpuForBonded)
-        {
-            auto stream = havePPDomainDecomposition(cr)
-                                  ? Nbnxm::gpu_get_command_stream(
-                                            fr->nbv->gpu_nbv, gmx::InteractionLocality::NonLocal)
-                                  : Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv,
-                                                                  gmx::InteractionLocality::Local);
-            // TODO the heap allocation is only needed while
-            // t_forcerec lacks a constructor.
-            fr->gpuBonded = new gmx::GpuBonded(mtop->ffparams, stream, wcycle);
-        }
-    }
-
      if (ir->eDispCorr != edispcNO)
      {
          fr->dispersionCorrection = std::make_unique<DispersionCorrection>(
-                *mtop, *ir, fr->bBHAM, fr->ntype,
-                gmx::arrayRefFromArray(fr->nbfp, fr->ntype * fr->ntype * 2), *fr->ic, tabfn);
+                *mtop, *ir, fr->bBHAM, fr->ntype, fr->nbfp, *fr->ic, tabfn);
          fr->dispersionCorrection->print(mdlog);
      }
  
@@ -1477,76 +1416,14 @@ void init_forcerec(FILE*                            fp,
           */
          fprintf(fp, "\n");
      }
-
-    if (pmeOnlyRankUsesGpu && c_enableGpuPmePpComms)
-    {
-        fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(cr->mpi_comm_mysim, cr->dd->pme_nodeid);
-    }
  }
  
  t_forcerec::t_forcerec() = default;
  
-t_forcerec::~t_forcerec() = default;
-
-/* Frees GPU memory and sets a tMPI node barrier.
- *
- * Note that this function needs to be called even if GPUs are not used
- * in this run because the PME ranks have no knowledge of whether GPUs
- * are used or not, but all ranks need to enter the barrier below.
- * \todo Remove physical node barrier from this function after making sure
- * that it's not needed anymore (with a shared GPU run).
- */
-void free_gpu_resources(t_forcerec*                          fr,
-                        const gmx::PhysicalNodeCommunicator& physicalNodeCommunicator,
-                        const gmx_gpu_info_t&                gpu_info)
+t_forcerec::~t_forcerec()
  {
-    bool isPPrankUsingGPU = (fr != nullptr) && (fr->nbv != nullptr) && fr->nbv->useGpu();
-
-    /* stop the GPU profiler (only CUDA) */
-    if (gpu_info.n_dev > 0)
-    {
-        stopGpuProfiler();
-    }
-
-    if (isPPrankUsingGPU)
-    {
-        /* Free data in GPU memory and pinned memory before destroying the GPU context */
-        fr->nbv.reset();
-
-        delete fr->gpuBonded;
-        fr->gpuBonded = nullptr;
-    }
-
-    /* With tMPI we need to wait for all ranks to finish deallocation before
-     * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
-     * GPU and context.
-     *
-     * This is not a concern in OpenCL where we use one context per rank which
-     * is freed in nbnxn_gpu_free().
-     *
-     * Note: it is safe to not call the barrier on the ranks which do not use GPU,
-     * but it is easier and more futureproof to call it on the whole node.
-     */
-    if (GMX_THREAD_MPI)
-    {
-        physicalNodeCommunicator.barrier();
-    }
-}
-
-void done_forcerec(t_forcerec* fr, int numMolBlocks)
-{
-    if (fr == nullptr)
-    {
-        // PME-only ranks don't have a forcerec
-        return;
-    }
-    done_cginfo_mb(fr->cginfo_mb, numMolBlocks);
-    sfree(fr->nbfp);
-    delete fr->ic;
-    sfree(fr->shift_vec);
-    sfree(fr->ewc_t);
-    tear_down_bonded_threading(fr->bondedThreading);
-    GMX_RELEASE_ASSERT(fr->gpuBonded == nullptr, "Should have been deleted earlier, when used");
-    fr->bondedThreading = nullptr;
-    delete fr;
+    /* Note: This code will disappear when types are converted to C++ */
+    sfree(shift_vec);
+    sfree(ewc_t);
+    tear_down_bonded_threading(bondedThreading);
  }