Disable PME Mixed mode with FEP

[alexxy/gromacs.git] / src / gromacs / taskassignment / decidegpuusage.cpp
diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp

index c184f89a7698117d27fd597f90c11179fff99b75..bc9af8d4e09ade7668c959c3345e9723dc00e5e8 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.cpp
+++ b/src/gromacs/taskassignment/decidegpuusage.cpp
@@ -1,7 +1,8 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -56,8 +57,9 @@
  #include "gromacs/hardware/detecthardware.h"
  #include "gromacs/hardware/hardwaretopology.h"
  #include "gromacs/hardware/hw_info.h"
+#include "gromacs/listed_forces/gpubonded.h"
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/update_constrain_cuda.h"
+#include "gromacs/mdlib/update_constrain_gpu.h"
  #include "gromacs/mdtypes/commrec.h"
  #include "gromacs/mdtypes/inputrec.h"
  #include "gromacs/mdtypes/md_enums.h"
@@ -84,16 +86,21 @@ namespace
  const char* g_specifyEverythingFormatString =
          "When you use mdrun -gputasks, %s must be set to non-default "
          "values, so that the device IDs can be interpreted correctly."
-#if GMX_GPU != GMX_GPU_NONE
+#if GMX_GPU
          " If you simply want to restrict which GPUs are used, then it is "
          "better to use mdrun -gpu_id. Otherwise, setting the "
-#    if GMX_GPU == GMX_GPU_CUDA
+#    if GMX_GPU_CUDA
          "CUDA_VISIBLE_DEVICES"
-#    elif GMX_GPU == GMX_GPU_OPENCL
+#    elif GMX_GPU_OPENCL
          // Technically there is no portable way to do this offered by the
          // OpenCL standard, but the only current relevant case for GROMACS
          // is AMD OpenCL, which offers this variable.
          "GPU_DEVICE_ORDINAL"
+#    elif GMX_GPU_SYCL
+        // As with OpenCL, there are no portable way to do it.
+        // Intel reference: https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md
+        // While SYCL_DEVICE_FILTER is a better option, as of 2021.1-beta10 it is not yet supported.
+        "SYCL_DEVICE_ALLOWLIST"
  #    else
  #        error "Unreachable branch"
  #    endif
@@ -105,7 +112,7 @@ const char* g_specifyEverythingFormatString =
  } // namespace
  
  bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        nonbondedTarget,
-                                                     const std::vector<int>& gpuIdsToUse,
+                                                     const int               numDevicesToUse,
                                                       const std::vector<int>& userGpuTaskAssignment,
                                                       const EmulateGpuNonbonded emulateGpuNonbonded,
                                                       const bool buildSupportsNonbondedOnGpu,
@@ -141,29 +148,33 @@ bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        non
      // Because this is thread-MPI, we already know about the GPUs that
      // all potential ranks can use, and can use that in a global
      // decision that will later be consistent.
-    auto haveGpus = !gpuIdsToUse.empty();
-
      // If we get here, then the user permitted or required GPUs.
-    return haveGpus;
+    return (numDevicesToUse > 0);
  }
  
  bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
                                                 const TaskTarget        pmeTarget,
-                                               const std::vector<int>& gpuIdsToUse,
+                                               const TaskTarget        pmeFftTarget,
+                                               const int               numDevicesToUse,
                                                 const std::vector<int>& userGpuTaskAssignment,
                                                 const gmx_hw_info_t&    hardwareInfo,
                                                 const t_inputrec&       inputrec,
-                                               const gmx_mtop_t&       mtop,
                                                 const int               numRanksPerSimulation,
                                                 const int               numPmeRanksPerSimulation)
  {
      // First, exclude all cases where we can't run PME on GPUs.
      if ((pmeTarget == TaskTarget::Cpu) || !useGpuForNonbonded || !pme_gpu_supports_build(nullptr)
-        || !pme_gpu_supports_hardware(hardwareInfo, nullptr)
-        || !pme_gpu_supports_input(inputrec, mtop, nullptr))
+        || !pme_gpu_supports_hardware(hardwareInfo, nullptr) || !pme_gpu_supports_input(inputrec, nullptr))
+    {
+        // PME can't run on a GPU. If the user required that, we issue an error later.
+        return false;
+    }
+
+    if (pmeFftTarget == TaskTarget::Cpu && !pme_gpu_mixed_mode_supports_input(inputrec, nullptr))
      {
-        // PME can't run on a GPU. If the user required that, we issue
-        // an error later.
+        /* User requested PME FFT on CPU, but the current system is not compatible with Mixed mode,
+         * so we don't use GPUs at all.
+         * If the user had -pme gpu, we issue an error later. */
          return false;
      }
  
@@ -220,7 +231,7 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuFor
      {
          // PME can run well on a GPU shared with NB, and we permit
          // mdrun to default to try that.
-        return !gpuIdsToUse.empty();
+        return numDevicesToUse > 0;
      }
  
      if (numRanksPerSimulation < 1)
@@ -228,7 +239,7 @@ bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuFor
          // Full automated mode for thread-MPI (the default). PME can
          // run well on a GPU shared with NB, and we permit mdrun to
          // default to it if there is only one GPU available.
-        return (gpuIdsToUse.size() == 1);
+        return (numDevicesToUse == 1);
      }
  
      // Not enough support for PME on GPUs for anything else
@@ -319,15 +330,15 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget          nonbondedTarge
  
      // If we get here, then the user permitted GPUs, which we should
      // use for nonbonded interactions.
-    return gpusWereDetected;
+    return buildSupportsNonbondedOnGpu && gpusWereDetected;
  }
  
  bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
                                    const TaskTarget        pmeTarget,
+                                  const TaskTarget        pmeFftTarget,
                                    const std::vector<int>& userGpuTaskAssignment,
                                    const gmx_hw_info_t&    hardwareInfo,
                                    const t_inputrec&       inputrec,
-                                  const gmx_mtop_t&       mtop,
                                    const int               numRanksPerSimulation,
                                    const int               numPmeRanksPerSimulation,
                                    const bool              gpusWereDetected)
@@ -364,7 +375,7 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
          }
          return false;
      }
-    if (!pme_gpu_supports_input(inputrec, mtop, &message))
+    if (!pme_gpu_supports_input(inputrec, &message))
      {
          if (pmeTarget == TaskTarget::Gpu)
          {
@@ -372,6 +383,16 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
          }
          return false;
      }
+    if (pmeFftTarget == TaskTarget::Cpu && !pme_gpu_mixed_mode_supports_input(inputrec, &message))
+    {
+        /* User requested PME FFT on CPU, but the current system is not compatible with Mixed mode,
+         * so we don't use GPUs at all. */
+        if (pmeTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(NotImplementedError("Cannot compute PME interactions in Mixed mode, because " + message));
+        }
+        return false;
+    }
  
      if (pmeTarget == TaskTarget::Cpu)
      {
@@ -428,27 +449,67 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
      return false;
  }
  
-bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
-                                     const bool       useGpuForPme,
-                                     const TaskTarget bondedTarget,
-                                     const bool       canUseGpuForBonded,
-                                     const bool       usingLJPme,
-                                     const bool       usingElecPmeOrEwald,
-                                     const int        numPmeRanksPerSimulation,
-                                     const bool       gpusWereDetected)
+
+PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
+{
+    if (!EEL_PME(inputrec.coulombtype))
+    {
+        return PmeRunMode::None;
+    }
+
+    if (useGpuForPme)
+    {
+        if (pmeFftTarget == TaskTarget::Cpu)
+        {
+            return PmeRunMode::Mixed;
+        }
+        else
+        {
+            return PmeRunMode::GPU;
+        }
+    }
+    else
+    {
+        if (pmeFftTarget == TaskTarget::Gpu)
+        {
+            gmx_fatal(FARGS,
+                      "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
+                      "on CPU you should not be using -pmefft.");
+        }
+        return PmeRunMode::CPU;
+    }
+}
+
+bool decideWhetherToUseGpusForBonded(bool              useGpuForNonbonded,
+                                     bool              useGpuForPme,
+                                     TaskTarget        bondedTarget,
+                                     const t_inputrec& inputrec,
+                                     const gmx_mtop_t& mtop,
+                                     int               numPmeRanksPerSimulation,
+                                     bool              gpusWereDetected)
  {
      if (bondedTarget == TaskTarget::Cpu)
      {
          return false;
      }
  
-    if (!canUseGpuForBonded)
+    std::string errorMessage;
+
+    if (!buildSupportsGpuBondeds(&errorMessage))
      {
          if (bondedTarget == TaskTarget::Gpu)
          {
-            GMX_THROW(InconsistentInputError(
-                    "Bonded interactions on the GPU were required, but not supported for these "
-                    "simulation settings. Change your settings, or do not require using GPUs."));
+            GMX_THROW(InconsistentInputError(errorMessage.c_str()));
+        }
+
+        return false;
+    }
+
+    if (!inputSupportsGpuBondeds(inputrec, mtop, &errorMessage))
+    {
+        if (bondedTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(InconsistentInputError(errorMessage.c_str()));
          }
  
          return false;
@@ -484,75 +545,86 @@ bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
      // Note that here we assume that the auto setting of PME ranks will not
      // choose seperate PME ranks when nonBonded are assigned to the GPU.
      bool usingOurCpuForPmeOrEwald =
-            (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
+            (EVDW_PME(inputrec.vdwtype)
+             || (EEL_PME_EWALD(inputrec.coulombtype) && !useGpuForPme && numPmeRanksPerSimulation <= 0));
  
      return gpusWereDetected && usingOurCpuForPmeOrEwald;
  }
  
-bool decideWhetherToUseGpuForUpdate(const bool           forceGpuUpdateDefault,
-                                    const bool           isDomainDecomposition,
-                                    const bool           useUpdateGroups,
-                                    const PmeRunMode     pmeRunMode,
-                                    const bool           havePmeOnlyRank,
-                                    const bool           useGpuForNonbonded,
-                                    const TaskTarget     updateTarget,
-                                    const bool           gpusWereDetected,
-                                    const t_inputrec&    inputrec,
-                                    const gmx_mtop_t&    mtop,
-                                    const bool           useEssentialDynamics,
-                                    const bool           doOrientationRestraints,
-                                    const bool           useReplicaExchange,
-                                    const bool           doRerun,
-                                    const gmx::MDLogger& mdlog)
+bool decideWhetherToUseGpuForUpdate(const bool                     isDomainDecomposition,
+                                    const bool                     useUpdateGroups,
+                                    const PmeRunMode               pmeRunMode,
+                                    const bool                     havePmeOnlyRank,
+                                    const bool                     useGpuForNonbonded,
+                                    const TaskTarget               updateTarget,
+                                    const bool                     gpusWereDetected,
+                                    const t_inputrec&              inputrec,
+                                    const gmx_mtop_t&              mtop,
+                                    const bool                     useEssentialDynamics,
+                                    const bool                     doOrientationRestraints,
+                                    const bool                     useReplicaExchange,
+                                    const bool                     haveFrozenAtoms,
+                                    const bool                     doRerun,
+                                    const DevelopmentFeatureFlags& devFlags,
+                                    const gmx::MDLogger&           mdlog)
  {
  
      // '-update cpu' overrides the environment variable, '-update auto' does not
-    if (updateTarget == TaskTarget::Cpu || (updateTarget == TaskTarget::Auto && !forceGpuUpdateDefault))
+    if (updateTarget == TaskTarget::Cpu
+        || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault))
      {
          return false;
      }
  
      const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
+    const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
  
      std::string errorMessage;
  
      if (isDomainDecomposition)
      {
-        if (!forceGpuUpdateDefault)
-        {
-            errorMessage += "Domain decomposition is not supported.\n ";
-        }
-        else if (hasAnyConstraints && !useUpdateGroups)
+        if (hasAnyConstraints && !useUpdateGroups)
          {
              errorMessage +=
-                    "Domain decomposition is only supported with constraints when update groups "
+                    "Domain decomposition is only supported with constraints when update "
+                    "groups "
                      "are used. This means constraining all bonds is not supported, except for "
                      "small molecules, and box sizes close to half the pair-list cutoff are not "
                      "supported.\n ";
          }
      }
+
+    if (havePmeOnlyRank)
+    {
+        if (pmeUsesCpu)
+        {
+            errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
+        }
+    }
+
+    if (inputrec.useMts)
+    {
+        errorMessage += "Multiple time stepping is not supported.\n";
+    }
+
      if (inputrec.eConstrAlg == econtSHAKE && hasAnyConstraints && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
      {
          errorMessage += "SHAKE constraints are not supported.\n";
      }
      // Using the GPU-version of update if:
-    // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread), or
+    // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
      // 2. Non-bonded interactions are on the GPU.
-    if (pmeRunMode == PmeRunMode::CPU && !useGpuForNonbonded)
+    if ((pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::None) && !useGpuForNonbonded)
      {
          errorMessage +=
                  "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
      }
-    // Since only direct GPU communications are supported with GPU update, PME should be fully offloaded in DD and PME only cases.
-    if (pmeRunMode != PmeRunMode::GPU && (isDomainDecomposition || havePmeOnlyRank))
-    {
-        errorMessage += "PME should run on GPU.\n";
-    }
+
      if (!gpusWereDetected)
      {
          errorMessage += "Compatible GPUs must have been found.\n";
      }
-    if (GMX_GPU != GMX_GPU_CUDA)
+    if (!GMX_GPU_CUDA)
      {
          errorMessage += "Only a CUDA build is supported.\n";
      }
@@ -564,9 +636,12 @@ bool decideWhetherToUseGpuForUpdate(const bool           forceGpuUpdateDefault,
      {
          errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
      }
-    if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN || inputrec.epc == epcBERENDSEN))
+    if (!(inputrec.epc == epcNO || inputrec.epc == epcPARRINELLORAHMAN
+          || inputrec.epc == epcBERENDSEN || inputrec.epc == epcCRESCALE))
      {
-        errorMessage += "Only Parrinello-Rahman and Berendsen pressure coupling are supported.\n";
+        errorMessage +=
+                "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
+                "supported.\n";
      }
      if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
      {
@@ -581,7 +656,7 @@ bool decideWhetherToUseGpuForUpdate(const bool           forceGpuUpdateDefault,
      {
          errorMessage += "Essential dynamics is not supported.\n";
      }
-    if (inputrec.bPull && pull_have_constraint(inputrec.pull))
+    if (inputrec.bPull && pull_have_constraint(*inputrec.pull))
      {
          errorMessage += "Constraints pulling is not supported.\n";
      }
@@ -590,10 +665,14 @@ bool decideWhetherToUseGpuForUpdate(const bool           forceGpuUpdateDefault,
          // The graph is needed, but not supported
          errorMessage += "Orientation restraints are not supported.\n";
      }
-    if (inputrec.efep != efepNO)
+    if (inputrec.efep != efepNO && (haveFepPerturbedMasses(mtop) || havePerturbedConstraints(mtop)))
      {
-        // Actually all free-energy options except for mass and constraint perturbation are supported
-        errorMessage += "Free energy perturbations are not supported.\n";
+        errorMessage += "Free energy perturbation for mass and constraints are not supported.\n";
+    }
+    const auto particleTypes = gmx_mtop_particletype_count(mtop);
+    if (particleTypes[eptShell] > 0)
+    {
+        errorMessage += "Shells are not supported.\n";
      }
      if (useReplicaExchange)
      {
@@ -612,18 +691,23 @@ bool decideWhetherToUseGpuForUpdate(const bool           forceGpuUpdateDefault,
      // does not support it, the actual CUDA LINCS code does support it
      if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
      {
-        errorMessage += "Non-connecting constraints are not supported";
+        errorMessage += "Non-connecting constraints are not supported\n";
      }
-    if (!UpdateConstrainCuda::isNumCoupledConstraintsSupported(mtop))
+    if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop))
      {
          errorMessage +=
-                "The number of coupled constraints is higher than supported in the CUDA LINCS "
+                "The number of coupled constraints is higher than supported in the GPU LINCS "
                  "code.\n";
      }
+    if (haveFrozenAtoms)
+    {
+        // There is a known bug with frozen atoms and GPU update, see Issue #3920.
+        errorMessage += "Frozen atoms not supported.\n";
+    }
  
      if (!errorMessage.empty())
      {
-        if (updateTarget != TaskTarget::Gpu && forceGpuUpdateDefault)
+        if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)
          {
              GMX_LOG(mdlog.warning)
                      .asParagraph()
@@ -644,14 +728,19 @@ bool decideWhetherToUseGpuForUpdate(const bool           forceGpuUpdateDefault,
          return false;
      }
  
-    if (isDomainDecomposition)
-    {
-        return forceGpuUpdateDefault;
-    }
-    else
-    {
-        return (updateTarget == TaskTarget::Gpu || forceGpuUpdateDefault);
-    }
+    return (updateTarget == TaskTarget::Gpu
+            || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault));
+}
+
+bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags& devFlags,
+                                  bool                           havePPDomainDecomposition,
+                                  bool                           useGpuForNonbonded,
+                                  bool                           useModularSimulator,
+                                  bool                           doRerun,
+                                  bool                           haveEnergyMinimization)
+{
+    return havePPDomainDecomposition && devFlags.enableGpuHaloExchange && useGpuForNonbonded
+           && !useModularSimulator && !doRerun && !haveEnergyMinimization;
  }
  
  } // namespace gmx