Activate GPU update support in SYCL build
[alexxy/gromacs.git] / src / gromacs / taskassignment / decidegpuusage.cpp
index 148765993527815ea50a04859ba09872a9fa3719..5bac5adcef47df7fd726cdb4fd16aa3d7b02930a 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
 #include "gromacs/hardware/detecthardware.h"
 #include "gromacs/hardware/hardwaretopology.h"
 #include "gromacs/hardware/hw_info.h"
+#include "gromacs/listed_forces/listed_forces_gpu.h"
 #include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/mdatoms.h"
+#include "gromacs/mdlib/update_constrain_gpu.h"
 #include "gromacs/mdtypes/commrec.h"
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/mdtypes/mdrunoptions.h"
+#include "gromacs/pulling/pull.h"
 #include "gromacs/taskassignment/taskassignment.h"
+#include "gromacs/topology/mtop_util.h"
 #include "gromacs/topology/topology.h"
 #include "gromacs/utility/baseversion.h"
 #include "gromacs/utility/exceptions.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/logger.h"
+#include "gromacs/utility/message_string_collector.h"
 #include "gromacs/utility/stringutil.h"
 
 
@@ -79,43 +84,48 @@ namespace
 {
 
 //! Helper variable to localise the text of an often repeated message.
-const char * g_specifyEverythingFormatString =
-    "When you use mdrun -gputasks, %s must be set to non-default "
-    "values, so that the device IDs can be interpreted correctly."
-#if GMX_GPU != GMX_GPU_NONE
-    " If you simply want to restrict which GPUs are used, then it is "
-    "better to use mdrun -gpu_id. Otherwise, setting the "
-#  if GMX_GPU == GMX_GPU_CUDA
-    "CUDA_VISIBLE_DEVICES"
-#  elif GMX_GPU == GMX_GPU_OPENCL
-    // Technically there is no portable way to do this offered by the
-    // OpenCL standard, but the only current relevant case for GROMACS
-    // is AMD OpenCL, which offers this variable.
-    "GPU_DEVICE_ORDINAL"
-#  else
-#  error "Unreachable branch"
-#  endif
-    " environment variable in your bash profile or job "
-    "script may be more convenient."
+const char* const g_specifyEverythingFormatString =
+        "When you use mdrun -gputasks, %s must be set to non-default "
+        "values, so that the device IDs can be interpreted correctly."
+#if GMX_GPU
+        " If you simply want to restrict which GPUs are used, then it is "
+        "better to use mdrun -gpu_id. Otherwise, setting the "
+#    if GMX_GPU_CUDA
+        "CUDA_VISIBLE_DEVICES"
+#    elif GMX_GPU_OPENCL
+        // Technically there is no portable way to do this offered by the
+        // OpenCL standard, but the only current relevant case for GROMACS
+        // is AMD OpenCL, which offers this variable.
+        "GPU_DEVICE_ORDINAL"
+#    elif GMX_GPU_SYCL && GMX_SYCL_DPCPP
+        // https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md
+        "SYCL_DEVICE_FILTER"
+#    elif GMX_GPU_SYCL && GMX_SYCL_HIPSYCL
+        // Not true if we use hipSYCL over CUDA or IntelLLVM, but in that case the user probably
+        // knows what they are doing.
+        // https://rocmdocs.amd.com/en/latest/Other_Solutions/Other-Solutions.html#hip-environment-variables
+        "HIP_VISIBLE_DEVICES"
+#    else
+#        error "Unreachable branch"
+#    endif
+        " environment variable in your bash profile or job "
+        "script may be more convenient."
 #endif
-;
-
-}   // namespace
-
-bool
-decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget          nonbondedTarget,
-                                                const std::vector<int>   &gpuIdsToUse,
-                                                const std::vector<int>   &userGpuTaskAssignment,
-                                                const EmulateGpuNonbonded emulateGpuNonbonded,
-                                                const bool                buildSupportsNonbondedOnGpu,
-                                                const bool                nonbondedOnGpuIsUseful,
-                                                const int                 numRanksPerSimulation)
+        ;
+
+} // namespace
+
+bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget        nonbondedTarget,
+                                                     const bool              haveAvailableDevices,
+                                                     const std::vector<int>& userGpuTaskAssignment,
+                                                     const EmulateGpuNonbonded emulateGpuNonbonded,
+                                                     const bool buildSupportsNonbondedOnGpu,
+                                                     const bool nonbondedOnGpuIsUseful,
+                                                     const int  numRanksPerSimulation)
 {
     // First, exclude all cases where we can't run NB on GPUs.
-    if (nonbondedTarget == TaskTarget::Cpu ||
-        emulateGpuNonbonded == EmulateGpuNonbonded::Yes ||
-        !nonbondedOnGpuIsUseful ||
-        !buildSupportsNonbondedOnGpu)
+    if (nonbondedTarget == TaskTarget::Cpu || emulateGpuNonbonded == EmulateGpuNonbonded::Yes
+        || !nonbondedOnGpuIsUseful || !buildSupportsNonbondedOnGpu)
     {
         // If the user required NB on GPUs, we issue an error later.
         return false;
@@ -126,10 +136,10 @@ decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget          nonbon
     if (!userGpuTaskAssignment.empty())
     {
         // Specifying -gputasks requires specifying everything.
-        if (nonbondedTarget == TaskTarget::Auto ||
-            numRanksPerSimulation < 1)
+        if (nonbondedTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
         {
-            GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
+            GMX_THROW(InconsistentInputError(
+                    formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
         }
         return true;
     }
@@ -142,32 +152,65 @@ decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget          nonbon
     // Because this is thread-MPI, we already know about the GPUs that
     // all potential ranks can use, and can use that in a global
     // decision that will later be consistent.
-    auto haveGpus = !gpuIdsToUse.empty();
-
     // If we get here, then the user permitted or required GPUs.
-    return haveGpus;
+    return haveAvailableDevices;
+}
+
+static bool canUseGpusForPme(const bool           useGpuForNonbonded,
+                             const TaskTarget     pmeTarget,
+                             const TaskTarget     pmeFftTarget,
+                             const gmx_hw_info_t& hardwareInfo,
+                             const t_inputrec&    inputrec,
+                             std::string*         errorMessage)
+{
+    if (pmeTarget == TaskTarget::Cpu)
+    {
+        return false;
+    }
+
+    std::string                 tempString;
+    gmx::MessageStringCollector errorReasons;
+    // Before changing the prefix string, make sure that it is not searched for in regression tests.
+    errorReasons.startContext("Cannot compute PME interactions on a GPU, because:");
+    errorReasons.appendIf(!useGpuForNonbonded, "Nonbonded interactions must also run on GPUs.");
+    errorReasons.appendIf(!pme_gpu_supports_build(&tempString), tempString);
+    errorReasons.appendIf(!pme_gpu_supports_hardware(hardwareInfo, &tempString), tempString);
+    errorReasons.appendIf(!pme_gpu_supports_input(inputrec, &tempString), tempString);
+    if (pmeFftTarget == TaskTarget::Cpu)
+    {
+        // User requested PME FFT on CPU, so we check whether we are able to use PME Mixed mode.
+        errorReasons.appendIf(!pme_gpu_mixed_mode_supports_input(inputrec, &tempString), tempString);
+    }
+    errorReasons.finishContext();
+
+    if (errorReasons.isEmpty())
+    {
+        return true;
+    }
+    else
+    {
+        if (pmeTarget == TaskTarget::Gpu && errorMessage != nullptr)
+        {
+            *errorMessage = errorReasons.toString();
+        }
+        return false;
+    }
 }
 
-bool
-decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
-                                          const TaskTarget        pmeTarget,
-                                          const std::vector<int> &gpuIdsToUse,
-                                          const std::vector<int> &userGpuTaskAssignment,
-                                          const gmx_hw_info_t    &hardwareInfo,
-                                          const t_inputrec       &inputrec,
-                                          const gmx_mtop_t       &mtop,
-                                          const int               numRanksPerSimulation,
-                                          const int               numPmeRanksPerSimulation)
+bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbonded,
+                                               const TaskTarget        pmeTarget,
+                                               const TaskTarget        pmeFftTarget,
+                                               const int               numDevicesToUse,
+                                               const std::vector<int>& userGpuTaskAssignment,
+                                               const gmx_hw_info_t&    hardwareInfo,
+                                               const t_inputrec&       inputrec,
+                                               const int               numRanksPerSimulation,
+                                               const int               numPmeRanksPerSimulation)
 {
     // First, exclude all cases where we can't run PME on GPUs.
-    if ((pmeTarget == TaskTarget::Cpu) ||
-        !useGpuForNonbonded ||
-        !pme_gpu_supports_build(nullptr) ||
-        !pme_gpu_supports_hardware(hardwareInfo, nullptr) ||
-        !pme_gpu_supports_input(inputrec, mtop, nullptr))
-    {
-        // PME can't run on a GPU. If the user required that, we issue
-        // an error later.
+    if (!canUseGpusForPme(useGpuForNonbonded, pmeTarget, pmeFftTarget, hardwareInfo, inputrec, nullptr))
+    {
+        // PME can't run on a GPU. If the user required that, we issue an error later.
         return false;
     }
 
@@ -180,20 +223,21 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbo
         // later.
 
         // Specifying -gputasks requires specifying everything.
-        if (pmeTarget == TaskTarget::Auto ||
-            numRanksPerSimulation < 1)
+        if (pmeTarget == TaskTarget::Auto || numRanksPerSimulation < 1)
         {
-            GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
+            GMX_THROW(InconsistentInputError(
+                    formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi")));
         }
 
         // PME on GPUs is only supported in a single case
         if (pmeTarget == TaskTarget::Gpu)
         {
-            if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
-                (numPmeRanksPerSimulation > 1))
+            if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
+                || (numPmeRanksPerSimulation > 1))
             {
-                GMX_THROW(InconsistentInputError
-                              ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
+                GMX_THROW(InconsistentInputError(
+                        "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
+                        "file and use a single PME rank."));
             }
             return true;
         }
@@ -208,13 +252,13 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbo
 
     if (pmeTarget == TaskTarget::Gpu)
     {
-        if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
-            (numPmeRanksPerSimulation > 1))
+        if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
+            || (numPmeRanksPerSimulation > 1))
         {
-            GMX_THROW(NotImplementedError
-                          ("PME tasks were required to run on GPUs, but that is not implemented with "
-                          "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
-                          "or permit PME tasks to be assigned to the CPU."));
+            GMX_THROW(NotImplementedError(
+                    "PME tasks were required to run on GPUs, but that is not implemented with "
+                    "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
+                    "or permit PME tasks to be assigned to the CPU."));
         }
         return true;
     }
@@ -223,7 +267,7 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbo
     {
         // PME can run well on a GPU shared with NB, and we permit
         // mdrun to default to try that.
-        return !gpuIdsToUse.empty();
+        return numDevicesToUse > 0;
     }
 
     if (numRanksPerSimulation < 1)
@@ -231,27 +275,27 @@ decideWhetherToUseGpusForPmeWithThreadMpi(const bool              useGpuForNonbo
         // Full automated mode for thread-MPI (the default). PME can
         // run well on a GPU shared with NB, and we permit mdrun to
         // default to it if there is only one GPU available.
-        return (gpuIdsToUse.size() == 1);
+        return (numDevicesToUse == 1);
     }
 
     // Not enough support for PME on GPUs for anything else
     return false;
 }
 
-bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarget,
-                                        const std::vector<int>    &userGpuTaskAssignment,
-                                        const EmulateGpuNonbonded  emulateGpuNonbonded,
-                                        const bool                 buildSupportsNonbondedOnGpu,
-                                        const bool                 nonbondedOnGpuIsUseful,
-                                        const bool                 gpusWereDetected)
+bool decideWhetherToUseGpusForNonbonded(const TaskTarget          nonbondedTarget,
+                                        const std::vector<int>&   userGpuTaskAssignment,
+                                        const EmulateGpuNonbonded emulateGpuNonbonded,
+                                        const bool                buildSupportsNonbondedOnGpu,
+                                        const bool                nonbondedOnGpuIsUseful,
+                                        const bool                gpusWereDetected)
 {
     if (nonbondedTarget == TaskTarget::Cpu)
     {
         if (!userGpuTaskAssignment.empty())
         {
-            GMX_THROW(InconsistentInputError
-                          ("A GPU task assignment was specified, but nonbonded interactions were "
-                          "assigned to the CPU. Make no more than one of these choices."));
+            GMX_THROW(InconsistentInputError(
+                    "A GPU task assignment was specified, but nonbonded interactions were "
+                    "assigned to the CPU. Make no more than one of these choices."));
         }
 
         return false;
@@ -259,11 +303,11 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
 
     if (!buildSupportsNonbondedOnGpu && nonbondedTarget == TaskTarget::Gpu)
     {
-        GMX_THROW(InconsistentInputError
-                      ("Nonbonded interactions on the GPU were requested with -nb gpu, "
-                      "but the GROMACS binary has been built without GPU support. "
-                      "Either run without selecting GPU options, or recompile GROMACS "
-                      "with GPU support enabled"));
+        GMX_THROW(InconsistentInputError(
+                "Nonbonded interactions on the GPU were requested with -nb gpu, "
+                "but the GROMACS binary has been built without GPU support. "
+                "Either run without selecting GPU options, or recompile GROMACS "
+                "with GPU support enabled"));
     }
 
     // TODO refactor all these TaskTarget::Gpu checks into one place?
@@ -273,14 +317,15 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
     {
         if (nonbondedTarget == TaskTarget::Gpu)
         {
-            GMX_THROW(InconsistentInputError
-                          ("Nonbonded interactions on the GPU were required, which is inconsistent "
-                          "with choosing emulation. Make no more than one of these choices."));
+            GMX_THROW(InconsistentInputError(
+                    "Nonbonded interactions on the GPU were required, which is inconsistent "
+                    "with choosing emulation. Make no more than one of these choices."));
         }
         if (!userGpuTaskAssignment.empty())
         {
-            GMX_THROW(InconsistentInputError
-                          ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
+            GMX_THROW(
+                    InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
+                                           "no more than one of these choices."));
         }
 
         return false;
@@ -290,9 +335,9 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
     {
         if (nonbondedTarget == TaskTarget::Gpu)
         {
-            GMX_THROW(InconsistentInputError
-                          ("Nonbonded interactions on the GPU were required, but not supported for these "
-                          "simulation settings. Change your settings, or do not require using GPUs."));
+            GMX_THROW(InconsistentInputError(
+                    "Nonbonded interactions on the GPU were required, but not supported for these "
+                    "simulation settings. Change your settings, or do not require using GPUs."));
         }
 
         return false;
@@ -303,7 +348,8 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
         // Specifying -gputasks requires specifying everything.
         if (nonbondedTarget == TaskTarget::Auto)
         {
-            GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
+            GMX_THROW(InconsistentInputError(
+                    formatString(g_specifyEverythingFormatString, "-nb and -ntmpi")));
         }
 
         return true;
@@ -320,59 +366,25 @@ bool decideWhetherToUseGpusForNonbonded(const TaskTarget           nonbondedTarg
 
     // If we get here, then the user permitted GPUs, which we should
     // use for nonbonded interactions.
-    return gpusWereDetected;
+    return buildSupportsNonbondedOnGpu && gpusWereDetected;
 }
 
 bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
                                   const TaskTarget        pmeTarget,
-                                  const std::vector<int> &userGpuTaskAssignment,
-                                  const gmx_hw_info_t    &hardwareInfo,
-                                  const t_inputrec       &inputrec,
-                                  const gmx_mtop_t       &mtop,
+                                  const TaskTarget        pmeFftTarget,
+                                  const std::vector<int>& userGpuTaskAssignment,
+                                  const gmx_hw_info_t&    hardwareInfo,
+                                  const t_inputrec&       inputrec,
                                   const int               numRanksPerSimulation,
                                   const int               numPmeRanksPerSimulation,
                                   const bool              gpusWereDetected)
 {
-    if (pmeTarget == TaskTarget::Cpu)
-    {
-        return false;
-    }
-
-    if (!useGpuForNonbonded)
-    {
-        if (pmeTarget == TaskTarget::Gpu)
-        {
-            GMX_THROW(NotImplementedError
-                          ("PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
-        }
-        return false;
-    }
-
     std::string message;
-    if (!pme_gpu_supports_build(&message))
-    {
-        if (pmeTarget == TaskTarget::Gpu)
-        {
-            GMX_THROW(NotImplementedError
-                          ("Cannot compute PME interactions on a GPU, because " + message));
-        }
-        return false;
-    }
-    if (!pme_gpu_supports_hardware(hardwareInfo, &message))
+    if (!canUseGpusForPme(useGpuForNonbonded, pmeTarget, pmeFftTarget, hardwareInfo, inputrec, &message))
     {
-        if (pmeTarget == TaskTarget::Gpu)
-        {
-            GMX_THROW(NotImplementedError
-                          ("Cannot compute PME interactions on a GPU, because " + message));
-        }
-        return false;
-    }
-    if (!pme_gpu_supports_input(inputrec, mtop, &message))
-    {
-        if (pmeTarget == TaskTarget::Gpu)
+        if (!message.empty())
         {
-            GMX_THROW(NotImplementedError
-                          ("Cannot compute PME interactions on a GPU, because " + message));
+            GMX_THROW(InconsistentInputError(message));
         }
         return false;
     }
@@ -381,9 +393,9 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
     {
         if (!userGpuTaskAssignment.empty())
         {
-            GMX_THROW(InconsistentInputError
-                          ("A GPU task assignment was specified, but PME interactions were "
-                          "assigned to the CPU. Make no more than one of these choices."));
+            GMX_THROW(InconsistentInputError(
+                    "A GPU task assignment was specified, but PME interactions were "
+                    "assigned to the CPU. Make no more than one of these choices."));
         }
 
         return false;
@@ -394,7 +406,8 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
         // Specifying -gputasks requires specifying everything.
         if (pmeTarget == TaskTarget::Auto)
         {
-            GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
+            GMX_THROW(InconsistentInputError(formatString(
+                    g_specifyEverythingFormatString, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
         }
 
         return true;
@@ -407,13 +420,13 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
 
     if (pmeTarget == TaskTarget::Gpu)
     {
-        if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0)) ||
-            (numPmeRanksPerSimulation > 1))
+        if (((numRanksPerSimulation > 1) && (numPmeRanksPerSimulation == 0))
+            || (numPmeRanksPerSimulation > 1))
         {
-            GMX_THROW(NotImplementedError
-                          ("PME tasks were required to run on GPUs, but that is not implemented with "
-                          "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
-                          "or permit PME tasks to be assigned to the CPU."));
+            GMX_THROW(NotImplementedError(
+                    "PME tasks were required to run on GPUs, but that is not implemented with "
+                    "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
+                    "or permit PME tasks to be assigned to the CPU."));
         }
         return true;
     }
@@ -431,27 +444,67 @@ bool decideWhetherToUseGpusForPme(const bool              useGpuForNonbonded,
     return false;
 }
 
-bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
-                                     const bool       useGpuForPme,
-                                     const TaskTarget bondedTarget,
-                                     const bool       canUseGpuForBonded,
-                                     const bool       usingLJPme,
-                                     const bool       usingElecPmeOrEwald,
-                                     const int        numPmeRanksPerSimulation,
-                                     const bool       gpusWereDetected)
+
+PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
+{
+    if (!EEL_PME(inputrec.coulombtype))
+    {
+        return PmeRunMode::None;
+    }
+
+    if (useGpuForPme)
+    {
+        if (pmeFftTarget == TaskTarget::Cpu)
+        {
+            return PmeRunMode::Mixed;
+        }
+        else
+        {
+            return PmeRunMode::GPU;
+        }
+    }
+    else
+    {
+        if (pmeFftTarget == TaskTarget::Gpu)
+        {
+            gmx_fatal(FARGS,
+                      "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
+                      "on CPU you should not be using -pmefft.");
+        }
+        return PmeRunMode::CPU;
+    }
+}
+
+bool decideWhetherToUseGpusForBonded(bool              useGpuForNonbonded,
+                                     bool              useGpuForPme,
+                                     TaskTarget        bondedTarget,
+                                     const t_inputrec& inputrec,
+                                     const gmx_mtop_t& mtop,
+                                     int               numPmeRanksPerSimulation,
+                                     bool              gpusWereDetected)
 {
     if (bondedTarget == TaskTarget::Cpu)
     {
         return false;
     }
 
-    if (!canUseGpuForBonded)
+    std::string errorMessage;
+
+    if (!buildSupportsListedForcesGpu(&errorMessage))
+    {
+        if (bondedTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(InconsistentInputError(errorMessage.c_str()));
+        }
+
+        return false;
+    }
+
+    if (!inputSupportsListedForcesGpu(inputrec, mtop, &errorMessage))
     {
         if (bondedTarget == TaskTarget::Gpu)
         {
-            GMX_THROW(InconsistentInputError
-                          ("Bonded interactions on the GPU were required, but not supported for these "
-                          "simulation settings. Change your settings, or do not require using GPUs."));
+            GMX_THROW(InconsistentInputError(errorMessage.c_str()));
         }
 
         return false;
@@ -461,10 +514,10 @@ bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
     {
         if (bondedTarget == TaskTarget::Gpu)
         {
-            GMX_THROW(InconsistentInputError
-                          ("Bonded interactions on the GPU were required, but this requires that "
-                          "short-ranged non-bonded interactions are also run on the GPU. Change "
-                          "your settings, or do not require using GPUs."));
+            GMX_THROW(InconsistentInputError(
+                    "Bonded interactions on the GPU were required, but this requires that "
+                    "short-ranged non-bonded interactions are also run on the GPU. Change "
+                    "your settings, or do not require using GPUs."));
         }
 
         return false;
@@ -485,63 +538,111 @@ bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
     // is busy, for which we currently only check PME or Ewald.
     // (It would be better to dynamically assign bondeds based on timings)
     // Note that here we assume that the auto setting of PME ranks will not
-    // choose seperate PME ranks when nonBonded are assigned to the GPU.
-    bool usingOurCpuForPmeOrEwald = (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
+    // choose separate PME ranks when nonBonded are assigned to the GPU.
+    bool usingOurCpuForPmeOrEwald =
+            (EVDW_PME(inputrec.vdwtype)
+             || (EEL_PME_EWALD(inputrec.coulombtype) && !useGpuForPme && numPmeRanksPerSimulation <= 0));
 
     return gpusWereDetected && usingOurCpuForPmeOrEwald;
 }
 
-bool decideWhetherToUseGpuForUpdate(bool              isDomainDecomposition,
-                                    bool              useGpuForPme,
-                                    bool              useGpuForNonbonded,
-                                    bool              useGpuForBufferOps,
-                                    TaskTarget        updateTarget,
-                                    bool              gpusWereDetected,
-                                    const t_inputrec &inputrec,
-                                    const MDAtoms    &mdatoms,
-                                    bool              useEssentialDynamics,
-                                    bool              doOrientationRestraints,
-                                    bool              doDistanceRestraints)
+bool decideWhetherToUseGpuForUpdate(const bool                     isDomainDecomposition,
+                                    const bool                     useUpdateGroups,
+                                    const PmeRunMode               pmeRunMode,
+                                    const bool                     havePmeOnlyRank,
+                                    const bool                     useGpuForNonbonded,
+                                    const TaskTarget               updateTarget,
+                                    const bool                     gpusWereDetected,
+                                    const t_inputrec&              inputrec,
+                                    const gmx_mtop_t&              mtop,
+                                    const bool                     useEssentialDynamics,
+                                    const bool                     doOrientationRestraints,
+                                    const bool                     haveFrozenAtoms,
+                                    const bool                     doRerun,
+                                    const DevelopmentFeatureFlags& devFlags,
+                                    const gmx::MDLogger&           mdlog)
 {
-    if (updateTarget == TaskTarget::Cpu)
+
+    // '-update cpu' overrides the environment variable, '-update auto' does not
+    if (updateTarget == TaskTarget::Cpu
+        || (updateTarget == TaskTarget::Auto && !devFlags.forceGpuUpdateDefault))
     {
         return false;
     }
 
+    const bool hasAnyConstraints = gmx_mtop_interaction_count(mtop, IF_CONSTRAINT) > 0;
+    const bool pmeUsesCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
+
     std::string errorMessage;
 
     if (isDomainDecomposition)
     {
-        errorMessage += "Domain decomposition is not supported.\n";
+        if (hasAnyConstraints && !useUpdateGroups)
+        {
+            errorMessage +=
+                    "Domain decomposition is only supported with constraints when update "
+                    "groups "
+                    "are used. This means constraining all bonds is not supported, except for "
+                    "small molecules, and box sizes close to half the pair-list cutoff are not "
+                    "supported.\n ";
+        }
+    }
+
+    if (havePmeOnlyRank)
+    {
+        if (pmeUsesCpu)
+        {
+            errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
+        }
+    }
+
+    if (inputrec.useMts)
+    {
+        errorMessage += "Multiple time stepping is not supported.\n";
+    }
+
+    if (inputrec.eConstrAlg == ConstraintAlgorithm::Shake && hasAnyConstraints
+        && gmx_mtop_ftype_count(mtop, F_CONSTR) > 0)
+    {
+        errorMessage += "SHAKE constraints are not supported.\n";
     }
-    // Using the GPU-version of update makes sense if forces are already on the GPU, i.e. if at least:
-    // 1. PME is on the GPU (there should be a copy of coordinates on a GPU in rvec format for PME spread).
-    // 2. Non-bonded interactions and buffer ops are on the GPU.
-    if (!(useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps)))
+    // Using the GPU-version of update if:
+    // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
+    // 2. Non-bonded interactions are on the GPU.
+    if ((pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::None) && !useGpuForNonbonded)
     {
-        errorMessage += "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
+        errorMessage +=
+                "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
     }
     if (!gpusWereDetected)
     {
         errorMessage += "Compatible GPUs must have been found.\n";
     }
-    if (GMX_GPU != GMX_GPU_CUDA)
+    if (!(GMX_GPU_CUDA || GMX_GPU_SYCL))
     {
-        errorMessage += "Only a CUDA build is supported.\n";
+        errorMessage += "Only CUDA and SYCL builds are supported.\n";
     }
-    if (inputrec.eI != eiMD)
+    if (inputrec.eI != IntegrationAlgorithm::MD)
     {
         errorMessage += "Only the md integrator is supported.\n";
     }
-    if (inputrec.etc == etcNOSEHOOVER)
+    if (inputrec.etc == TemperatureCoupling::NoseHoover)
     {
         errorMessage += "Nose-Hoover temperature coupling is not supported.\n";
     }
-    if (inputrec.epc != epcNO && inputrec.epc != epcPARRINELLORAHMAN)
+    if (!(inputrec.epc == PressureCoupling::No || inputrec.epc == PressureCoupling::ParrinelloRahman
+          || inputrec.epc == PressureCoupling::Berendsen || inputrec.epc == PressureCoupling::CRescale))
     {
-        errorMessage += "Only Parrinello-Rahman pressure control is supported.\n";
+        errorMessage +=
+                "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
+                "supported.\n";
     }
-    if (mdatoms.mdatoms()->haveVsites)
+    if (EEL_PME_EWALD(inputrec.coulombtype) && inputrec.epsilon_surface != 0)
+    {
+        // The graph is needed, but not supported
+        errorMessage += "Ewald surface correction is not supported.\n";
+    }
+    if (gmx_mtop_interaction_count(mtop, IF_VSITE) > 0)
     {
         errorMessage += "Virtual sites are not supported.\n";
     }
@@ -549,34 +650,92 @@ bool decideWhetherToUseGpuForUpdate(bool              isDomainDecomposition,
     {
         errorMessage += "Essential dynamics is not supported.\n";
     }
-    if (inputrec.bPull || inputrec.pull)
+    if (inputrec.bPull && pull_have_constraint(*inputrec.pull))
     {
-        errorMessage += "Pulling is not supported.\n";
+        errorMessage += "Constraints pulling is not supported.\n";
     }
     if (doOrientationRestraints)
     {
+        // The graph is needed, but not supported
         errorMessage += "Orientation restraints are not supported.\n";
     }
-    if (doDistanceRestraints)
+    if (inputrec.efep != FreeEnergyPerturbationType::No
+        && (haveFepPerturbedMasses(mtop) || havePerturbedConstraints(mtop)))
+    {
+        errorMessage += "Free energy perturbation for mass and constraints are not supported.\n";
+    }
+    const auto particleTypes = gmx_mtop_particletype_count(mtop);
+    if (particleTypes[ParticleType::Shell] > 0)
+    {
+        errorMessage += "Shells are not supported.\n";
+    }
+    if (inputrec.eSwapCoords != SwapType::No)
+    {
+        errorMessage += "Swapping the coordinates is not supported.\n";
+    }
+    if (doRerun)
+    {
+        errorMessage += "Re-run is not supported.\n";
+    }
+
+    // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
+    // does not support it, the actual CUDA LINCS code does support it
+    if (gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)
+    {
+        errorMessage += "Non-connecting constraints are not supported\n";
+    }
+    if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop))
+    {
+        errorMessage +=
+                "The number of coupled constraints is higher than supported in the GPU LINCS "
+                "code.\n";
+    }
+    if (hasAnyConstraints && !UpdateConstrainGpu::areConstraintsSupported())
     {
-        errorMessage += "Distance restraints are not supported.\n";
+        errorMessage += "Chosen GPU implementation does not support constraints.\n";
     }
-    if (inputrec.efep != efepNO)
+    if (haveFrozenAtoms)
     {
-        errorMessage += "Free energy perturbations are not supported.\n";
+        // There is a known bug with frozen atoms and GPU update, see Issue #3920.
+        errorMessage += "Frozen atoms not supported.\n";
     }
+
     if (!errorMessage.empty())
     {
-        if (updateTarget == TaskTarget::Gpu)
+        if (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault)
+        {
+            GMX_LOG(mdlog.warning)
+                    .asParagraph()
+                    .appendText(
+                            "Update task on the GPU was required, by the "
+                            "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
+                            "condition(s) were not satisfied:");
+            GMX_LOG(mdlog.warning).asParagraph().appendText(errorMessage.c_str());
+            GMX_LOG(mdlog.warning).asParagraph().appendText("Will use CPU version of update.");
+        }
+        else if (updateTarget == TaskTarget::Gpu)
         {
-            std::string prefix = gmx::formatString("Update task on the GPU was required,\n"
-                                                   "but the following condition(s) were not satisfied:\n");
+            std::string prefix = gmx::formatString(
+                    "Update task on the GPU was required,\n"
+                    "but the following condition(s) were not satisfied:\n");
             GMX_THROW(InconsistentInputError((prefix + errorMessage).c_str()));
         }
         return false;
     }
 
-    return true;
+    return (updateTarget == TaskTarget::Gpu
+            || (updateTarget == TaskTarget::Auto && devFlags.forceGpuUpdateDefault));
+}
+
+bool decideWhetherToUseGpuForHalo(const DevelopmentFeatureFlags& devFlags,
+                                  bool                           havePPDomainDecomposition,
+                                  bool                           useGpuForNonbonded,
+                                  bool                           useModularSimulator,
+                                  bool                           doRerun,
+                                  bool                           haveEnergyMinimization)
+{
+    return havePPDomainDecomposition && devFlags.enableGpuHaloExchange && useGpuForNonbonded
+           && !useModularSimulator && !doRerun && !haveEnergyMinimization;
 }
 
-}  // namespace gmx
+} // namespace gmx