Extend task assignment code
[alexxy/gromacs.git] / src / programs / mdrun / runner.cpp
index ed9209d564d6f525ae1ca15ed039a047ff644d23..53c55ff09fe9d25ef69b513ab9e31902911f42d8 100644 (file)
 #include "gromacs/pbcutil/pbc.h"
 #include "gromacs/pulling/pull.h"
 #include "gromacs/pulling/pull_rotation.h"
-#include "gromacs/taskassignment/hardwareassign.h"
+#include "gromacs/taskassignment/decidegpuusage.h"
 #include "gromacs/taskassignment/resourcedivision.h"
+#include "gromacs/taskassignment/taskassignment.h"
 #include "gromacs/taskassignment/usergpuids.h"
 #include "gromacs/timing/wallcycle.h"
 #include "gromacs/topology/mtop_util.h"
 #include "gromacs/utility/pleasecite.h"
 #include "gromacs/utility/programcontext.h"
 #include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringutil.h"
 
 #include "deform.h"
 #include "md.h"
@@ -134,13 +136,6 @@ matrix              deform_init_box_tpx;
 //! MPI variable for use in pressure scaling
 tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 
-#if GMX_THREAD_MPI
-/* The minimum number of atoms per tMPI thread. With fewer atoms than this,
- * the number of threads will get lowered.
- */
-#define MIN_ATOMS_PER_MPI_THREAD    90
-#define MIN_ATOMS_PER_GPU           900
-
 namespace gmx
 {
 
@@ -209,6 +204,7 @@ t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch)
     // Mdrunner.
     spawnedMdrunner.fnm = dup_tfn(this->nfile, fnm);
 
+#if GMX_THREAD_MPI
     /* now spawn new threads that start mdrunner_start_fn(), while
        the main thread returns, we set thread affinity later */
     if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE,
@@ -216,14 +212,15 @@ t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch)
     {
         GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
     }
+#else
+    GMX_UNUSED_VALUE(mdrunner_start_fn);
+#endif
 
     return reinitialize_commrec_for_this_thread(cr);
 }
 
 }      // namespace
 
-#endif /* GMX_THREAD_MPI */
-
 /*! \brief Initialize variables for Verlet scheme simulation */
 static void prepare_verlet_scheme(FILE                           *fplog,
                                   t_commrec                      *cr,
@@ -318,47 +315,12 @@ static void override_nsteps_cmdline(const gmx::MDLogger &mdlog,
 namespace gmx
 {
 
-//! Halt the run if there are inconsistences between user choices to run with GPUs and/or hardware detection.
-static void exitIfCannotForceGpuRun(bool                requirePhysicalGpu,
-                                    EmulateGpuNonbonded emulateGpuNonbonded,
-                                    bool                useVerletScheme,
-                                    bool                compatibleGpusFound)
-{
-    /* Was GPU acceleration either explicitly (-nb gpu) or implicitly
-     * (gpu ID passed) requested? */
-    if (!requirePhysicalGpu)
-    {
-        return;
-    }
-
-    if (GMX_GPU == GMX_GPU_NONE)
-    {
-        gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!",
-                  gmx::getProgramContext().displayName());
-    }
-
-    if (emulateGpuNonbonded == EmulateGpuNonbonded::Yes)
-    {
-        gmx_fatal(FARGS, "GPU emulation cannot be requested together with GPU acceleration!");
-    }
-
-    if (!useVerletScheme)
-    {
-        gmx_fatal(FARGS, "GPU acceleration requested, but can't be used without cutoff-scheme=Verlet");
-    }
-
-    if (!compatibleGpusFound)
-    {
-        gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
-    }
-}
-
-/*! \brief Return whether GPU acceleration is useful with the given settings.
+/*! \brief Return whether GPU acceleration of nonbondeds is useful with the given settings.
  *
  * If not, logs a message about falling back to CPU code. */
-static bool gpuAccelerationIsUseful(const MDLogger   &mdlog,
-                                    const t_inputrec *ir,
-                                    bool              doRerun)
+static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger   &mdlog,
+                                               const t_inputrec *ir,
+                                               bool              doRerun)
 {
     if (doRerun && ir->opts.ngener > 1)
     {
@@ -430,6 +392,31 @@ static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr)
     return builder.build();
 }
 
+//! Make a TaskTarget from an mdrun argument string.
+static TaskTarget findTaskTarget(const char *optionString)
+{
+    TaskTarget returnValue = TaskTarget::Auto;
+
+    if (strncmp(optionString, "auto", 3) == 0)
+    {
+        returnValue = TaskTarget::Auto;
+    }
+    else if (strncmp(optionString, "cpu", 3) == 0)
+    {
+        returnValue = TaskTarget::Cpu;
+    }
+    else if (strncmp(optionString, "gpu", 3) == 0)
+    {
+        returnValue = TaskTarget::Gpu;
+    }
+    else
+    {
+        GMX_ASSERT(false, "Option string should have been checked for sanity already");
+    }
+
+    return returnValue;
+}
+
 int Mdrunner::mdrunner()
 {
     matrix                    box;
@@ -467,27 +454,44 @@ int Mdrunner::mdrunner()
     bool doMembed = opt2bSet("-membed", nfile, fnm);
     bool doRerun  = mdrunOptions.rerun;
 
-    /* Handle GPU-related user options. Later, we check consistency
-     * with things like whether support is compiled, or tMPI thread
-     * count. */
+    // Handle task-assignment related user options.
     EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ?
                                                EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No);
-    std::vector<int>    userGpuIds;
+    std::vector<int>    gpuIdsAvailable;
     try
     {
-        userGpuIds = parseUserGpuIds(hw_opt.gpuIdTaskAssignment);
+        gpuIdsAvailable = parseUserGpuIds(hw_opt.gpuIdsAvailable);
+        // TODO We could put the GPU IDs into a std::map to find
+        // duplicates, but for the small numbers of IDs involved, this
+        // code is simple and fast.
+        for (size_t i = 0; i != gpuIdsAvailable.size(); ++i)
+        {
+            for (size_t j = i+1; j != gpuIdsAvailable.size(); ++j)
+            {
+                if (gpuIdsAvailable[i] == gpuIdsAvailable[j])
+                {
+                    GMX_THROW(InvalidInputError(formatString("The string of available GPU device IDs '%s' may not contain duplicate device IDs", hw_opt.gpuIdsAvailable.c_str())));
+                }
+            }
+        }
     }
     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 
-    bool                forceUseCpu           = (strncmp(nbpu_opt, "cpu", 3) == 0);
-    if (!userGpuIds.empty() && forceUseCpu)
+    std::vector<int> userGpuTaskAssignment;
+    try
     {
-        gmx_fatal(FARGS, "GPU IDs were specified, and short-ranged interactions were assigned to the CPU. Make no more than one of these choices.");
+        userGpuTaskAssignment = parseUserGpuIds(hw_opt.userGpuTaskAssignment);
     }
-    bool forceUsePhysicalGpu = (strncmp(nbpu_opt, "gpu", 3) == 0) || !userGpuIds.empty();
-    bool tryUsePhysicalGpu   = (strncmp(nbpu_opt, "auto", 4) == 0) && userGpuIds.empty() && (emulateGpuNonbonded == EmulateGpuNonbonded::No);
-    GMX_RELEASE_ASSERT(!(forceUsePhysicalGpu && tryUsePhysicalGpu), "Must either force use of "
-                       "GPUs for short-ranged interactions, or try to use them, not both.");
+    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+    auto             nonbondedTarget = findTaskTarget(nbpu_opt);
+    // TODO Connect these to actual mdrun arguments and some functionality
+    const char      *pme_opt     = "cpu";
+    auto             pmeTarget   = findTaskTarget(pme_opt);
+
+    // TODO find a sensible home and behaviour for this
+    //const char      *pme_fft_opt = "auto";
+    //auto pmeFftTarget    = findTaskTarget(pme_fft_opt);
+
     const PmeRunMode pmeRunMode = PmeRunMode::CPU;
     //TODO this is a placeholder as PME on GPU is not permitted yet
     //TODO should there exist a PmeRunMode::None value for consistency?
@@ -501,6 +505,33 @@ int Mdrunner::mdrunner()
 
     gmx_print_detected_hardware(fplog, cr, mdlog, hwinfo);
 
+    std::vector<int> gpuIdsToUse;
+    auto             compatibleGpus = getCompatibleGpus(hwinfo->gpu_info);
+    if (gpuIdsAvailable.empty())
+    {
+        gpuIdsToUse = compatibleGpus;
+    }
+    else
+    {
+        for (const auto &availableGpuId : gpuIdsAvailable)
+        {
+            bool availableGpuIsCompatible = false;
+            for (const auto &compatibleGpuId : compatibleGpus)
+            {
+                if (availableGpuId == compatibleGpuId)
+                {
+                    availableGpuIsCompatible = true;
+                    break;
+                }
+            }
+            if (!availableGpuIsCompatible)
+            {
+                gmx_fatal(FARGS, "You limited the set of compatible GPUs to a set that included ID #%d, but that ID is not for a compatible GPU. List only compatible GPUs.", availableGpuId);
+            }
+            gpuIdsToUse.push_back(availableGpuId);
+        }
+    }
+
     if (fplog != nullptr)
     {
         /* Print references after all software/hardware printing */
@@ -523,45 +554,21 @@ int Mdrunner::mdrunner()
         /* Read (nearly) all data required for the simulation */
         read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, globalState.get(), mtop);
 
-        exitIfCannotForceGpuRun(forceUsePhysicalGpu,
-                                emulateGpuNonbonded,
-                                inputrec->cutoff_scheme == ecutsVERLET,
-                                compatibleGpusFound(hwinfo->gpu_info));
-
-        if (inputrec->cutoff_scheme == ecutsVERLET)
-        {
-            /* TODO This logic could run later, e.g. before -npme -1
-               is handled. If inputrec has already been communicated,
-               then the resulting tryUsePhysicalGpu does not need to
-               be communicated. */
-            if ((tryUsePhysicalGpu || forceUsePhysicalGpu) &&
-                !gpuAccelerationIsUseful(mdlog, inputrec, doRerun))
-            {
-                /* Fallback message printed by nbnxn_acceleration_supported */
-                if (forceUsePhysicalGpu)
-                {
-                    gmx_fatal(FARGS, "GPU acceleration requested, but not supported with the given input settings");
-                }
-                tryUsePhysicalGpu = false;
-            }
-        }
-        else
+        if (inputrec->cutoff_scheme != ecutsVERLET)
         {
             if (nstlist_cmdline > 0)
             {
                 gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme");
             }
 
-            if (compatibleGpusFound(hwinfo->gpu_info))
+            if (!compatibleGpus.empty())
             {
                 GMX_LOG(mdlog.warning).asParagraph().appendText(
                         "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
                         "      To use a GPU, set the mdp option: cutoff-scheme = Verlet");
             }
-            tryUsePhysicalGpu = false;
         }
     }
-    bool nonbondedOnGpu = (tryUsePhysicalGpu || forceUsePhysicalGpu) && compatibleGpusFound(hwinfo->gpu_info);
 
     /* Check and update the hardware options for internal consistency */
     check_and_update_hw_opt_1(&hw_opt, cr, domdecOptions.numPmeRanks);
@@ -570,8 +577,7 @@ int Mdrunner::mdrunner()
     gmx_check_thread_affinity_set(mdlog, cr,
                                   &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
 
-#if GMX_THREAD_MPI
-    if (SIMMASTER(cr))
+    if (GMX_THREAD_MPI && SIMMASTER(cr))
     {
         if (domdecOptions.numPmeRanks > 0 && hw_opt.nthreads_tmpi <= 0)
         {
@@ -584,6 +590,26 @@ int Mdrunner::mdrunner()
          */
         check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme);
 
+        bool useGpuForNonbonded = false;
+        bool useGpuForPme       = false;
+        try
+        {
+            // If the user specified the number of ranks, then we must
+            // respect that, but in default mode, we need to allow for
+            // the number of GPUs to choose the number of ranks.
+
+            useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi
+                    (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
+                    inputrec->cutoff_scheme == ecutsVERLET,
+                    gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, doRerun),
+                    hw_opt.nthreads_tmpi);
+            auto inputSystemHasPme = EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype);
+            auto canUseGpuForPme   = inputSystemHasPme && pme_gpu_supports_input(inputrec, nullptr);
+            useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi
+                    (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment,
+                    canUseGpuForPme, hw_opt.nthreads_tmpi);
+        }
+        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
         /* Determine how many thread-MPI ranks to start.
          *
          * TODO Over-writing the user-supplied value here does
@@ -591,9 +617,9 @@ int Mdrunner::mdrunner()
          * correctly. */
         hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo,
                                                 &hw_opt,
-                                                userGpuIds,
-                                                domdecOptions.numPmeRanks,
-                                                nonbondedOnGpu,
+                                                gpuIdsToUse,
+                                                useGpuForNonbonded,
+                                                useGpuForPme,
                                                 inputrec, mtop,
                                                 mdlog,
                                                 doMembed);
@@ -606,16 +632,37 @@ int Mdrunner::mdrunner()
         // reinitialize_commrec_for_this_thread. Find a way to express
         // this better.
     }
-#endif
     /* END OF CAUTION: cr is now reliable */
 
     if (PAR(cr))
     {
         /* now broadcast everything to the non-master nodes/threads: */
         init_parallel(cr, inputrec, mtop);
+    }
 
-        gmx_bcast_sim(sizeof(nonbondedOnGpu), &nonbondedOnGpu, cr);
+    // Now each rank knows the inputrec that SIMMASTER read and used,
+    // and (if applicable) cr->nnodes has been assigned the number of
+    // thread-MPI ranks that have been chosen. The ranks can now all
+    // run the task-deciding functions and will agree on the result
+    // without needing to communicate.
+    //
+    // TODO Should we do the communication in debug mode to support
+    // having an assertion?
+    //
+    // Note that these variables describe only their own node.
+    bool useGpuForNonbonded = false;
+    bool useGpuForPme       = false;
+    try
+    {
+        useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment,
+                                                                emulateGpuNonbonded, inputrec->cutoff_scheme == ecutsVERLET,
+                                                                gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, doRerun));
+        auto inputSystemHasPme = EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype);
+        auto canUseGpuForPme   = inputSystemHasPme && pme_gpu_supports_input(inputrec, nullptr);
+        useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, canUseGpuForPme, cr->nnodes);
     }
+    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
+
     // TODO: Error handling
     mdModules.assignOptionsToModules(*inputrec->params, nullptr);
 
@@ -687,7 +734,7 @@ int Mdrunner::mdrunner()
         domdecOptions.numPmeRanks = 0;
     }
 
-    if (nonbondedOnGpu && domdecOptions.numPmeRanks < 0)
+    if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
     {
         /* With GPUs we don't automatically use PME-only ranks. PME ranks can
          * improve performance with many threads per GPU, since our OpenMP
@@ -790,7 +837,7 @@ int Mdrunner::mdrunner()
     if (inputrec->cutoff_scheme == ecutsVERLET)
     {
         prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, mtop, box,
-                              nonbondedOnGpu || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
+                              useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo);
     }
 
     if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
@@ -871,44 +918,72 @@ int Mdrunner::mdrunner()
     }
 #endif
 
-    // Contains the ID of the GPU used by each PP rank on this node,
-    // indexed by that rank. Empty if no GPUs are selected for use on
-    // this node.
-    std::vector<int> gpuTaskAssignment;
-    if (nonbondedOnGpu)
-    {
-        // Currently the DD code assigns duty to ranks that can
-        // include PP work that currently can be executed on a single
-        // GPU, if present and compatible.  This has to be coordinated
-        // across PP ranks on a node, with possible multiple devices
-        // or sharing devices on a node, either from the user
-        // selection, or automatically.
-        //
-        // GPU ID assignment strings, if provided, cover all the ranks on
-        // a node. If nodes or the process placement on them are
-        // heterogeneous, then the GMX_GPU_ID environment variable must be
-        // set by a user who also wishes to direct GPU ID assignment.
-        // Thus the implementation of task assignment can assume it has a
-        // GPU ID assignment appropriate for the node upon which its
-        // process is running.
-        //
-        // Valid GPU ID assignments are an ordered set of digits that
-        // identify GPU device IDs (e.g. as understood by the GPU runtime,
-        // and subject to environment modification such as with
-        // CUDA_VISIBLE_DEVICES) that will be used for the GPU-suitable
-        // tasks on all of the ranks of that node.
-        bool rankCanUseGpu = thisRankHasDuty(cr, DUTY_PP);
-        gpuTaskAssignment = mapPpRanksToGpus(rankCanUseGpu, cr, hwinfo->gpu_info, hwinfo->compatibleGpus, userGpuIds);
-    }
-
-    reportGpuUsage(mdlog, hwinfo->gpu_info, !userGpuIds.empty(),
-                   gpuTaskAssignment, cr->nrank_pp_intranode, cr->nnodes > 1);
-
-    if (!gpuTaskAssignment.empty())
-    {
-        GMX_RELEASE_ASSERT(cr->nrank_pp_intranode == static_cast<int>(gpuTaskAssignment.size()),
-                           "The number of PP ranks on each node must equal the number of GPU tasks used on each node");
+    // Build a data structure that expresses which kinds of non-bonded
+    // task are handled by this rank.
+    //
+    // TODO Later, this might become a loop over all registered modules
+    // relevant to the mdp inputs, to find those that have such tasks.
+    //
+    // TODO This could move before init_domain_decomposition() as part
+    // of refactoring that separates the responsibility for duty
+    // assignment from setup for communication between tasks, and
+    // setup for tasks handled with a domain (ie including short-ranged
+    // tasks, bonded tasks, etc.).
+    //
+    // Note that in general useGpuForNonbonded, etc. can have a value
+    // that is inconsistent with the presence of actual GPUs on any
+    // rank, and that is not known to be a problem until the
+    // duty of the ranks on a node become node.
+    //
+    // TODO Later we might need the concept of computeTasksOnThisRank,
+    // from which we construct gpuTasksOnThisRank.
+    //
+    // Currently the DD code assigns duty to ranks that can
+    // include PP work that currently can be executed on a single
+    // GPU, if present and compatible.  This has to be coordinated
+    // across PP ranks on a node, with possible multiple devices
+    // or sharing devices on a node, either from the user
+    // selection, or automatically.
+    auto                 haveGpus = !gpuIdsToUse.empty();
+    std::vector<GpuTask> gpuTasksOnThisRank;
+    if (thisRankHasDuty(cr, DUTY_PP))
+    {
+        if (useGpuForNonbonded)
+        {
+            if (haveGpus)
+            {
+                gpuTasksOnThisRank.push_back(GpuTask::Nonbonded);
+            }
+            else if (nonbondedTarget == TaskTarget::Gpu)
+            {
+                gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected.");
+            }
+        }
+    }
+    // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not.
+    if (EEL_PME(inputrec->coulombtype) && (thisRankHasDuty(cr, DUTY_PME)))
+    {
+        if (useGpuForPme)
+        {
+            if (haveGpus)
+            {
+                gpuTasksOnThisRank.push_back(GpuTask::Pme);
+            }
+            else if (pmeTarget == TaskTarget::Gpu)
+            {
+                gmx_fatal(FARGS, "Cannot run PME on a GPU because there is none detected.");
+            }
+        }
+    }
+
+    GpuTaskAssignment gpuTaskAssignment;
+    try
+    {
+        // Produce the task assignment for this rank.
+        gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
+                                              mdlog, cr, gpuTasksOnThisRank);
     }
+    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 
     /* Prevent other ranks from continuing after an issue was found
      * and reported as a fatal error.
@@ -923,27 +998,33 @@ int Mdrunner::mdrunner()
     {
         MPI_Barrier(cr->mpi_comm_mysim);
     }
+    if (MULTISIM(cr))
+    {
+        MPI_Barrier(cr->ms->mpi_comm_masters);
+    }
 #endif
 
     /* Now that we know the setup is consistent, check for efficiency */
     check_resource_division_efficiency(hwinfo, hw_opt.nthreads_tot, !gpuTaskAssignment.empty(), mdrunOptions.ntompOptionIsSet,
                                        cr, mdlog);
 
-    gmx_device_info_t *shortRangedDeviceInfo = nullptr;
-    int                shortRangedDeviceId   = -1;
+    gmx_device_info_t *nonbondedDeviceInfo = nullptr;
+    int                nonbondedDeviceId   = -1;
     if (thisRankHasDuty(cr, DUTY_PP))
     {
         if (!gpuTaskAssignment.empty())
         {
-            shortRangedDeviceId   = gpuTaskAssignment[cr->rank_pp_intranode];
-            shortRangedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, shortRangedDeviceId);
+            GMX_RELEASE_ASSERT(gpuTaskAssignment.size() == 1, "A valid GPU assignment can only have one task per rank");
+            GMX_RELEASE_ASSERT(gpuTaskAssignment[0].task_ == gmx::GpuTask::Nonbonded, "A valid GPU assignment can only include short-ranged tasks");
+            nonbondedDeviceId   = gpuTaskAssignment[0].deviceId_;
+            nonbondedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, nonbondedDeviceId);
         }
     }
 
     if (DOMAINDECOMP(cr))
     {
         /* When we share GPUs over ranks, we need to know this for the DLB */
-        dd_setup_dlb_resource_sharing(cr, shortRangedDeviceId);
+        dd_setup_dlb_resource_sharing(cr, nonbondedDeviceId);
     }
 
     /* getting number of PP/PME threads
@@ -989,7 +1070,7 @@ int Mdrunner::mdrunner()
                       opt2fn("-table", nfile, fnm),
                       opt2fn("-tablep", nfile, fnm),
                       getFilenm("-tableb", nfile, fnm),
-                      shortRangedDeviceInfo,
+                      nonbondedDeviceInfo,
                       FALSE,
                       pforce);
 
@@ -1226,7 +1307,7 @@ int Mdrunner::mdrunner()
     }
 
     /* Free GPU memory and context */
-    free_gpu_resources(fr, cr, shortRangedDeviceInfo);
+    free_gpu_resources(fr, cr, nonbondedDeviceInfo);
 
     if (doMembed)
     {