Separate management of GPU contexts from modules
[alexxy/gromacs.git] / src / gromacs / mdlib / forcerec.cpp
index 05c59c2f2937752d83b48a44dfdab5bb5191519e..0d9cfb49c7412291e6695655c50b1d10d95697fe 100644 (file)
@@ -2134,19 +2134,15 @@ init_interaction_const(FILE                       *fp,
     *interaction_const = ic;
 }
 
-/* TODO deviceInfo should be logically const, but currently
- * init_gpu modifies it to set up NVML support. This could
- * happen during the detection phase, and deviceInfo could
- * the become const. */
-static void init_nb_verlet(const gmx::MDLogger &mdlog,
-                           nonbonded_verlet_t **nb_verlet,
-                           gmx_bool             bFEP_NonBonded,
-                           const t_inputrec    *ir,
-                           const t_forcerec    *fr,
-                           const t_commrec     *cr,
-                           gmx_device_info_t   *deviceInfo,
-                           const gmx_mtop_t    *mtop,
-                           matrix               box)
+static void init_nb_verlet(const gmx::MDLogger     &mdlog,
+                           nonbonded_verlet_t     **nb_verlet,
+                           gmx_bool                 bFEP_NonBonded,
+                           const t_inputrec        *ir,
+                           const t_forcerec        *fr,
+                           const t_commrec         *cr,
+                           const gmx_device_info_t *deviceInfo,
+                           const gmx_mtop_t        *mtop,
+                           matrix                   box)
 {
     nonbonded_verlet_t *nbv;
     char               *env;
@@ -2161,12 +2157,6 @@ static void init_nb_verlet(const gmx::MDLogger &mdlog,
 
     GMX_RELEASE_ASSERT(!(nbv->emulateGpu == EmulateGpuNonbonded::Yes && nbv->bUseGPU), "When GPU emulation is active, there cannot be a GPU assignment");
 
-    if (nbv->bUseGPU)
-    {
-        /* Use the assigned GPU. */
-        init_gpu(mdlog, cr->nodeid, deviceInfo);
-    }
-
     nbv->nbs             = nullptr;
     nbv->min_ci_balanced = 0;
 
@@ -2319,20 +2309,20 @@ gmx_bool usingGpu(nonbonded_verlet_t *nbv)
     return nbv != nullptr && nbv->bUseGPU;
 }
 
-void init_forcerec(FILE                *fp,
-                   const gmx::MDLogger &mdlog,
-                   t_forcerec          *fr,
-                   t_fcdata            *fcd,
-                   const t_inputrec    *ir,
-                   const gmx_mtop_t    *mtop,
-                   const t_commrec     *cr,
-                   matrix               box,
-                   const char          *tabfn,
-                   const char          *tabpfn,
-                   const t_filenm      *tabbfnm,
-                   gmx_device_info_t   *deviceInfo,
-                   gmx_bool             bNoSolvOpt,
-                   real                 print_force)
+void init_forcerec(FILE                    *fp,
+                   const gmx::MDLogger     &mdlog,
+                   t_forcerec              *fr,
+                   t_fcdata                *fcd,
+                   const t_inputrec        *ir,
+                   const gmx_mtop_t        *mtop,
+                   const t_commrec         *cr,
+                   matrix                   box,
+                   const char              *tabfn,
+                   const char              *tabpfn,
+                   const t_filenm          *tabbfnm,
+                   const gmx_device_info_t *deviceInfo,
+                   gmx_bool                 bNoSolvOpt,
+                   real                     print_force)
 {
     int            i, m, negp_pp, negptable, egi, egj;
     real           rtab;
@@ -3156,27 +3146,26 @@ void init_forcerec(FILE                *fp,
     }
 }
 
-/* Frees GPU memory and destroys the GPU context.
+/* Frees GPU memory and sets a tMPI node barrier.
  *
  * Note that this function needs to be called even if GPUs are not used
  * in this run because the PME ranks have no knowledge of whether GPUs
  * are used or not, but all ranks need to enter the barrier below.
+ * \todo Remove physical node barrier from this function after making sure
+ * that it's not needed anymore (with a shared GPU run).
  */
 void free_gpu_resources(const t_forcerec        *fr,
-                        const t_commrec         *cr,
-                        const gmx_device_info_t *deviceInfo)
+                        const t_commrec         *cr)
 {
-    gmx_bool bIsPPrankUsingGPU;
-    char     gpu_err_str[STRLEN];
+    bool isPPrankUsingGPU = fr && fr->nbv && fr->nbv->bUseGPU;
 
-    bIsPPrankUsingGPU = thisRankHasDuty(cr, DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
+    /* stop the GPU profiler (only CUDA) */
+    stopGpuProfiler();
 
-    if (bIsPPrankUsingGPU)
+    if (isPPrankUsingGPU)
     {
         /* free nbnxn data in GPU memory */
         nbnxn_gpu_free(fr->nbv->gpu_nbv);
-        /* stop the GPU profiler (only CUDA) */
-        stopGpuProfiler();
     }
 
     /* With tMPI we need to wait for all ranks to finish deallocation before
@@ -3189,20 +3178,8 @@ void free_gpu_resources(const t_forcerec        *fr,
      * Note: it is safe to not call the barrier on the ranks which do not use GPU,
      * but it is easier and more futureproof to call it on the whole node.
      */
-#if GMX_THREAD_MPI
-    if (PAR(cr) || MULTISIM(cr))
+    if (GMX_THREAD_MPI && (PAR(cr) || MULTISIM(cr)))
     {
         gmx_barrier_physical_node(cr);
     }
-#endif  /* GMX_THREAD_MPI */
-
-    if (bIsPPrankUsingGPU)
-    {
-        /* uninitialize GPU (by destroying the context) */
-        if (!free_cuda_gpu(deviceInfo, gpu_err_str))
-        {
-            gmx_warning("On rank %d failed to free GPU #%d: %s",
-                        cr->nodeid, get_current_cuda_gpu_device_id(), gpu_err_str);
-        }
-    }
 }