Use unique_ptr in nonbonded_verlet_t
authorBerk Hess <hess@kth.se>
Thu, 21 Feb 2019 19:56:04 +0000 (20:56 +0100)
committerBerk Hess <hess@kth.se>
Thu, 28 Feb 2019 08:45:55 +0000 (09:45 +0100)
Changed nonbonded_verlet_t and all its pointer members to unique_ptr,
except for the GPU struct, because its contents is currently only
visible in the cuda/opencl part of the code.

Change-Id: I710b4db7a08ebf8d892b2dd9417ab82fbccf1ed2

18 files changed:
src/gromacs/domdec/partition.cpp
src/gromacs/ewald/pme_load_balancing.cpp
src/gromacs/mdlib/forcerec.cpp
src/gromacs/mdlib/sim_util.cpp
src/gromacs/mdrun/md.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/mdtypes/forcerec.h
src/gromacs/nbnxm/atomdata.cpp
src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu
src/gromacs/nbnxm/gpu_data_mgmt.h
src/gromacs/nbnxm/gpu_types.h
src/gromacs/nbnxm/grid.cpp
src/gromacs/nbnxm/kerneldispatch.cpp
src/gromacs/nbnxm/nbnxm.h
src/gromacs/nbnxm/nbnxm_setup.cpp
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
src/gromacs/nbnxm/pairlist.cpp
src/gromacs/nbnxm/prunekerneldispatch.cpp

index 02b0d214f5bb10885bb4608692e58af482475066..7df2f9cf2bc9e4b551c3137dd5ea0284f5c9589d 100644 (file)
@@ -3415,7 +3415,7 @@ void dd_partition_system(FILE                    *fplog,
             case ecutsVERLET:
                 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
 
-                nbnxn_put_on_grid(fr->nbv, state_local->box,
+                nbnxn_put_on_grid(fr->nbv.get(), state_local->box,
                                   0,
                                   comm->zones.size[0].bb_x0,
                                   comm->zones.size[0].bb_x1,
index 49765554f86195204835767f41c00d4868feeb4d..ba3c90c12ef064dfb4398b8723abd27e3a6578dd 100644 (file)
@@ -1007,7 +1007,7 @@ void pme_loadbal_do(pme_load_balancing_t *pme_lb,
         pme_load_balance(pme_lb, cr,
                          fp_err, fp_log, mdlog,
                          ir, state, pme_lb->cycles_c - cycles_prev,
-                         fr->ic, fr->nbv, &fr->pmedata,
+                         fr->ic, fr->nbv.get(), &fr->pmedata,
                          step);
 
         /* Update deprecated rlist in forcerec to stay in sync with fr->nbv */
index 9cd9d0891a81f05175be413b09232b3dbed3eb2a..e852525ebd7201a6bcd5ef4f3ce4f2a254f90804 100644 (file)
@@ -2356,9 +2356,9 @@ void init_forcerec(FILE                             *fp,
             GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw, "With Verlet lists and no PME rcoulomb and rvdw should be identical");
         }
 
-        Nbnxm::init_nb_verlet(mdlog, &fr->nbv, bFEP_NonBonded, ir, fr,
-                              cr, hardwareInfo, deviceInfo,
-                              mtop, box);
+        fr->nbv = Nbnxm::init_nb_verlet(mdlog, bFEP_NonBonded, ir, fr,
+                                        cr, hardwareInfo, deviceInfo,
+                                        mtop, box);
 
         if (useGpuForBonded)
         {
@@ -2405,8 +2405,9 @@ void free_gpu_resources(t_forcerec                          *fr,
 
     if (isPPrankUsingGPU)
     {
-        /* free nbnxn data in GPU memory */
-        Nbnxm::gpu_free(fr->nbv->gpu_nbv);
+        /* Free data in GPU memory and pinned memory before destroying the GPU context */
+        fr->nbv.reset();
+
         delete fr->gpuBonded;
         fr->gpuBonded = nullptr;
     }
index e2946c1561295af033d2dbde3c5c2652a316aad2..d386490a943300d55f5fac19d9d75ffeba41c4eb 100644 (file)
@@ -318,7 +318,7 @@ static void do_nb_verlet(t_forcerec                       *fr,
         return;
     }
 
-    nonbonded_verlet_t *nbv  = fr->nbv;
+    nonbonded_verlet_t *nbv  = fr->nbv.get();
 
     /* GPU kernel launch overhead is already timed separately */
     if (fr->cutoff_scheme != ecutsVERLET)
@@ -717,7 +717,7 @@ static void do_force_cutsVERLET(FILE *fplog,
     gmx_bool            bDoForces, bUseGPU, bUseOrEmulGPU;
     rvec                vzero, box_diag;
     float               cycles_pme, cycles_wait_gpu;
-    nonbonded_verlet_t *nbv = fr->nbv;
+    nonbonded_verlet_t *nbv = fr->nbv.get();
 
     bStateChanged = ((flags & GMX_FORCE_STATECHANGED) != 0);
     bNS           = ((flags & GMX_FORCE_NS) != 0);
@@ -802,7 +802,7 @@ static void do_force_cutsVERLET(FILE *fplog,
     }
 
     nbnxn_atomdata_copy_shiftvec((flags & GMX_FORCE_DYNAMICBOX) != 0,
-                                 fr->shift_vec, nbv->nbat);
+                                 fr->shift_vec, nbv->nbat.get());
 
 #if GMX_MPI
     if (!thisRankHasDuty(cr, DUTY_PME))
@@ -857,7 +857,7 @@ static void do_force_cutsVERLET(FILE *fplog,
             wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL);
         }
 
-        nbnxn_atomdata_set(nbv->nbat, nbv->nbs.get(), mdatoms, fr->cginfo);
+        nbnxn_atomdata_set(nbv->nbat.get(), nbv->nbs.get(), mdatoms, fr->cginfo);
 
         wallcycle_stop(wcycle, ewcNS);
     }
@@ -870,10 +870,10 @@ static void do_force_cutsVERLET(FILE *fplog,
 
         if (bNS)
         {
-            Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat);
+            Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get());
         }
 
-        Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat);
+        Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get());
 
         wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
 
@@ -913,7 +913,7 @@ static void do_force_cutsVERLET(FILE *fplog,
     {
         nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs.get(), Nbnxm::AtomLocality::Local,
                                         FALSE, as_rvec_array(x.unpaddedArrayRef().data()),
-                                        nbv->nbat, wcycle);
+                                        nbv->nbat.get(), wcycle);
     }
 
     if (bUseGPU)
@@ -923,7 +923,9 @@ static void do_force_cutsVERLET(FILE *fplog,
         wallcycle_start(wcycle, ewcLAUNCH_GPU);
 
         wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-        Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat, Nbnxm::AtomLocality::Local, ppForceWorkload->haveGpuBondedWork);
+        Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
+                                  Nbnxm::AtomLocality::Local,
+                                  ppForceWorkload->haveGpuBondedWork);
         wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
 
         // bonded work not split into separate local and non-local, so with DD
@@ -972,7 +974,7 @@ static void do_force_cutsVERLET(FILE *fplog,
 
             nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal,
                                             FALSE, as_rvec_array(x.unpaddedArrayRef().data()),
-                                            nbv->nbat, wcycle);
+                                            nbv->nbat.get(), wcycle);
         }
 
         if (bUseGPU)
@@ -981,7 +983,9 @@ static void do_force_cutsVERLET(FILE *fplog,
 
             /* launch non-local nonbonded tasks on GPU */
             wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
-            Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat, Nbnxm::AtomLocality::NonLocal, ppForceWorkload->haveGpuBondedWork);
+            Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(),
+                                      Nbnxm::AtomLocality::NonLocal,
+                                      ppForceWorkload->haveGpuBondedWork);
             wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
 
             if (ppForceWorkload->haveGpuBondedWork)
@@ -1007,10 +1011,10 @@ static void do_force_cutsVERLET(FILE *fplog,
         wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED);
         if (havePPDomainDecomposition(cr))
         {
-            Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat,
+            Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
                                       flags, Nbnxm::AtomLocality::NonLocal, ppForceWorkload->haveGpuBondedWork);
         }
-        Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat,
+        Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(),
                                   flags, Nbnxm::AtomLocality::Local, ppForceWorkload->haveGpuBondedWork);
         wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED);
 
@@ -1167,7 +1171,7 @@ static void do_force_cutsVERLET(FILE *fplog,
         {
             /* This is not in a subcounter because it takes a
                negligible and constant-sized amount of time */
-            nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->nbat,
+            nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->nbat.get(),
                                                      fr->fshift);
         }
     }
@@ -1242,7 +1246,8 @@ static void do_force_cutsVERLET(FILE *fplog,
     bool alternateGpuWait = (!c_disableAlternatingWait && useGpuPme && bUseGPU && !DOMAINDECOMP(cr));
     if (alternateGpuWait)
     {
-        alternatePmeNbGpuWaitReduce(fr->nbv, fr->pmedata, &force, &forceWithVirial, fr->fshift, enerd, flags, pmeFlags, ppForceWorkload->haveGpuBondedWork, wcycle);
+        alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &force, &forceWithVirial, fr->fshift, enerd,
+                                    flags, pmeFlags, ppForceWorkload->haveGpuBondedWork, wcycle);
     }
 
     if (!alternateGpuWait && useGpuPme)
index 681a741ab65366c3cc066496e8ac8da8ffe3de52..d853a25d75bc5cc5ae310d64c41c9b5aa3aff97a 100644 (file)
@@ -413,7 +413,7 @@ void gmx::Integrator::do_md()
     if (bPMETune)
     {
         pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box,
-                         *fr->ic, fr->nbv->pairlistSets().params(), fr->pmedata, use_GPU(fr->nbv),
+                         *fr->ic, fr->nbv->pairlistSets().params(), fr->pmedata, use_GPU(fr->nbv.get()),
                          &bPMETunePrinting);
     }
 
@@ -1475,7 +1475,7 @@ void gmx::Integrator::do_md()
         step_rel++;
 
         resetHandler->resetCounters(
-                step, step_rel, mdlog, fplog, cr, (use_GPU(fr->nbv) ? fr->nbv : nullptr),
+                step, step_rel, mdlog, fplog, cr, (use_GPU(fr->nbv.get()) ? fr->nbv.get() : nullptr),
                 nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
 
         /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
@@ -1510,7 +1510,7 @@ void gmx::Integrator::do_md()
 
     if (bPMETune)
     {
-        pme_loadbal_done(pme_loadbal, fplog, mdlog, use_GPU(fr->nbv));
+        pme_loadbal_done(pme_loadbal, fplog, mdlog, use_GPU(fr->nbv.get()));
     }
 
     done_shellfc(fplog, shellfc, step_rel);
index 3239a01f0b0e169b42c7f488a1ae96cf3723b53c..df8da62702851906590509c380fb0a288c0e24ee 100644 (file)
@@ -1564,7 +1564,7 @@ int Mdrunner::mdrunner()
      */
     finish_run(fplog, mdlog, cr,
                inputrec, nrnb, wcycle, walltime_accounting,
-               fr ? fr->nbv : nullptr,
+               fr ? fr->nbv.get() : nullptr,
                pmedata,
                EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
 
index 915381691dda82b739e999b2f9b46852b1ca3714..e90470a62a8145907ce1b1b00a0995db9340c8a4 100644 (file)
@@ -38,6 +38,7 @@
 #define GMX_MDTYPES_TYPES_FORCEREC_H
 
 #include <array>
+#include <memory>
 #include <vector>
 
 #include "gromacs/math/vectypes.h"
@@ -229,7 +230,9 @@ struct t_forcerec { // NOLINT (clang-analyzer-optin.performance.Padding)
 
     int                        cutoff_scheme = 0;     /* group- or Verlet-style cutoff */
     gmx_bool                   bNonbonded    = FALSE; /* true if nonbonded calculations are *not* turned off */
-    struct nonbonded_verlet_t *nbv           = nullptr;
+
+    /* The Nbnxm Verlet non-bonded machinery */
+    std::unique_ptr<nonbonded_verlet_t> nbv;
 
     /* The wall tables (if used) */
     int                    nwall    = 0;
index 1de1af8f114bc236af7bfe55a5fa75592d75118e..db26637b051d34dd4308a0290fc382989a9250c5 100644 (file)
@@ -1516,11 +1516,11 @@ nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality  locality
          */
         if (nbat->bUseTreeReduce)
         {
-            nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbat, nth);
+            nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbat.get(), nth);
         }
         else
         {
-            nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbat, nth);
+            nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbat.get(), nth);
         }
     }
 #pragma omp parallel for num_threads(nth) schedule(static)
@@ -1528,7 +1528,7 @@ nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality  locality
     {
         try
         {
-            nbnxn_atomdata_add_nbat_f_to_f_part(nbs.get(), nbat,
+            nbnxn_atomdata_add_nbat_f_to_f_part(nbs.get(), nbat.get(),
                                                 nbat->out,
                                                 1,
                                                 a0+((th+0)*na)/nth,
index c711136b470a9577da0c7e8ee460dc73ec399f51..61104dd57b8de098f7fdfb683646d3494d67665c 100644 (file)
@@ -345,14 +345,13 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t    *nbv,
     {
         return;
     }
-    gmx_nbnxn_cuda_t *nb    = nbv->gpu_nbv;
-    cu_nbparam_t     *nbp   = nb->nbparam;
+    cu_nbparam_t *nbp   = nbv->gpu_nbv->nbparam;
 
     set_cutoff_parameters(nbp, ic, listParams);
 
     nbp->eeltype        = pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw);
 
-    init_ewald_coulomb_force_table(ic, nb->nbparam);
+    init_ewald_coulomb_force_table(ic, nbp);
 }
 
 /*! Initializes the pair list data structure. */
@@ -415,22 +414,17 @@ static void cuda_init_const(gmx_nbnxn_cuda_t               *nb,
     nbnxn_cuda_clear_e_fshift(nb);
 }
 
-void gpu_init(gmx_nbnxn_cuda_t         **p_nb,
-              const gmx_device_info_t   *deviceInfo,
-              const interaction_const_t *ic,
-              const NbnxnListParameters *listParams,
-              const nbnxn_atomdata_t    *nbat,
-              int                        /*rank*/,
-              gmx_bool                   bLocalAndNonlocal)
+gmx_nbnxn_cuda_t *
+gpu_init(const gmx_device_info_t   *deviceInfo,
+         const interaction_const_t *ic,
+         const NbnxnListParameters *listParams,
+         const nbnxn_atomdata_t    *nbat,
+         int                        /*rank*/,
+         gmx_bool                   bLocalAndNonlocal)
 {
     cudaError_t       stat;
-    gmx_nbnxn_cuda_t *nb;
-
-    if (p_nb == nullptr)
-    {
-        return;
-    }
 
+    gmx_nbnxn_cuda_t *nb;
     snew(nb, 1);
     snew(nb->atdat, 1);
     snew(nb->nbparam, 1);
@@ -499,12 +493,12 @@ void gpu_init(gmx_nbnxn_cuda_t         **p_nb,
 
     cuda_init_const(nb, ic, listParams, nbat->params());
 
-    *p_nb = nb;
-
     if (debug)
     {
         fprintf(debug, "Initialized CUDA data structures.\n");
     }
+
+    return nb;
 }
 
 void gpu_init_pairlist(gmx_nbnxn_cuda_t          *nb,
index f578cf5cb99f12da5084357b8f6ca33c035440d0..c1a19b7f635d554a9a67d65ade04fd560e94ea2f 100644 (file)
@@ -44,6 +44,8 @@
 #ifndef GMX_NBNXN_GPU_DATA_MGMT_H
 #define GMX_NBNXN_GPU_DATA_MGMT_H
 
+#include <memory>
+
 #include "gromacs/gpu_utils/gpu_macros.h"
 #include "gromacs/mdtypes/interaction_const.h"
 
@@ -63,14 +65,14 @@ namespace Nbnxm
 
 /** Initializes the data structures related to GPU nonbonded calculations. */
 GPU_FUNC_QUALIFIER
-void gpu_init(gmx_nbnxn_gpu_t gmx_unused            **p_nb,
-              const gmx_device_info_t gmx_unused     *deviceInfo,
-              const interaction_const_t gmx_unused   *ic,
-              const NbnxnListParameters gmx_unused   *listParams,
-              const nbnxn_atomdata_t gmx_unused      *nbat,
-              int gmx_unused                          rank,
-              /* true if both local and non-local are done on GPU */
-              gmx_bool gmx_unused                     bLocalAndNonlocal) GPU_FUNC_TERM
+gmx_nbnxn_gpu_t *
+gpu_init(const gmx_device_info_t gmx_unused     *deviceInfo,
+         const interaction_const_t gmx_unused   *ic,
+         const NbnxnListParameters gmx_unused   *listParams,
+         const nbnxn_atomdata_t gmx_unused      *nbat,
+         int gmx_unused                          rank,
+         /* true if both local and non-local are done on GPU */
+         gmx_bool gmx_unused                     bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr)
 
 /** Initializes pair-list data for GPU, called at every pair search step. */
 GPU_FUNC_QUALIFIER
@@ -115,7 +117,7 @@ void gpu_reset_timings(struct nonbonded_verlet_t gmx_unused *nbv) GPU_FUNC_TERM
 
 /** Calculates the minimum size of proximity lists to improve SM load balance
  *  with GPU non-bonded kernels. */
-     GPU_FUNC_QUALIFIER
+    GPU_FUNC_QUALIFIER
 int gpu_min_ci_balanced(gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM_WITH_RETURN(-1)
 
 /** Returns if analytical Ewald GPU kernels are used. */
@@ -145,7 +147,7 @@ void *gpu_get_f(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullp
  *  Note: CUDA only.
  */
 CUDA_FUNC_QUALIFIER
-     rvec *gpu_get_fshift(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr)
+    rvec *gpu_get_fshift(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr)
 
 } // namespace Nbnxm
 
index a18ca0b56f9f3fc07a31da5516da303db3138ccd..f02e456ad975bcd54aab2a94b743f46359cc900c 100644 (file)
 
 #if GMX_GPU == GMX_GPU_OPENCL
 struct gmx_nbnxn_ocl_t;
-typedef struct gmx_nbnxn_ocl_t gmx_nbnxn_gpu_t;
+using gmx_nbnxn_gpu_t = gmx_nbnxn_ocl_t;
 #endif
 
 #if GMX_GPU == GMX_GPU_CUDA
 struct gmx_nbnxn_cuda_t;
-typedef struct gmx_nbnxn_cuda_t gmx_nbnxn_gpu_t;
+using gmx_nbnxn_gpu_t = gmx_nbnxn_cuda_t;
 #endif
 
 #if GMX_GPU == GMX_GPU_NONE
index 19ed3d1021e9d01e09534e7086bee727e2c841da..3e91161f2ccb8533dc3ab7a019241b571c25d747 100644 (file)
@@ -1495,7 +1495,7 @@ void nbnxn_put_on_grid(nonbonded_verlet_t             *nbv,
                      lowerCorner, upperCorner,
                      nbs->grid[0].atom_density);
 
-    nbnxn_atomdata_t *nbat = nbv->nbat;
+    nbnxn_atomdata_t *nbat = nbv->nbat.get();
 
     calc_cell_indices(nbs, ddZone, grid, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat);
 
index b4b79b0b11fabed001234dbe33fc8d10fc19c9d0..31cf946e201a7f84a5fbde6650f2a1f3c4b6a8dc 100644 (file)
@@ -498,7 +498,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
         case Nbnxm::KernelType::Cpu4xN_Simd_2xNN:
             nbnxn_kernel_cpu(pairlistSet,
                              kernelSetup(),
-                             nbat,
+                             nbat.get(),
                              ic,
                              fr->shift_vec,
                              forceFlags,
@@ -516,7 +516,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality
 
         case Nbnxm::KernelType::Cpu8x8x8_PlainC:
             nbnxn_kernel_gpu_ref(pairlistSet.nblGpu[0],
-                                 nbat, &ic,
+                                 nbat.get(), &ic,
                                  fr->shift_vec,
                                  forceFlags,
                                  clearF,
index a3fce4a01d573389c1082b2432847763d18e6e5b..dbdb9fba51b295c2a02c510295c5fbb8e3132270 100644 (file)
@@ -346,6 +346,15 @@ struct nonbonded_verlet_t
                 int64_t                               outerListCreationStep_;
         };
 
+        //! Constructs an object from its components
+        nonbonded_verlet_t(std::unique_ptr<PairlistSets>      pairlistSets_,
+                           std::unique_ptr<nbnxn_search>      nbs,
+                           std::unique_ptr<nbnxn_atomdata_t>  nbat,
+                           const Nbnxm::KernelSetup          &kernelSetup,
+                           gmx_nbnxn_gpu_t                   *gpu_nbv);
+
+        ~nonbonded_verlet_t();
+
         //! Returns whether a GPU is use for the non-bonded calculations
         bool useGpu() const
         {
@@ -425,43 +434,36 @@ struct nonbonded_verlet_t
             return kernelSetup_;
         }
 
-        //! Sets the kernel setup, TODO: make private
-        void setKernelSetup(const Nbnxm::KernelSetup &kernelSetup)
-        {
-            kernelSetup_ = kernelSetup;
-        }
-
         // TODO: Make all data members private
     public:
         //! All data related to the pair lists
-        std::unique_ptr<PairlistSets>         pairlistSets_;
+        std::unique_ptr<PairlistSets>     pairlistSets_;
         //! Working data for constructing the pairlists
-        std::unique_ptr<nbnxn_search>         nbs;
+        std::unique_ptr<nbnxn_search>     nbs;
         //! Atom data
-        nbnxn_atomdata_t                     *nbat;
-
+        std::unique_ptr<nbnxn_atomdata_t> nbat;
     private:
         //! The non-bonded setup, also affects the pairlist construction kernel
-        Nbnxm::KernelSetup   kernelSetup_;
+        Nbnxm::KernelSetup                kernelSetup_;
     public:
-
-        gmx_nbnxn_gpu_t     *gpu_nbv;         /**< pointer to GPU nb verlet data     */
+        //! GPU Nbnxm data, only used with a physical GPU (TODO: use unique_ptr)
+        gmx_nbnxn_gpu_t                  *gpu_nbv;
 };
 
 namespace Nbnxm
 {
 
-/*! \brief Initializes the nbnxn module */
-void init_nb_verlet(const gmx::MDLogger     &mdlog,
-                    nonbonded_verlet_t     **nb_verlet,
-                    gmx_bool                 bFEP_NonBonded,
-                    const t_inputrec        *ir,
-                    const t_forcerec        *fr,
-                    const t_commrec         *cr,
-                    const gmx_hw_info_t     &hardwareInfo,
-                    const gmx_device_info_t *deviceInfo,
-                    const gmx_mtop_t        *mtop,
-                    matrix                   box);
+/*! \brief Creates an Nbnxm object */
+std::unique_ptr<nonbonded_verlet_t>
+init_nb_verlet(const gmx::MDLogger     &mdlog,
+               gmx_bool                 bFEP_NonBonded,
+               const t_inputrec        *ir,
+               const t_forcerec        *fr,
+               const t_commrec         *cr,
+               const gmx_hw_info_t     &hardwareInfo,
+               const gmx_device_info_t *deviceInfo,
+               const gmx_mtop_t        *mtop,
+               matrix                   box);
 
 } // namespace Nbnxm
 
index da474ae8ba0b15474d6f1757649e32e2b21db02c..2e43db5fc661148a191f501bcac6ce7d9d68a4bd 100644 (file)
@@ -61,6 +61,7 @@
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/logger.h"
 
+#include "gpu_types.h"
 #include "grid.h"
 #include "internal.h"
 
@@ -343,19 +344,17 @@ static int getMinimumIlistCountForGpuBalancing(gmx_nbnxn_gpu_t *nbnxmGpu)
     return minimumIlistCount;
 }
 
-void init_nb_verlet(const gmx::MDLogger     &mdlog,
-                    nonbonded_verlet_t     **nb_verlet,
-                    gmx_bool                 bFEP_NonBonded,
-                    const t_inputrec        *ir,
-                    const t_forcerec        *fr,
-                    const t_commrec         *cr,
-                    const gmx_hw_info_t     &hardwareInfo,
-                    const gmx_device_info_t *deviceInfo,
-                    const gmx_mtop_t        *mtop,
-                    matrix                   box)
+std::unique_ptr<nonbonded_verlet_t>
+init_nb_verlet(const gmx::MDLogger     &mdlog,
+               gmx_bool                 bFEP_NonBonded,
+               const t_inputrec        *ir,
+               const t_forcerec        *fr,
+               const t_commrec         *cr,
+               const gmx_hw_info_t     &hardwareInfo,
+               const gmx_device_info_t *deviceInfo,
+               const gmx_mtop_t        *mtop,
+               matrix                   box)
 {
-    nonbonded_verlet_t *nbv        = new nonbonded_verlet_t();
-
     const bool          emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr);
     const bool          useGpu     = deviceInfo != nullptr;
 
@@ -375,15 +374,14 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
         nonbondedResource = NonbondedResource::Cpu;
     }
 
-    nbv->nbs             = nullptr;
-
-    nbv->setKernelSetup(pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
-                                          nonbondedResource, ir,
-                                          fr->bNonbonded));
+    Nbnxm::KernelSetup kernelSetup =
+        pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
+                          nonbondedResource, ir,
+                          fr->bNonbonded);
 
     const bool          haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
 
-    NbnxnListParameters listParams(nbv->kernelSetup().kernelType,
+    NbnxnListParameters listParams(kernelSetup.kernelType,
                                    ir->rlist,
                                    havePPDomainDecomposition(cr));
 
@@ -417,7 +415,9 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
         enbnxninitcombrule = enbnxninitcombruleNONE;
     }
 
-    nbv->nbat = new nbnxn_atomdata_t(useGpu ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
+    std::unique_ptr<nbnxn_atomdata_t> nbat =
+        std::make_unique<nbnxn_atomdata_t>(useGpu ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
+
     int mimimumNumEnergyGroupNonbonded = ir->opts.ngener;
     if (ir->opts.ngener - ir->nwall == 1)
     {
@@ -428,41 +428,67 @@ void init_nb_verlet(const gmx::MDLogger     &mdlog,
         mimimumNumEnergyGroupNonbonded = 1;
     }
     nbnxn_atomdata_init(mdlog,
-                        nbv->nbat,
-                        nbv->kernelSetup().kernelType,
+                        nbat.get(),
+                        kernelSetup.kernelType,
                         enbnxninitcombrule,
                         fr->ntype, fr->nbfp,
                         mimimumNumEnergyGroupNonbonded,
-                        nbv->pairlistIsSimple() ? gmx_omp_nthreads_get(emntNonbonded) : 1);
+                        (useGpu || emulateGpu) ? 1 : gmx_omp_nthreads_get(emntNonbonded));
 
-    int minimumIlistCountForGpuBalancing = 0;
+    gmx_nbnxn_gpu_t *gpu_nbv = nullptr;
+    int              minimumIlistCountForGpuBalancing = 0;
     if (useGpu)
     {
         /* init the NxN GPU data; the last argument tells whether we'll have
          * both local and non-local NB calculation on GPU */
-        gpu_init(&nbv->gpu_nbv,
-                 deviceInfo,
-                 fr->ic,
-                 &listParams,
-                 nbv->nbat,
-                 cr->nodeid,
-                 haveMultipleDomains);
-
-        minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(nbv->gpu_nbv);
+        gpu_nbv = gpu_init(deviceInfo,
+                           fr->ic,
+                           &listParams,
+                           nbat.get(),
+                           cr->nodeid,
+                           haveMultipleDomains);
+
+        minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
     }
 
-    nbv->pairlistSets_ =
+    std::unique_ptr<nonbonded_verlet_t::PairlistSets> pairlistSets =
         std::make_unique<nonbonded_verlet_t::PairlistSets>(listParams,
                                                            haveMultipleDomains,
                                                            minimumIlistCountForGpuBalancing);
 
-    nbv->nbs = std::make_unique<nbnxn_search>(ir->ePBC,
-                                              DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
-                                              DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
-                                              bFEP_NonBonded,
-                                              gmx_omp_nthreads_get(emntPairsearch));
-
-    *nb_verlet = nbv;
+    std::unique_ptr<nbnxn_search> nbs =
+        std::make_unique<nbnxn_search>(ir->ePBC,
+                                       DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
+                                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
+                                       bFEP_NonBonded,
+                                       gmx_omp_nthreads_get(emntPairsearch));
+
+    return std::make_unique<nonbonded_verlet_t>(std::move(pairlistSets),
+                                                std::move(nbs),
+                                                std::move(nbat),
+                                                kernelSetup,
+                                                gpu_nbv);
 }
 
 } // namespace Nbnxm
+
+nonbonded_verlet_t::nonbonded_verlet_t(std::unique_ptr<PairlistSets>      pairlistSets,
+                                       std::unique_ptr<nbnxn_search>      nbs_in,
+                                       std::unique_ptr<nbnxn_atomdata_t>  nbat_in,
+                                       const Nbnxm::KernelSetup          &kernelSetup,
+                                       gmx_nbnxn_gpu_t                   *gpu_nbv_ptr) :
+    pairlistSets_(std::move(pairlistSets)),
+    nbs(std::move(nbs_in)),
+    nbat(std::move(nbat_in)),
+    kernelSetup_(kernelSetup),
+    gpu_nbv(gpu_nbv_ptr)
+{
+    GMX_RELEASE_ASSERT(pairlistSets_, "Need valid pairlistSets");
+    GMX_RELEASE_ASSERT(nbs, "Need valid search object");
+    GMX_RELEASE_ASSERT(nbat, "Need valid atomdata object");
+}
+
+nonbonded_verlet_t::~nonbonded_verlet_t()
+{
+    Nbnxm::gpu_free(gpu_nbv);
+}
index d040b67c221a8f6c3089f2f137423067f84632eb..85de6a2746de2933692dbaa4d9136290b3e9e328 100644 (file)
@@ -429,7 +429,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t    *nbv,
 
     nbp->eeltype = gpu_pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw);
 
-    init_ewald_coulomb_force_table(ic, nb->nbparam, nb->dev_rundata);
+    init_ewald_coulomb_force_table(ic, nbp, nb->dev_rundata);
 }
 
 /*! \brief Initializes the pair list data structure.
@@ -633,13 +633,13 @@ static void nbnxn_ocl_init_const(gmx_nbnxn_ocl_t                *nb,
 
 
 //! This function is documented in the header file
-void gpu_init(gmx_nbnxn_ocl_t          **p_nb,
-              const gmx_device_info_t   *deviceInfo,
-              const interaction_const_t *ic,
-              const NbnxnListParameters *listParams,
-              const nbnxn_atomdata_t    *nbat,
-              const int                  rank,
-              const gmx_bool             bLocalAndNonlocal)
+gmx_nbnxn_ocl_t *
+gpu_init(const gmx_device_info_t   *deviceInfo,
+         const interaction_const_t *ic,
+         const NbnxnListParameters *listParams,
+         const nbnxn_atomdata_t    *nbat,
+         const int                  rank,
+         const gmx_bool             bLocalAndNonlocal)
 {
     gmx_nbnxn_ocl_t            *nb;
     cl_int                      cl_error;
@@ -647,11 +647,6 @@ void gpu_init(gmx_nbnxn_ocl_t          **p_nb,
 
     assert(ic);
 
-    if (p_nb == nullptr)
-    {
-        return;
-    }
-
     snew(nb, 1);
     snew(nb->atdat, 1);
     snew(nb->nbparam, 1);
@@ -743,12 +738,12 @@ void gpu_init(gmx_nbnxn_ocl_t          **p_nb,
     /* clear energy and shift force outputs */
     nbnxn_ocl_clear_e_fshift(nb);
 
-    *p_nb = nb;
-
     if (debug)
     {
         fprintf(debug, "Initialized OpenCL data structures.\n");
     }
+
+    return nb;
 }
 
 /*! \brief Clears the first natoms_clear elements of the GPU nonbonded force output array.
index d2327f94bd46a65b72cff93b5c289d65c85352bf..26c64db34fe5f52935bfc73a28c8d82bfb788258 100644 (file)
@@ -4368,7 +4368,7 @@ nonbonded_verlet_t::constructPairlist(const Nbnxm::InteractionLocality  iLocalit
                                       int64_t                           step,
                                       t_nrnb                           *nrnb)
 {
-    pairlistSets_->construct(iLocality, nbs.get(), nbat, excl,
+    pairlistSets_->construct(iLocality, nbs.get(), nbat.get(), excl,
                              kernelSetup_.kernelType,
                              step, nrnb);
 
index 1b89ebbf835f0884bad51b35f5918e6008cbd2b8..15f84465b20a5bd2732307f4ec07865fa3d226f4 100644 (file)
@@ -85,5 +85,5 @@ void
 nonbonded_verlet_t::dispatchPruneKernelCpu(const Nbnxm::InteractionLocality  iLocality,
                                            const rvec                       *shift_vec)
 {
-    pairlistSets_->dispatchPruneKernel(iLocality, nbat, shift_vec, kernelSetup_.kernelType);
+    pairlistSets_->dispatchPruneKernel(iLocality, nbat.get(), shift_vec, kernelSetup_.kernelType);
 }