From af6a1d6daa7473ca1174681e3c89e5c3c04a168c Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Thu, 21 Feb 2019 20:56:04 +0100 Subject: [PATCH] Use unique_ptr in nonbonded_verlet_t Changed nonbonded_verlet_t and all its pointer members to unique_ptr, except for the GPU struct, because its contents is currently only visible in the cuda/opencl part of the code. Change-Id: I710b4db7a08ebf8d892b2dd9417ab82fbccf1ed2 --- src/gromacs/domdec/partition.cpp | 2 +- src/gromacs/ewald/pme_load_balancing.cpp | 2 +- src/gromacs/mdlib/forcerec.cpp | 11 +- src/gromacs/mdlib/sim_util.cpp | 33 +++--- src/gromacs/mdrun/md.cpp | 6 +- src/gromacs/mdrun/runner.cpp | 2 +- src/gromacs/mdtypes/forcerec.h | 5 +- src/gromacs/nbnxm/atomdata.cpp | 6 +- .../nbnxm/cuda/nbnxm_cuda_data_mgmt.cu | 30 ++--- src/gromacs/nbnxm/gpu_data_mgmt.h | 22 ++-- src/gromacs/nbnxm/gpu_types.h | 4 +- src/gromacs/nbnxm/grid.cpp | 2 +- src/gromacs/nbnxm/kerneldispatch.cpp | 4 +- src/gromacs/nbnxm/nbnxm.h | 50 +++++---- src/gromacs/nbnxm/nbnxm_setup.cpp | 106 +++++++++++------- .../nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp | 25 ++--- src/gromacs/nbnxm/pairlist.cpp | 2 +- src/gromacs/nbnxm/prunekerneldispatch.cpp | 2 +- 18 files changed, 171 insertions(+), 143 deletions(-) diff --git a/src/gromacs/domdec/partition.cpp b/src/gromacs/domdec/partition.cpp index 02b0d214f5..7df2f9cf2b 100644 --- a/src/gromacs/domdec/partition.cpp +++ b/src/gromacs/domdec/partition.cpp @@ -3415,7 +3415,7 @@ void dd_partition_system(FILE *fplog, case ecutsVERLET: set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved); - nbnxn_put_on_grid(fr->nbv, state_local->box, + nbnxn_put_on_grid(fr->nbv.get(), state_local->box, 0, comm->zones.size[0].bb_x0, comm->zones.size[0].bb_x1, diff --git a/src/gromacs/ewald/pme_load_balancing.cpp b/src/gromacs/ewald/pme_load_balancing.cpp index 49765554f8..ba3c90c12e 100644 --- a/src/gromacs/ewald/pme_load_balancing.cpp +++ b/src/gromacs/ewald/pme_load_balancing.cpp @@ -1007,7 +1007,7 @@ void pme_loadbal_do(pme_load_balancing_t *pme_lb, pme_load_balance(pme_lb, cr, fp_err, fp_log, mdlog, ir, state, pme_lb->cycles_c - cycles_prev, - fr->ic, fr->nbv, &fr->pmedata, + fr->ic, fr->nbv.get(), &fr->pmedata, step); /* Update deprecated rlist in forcerec to stay in sync with fr->nbv */ diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index 9cd9d0891a..e852525ebd 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -2356,9 +2356,9 @@ void init_forcerec(FILE *fp, GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw, "With Verlet lists and no PME rcoulomb and rvdw should be identical"); } - Nbnxm::init_nb_verlet(mdlog, &fr->nbv, bFEP_NonBonded, ir, fr, - cr, hardwareInfo, deviceInfo, - mtop, box); + fr->nbv = Nbnxm::init_nb_verlet(mdlog, bFEP_NonBonded, ir, fr, + cr, hardwareInfo, deviceInfo, + mtop, box); if (useGpuForBonded) { @@ -2405,8 +2405,9 @@ void free_gpu_resources(t_forcerec *fr, if (isPPrankUsingGPU) { - /* free nbnxn data in GPU memory */ - Nbnxm::gpu_free(fr->nbv->gpu_nbv); + /* Free data in GPU memory and pinned memory before destroying the GPU context */ + fr->nbv.reset(); + delete fr->gpuBonded; fr->gpuBonded = nullptr; } diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index e2946c1561..d386490a94 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -318,7 +318,7 @@ static void do_nb_verlet(t_forcerec *fr, return; } - nonbonded_verlet_t *nbv = fr->nbv; + nonbonded_verlet_t *nbv = fr->nbv.get(); /* GPU kernel launch overhead is already timed separately */ if (fr->cutoff_scheme != ecutsVERLET) @@ -717,7 +717,7 @@ static void do_force_cutsVERLET(FILE *fplog, gmx_bool bDoForces, bUseGPU, bUseOrEmulGPU; rvec vzero, box_diag; float cycles_pme, cycles_wait_gpu; - nonbonded_verlet_t *nbv = fr->nbv; + nonbonded_verlet_t *nbv = fr->nbv.get(); bStateChanged = ((flags & GMX_FORCE_STATECHANGED) != 0); bNS = ((flags & GMX_FORCE_NS) != 0); @@ -802,7 +802,7 @@ static void do_force_cutsVERLET(FILE *fplog, } nbnxn_atomdata_copy_shiftvec((flags & GMX_FORCE_DYNAMICBOX) != 0, - fr->shift_vec, nbv->nbat); + fr->shift_vec, nbv->nbat.get()); #if GMX_MPI if (!thisRankHasDuty(cr, DUTY_PME)) @@ -857,7 +857,7 @@ static void do_force_cutsVERLET(FILE *fplog, wallcycle_sub_stop(wcycle, ewcsNBS_GRID_NONLOCAL); } - nbnxn_atomdata_set(nbv->nbat, nbv->nbs.get(), mdatoms, fr->cginfo); + nbnxn_atomdata_set(nbv->nbat.get(), nbv->nbs.get(), mdatoms, fr->cginfo); wallcycle_stop(wcycle, ewcNS); } @@ -870,10 +870,10 @@ static void do_force_cutsVERLET(FILE *fplog, if (bNS) { - Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat); + Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get()); } - Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat); + Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get()); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); @@ -913,7 +913,7 @@ static void do_force_cutsVERLET(FILE *fplog, { nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs.get(), Nbnxm::AtomLocality::Local, FALSE, as_rvec_array(x.unpaddedArrayRef().data()), - nbv->nbat, wcycle); + nbv->nbat.get(), wcycle); } if (bUseGPU) @@ -923,7 +923,9 @@ static void do_force_cutsVERLET(FILE *fplog, wallcycle_start(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat, Nbnxm::AtomLocality::Local, ppForceWorkload->haveGpuBondedWork); + Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), + Nbnxm::AtomLocality::Local, + ppForceWorkload->haveGpuBondedWork); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); // bonded work not split into separate local and non-local, so with DD @@ -972,7 +974,7 @@ static void do_force_cutsVERLET(FILE *fplog, nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs.get(), Nbnxm::AtomLocality::NonLocal, FALSE, as_rvec_array(x.unpaddedArrayRef().data()), - nbv->nbat, wcycle); + nbv->nbat.get(), wcycle); } if (bUseGPU) @@ -981,7 +983,9 @@ static void do_force_cutsVERLET(FILE *fplog, /* launch non-local nonbonded tasks on GPU */ wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); - Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat, Nbnxm::AtomLocality::NonLocal, ppForceWorkload->haveGpuBondedWork); + Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), + Nbnxm::AtomLocality::NonLocal, + ppForceWorkload->haveGpuBondedWork); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); if (ppForceWorkload->haveGpuBondedWork) @@ -1007,10 +1011,10 @@ static void do_force_cutsVERLET(FILE *fplog, wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_NONBONDED); if (havePPDomainDecomposition(cr)) { - Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat, + Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), flags, Nbnxm::AtomLocality::NonLocal, ppForceWorkload->haveGpuBondedWork); } - Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat, + Nbnxm::gpu_launch_cpyback(nbv->gpu_nbv, nbv->nbat.get(), flags, Nbnxm::AtomLocality::Local, ppForceWorkload->haveGpuBondedWork); wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_NONBONDED); @@ -1167,7 +1171,7 @@ static void do_force_cutsVERLET(FILE *fplog, { /* This is not in a subcounter because it takes a negligible and constant-sized amount of time */ - nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->nbat, + nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->nbat.get(), fr->fshift); } } @@ -1242,7 +1246,8 @@ static void do_force_cutsVERLET(FILE *fplog, bool alternateGpuWait = (!c_disableAlternatingWait && useGpuPme && bUseGPU && !DOMAINDECOMP(cr)); if (alternateGpuWait) { - alternatePmeNbGpuWaitReduce(fr->nbv, fr->pmedata, &force, &forceWithVirial, fr->fshift, enerd, flags, pmeFlags, ppForceWorkload->haveGpuBondedWork, wcycle); + alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &force, &forceWithVirial, fr->fshift, enerd, + flags, pmeFlags, ppForceWorkload->haveGpuBondedWork, wcycle); } if (!alternateGpuWait && useGpuPme) diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index 681a741ab6..d853a25d75 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -413,7 +413,7 @@ void gmx::Integrator::do_md() if (bPMETune) { pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, - *fr->ic, fr->nbv->pairlistSets().params(), fr->pmedata, use_GPU(fr->nbv), + *fr->ic, fr->nbv->pairlistSets().params(), fr->pmedata, use_GPU(fr->nbv.get()), &bPMETunePrinting); } @@ -1475,7 +1475,7 @@ void gmx::Integrator::do_md() step_rel++; resetHandler->resetCounters( - step, step_rel, mdlog, fplog, cr, (use_GPU(fr->nbv) ? fr->nbv : nullptr), + step, step_rel, mdlog, fplog, cr, (use_GPU(fr->nbv.get()) ? fr->nbv.get() : nullptr), nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting); /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ @@ -1510,7 +1510,7 @@ void gmx::Integrator::do_md() if (bPMETune) { - pme_loadbal_done(pme_loadbal, fplog, mdlog, use_GPU(fr->nbv)); + pme_loadbal_done(pme_loadbal, fplog, mdlog, use_GPU(fr->nbv.get())); } done_shellfc(fplog, shellfc, step_rel); diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 3239a01f0b..df8da62702 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1564,7 +1564,7 @@ int Mdrunner::mdrunner() */ finish_run(fplog, mdlog, cr, inputrec, nrnb, wcycle, walltime_accounting, - fr ? fr->nbv : nullptr, + fr ? fr->nbv.get() : nullptr, pmedata, EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms)); diff --git a/src/gromacs/mdtypes/forcerec.h b/src/gromacs/mdtypes/forcerec.h index 915381691d..e90470a62a 100644 --- a/src/gromacs/mdtypes/forcerec.h +++ b/src/gromacs/mdtypes/forcerec.h @@ -38,6 +38,7 @@ #define GMX_MDTYPES_TYPES_FORCEREC_H #include +#include #include #include "gromacs/math/vectypes.h" @@ -229,7 +230,9 @@ struct t_forcerec { // NOLINT (clang-analyzer-optin.performance.Padding) int cutoff_scheme = 0; /* group- or Verlet-style cutoff */ gmx_bool bNonbonded = FALSE; /* true if nonbonded calculations are *not* turned off */ - struct nonbonded_verlet_t *nbv = nullptr; + + /* The Nbnxm Verlet non-bonded machinery */ + std::unique_ptr nbv; /* The wall tables (if used) */ int nwall = 0; diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp index 1de1af8f11..db26637b05 100644 --- a/src/gromacs/nbnxm/atomdata.cpp +++ b/src/gromacs/nbnxm/atomdata.cpp @@ -1516,11 +1516,11 @@ nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality */ if (nbat->bUseTreeReduce) { - nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbat, nth); + nbnxn_atomdata_add_nbat_f_to_f_treereduce(nbat.get(), nth); } else { - nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbat, nth); + nbnxn_atomdata_add_nbat_f_to_f_stdreduce(nbat.get(), nth); } } #pragma omp parallel for num_threads(nth) schedule(static) @@ -1528,7 +1528,7 @@ nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality { try { - nbnxn_atomdata_add_nbat_f_to_f_part(nbs.get(), nbat, + nbnxn_atomdata_add_nbat_f_to_f_part(nbs.get(), nbat.get(), nbat->out, 1, a0+((th+0)*na)/nth, diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu index c711136b47..61104dd57b 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda_data_mgmt.cu @@ -345,14 +345,13 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t *nbv, { return; } - gmx_nbnxn_cuda_t *nb = nbv->gpu_nbv; - cu_nbparam_t *nbp = nb->nbparam; + cu_nbparam_t *nbp = nbv->gpu_nbv->nbparam; set_cutoff_parameters(nbp, ic, listParams); nbp->eeltype = pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw); - init_ewald_coulomb_force_table(ic, nb->nbparam); + init_ewald_coulomb_force_table(ic, nbp); } /*! Initializes the pair list data structure. */ @@ -415,22 +414,17 @@ static void cuda_init_const(gmx_nbnxn_cuda_t *nb, nbnxn_cuda_clear_e_fshift(nb); } -void gpu_init(gmx_nbnxn_cuda_t **p_nb, - const gmx_device_info_t *deviceInfo, - const interaction_const_t *ic, - const NbnxnListParameters *listParams, - const nbnxn_atomdata_t *nbat, - int /*rank*/, - gmx_bool bLocalAndNonlocal) +gmx_nbnxn_cuda_t * +gpu_init(const gmx_device_info_t *deviceInfo, + const interaction_const_t *ic, + const NbnxnListParameters *listParams, + const nbnxn_atomdata_t *nbat, + int /*rank*/, + gmx_bool bLocalAndNonlocal) { cudaError_t stat; - gmx_nbnxn_cuda_t *nb; - - if (p_nb == nullptr) - { - return; - } + gmx_nbnxn_cuda_t *nb; snew(nb, 1); snew(nb->atdat, 1); snew(nb->nbparam, 1); @@ -499,12 +493,12 @@ void gpu_init(gmx_nbnxn_cuda_t **p_nb, cuda_init_const(nb, ic, listParams, nbat->params()); - *p_nb = nb; - if (debug) { fprintf(debug, "Initialized CUDA data structures.\n"); } + + return nb; } void gpu_init_pairlist(gmx_nbnxn_cuda_t *nb, diff --git a/src/gromacs/nbnxm/gpu_data_mgmt.h b/src/gromacs/nbnxm/gpu_data_mgmt.h index f578cf5cb9..c1a19b7f63 100644 --- a/src/gromacs/nbnxm/gpu_data_mgmt.h +++ b/src/gromacs/nbnxm/gpu_data_mgmt.h @@ -44,6 +44,8 @@ #ifndef GMX_NBNXN_GPU_DATA_MGMT_H #define GMX_NBNXN_GPU_DATA_MGMT_H +#include + #include "gromacs/gpu_utils/gpu_macros.h" #include "gromacs/mdtypes/interaction_const.h" @@ -63,14 +65,14 @@ namespace Nbnxm /** Initializes the data structures related to GPU nonbonded calculations. */ GPU_FUNC_QUALIFIER -void gpu_init(gmx_nbnxn_gpu_t gmx_unused **p_nb, - const gmx_device_info_t gmx_unused *deviceInfo, - const interaction_const_t gmx_unused *ic, - const NbnxnListParameters gmx_unused *listParams, - const nbnxn_atomdata_t gmx_unused *nbat, - int gmx_unused rank, - /* true if both local and non-local are done on GPU */ - gmx_bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM +gmx_nbnxn_gpu_t * +gpu_init(const gmx_device_info_t gmx_unused *deviceInfo, + const interaction_const_t gmx_unused *ic, + const NbnxnListParameters gmx_unused *listParams, + const nbnxn_atomdata_t gmx_unused *nbat, + int gmx_unused rank, + /* true if both local and non-local are done on GPU */ + gmx_bool gmx_unused bLocalAndNonlocal) GPU_FUNC_TERM_WITH_RETURN(nullptr) /** Initializes pair-list data for GPU, called at every pair search step. */ GPU_FUNC_QUALIFIER @@ -115,7 +117,7 @@ void gpu_reset_timings(struct nonbonded_verlet_t gmx_unused *nbv) GPU_FUNC_TERM /** Calculates the minimum size of proximity lists to improve SM load balance * with GPU non-bonded kernels. */ - GPU_FUNC_QUALIFIER + GPU_FUNC_QUALIFIER int gpu_min_ci_balanced(gmx_nbnxn_gpu_t gmx_unused *nb) GPU_FUNC_TERM_WITH_RETURN(-1) /** Returns if analytical Ewald GPU kernels are used. */ @@ -145,7 +147,7 @@ void *gpu_get_f(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullp * Note: CUDA only. */ CUDA_FUNC_QUALIFIER - rvec *gpu_get_fshift(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr) + rvec *gpu_get_fshift(gmx_nbnxn_gpu_t gmx_unused *nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr) } // namespace Nbnxm diff --git a/src/gromacs/nbnxm/gpu_types.h b/src/gromacs/nbnxm/gpu_types.h index a18ca0b56f..f02e456ad9 100644 --- a/src/gromacs/nbnxm/gpu_types.h +++ b/src/gromacs/nbnxm/gpu_types.h @@ -47,12 +47,12 @@ #if GMX_GPU == GMX_GPU_OPENCL struct gmx_nbnxn_ocl_t; -typedef struct gmx_nbnxn_ocl_t gmx_nbnxn_gpu_t; +using gmx_nbnxn_gpu_t = gmx_nbnxn_ocl_t; #endif #if GMX_GPU == GMX_GPU_CUDA struct gmx_nbnxn_cuda_t; -typedef struct gmx_nbnxn_cuda_t gmx_nbnxn_gpu_t; +using gmx_nbnxn_gpu_t = gmx_nbnxn_cuda_t; #endif #if GMX_GPU == GMX_GPU_NONE diff --git a/src/gromacs/nbnxm/grid.cpp b/src/gromacs/nbnxm/grid.cpp index 19ed3d1021..3e91161f2c 100644 --- a/src/gromacs/nbnxm/grid.cpp +++ b/src/gromacs/nbnxm/grid.cpp @@ -1495,7 +1495,7 @@ void nbnxn_put_on_grid(nonbonded_verlet_t *nbv, lowerCorner, upperCorner, nbs->grid[0].atom_density); - nbnxn_atomdata_t *nbat = nbv->nbat; + nbnxn_atomdata_t *nbat = nbv->nbat.get(); calc_cell_indices(nbs, ddZone, grid, updateGroupsCog, atomStart, atomEnd, atinfo, x, numAtomsMoved, move, nbat); diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp index b4b79b0b11..31cf946e20 100644 --- a/src/gromacs/nbnxm/kerneldispatch.cpp +++ b/src/gromacs/nbnxm/kerneldispatch.cpp @@ -498,7 +498,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality case Nbnxm::KernelType::Cpu4xN_Simd_2xNN: nbnxn_kernel_cpu(pairlistSet, kernelSetup(), - nbat, + nbat.get(), ic, fr->shift_vec, forceFlags, @@ -516,7 +516,7 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality case Nbnxm::KernelType::Cpu8x8x8_PlainC: nbnxn_kernel_gpu_ref(pairlistSet.nblGpu[0], - nbat, &ic, + nbat.get(), &ic, fr->shift_vec, forceFlags, clearF, diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h index a3fce4a01d..dbdb9fba51 100644 --- a/src/gromacs/nbnxm/nbnxm.h +++ b/src/gromacs/nbnxm/nbnxm.h @@ -346,6 +346,15 @@ struct nonbonded_verlet_t int64_t outerListCreationStep_; }; + //! Constructs an object from its components + nonbonded_verlet_t(std::unique_ptr pairlistSets_, + std::unique_ptr nbs, + std::unique_ptr nbat, + const Nbnxm::KernelSetup &kernelSetup, + gmx_nbnxn_gpu_t *gpu_nbv); + + ~nonbonded_verlet_t(); + //! Returns whether a GPU is use for the non-bonded calculations bool useGpu() const { @@ -425,43 +434,36 @@ struct nonbonded_verlet_t return kernelSetup_; } - //! Sets the kernel setup, TODO: make private - void setKernelSetup(const Nbnxm::KernelSetup &kernelSetup) - { - kernelSetup_ = kernelSetup; - } - // TODO: Make all data members private public: //! All data related to the pair lists - std::unique_ptr pairlistSets_; + std::unique_ptr pairlistSets_; //! Working data for constructing the pairlists - std::unique_ptr nbs; + std::unique_ptr nbs; //! Atom data - nbnxn_atomdata_t *nbat; - + std::unique_ptr nbat; private: //! The non-bonded setup, also affects the pairlist construction kernel - Nbnxm::KernelSetup kernelSetup_; + Nbnxm::KernelSetup kernelSetup_; public: - - gmx_nbnxn_gpu_t *gpu_nbv; /**< pointer to GPU nb verlet data */ + //! GPU Nbnxm data, only used with a physical GPU (TODO: use unique_ptr) + gmx_nbnxn_gpu_t *gpu_nbv; }; namespace Nbnxm { -/*! \brief Initializes the nbnxn module */ -void init_nb_verlet(const gmx::MDLogger &mdlog, - nonbonded_verlet_t **nb_verlet, - gmx_bool bFEP_NonBonded, - const t_inputrec *ir, - const t_forcerec *fr, - const t_commrec *cr, - const gmx_hw_info_t &hardwareInfo, - const gmx_device_info_t *deviceInfo, - const gmx_mtop_t *mtop, - matrix box); +/*! \brief Creates an Nbnxm object */ +std::unique_ptr +init_nb_verlet(const gmx::MDLogger &mdlog, + gmx_bool bFEP_NonBonded, + const t_inputrec *ir, + const t_forcerec *fr, + const t_commrec *cr, + const gmx_hw_info_t &hardwareInfo, + const gmx_device_info_t *deviceInfo, + const gmx_mtop_t *mtop, + matrix box); } // namespace Nbnxm diff --git a/src/gromacs/nbnxm/nbnxm_setup.cpp b/src/gromacs/nbnxm/nbnxm_setup.cpp index da474ae8ba..2e43db5fc6 100644 --- a/src/gromacs/nbnxm/nbnxm_setup.cpp +++ b/src/gromacs/nbnxm/nbnxm_setup.cpp @@ -61,6 +61,7 @@ #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/logger.h" +#include "gpu_types.h" #include "grid.h" #include "internal.h" @@ -343,19 +344,17 @@ static int getMinimumIlistCountForGpuBalancing(gmx_nbnxn_gpu_t *nbnxmGpu) return minimumIlistCount; } -void init_nb_verlet(const gmx::MDLogger &mdlog, - nonbonded_verlet_t **nb_verlet, - gmx_bool bFEP_NonBonded, - const t_inputrec *ir, - const t_forcerec *fr, - const t_commrec *cr, - const gmx_hw_info_t &hardwareInfo, - const gmx_device_info_t *deviceInfo, - const gmx_mtop_t *mtop, - matrix box) +std::unique_ptr +init_nb_verlet(const gmx::MDLogger &mdlog, + gmx_bool bFEP_NonBonded, + const t_inputrec *ir, + const t_forcerec *fr, + const t_commrec *cr, + const gmx_hw_info_t &hardwareInfo, + const gmx_device_info_t *deviceInfo, + const gmx_mtop_t *mtop, + matrix box) { - nonbonded_verlet_t *nbv = new nonbonded_verlet_t(); - const bool emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr); const bool useGpu = deviceInfo != nullptr; @@ -375,15 +374,14 @@ void init_nb_verlet(const gmx::MDLogger &mdlog, nonbondedResource = NonbondedResource::Cpu; } - nbv->nbs = nullptr; - - nbv->setKernelSetup(pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo, - nonbondedResource, ir, - fr->bNonbonded)); + Nbnxm::KernelSetup kernelSetup = + pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo, + nonbondedResource, ir, + fr->bNonbonded); const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1); - NbnxnListParameters listParams(nbv->kernelSetup().kernelType, + NbnxnListParameters listParams(kernelSetup.kernelType, ir->rlist, havePPDomainDecomposition(cr)); @@ -417,7 +415,9 @@ void init_nb_verlet(const gmx::MDLogger &mdlog, enbnxninitcombrule = enbnxninitcombruleNONE; } - nbv->nbat = new nbnxn_atomdata_t(useGpu ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned); + std::unique_ptr nbat = + std::make_unique(useGpu ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned); + int mimimumNumEnergyGroupNonbonded = ir->opts.ngener; if (ir->opts.ngener - ir->nwall == 1) { @@ -428,41 +428,67 @@ void init_nb_verlet(const gmx::MDLogger &mdlog, mimimumNumEnergyGroupNonbonded = 1; } nbnxn_atomdata_init(mdlog, - nbv->nbat, - nbv->kernelSetup().kernelType, + nbat.get(), + kernelSetup.kernelType, enbnxninitcombrule, fr->ntype, fr->nbfp, mimimumNumEnergyGroupNonbonded, - nbv->pairlistIsSimple() ? gmx_omp_nthreads_get(emntNonbonded) : 1); + (useGpu || emulateGpu) ? 1 : gmx_omp_nthreads_get(emntNonbonded)); - int minimumIlistCountForGpuBalancing = 0; + gmx_nbnxn_gpu_t *gpu_nbv = nullptr; + int minimumIlistCountForGpuBalancing = 0; if (useGpu) { /* init the NxN GPU data; the last argument tells whether we'll have * both local and non-local NB calculation on GPU */ - gpu_init(&nbv->gpu_nbv, - deviceInfo, - fr->ic, - &listParams, - nbv->nbat, - cr->nodeid, - haveMultipleDomains); - - minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(nbv->gpu_nbv); + gpu_nbv = gpu_init(deviceInfo, + fr->ic, + &listParams, + nbat.get(), + cr->nodeid, + haveMultipleDomains); + + minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv); } - nbv->pairlistSets_ = + std::unique_ptr pairlistSets = std::make_unique(listParams, haveMultipleDomains, minimumIlistCountForGpuBalancing); - nbv->nbs = std::make_unique(ir->ePBC, - DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr, - DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr, - bFEP_NonBonded, - gmx_omp_nthreads_get(emntPairsearch)); - - *nb_verlet = nbv; + std::unique_ptr nbs = + std::make_unique(ir->ePBC, + DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr, + DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr, + bFEP_NonBonded, + gmx_omp_nthreads_get(emntPairsearch)); + + return std::make_unique(std::move(pairlistSets), + std::move(nbs), + std::move(nbat), + kernelSetup, + gpu_nbv); } } // namespace Nbnxm + +nonbonded_verlet_t::nonbonded_verlet_t(std::unique_ptr pairlistSets, + std::unique_ptr nbs_in, + std::unique_ptr nbat_in, + const Nbnxm::KernelSetup &kernelSetup, + gmx_nbnxn_gpu_t *gpu_nbv_ptr) : + pairlistSets_(std::move(pairlistSets)), + nbs(std::move(nbs_in)), + nbat(std::move(nbat_in)), + kernelSetup_(kernelSetup), + gpu_nbv(gpu_nbv_ptr) +{ + GMX_RELEASE_ASSERT(pairlistSets_, "Need valid pairlistSets"); + GMX_RELEASE_ASSERT(nbs, "Need valid search object"); + GMX_RELEASE_ASSERT(nbat, "Need valid atomdata object"); +} + +nonbonded_verlet_t::~nonbonded_verlet_t() +{ + Nbnxm::gpu_free(gpu_nbv); +} diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp index d040b67c22..85de6a2746 100644 --- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp +++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp @@ -429,7 +429,7 @@ void gpu_pme_loadbal_update_param(const nonbonded_verlet_t *nbv, nbp->eeltype = gpu_pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw); - init_ewald_coulomb_force_table(ic, nb->nbparam, nb->dev_rundata); + init_ewald_coulomb_force_table(ic, nbp, nb->dev_rundata); } /*! \brief Initializes the pair list data structure. @@ -633,13 +633,13 @@ static void nbnxn_ocl_init_const(gmx_nbnxn_ocl_t *nb, //! This function is documented in the header file -void gpu_init(gmx_nbnxn_ocl_t **p_nb, - const gmx_device_info_t *deviceInfo, - const interaction_const_t *ic, - const NbnxnListParameters *listParams, - const nbnxn_atomdata_t *nbat, - const int rank, - const gmx_bool bLocalAndNonlocal) +gmx_nbnxn_ocl_t * +gpu_init(const gmx_device_info_t *deviceInfo, + const interaction_const_t *ic, + const NbnxnListParameters *listParams, + const nbnxn_atomdata_t *nbat, + const int rank, + const gmx_bool bLocalAndNonlocal) { gmx_nbnxn_ocl_t *nb; cl_int cl_error; @@ -647,11 +647,6 @@ void gpu_init(gmx_nbnxn_ocl_t **p_nb, assert(ic); - if (p_nb == nullptr) - { - return; - } - snew(nb, 1); snew(nb->atdat, 1); snew(nb->nbparam, 1); @@ -743,12 +738,12 @@ void gpu_init(gmx_nbnxn_ocl_t **p_nb, /* clear energy and shift force outputs */ nbnxn_ocl_clear_e_fshift(nb); - *p_nb = nb; - if (debug) { fprintf(debug, "Initialized OpenCL data structures.\n"); } + + return nb; } /*! \brief Clears the first natoms_clear elements of the GPU nonbonded force output array. diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp index d2327f94bd..26c64db34f 100644 --- a/src/gromacs/nbnxm/pairlist.cpp +++ b/src/gromacs/nbnxm/pairlist.cpp @@ -4368,7 +4368,7 @@ nonbonded_verlet_t::constructPairlist(const Nbnxm::InteractionLocality iLocalit int64_t step, t_nrnb *nrnb) { - pairlistSets_->construct(iLocality, nbs.get(), nbat, excl, + pairlistSets_->construct(iLocality, nbs.get(), nbat.get(), excl, kernelSetup_.kernelType, step, nrnb); diff --git a/src/gromacs/nbnxm/prunekerneldispatch.cpp b/src/gromacs/nbnxm/prunekerneldispatch.cpp index 1b89ebbf83..15f84465b2 100644 --- a/src/gromacs/nbnxm/prunekerneldispatch.cpp +++ b/src/gromacs/nbnxm/prunekerneldispatch.cpp @@ -85,5 +85,5 @@ void nonbonded_verlet_t::dispatchPruneKernelCpu(const Nbnxm::InteractionLocality iLocality, const rvec *shift_vec) { - pairlistSets_->dispatchPruneKernel(iLocality, nbat, shift_vec, kernelSetup_.kernelType); + pairlistSets_->dispatchPruneKernel(iLocality, nbat.get(), shift_vec, kernelSetup_.kernelType); } -- 2.22.0