From e44b37157f16c21f248da21f2604fb87f6c2ceb3 Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Thu, 14 Nov 2019 09:07:22 +0100 Subject: [PATCH] Simplify t_forcerec destruction Destruction of t_forcerec was, without need, done in two steps with complicated dependencies using the functions free_gpu_resources() and done_forcerec(). Now only the destructor is used. To make this work, cginfo_mb and nbfp in t_forcerec needed to be converted to std::vector. Change-Id: I836bcdbd52b03834d4f2661475247fc56b907518 --- src/gromacs/domdec/domdec.cpp | 16 +-- src/gromacs/domdec/domdec.h | 16 +-- src/gromacs/domdec/domdec_topology.cpp | 24 ++-- src/gromacs/domdec/partition.cpp | 11 +- src/gromacs/domdec/redistribute.cpp | 2 +- src/gromacs/domdec/utility.h | 11 +- .../gmxlib/nonbonded/nb_free_energy.cpp | 2 +- src/gromacs/mdlib/forcerec.cpp | 135 +++++------------- src/gromacs/mdlib/forcerec.h | 7 - src/gromacs/mdlib/wall.cpp | 2 +- src/gromacs/mdrun/runner.cpp | 37 ++++- src/gromacs/mdtypes/forcerec.h | 18 +-- src/gromacs/nbnxm/atomdata.cpp | 4 +- src/gromacs/nbnxm/atomdata.h | 16 +-- src/gromacs/nbnxm/benchmark/bench_setup.cpp | 2 +- src/gromacs/nbnxm/benchmark/bench_system.cpp | 2 +- 16 files changed, 129 insertions(+), 176 deletions(-) diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp index 2f931240e4..83bdef4803 100644 --- a/src/gromacs/domdec/domdec.cpp +++ b/src/gromacs/domdec/domdec.cpp @@ -2507,17 +2507,17 @@ static void set_dd_limits(const gmx::MDLogger& mdlog, } } -void dd_init_bondeds(FILE* fplog, - gmx_domdec_t* dd, - const gmx_mtop_t* mtop, - const gmx_vsite_t* vsite, - const t_inputrec* ir, - gmx_bool bBCheck, - cginfo_mb_t* cginfo_mb) +void dd_init_bondeds(FILE* fplog, + gmx_domdec_t* dd, + const gmx_mtop_t& mtop, + const gmx_vsite_t* vsite, + const t_inputrec* ir, + gmx_bool bBCheck, + gmx::ArrayRef cginfo_mb) { gmx_domdec_comm_t* comm; - dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck); + dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck); comm = dd->comm; diff --git a/src/gromacs/domdec/domdec.h b/src/gromacs/domdec/domdec.h index 61691d63e3..78c321d624 100644 --- a/src/gromacs/domdec/domdec.h +++ b/src/gromacs/domdec/domdec.h @@ -155,13 +155,13 @@ bool ddHaveSplitConstraints(const gmx_domdec_t& dd); bool is1DAnd1PulseDD(const gmx_domdec_t& dd); /*! \brief Initialize data structures for bonded interactions */ -void dd_init_bondeds(FILE* fplog, - gmx_domdec_t* dd, - const gmx_mtop_t* mtop, - const gmx_vsite_t* vsite, - const t_inputrec* ir, - gmx_bool bBCheck, - cginfo_mb_t* cginfo_mb); +void dd_init_bondeds(FILE* fplog, + gmx_domdec_t* dd, + const gmx_mtop_t& mtop, + const gmx_vsite_t* vsite, + const t_inputrec* ir, + gmx_bool bBCheck, + gmx::ArrayRef cginfo_mb); /*! \brief Returns whether molecules are always whole, i.e. not broken by PBC */ bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd); @@ -295,7 +295,7 @@ void dd_init_local_state(struct gmx_domdec_t* dd, const t_state* state_global, t * * Also stores whether atoms are linked in \p cginfo_mb. */ -t_blocka* makeBondedLinks(const gmx_mtop_t* mtop, cginfo_mb_t* cginfo_mb); +t_blocka* makeBondedLinks(const gmx_mtop_t& mtop, gmx::ArrayRef cginfo_mb); /*! \brief Calculate the maximum distance involved in 2-body and multi-body bonded interactions */ void dd_bonded_cg_distance(const gmx::MDLogger& mdlog, diff --git a/src/gromacs/domdec/domdec_topology.cpp b/src/gromacs/domdec/domdec_topology.cpp index a426057672..ee5da3d11a 100644 --- a/src/gromacs/domdec/domdec_topology.cpp +++ b/src/gromacs/domdec/domdec_topology.cpp @@ -1857,7 +1857,7 @@ static void check_link(t_blocka* link, int cg_gl, int cg_gl_j) } } -t_blocka* makeBondedLinks(const gmx_mtop_t* mtop, cginfo_mb_t* cginfo_mb) +t_blocka* makeBondedLinks(const gmx_mtop_t& mtop, gmx::ArrayRef cginfo_mb) { t_blocka* link; cginfo_mb_t* cgi_mb; @@ -1868,35 +1868,35 @@ t_blocka* makeBondedLinks(const gmx_mtop_t* mtop, cginfo_mb_t* cginfo_mb) */ reverse_ilist_t ril_intermol; - if (mtop->bIntermolecularInteractions) + if (mtop.bIntermolecularInteractions) { t_atoms atoms; - atoms.nr = mtop->natoms; + atoms.nr = mtop.natoms; atoms.atom = nullptr; - GMX_RELEASE_ASSERT(mtop->intermolecular_ilist, + GMX_RELEASE_ASSERT(mtop.intermolecular_ilist, "We should have an ilist when intermolecular interactions are on"); - make_reverse_ilist(*mtop->intermolecular_ilist, &atoms, FALSE, FALSE, FALSE, TRUE, &ril_intermol); + make_reverse_ilist(*mtop.intermolecular_ilist, &atoms, FALSE, FALSE, FALSE, TRUE, &ril_intermol); } snew(link, 1); - snew(link->index, mtop->natoms + 1); + snew(link->index, mtop.natoms + 1); link->nalloc_a = 0; link->a = nullptr; link->index[0] = 0; int cg_offset = 0; int ncgi = 0; - for (size_t mb = 0; mb < mtop->molblock.size(); mb++) + for (size_t mb = 0; mb < mtop.molblock.size(); mb++) { - const gmx_molblock_t& molb = mtop->molblock[mb]; + const gmx_molblock_t& molb = mtop.molblock[mb]; if (molb.nmol == 0) { continue; } - const gmx_moltype_t& molt = mtop->moltype[molb.type]; + const gmx_moltype_t& molt = mtop.moltype[molb.type]; /* Make a reverse ilist in which the interactions are linked * to all atoms, not only the first atom as in gmx_reverse_top. * The constraints are discarded here. @@ -1907,7 +1907,7 @@ t_blocka* makeBondedLinks(const gmx_mtop_t* mtop, cginfo_mb_t* cginfo_mb) cgi_mb = &cginfo_mb[mb]; int mol; - for (mol = 0; mol < (mtop->bIntermolecularInteractions ? molb.nmol : 1); mol++) + for (mol = 0; mol < (mtop.bIntermolecularInteractions ? molb.nmol : 1); mol++) { for (int a = 0; a < molt.atoms.nr; a++) { @@ -1931,7 +1931,7 @@ t_blocka* makeBondedLinks(const gmx_mtop_t* mtop, cginfo_mb_t* cginfo_mb) i += nral_rt(ftype); } - if (mtop->bIntermolecularInteractions) + if (mtop.bIntermolecularInteractions) { int i = ril_intermol.index[a]; while (i < ril_intermol.index[a + 1]) @@ -1997,7 +1997,7 @@ t_blocka* makeBondedLinks(const gmx_mtop_t* mtop, cginfo_mb_t* cginfo_mb) if (debug) { - fprintf(debug, "Of the %d atoms %d are linked via bonded interactions\n", mtop->natoms, ncgi); + fprintf(debug, "Of the %d atoms %d are linked via bonded interactions\n", mtop.natoms, ncgi); } return link; diff --git a/src/gromacs/domdec/partition.cpp b/src/gromacs/domdec/partition.cpp index dbd52b2049..d9428e3975 100644 --- a/src/gromacs/domdec/partition.cpp +++ b/src/gromacs/domdec/partition.cpp @@ -462,8 +462,8 @@ static void dd_set_cginfo(gmx::ArrayRef index_gl, int cg0, int cg1, t { if (fr != nullptr) { - const cginfo_mb_t* cginfo_mb = fr->cginfo_mb; - gmx::ArrayRef cginfo = fr->cginfo; + gmx::ArrayRef cginfo_mb = fr->cginfo_mb; + gmx::ArrayRef cginfo = fr->cginfo; for (int cg = cg0; cg < cg1; cg++) { @@ -1327,7 +1327,7 @@ static void merge_cg_buffers(int ncell, const int* recv_i, gmx::ArrayRef x, gmx::ArrayRef recv_vr, - cginfo_mb_t* cginfo_mb, + gmx::ArrayRef cginfo_mb, gmx::ArrayRef cginfo) { gmx_domdec_ind_t *ind, *ind_p; @@ -1820,7 +1820,6 @@ static void setup_dd_communication(gmx_domdec_t* dd, gmx_domdec_comm_t* comm; gmx_domdec_zones_t* zones; gmx_domdec_comm_dim_t* cd; - cginfo_mb_t* cginfo_mb; gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded; dd_corners_t corners; rvec * normal, *v_d, *v_0 = nullptr, *v_1 = nullptr; @@ -1894,8 +1893,8 @@ static void setup_dd_communication(gmx_domdec_t* dd, v_1 = ddbox->v[dim1]; } - zone_cg_range = zones->cg_range; - cginfo_mb = fr->cginfo_mb; + zone_cg_range = zones->cg_range; + gmx::ArrayRef cginfo_mb = fr->cginfo_mb; zone_cg_range[0] = 0; zone_cg_range[1] = dd->ncg_home; diff --git a/src/gromacs/domdec/redistribute.cpp b/src/gromacs/domdec/redistribute.cpp index eb130157c6..a747a26a1c 100644 --- a/src/gromacs/domdec/redistribute.cpp +++ b/src/gromacs/domdec/redistribute.cpp @@ -736,7 +736,7 @@ void dd_redistribute_cg(FILE* fplog, /* We reuse the intBuffer without reacquiring since we are in the same scope */ DDBufferAccess& flagBuffer = moveBuffer; - const cginfo_mb_t* cginfo_mb = fr->cginfo_mb; + gmx::ArrayRef cginfo_mb = fr->cginfo_mb; /* Temporarily store atoms passed to our rank at the end of the range */ int home_pos_cg = dd->ncg_home; diff --git a/src/gromacs/domdec/utility.h b/src/gromacs/domdec/utility.h index 9d7db1f112..353dc327b0 100644 --- a/src/gromacs/domdec/utility.h +++ b/src/gromacs/domdec/utility.h @@ -44,6 +44,7 @@ #include "gromacs/gpu_utils/hostallocator.h" #include "gromacs/mdtypes/forcerec.h" +#include "gromacs/utility/arrayref.h" #include "domdec_internal.h" @@ -77,14 +78,16 @@ void make_tric_corr_matrix(int npbcdim, const matrix box, matrix tcm); void check_screw_box(const matrix box); /*! \brief Return the charge group information flags for charge group cg */ -static inline int ddcginfo(const cginfo_mb_t* cginfo_mb, int cg) +static inline int ddcginfo(gmx::ArrayRef cginfo_mb, int cg) { - while (cg >= cginfo_mb->cg_end) + size_t index = 0; + while (cg >= cginfo_mb[index].cg_end) { - cginfo_mb++; + index++; } + const cginfo_mb_t& cgimb = cginfo_mb[index]; - return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod]; + return cgimb.cginfo[(cg - cgimb.cg_start) % cgimb.cg_mod]; }; /*! \brief Returns the number of MD steps for which load has been recorded */ diff --git a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp index 8395379dad..e440a146f6 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp +++ b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp @@ -265,7 +265,7 @@ static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist, const int* typeA = mdatoms->typeA; const int* typeB = mdatoms->typeB; const int ntype = fr->ntype; - const real* nbfp = fr->nbfp; + const real* nbfp = fr->nbfp.data(); const real* nbfp_grid = fr->ljpme_c6grid; real* Vv = kernel_data->energygrp_vdw; const real lambda_coul = kernel_data->lambda[efptCOUL]; diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index b702a4a255..66c4c33b56 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -102,18 +102,19 @@ static const bool c_enableGpuPmePpComms = (getenv("GMX_GPU_PME_PP_COMMS") != nullptr) && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA); -static real* mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM) +static std::vector mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM) { - real* nbfp; - int i, j, k, atnr; + std::vector nbfp; + int atnr; atnr = idef->atnr; if (bBHAM) { - snew(nbfp, 3 * atnr * atnr); - for (i = k = 0; (i < atnr); i++) + nbfp.resize(3 * atnr * atnr); + int k = 0; + for (int i = 0; (i < atnr); i++) { - for (j = 0; (j < atnr); j++, k++) + for (int j = 0; (j < atnr); j++, k++) { BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a; BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b; @@ -124,10 +125,11 @@ static real* mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM) } else { - snew(nbfp, 2 * atnr * atnr); - for (i = k = 0; (i < atnr); i++) + nbfp.resize(2 * atnr * atnr); + int k = 0; + for (int i = 0; (i < atnr); i++) { - for (j = 0; (j < atnr); j++, k++) + for (int j = 0; (j < atnr); j++, k++) { /* nbfp now includes the 6.0/12.0 derivative prefactors */ C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6 * 6.0; @@ -186,14 +188,10 @@ enum acSETTLE }; -static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr, gmx_bool* bFEP_NonBonded) +static std::vector init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr, gmx_bool* bFEP_NonBonded) { - cginfo_mb_t* cginfo_mb; - gmx_bool* type_VDW; - int* cginfo; - int* a_con; - - snew(cginfo_mb, mtop->molblock.size()); + gmx_bool* type_VDW; + int* a_con; snew(type_VDW, fr->ntype); for (int ai = 0; ai < fr->ntype; ai++) @@ -208,7 +206,8 @@ static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr, *bFEP_NonBonded = FALSE; - int a_offset = 0; + std::vector cginfoPerMolblock; + int a_offset = 0; for (size_t mb = 0; mb < mtop->molblock.size(); mb++) { const gmx_molblock_t& molb = mtop->molblock[mb]; @@ -241,11 +240,12 @@ static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr, } } - cginfo_mb[mb].cg_start = a_offset; - cginfo_mb[mb].cg_end = a_offset + molb.nmol * molt.atoms.nr; - cginfo_mb[mb].cg_mod = (bId ? 1 : molb.nmol) * molt.atoms.nr; - snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod); - cginfo = cginfo_mb[mb].cginfo; + cginfo_mb_t cginfo_mb; + cginfo_mb.cg_start = a_offset; + cginfo_mb.cg_end = a_offset + molb.nmol * molt.atoms.nr; + cginfo_mb.cg_mod = (bId ? 1 : molb.nmol) * molt.atoms.nr; + cginfo_mb.cginfo.resize(cginfo_mb.cg_mod); + gmx::ArrayRef cginfo = cginfo_mb.cginfo; /* Set constraints flags for constrained atoms */ snew(a_con, molt.atoms.nr); @@ -322,14 +322,16 @@ static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr, sfree(a_con); + cginfoPerMolblock.push_back(cginfo_mb); + a_offset += molb.nmol * molt.atoms.nr; } sfree(type_VDW); - return cginfo_mb; + return cginfoPerMolblock; } -static std::vector cginfo_expand(const int nmb, const cginfo_mb_t* cgi_mb) +static std::vector cginfo_expand(const int nmb, gmx::ArrayRef cgi_mb) { const int ncg = cgi_mb[nmb - 1].cg_end; @@ -348,19 +350,6 @@ static std::vector cginfo_expand(const int nmb, const cginfo_mb_t* cgi_mb) return cginfo; } -static void done_cginfo_mb(cginfo_mb_t* cginfo_mb, int numMolBlocks) -{ - if (cginfo_mb == nullptr) - { - return; - } - for (int mb = 0; mb < numMolBlocks; ++mb) - { - sfree(cginfo_mb[mb].cginfo); - } - sfree(cginfo_mb); -} - /* Sets the sum of charges (squared) and C6 in the system in fr. * Returns whether the system has a net charge. */ @@ -1271,7 +1260,7 @@ void init_forcerec(FILE* fp, fr->shiftForces.resize(SHIFTS); - if (fr->nbfp == nullptr) + if (fr->nbfp.empty()) { fr->ntype = mtop->ffparams.atnr; fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM); @@ -1452,8 +1441,7 @@ void init_forcerec(FILE* fp, if (ir->eDispCorr != edispcNO) { fr->dispersionCorrection = std::make_unique( - *mtop, *ir, fr->bBHAM, fr->ntype, - gmx::arrayRefFromArray(fr->nbfp, fr->ntype * fr->ntype * 2), *fr->ic, tabfn); + *mtop, *ir, fr->bBHAM, fr->ntype, fr->nbfp, *fr->ic, tabfn); fr->dispersionCorrection->print(mdlog); } @@ -1474,67 +1462,10 @@ void init_forcerec(FILE* fp, t_forcerec::t_forcerec() = default; -t_forcerec::~t_forcerec() = default; - -/* Frees GPU memory and sets a tMPI node barrier. - * - * Note that this function needs to be called even if GPUs are not used - * in this run because the PME ranks have no knowledge of whether GPUs - * are used or not, but all ranks need to enter the barrier below. - * \todo Remove physical node barrier from this function after making sure - * that it's not needed anymore (with a shared GPU run). - */ -void free_gpu_resources(t_forcerec* fr, - const gmx::PhysicalNodeCommunicator& physicalNodeCommunicator, - const gmx_gpu_info_t& gpu_info) +t_forcerec::~t_forcerec() { - bool isPPrankUsingGPU = (fr != nullptr) && (fr->nbv != nullptr) && fr->nbv->useGpu(); - - /* stop the GPU profiler (only CUDA) */ - if (gpu_info.n_dev > 0) - { - stopGpuProfiler(); - } - - if (isPPrankUsingGPU) - { - /* Free data in GPU memory and pinned memory before destroying the GPU context */ - fr->nbv.reset(); - - delete fr->gpuBonded; - fr->gpuBonded = nullptr; - } - - /* With tMPI we need to wait for all ranks to finish deallocation before - * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing - * GPU and context. - * - * This is not a concern in OpenCL where we use one context per rank which - * is freed in nbnxn_gpu_free(). - * - * Note: it is safe to not call the barrier on the ranks which do not use GPU, - * but it is easier and more futureproof to call it on the whole node. - */ - if (GMX_THREAD_MPI) - { - physicalNodeCommunicator.barrier(); - } -} - -void done_forcerec(t_forcerec* fr, int numMolBlocks) -{ - if (fr == nullptr) - { - // PME-only ranks don't have a forcerec - return; - } - done_cginfo_mb(fr->cginfo_mb, numMolBlocks); - sfree(fr->nbfp); - delete fr->ic; - sfree(fr->shift_vec); - sfree(fr->ewc_t); - tear_down_bonded_threading(fr->bondedThreading); - GMX_RELEASE_ASSERT(fr->gpuBonded == nullptr, "Should have been deleted earlier, when used"); - fr->bondedThreading = nullptr; - delete fr; + /* Note: This code will disappear when types are converted to C++ */ + sfree(shift_vec); + sfree(ewc_t); + tear_down_bonded_threading(bondedThreading); } diff --git a/src/gromacs/mdlib/forcerec.h b/src/gromacs/mdlib/forcerec.h index e28e717b50..0c9033f84e 100644 --- a/src/gromacs/mdlib/forcerec.h +++ b/src/gromacs/mdlib/forcerec.h @@ -59,9 +59,6 @@ class MDLogger; class PhysicalNodeCommunicator; } // namespace gmx -//! Destroy a forcerec. -void done_forcerec(t_forcerec* fr, int numMolBlocks); - /*! \brief Print the contents of the forcerec to a file * * \param[in] fplog The log file to print to @@ -135,8 +132,4 @@ void init_forcerec(FILE* fplog, */ void forcerec_set_excl_load(t_forcerec* fr, const gmx_localtop_t* top); -void free_gpu_resources(t_forcerec* fr, - const gmx::PhysicalNodeCommunicator& physicalNodeCommunicator, - const gmx_gpu_info_t& gpu_info); - #endif diff --git a/src/gromacs/mdlib/wall.cpp b/src/gromacs/mdlib/wall.cpp index d03f55344c..aab096a6f4 100644 --- a/src/gromacs/mdlib/wall.cpp +++ b/src/gromacs/mdlib/wall.cpp @@ -176,7 +176,7 @@ real do_walls(const t_inputrec& ir, const int nwall = ir.nwall; const int ngid = ir.opts.ngener; const int ntype = fr.ntype; - const real* nbfp = fr.nbfp; + const real* nbfp = fr.nbfp.data(); const int* egp_flags = fr.egp_flags; for (int w = 0; w < nwall; w++) diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 59abe3fcaa..a54046f4a5 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1533,7 +1533,7 @@ int Mdrunner::mdrunner() /* This call is not included in init_domain_decomposition mainly * because fr->cginfo_mb is set later. */ - dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec, + dd_init_bondeds(fplog, cr->dd, mtop, vsite.get(), inputrec, domdecOptions.checkBondedInteractions, fr->cginfo_mb); } @@ -1627,18 +1627,45 @@ int Mdrunner::mdrunner() } // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x, - // before we destroy the GPU context(s) in free_gpu_resources(). + // before we destroy the GPU context(s) in free_gpu(). // Pinned buffers are associated with contexts in CUDA. // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go. mdAtoms.reset(nullptr); globalState.reset(nullptr); mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU + /* Free pinned buffers in *fr */ + delete fr; + fr = nullptr; + + if (hwinfo->gpu_info.n_dev > 0) + { + /* stop the GPU profiler (only CUDA) */ + stopGpuProfiler(); + } + + /* With tMPI we need to wait for all ranks to finish deallocation before + * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing + * GPU and context. + * + * This is not a concern in OpenCL where we use one context per rank which + * is freed in nbnxn_gpu_free(). + * + * Note: it is safe to not call the barrier on the ranks which do not use GPU, + * but it is easier and more futureproof to call it on the whole node. + * + * Note that this function needs to be called even if GPUs are not used + * in this run because the PME ranks have no knowledge of whether GPUs + * are used or not, but all ranks need to enter the barrier below. + * \todo Remove this physical node barrier after making sure + * that it's not needed anymore (with a shared GPU run). + */ + if (GMX_THREAD_MPI) + { + physicalNodeComm.barrier(); + } - /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */ - free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info); free_gpu(nonbondedDeviceInfo); free_gpu(pmeDeviceInfo); - done_forcerec(fr, mtop.molblock.size()); sfree(fcd); if (doMembed) diff --git a/src/gromacs/mdtypes/forcerec.h b/src/gromacs/mdtypes/forcerec.h index 2b0832a80a..05bcab152e 100644 --- a/src/gromacs/mdtypes/forcerec.h +++ b/src/gromacs/mdtypes/forcerec.h @@ -110,10 +110,10 @@ enum struct cginfo_mb_t { - int cg_start; - int cg_end; - int cg_mod; - int* cginfo; + int cg_start = 0; + int cg_end = 0; + int cg_mod = 0; + std::vector cginfo; }; @@ -192,7 +192,7 @@ struct t_forcerec real sc_sigma6_min = 0; /* Information about atom properties for the molecule blocks in the system */ - struct cginfo_mb_t* cginfo_mb = nullptr; + std::vector cginfo_mb; /* Information about atom properties for local and non-local atoms */ std::vector cginfo; @@ -234,10 +234,10 @@ struct t_forcerec std::vector shiftForces; /* Non bonded Parameter lists */ - int ntype = 0; /* Number of atom types */ - gmx_bool bBHAM = FALSE; - real* nbfp = nullptr; - real* ljpme_c6grid = nullptr; /* C6-values used on grid in LJPME */ + int ntype = 0; /* Number of atom types */ + gmx_bool bBHAM = FALSE; + std::vector nbfp; + real* ljpme_c6grid = nullptr; /* C6-values used on grid in LJPME */ /* Energy group pair flags */ int* egp_flags = nullptr; diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp index c4a15ba974..07684c9009 100644 --- a/src/gromacs/nbnxm/atomdata.cpp +++ b/src/gromacs/nbnxm/atomdata.cpp @@ -442,7 +442,7 @@ static void nbnxn_atomdata_params_init(const gmx::MDLogger& mdlog, const Nbnxm::KernelType kernelType, int enbnxninitcombrule, int ntype, - const real* nbfp, + ArrayRef nbfp, int n_energygroups) { real c6, c12, tol; @@ -625,7 +625,7 @@ void nbnxn_atomdata_init(const gmx::MDLogger& mdlog, const Nbnxm::KernelType kernelType, int enbnxninitcombrule, int ntype, - const real* nbfp, + ArrayRef nbfp, int n_energygroups, int nout) { diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h index 1b1e3b763a..6f4e11561c 100644 --- a/src/gromacs/nbnxm/atomdata.h +++ b/src/gromacs/nbnxm/atomdata.h @@ -288,14 +288,14 @@ enum * to the atom data structure. * enbnxninitcombrule sets what combination rule data gets stored in nbat. */ -void nbnxn_atomdata_init(const gmx::MDLogger& mdlog, - nbnxn_atomdata_t* nbat, - Nbnxm::KernelType kernelType, - int enbnxninitcombrule, - int ntype, - const real* nbfp, - int n_energygroups, - int nout); +void nbnxn_atomdata_init(const gmx::MDLogger& mdlog, + nbnxn_atomdata_t* nbat, + Nbnxm::KernelType kernelType, + int enbnxninitcombrule, + int ntype, + gmx::ArrayRef nbfp, + int n_energygroups, + int nout); void nbnxn_atomdata_set(nbnxn_atomdata_t* nbat, const Nbnxm::GridSet& gridSet, diff --git a/src/gromacs/nbnxm/benchmark/bench_setup.cpp b/src/gromacs/nbnxm/benchmark/bench_setup.cpp index ecfe6af14d..f3d6b27526 100644 --- a/src/gromacs/nbnxm/benchmark/bench_setup.cpp +++ b/src/gromacs/nbnxm/benchmark/bench_setup.cpp @@ -199,7 +199,7 @@ static std::unique_ptr setupNbnxmForBenchInstance(const Kern std::move(atomData), kernelSetup, nullptr, nullptr); nbnxn_atomdata_init(gmx::MDLogger(), nbv->nbat.get(), kernelSetup.kernelType, combinationRule, - system.numAtomTypes, system.nonbondedParameters.data(), 1, numThreads); + system.numAtomTypes, system.nonbondedParameters, 1, numThreads); t_nrnb nrnb; diff --git a/src/gromacs/nbnxm/benchmark/bench_system.cpp b/src/gromacs/nbnxm/benchmark/bench_system.cpp index ffe0a899ff..fba71d2b9a 100644 --- a/src/gromacs/nbnxm/benchmark/bench_system.cpp +++ b/src/gromacs/nbnxm/benchmark/bench_system.cpp @@ -200,7 +200,7 @@ BenchmarkSystem::BenchmarkSystem(const int multiplicationFactor) } forceRec.ntype = numAtomTypes; - forceRec.nbfp = nonbondedParameters.data(); + forceRec.nbfp = nonbondedParameters; snew(forceRec.shift_vec, SHIFTS); calc_shifts(box, forceRec.shift_vec); } -- 2.22.0