From 1f2be3d076837ec992907662dd714f6c011c388c Mon Sep 17 00:00:00 2001 From: Berk Hess Date: Fri, 2 Aug 2019 14:56:23 +0200 Subject: [PATCH] Use shiftForces in ForceOuputs The shift force buffer in t_forcerec is changed to std::vector and all access now happens through ForceWithShiftForces. Also removed the depreacted rvec getters from ForceOuputs. Change-Id: I1353e6497d651a8f24e819a8826af78656889953 --- src/gromacs/domdec/domdec.cpp | 61 +++++------- src/gromacs/domdec/domdec.h | 8 +- .../gmxlib/nonbonded/nb_free_energy.cpp | 30 +++--- src/gromacs/gmxlib/nonbonded/nb_free_energy.h | 10 +- src/gromacs/listed_forces/listed_forces.cpp | 38 ++++--- src/gromacs/listed_forces/listed_forces.h | 8 +- src/gromacs/mdlib/force.cpp | 16 +-- src/gromacs/mdlib/forcerec.cpp | 6 +- src/gromacs/mdlib/qmmm.cpp | 25 ++--- src/gromacs/mdlib/qmmm.h | 11 ++- src/gromacs/mdlib/sim_util.cpp | 98 +++++++++++-------- src/gromacs/mdtypes/forceoutput.h | 24 +++-- src/gromacs/mdtypes/forcerec.h | 4 +- src/gromacs/nbnxm/atomdata.cpp | 8 +- src/gromacs/nbnxm/atomdata.h | 4 +- src/gromacs/nbnxm/gpu_common.h | 33 ++++--- src/gromacs/nbnxm/kerneldispatch.cpp | 11 ++- src/gromacs/nbnxm/nbnxm.cpp | 8 +- src/gromacs/nbnxm/nbnxm.h | 13 +-- src/gromacs/nbnxm/nbnxm_gpu.h | 30 +++--- 20 files changed, 241 insertions(+), 205 deletions(-) diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp index f9d642e16c..3dc77780ef 100644 --- a/src/gromacs/domdec/domdec.cpp +++ b/src/gromacs/domdec/domdec.cpp @@ -70,6 +70,7 @@ #include "gromacs/mdlib/updategroups.h" #include "gromacs/mdlib/vsite.h" #include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/forceoutput.h" #include "gromacs/mdtypes/inputrec.h" #include "gromacs/mdtypes/mdrunoptions.h" #include "gromacs/mdtypes/state.h" @@ -406,52 +407,43 @@ void dd_move_x(gmx_domdec_t *dd, wallcycle_stop(wcycle, ewcMOVEX); } -void dd_move_f(gmx_domdec_t *dd, - gmx::ArrayRef f, - rvec *fshift, - gmx_wallcycle *wcycle) +void dd_move_f(gmx_domdec_t *dd, + gmx::ForceWithShiftForces *forceWithShiftForces, + gmx_wallcycle *wcycle) { wallcycle_start(wcycle, ewcMOVEF); - int nzone, nat_tot; - gmx_domdec_comm_t *comm; - gmx_domdec_comm_dim_t *cd; - ivec vis; - int is; - gmx_bool bShiftForcesNeedPbc, bScrew; + gmx::ArrayRef f = forceWithShiftForces->force(); + gmx::ArrayRef fshift = forceWithShiftForces->shiftForces(); - comm = dd->comm; - - nzone = comm->zones.n/2; - nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones); + gmx_domdec_comm_t &comm = *dd->comm; + int nzone = comm.zones.n/2; + int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones); for (int d = dd->ndim-1; d >= 0; d--) { /* Only forces in domains near the PBC boundaries need to consider PBC in the treatment of fshift */ - bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0); - bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX); - if (fshift == nullptr && !bScrew) - { - bShiftForcesNeedPbc = FALSE; - } + const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0); + const bool applyScrewPbc = (shiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX); /* Determine which shift vector we need */ - clear_ivec(vis); - vis[dd->dim[d]] = 1; - is = IVEC2IS(vis); + ivec vis = { 0, 0, 0 }; + vis[dd->dim[d]] = 1; + const int is = IVEC2IS(vis); - cd = &comm->cd[d]; - for (int p = cd->numPulses() - 1; p >= 0; p--) + /* Loop over the pulses */ + const gmx_domdec_comm_dim_t &cd = comm.cd[d]; + for (int p = cd.numPulses() - 1; p >= 0; p--) { - const gmx_domdec_ind_t &ind = cd->ind[p]; - DDBufferAccess receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]); + const gmx_domdec_ind_t &ind = cd.ind[p]; + DDBufferAccess receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]); gmx::ArrayRef &receiveBuffer = receiveBufferAccess.buffer; - nat_tot -= ind.nrecv[nzone+1]; + nat_tot -= ind.nrecv[nzone+1]; - DDBufferAccess sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]); + DDBufferAccess sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]); gmx::ArrayRef sendBuffer; - if (cd->receiveInPlace) + if (cd.receiveInPlace) { sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]); } @@ -472,7 +464,7 @@ void dd_move_f(gmx_domdec_t *dd, sendBuffer, receiveBuffer); /* Add the received forces */ int n = 0; - if (!bShiftForcesNeedPbc) + if (!shiftForcesNeedPbc) { for (int j : ind.index) { @@ -483,11 +475,8 @@ void dd_move_f(gmx_domdec_t *dd, n++; } } - else if (!bScrew) + else if (!applyScrewPbc) { - /* fshift should always be defined if this function is - * called when bShiftForcesNeedPbc is true */ - assert(nullptr != fshift); for (int j : ind.index) { for (int d = 0; d < DIM; d++) @@ -510,7 +499,7 @@ void dd_move_f(gmx_domdec_t *dd, f[j][XX] += receiveBuffer[n][XX]; f[j][YY] -= receiveBuffer[n][YY]; f[j][ZZ] -= receiveBuffer[n][ZZ]; - if (fshift) + if (shiftForcesNeedPbc) { /* Add this force to the shift force */ for (int d = 0; d < DIM; d++) diff --git a/src/gromacs/domdec/domdec.h b/src/gromacs/domdec/domdec.h index d015ab31c2..08a27291fc 100644 --- a/src/gromacs/domdec/domdec.h +++ b/src/gromacs/domdec/domdec.h @@ -85,6 +85,7 @@ class t_state; namespace gmx { +class ForceWithShiftForces; class MDLogger; class LocalAtomSetManager; class RangePartitioning; @@ -217,10 +218,9 @@ void dd_move_x(struct gmx_domdec_t *dd, * When fshift!=NULL the shift forces are updated to obtain * the correct virial from the single sum including f. */ -void dd_move_f(struct gmx_domdec_t *dd, - gmx::ArrayRef f, - rvec *fshift, - gmx_wallcycle *wcycle); +void dd_move_f(struct gmx_domdec_t *dd, + gmx::ForceWithShiftForces *forceWithShiftForces, + gmx_wallcycle *wcycle); /*! \brief Communicate a real for each atom to the neighboring cells. */ void dd_atom_spread_real(struct gmx_domdec_t *dd, real v[]); diff --git a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp index dc6464a4fa..a9c5b2433d 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp +++ b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp @@ -47,6 +47,7 @@ #include "gromacs/gmxlib/nonbonded/nonbonded.h" #include "gromacs/math/functions.h" #include "gromacs/math/vec.h" +#include "gromacs/mdtypes/forceoutput.h" #include "gromacs/mdtypes/forcerec.h" #include "gromacs/mdtypes/md_enums.h" #include "gromacs/utility/fatalerror.h" @@ -114,8 +115,8 @@ template static void nb_free_energy_kernel(const t_nblist * gmx_restrict nlist, rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - t_forcerec * gmx_restrict fr, + gmx::ForceWithShiftForces * forceWithShiftForces, + const t_forcerec * gmx_restrict fr, const t_mdatoms * gmx_restrict mdatoms, nb_kernel_data_t * gmx_restrict kernel_data, t_nrnb * gmx_restrict nrnb) @@ -158,9 +159,6 @@ nb_free_energy_kernel(const t_nblist * gmx_restrict nlist, const int * typeB; int ntype; const real * shiftvec; - real * fshift; - const real * x; - real * f; const real * chargeA; const real * chargeB; real sigma6_min, sigma6_def, lam_power; @@ -192,11 +190,13 @@ nb_free_energy_kernel(const t_nblist * gmx_restrict nlist, /* Extract pointer to non-bonded interaction constants */ const interaction_const_t *ic = fr->ic; - x = xx[0]; - f = ff[0]; + // TODO: We should get rid of using pointers to real + const real *x = xx[0]; + real * gmx_restrict f = &(forceWithShiftForces->force()[0][0]); - fshift = fr->fshift[0]; + real * gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]); + // Extract pair list data nri = nlist->nri; iinr = nlist->iinr; jindex = nlist->jindex; @@ -814,13 +814,13 @@ nb_free_energy_kernel(const t_nblist * gmx_restrict nlist, inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150); } -void gmx_nb_free_energy_kernel(const t_nblist *nlist, - rvec *xx, - rvec *ff, - t_forcerec *fr, - const t_mdatoms *mdatoms, - nb_kernel_data_t *kernel_data, - t_nrnb *nrnb) +void gmx_nb_free_energy_kernel(const t_nblist *nlist, + rvec *xx, + gmx::ForceWithShiftForces *ff, + const t_forcerec *fr, + const t_mdatoms *mdatoms, + nb_kernel_data_t *kernel_data, + t_nrnb *nrnb) { if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0) { diff --git a/src/gromacs/gmxlib/nonbonded/nb_free_energy.h b/src/gromacs/gmxlib/nonbonded/nb_free_energy.h index 7e12ebdaea..cf4394fee4 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_free_energy.h +++ b/src/gromacs/gmxlib/nonbonded/nb_free_energy.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2008, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2018, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2018,2019, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -45,12 +45,16 @@ #include "gromacs/mdtypes/nblist.h" struct t_forcerec; +namespace gmx +{ +class ForceWithShiftForces; +} void gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist, rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - t_forcerec * gmx_restrict fr, + gmx::ForceWithShiftForces * forceWithShiftForces, + const t_forcerec * gmx_restrict fr, const t_mdatoms * gmx_restrict mdatoms, nb_kernel_data_t * gmx_restrict kernel_data, t_nrnb * gmx_restrict nrnb); diff --git a/src/gromacs/listed_forces/listed_forces.cpp b/src/gromacs/listed_forces/listed_forces.cpp index 2766c89edb..c6246a5a93 100644 --- a/src/gromacs/listed_forces/listed_forces.cpp +++ b/src/gromacs/listed_forces/listed_forces.cpp @@ -136,9 +136,10 @@ zero_thread_output(f_thread_t *f_t) /*! \brief Reduce thread-local force buffers */ void -reduce_thread_forces(int n, rvec *f, - const bonded_threading_t *bt, - int nthreads) +reduce_thread_forces(int n, + gmx::ArrayRef force, + const bonded_threading_t *bt, + int nthreads) { if (nthreads > MAX_BONDED_THREADS) { @@ -146,6 +147,8 @@ reduce_thread_forces(int n, rvec *f, MAX_BONDED_THREADS); } + rvec * gmx_restrict f = as_rvec_array(force.data()); + /* This reduction can run on any number of threads, * independently of bt->nthreads. * But if nthreads matches bt->nthreads (which it currently does) @@ -193,7 +196,7 @@ reduce_thread_forces(int n, rvec *f, /*! \brief Reduce thread-local forces, shift forces and energies */ void -reduce_thread_output(int n, rvec *f, rvec *fshift, +reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces, real *ener, gmx_grppairener_t *grpp, real *dvdl, const bonded_threading_t *bt, gmx_bool bCalcEnerVir, @@ -204,9 +207,11 @@ reduce_thread_output(int n, rvec *f, rvec *fshift, if (bt->nblock_used > 0) { /* Reduce the bonded force buffer */ - reduce_thread_forces(n, f, bt, bt->nthreads); + reduce_thread_forces(n, forceWithShiftForces->force(), bt, bt->nthreads); } + rvec * gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data()); + /* When necessary, reduce energy and virial using one thread only */ if (bCalcEnerVir && bt->nthreads > 1) { @@ -361,6 +366,7 @@ calcBondedForces(const t_idef *idef, const t_forcerec *fr, const t_pbc *pbc_null, const t_graph *g, + rvec *fshiftMasterBuffer, gmx_enerdata_t *enerd, t_nrnb *nrnb, const real *lambda, @@ -394,7 +400,7 @@ calcBondedForces(const t_idef *idef, */ if (thread == 0) { - fshift = fr->fshift; + fshift = fshiftMasterBuffer; epot = enerd->term; grpp = &enerd->grpp; dvdlt = dvdl; @@ -452,8 +458,7 @@ void calc_listed(const t_commrec *cr, struct gmx_wallcycle *wcycle, const t_idef *idef, const rvec x[], history_t *hist, - rvec f[], - gmx::ForceWithVirial *forceWithVirial, + gmx::ForceOutputs *forceOutputs, const t_forcerec *fr, const struct t_pbc *pbc, const struct t_pbc *pbc_full, @@ -491,13 +496,13 @@ void calc_listed(const t_commrec *cr, if (idef->il[F_POSRES].nr > 0) { posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr, - forceWithVirial); + &forceOutputs->forceWithVirial()); } if (idef->il[F_FBPOSRES].nr > 0) { fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr, - forceWithVirial); + &forceOutputs->forceWithVirial()); } /* Do pre force calculation stuff which might require communication */ @@ -530,16 +535,20 @@ void calc_listed(const t_commrec *cr, if (haveCpuBondeds(*fr)) { + gmx::ForceWithShiftForces &forceWithShiftForces = forceOutputs->forceWithShiftForces(); + wallcycle_sub_start(wcycle, ewcsLISTED); /* The dummy array is to have a place to store the dhdl at other values of lambda, which will be thrown away in the end */ real dvdl[efptNR] = {0}; - calcBondedForces(idef, x, fr, pbc_null, g, enerd, nrnb, lambda, dvdl, md, + calcBondedForces(idef, x, fr, pbc_null, g, + as_rvec_array(forceWithShiftForces.shiftForces().data()), + enerd, nrnb, lambda, dvdl, md, fcd, bCalcEnerVir, global_atom_index); wallcycle_sub_stop(wcycle, ewcsLISTED); wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS); - reduce_thread_output(fr->natoms_force, f, fr->fshift, + reduce_thread_output(fr->natoms_force, &forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, bCalcEnerVir, @@ -637,8 +646,7 @@ do_force_listed(struct gmx_wallcycle *wcycle, const t_idef *idef, const rvec x[], history_t *hist, - rvec *forceForUseWithShiftForces, - gmx::ForceWithVirial *forceWithVirial, + gmx::ForceOutputs *forceOutputs, const t_forcerec *fr, const struct t_pbc *pbc, const struct t_graph *graph, @@ -664,7 +672,7 @@ do_force_listed(struct gmx_wallcycle *wcycle, set_pbc(&pbc_full, fr->ePBC, box); } calc_listed(cr, ms, wcycle, idef, x, hist, - forceForUseWithShiftForces, forceWithVirial, + forceOutputs, fr, pbc, &pbc_full, graph, enerd, nrnb, lambda, md, fcd, global_atom_index, flags); diff --git a/src/gromacs/listed_forces/listed_forces.h b/src/gromacs/listed_forces/listed_forces.h index f5bb76fd95..52a2699f3f 100644 --- a/src/gromacs/listed_forces/listed_forces.h +++ b/src/gromacs/listed_forces/listed_forces.h @@ -86,7 +86,7 @@ class t_state; namespace gmx { -class ForceWithVirial; +class ForceOutputs; } //! Type of CPU function to compute a bonded interaction. @@ -110,8 +110,7 @@ void calc_listed(const t_commrec *cr, struct gmx_wallcycle *wcycle, const t_idef *idef, const rvec x[], history_t *hist, - rvec f[], - gmx::ForceWithVirial *forceWithVirial, + gmx::ForceOutputs *forceOutputs, const t_forcerec *fr, const struct t_pbc *pbc, const struct t_pbc *pbc_full, const struct t_graph *g, @@ -144,8 +143,7 @@ do_force_listed(struct gmx_wallcycle *wcycle, const t_idef *idef, const rvec x[], history_t *hist, - rvec *forceForUseWithShiftForces, - gmx::ForceWithVirial *forceWithVirial, + gmx::ForceOutputs *forceOutputs, const t_forcerec *fr, const struct t_pbc *pbc, const struct t_graph *graph, diff --git a/src/gromacs/mdlib/force.cpp b/src/gromacs/mdlib/force.cpp index 0796b08076..05f5aed659 100644 --- a/src/gromacs/mdlib/force.cpp +++ b/src/gromacs/mdlib/force.cpp @@ -120,16 +120,14 @@ do_force_lowlevel(t_forcerec *fr, const DDBalanceRegionHandler &ddBalanceRegionHandler) { // TODO: Replace all uses of x by const coordinates - rvec *x = as_rvec_array(coordinates.paddedArrayRef().data()); + rvec *x = as_rvec_array(coordinates.paddedArrayRef().data()); - // TODO: Add the shift forces to forceOutputs - rvec *forceForUseWithShiftForces = forceOutputs->f(); - auto &forceWithVirial = forceOutputs->forceWithVirial(); + auto &forceWithVirial = forceOutputs->forceWithVirial(); /* do QMMM first if requested */ if (fr->bQMMM) { - enerd->term[F_EQM] = calculate_QMMM(cr, forceForUseWithShiftForces, fr); + enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr); } /* Call the short range functions all in one go. */ @@ -186,7 +184,7 @@ do_force_lowlevel(t_forcerec *fr, do_force_listed(wcycle, box, ir->fepvals, cr, ms, idef, x, hist, - forceForUseWithShiftForces, &forceWithVirial, + forceOutputs, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, flags); @@ -378,7 +376,8 @@ do_force_lowlevel(t_forcerec *fr, fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q, ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]); pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM); - pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); + rvec *fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data()); + pr_rvecs(debug, 0, "fshift after LR Corrections", fshift, SHIFTS); fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj, ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]); pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM); @@ -392,7 +391,8 @@ do_force_lowlevel(t_forcerec *fr, if (debug) { - pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); + rvec *fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data()); + pr_rvecs(debug, 0, "fshift after bondeds", fshift, SHIFTS); } } diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index 19cd25e8cf..efc756dc87 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -1804,10 +1804,7 @@ void init_forcerec(FILE *fp, snew(fr->shift_vec, SHIFTS); } - if (fr->fshift == nullptr) - { - snew(fr->fshift, SHIFTS); - } + fr->shiftForces.resize(SHIFTS); if (fr->nbfp == nullptr) { @@ -2066,7 +2063,6 @@ void done_forcerec(t_forcerec *fr, int numMolBlocks) sfree(fr->nbfp); delete fr->ic; sfree(fr->shift_vec); - sfree(fr->fshift); sfree(fr->ewc_t); tear_down_bonded_threading(fr->bondedThreading); GMX_RELEASE_ASSERT(fr->gpuBonded == nullptr, "Should have been deleted earlier, when used"); diff --git a/src/gromacs/mdlib/qmmm.cpp b/src/gromacs/mdlib/qmmm.cpp index 200dacac7d..f2552fc671 100644 --- a/src/gromacs/mdlib/qmmm.cpp +++ b/src/gromacs/mdlib/qmmm.cpp @@ -59,6 +59,7 @@ #include "gromacs/mdlib/qm_mopac.h" #include "gromacs/mdlib/qm_orca.h" #include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/forceoutput.h" #include "gromacs/mdtypes/forcerec.h" #include "gromacs/mdtypes/inputrec.h" #include "gromacs/mdtypes/md_enums.h" @@ -848,9 +849,9 @@ void update_QMMMrec(const t_commrec *cr, } } /* update_QMMM_rec */ -real calculate_QMMM(const t_commrec *cr, - rvec f[], - const t_forcerec *fr) +real calculate_QMMM(const t_commrec *cr, + gmx::ForceWithShiftForces *forceWithShiftForces, + const t_forcerec *fr) { real QMener = 0.0; @@ -883,6 +884,8 @@ real calculate_QMMM(const t_commrec *cr, /* now different procedures are carried out for one layer ONION and * normal QMMM on one hand and multilayer oniom on the other */ + gmx::ArrayRef fMM = forceWithShiftForces->force(); + gmx::ArrayRef fshiftMM = forceWithShiftForces->shiftForces(); if (qr->QMMMscheme == eQMMMschemenormal || qr->nrQMlayers == 1) { qm = qr->qm[0]; @@ -893,16 +896,16 @@ real calculate_QMMM(const t_commrec *cr, { for (j = 0; j < DIM; j++) { - f[qm->indexQM[i]][j] -= forces[i][j]; - fr->fshift[qm->shiftQM[i]][j] += fshift[i][j]; + fMM[qm->indexQM[i]][j] -= forces[i][j]; + fshiftMM[qm->shiftQM[i]][j] += fshift[i][j]; } } for (i = 0; i < mm->nrMMatoms; i++) { for (j = 0; j < DIM; j++) { - f[mm->indexMM[i]][j] -= forces[qm->nrQMatoms+i][j]; - fr->fshift[mm->shiftMM[i]][j] += fshift[qm->nrQMatoms+i][j]; + fMM[mm->indexMM[i]][j] -= forces[qm->nrQMatoms+i][j]; + fshiftMM[mm->shiftMM[i]][j] += fshift[qm->nrQMatoms+i][j]; } } @@ -950,8 +953,8 @@ real calculate_QMMM(const t_commrec *cr, { for (j = 0; j < DIM; j++) { - f[qm->indexQM[i]][j] -= (forces[i][j]-forces2[i][j]); - fr->fshift[qm->shiftQM[i]][j] += (fshift[i][j]-fshift2[i][j]); + fMM[qm->indexQM[i]][j] -= (forces[i][j]-forces2[i][j]); + fshiftMM[qm->shiftQM[i]][j] += (fshift[i][j]-fshift2[i][j]); } } free(qm2); @@ -966,8 +969,8 @@ real calculate_QMMM(const t_commrec *cr, { for (j = 0; j < DIM; j++) { - f[qm->indexQM[i]][j] -= forces[i][j]; - fr->fshift[qm->shiftQM[i]][j] += fshift[i][j]; + fMM[qm->indexQM[i]][j] -= forces[i][j]; + fshiftMM[qm->shiftQM[i]][j] += fshift[i][j]; } } free(forces); diff --git a/src/gromacs/mdlib/qmmm.h b/src/gromacs/mdlib/qmmm.h index 78fb3b66b1..ffa98d352b 100644 --- a/src/gromacs/mdlib/qmmm.h +++ b/src/gromacs/mdlib/qmmm.h @@ -55,6 +55,11 @@ struct t_inputrec; struct t_mdatoms; struct t_QMMMrec; +namespace gmx +{ +class ForceWithShiftForces; +} + typedef struct { int nrQMatoms; /* total nr of QM atoms */ rvec *xQM; /* shifted to center of box */ @@ -138,9 +143,9 @@ void update_QMMMrec(const t_commrec *cr, * routine should be called at every step, since it updates the MM * elements of the t_QMMMrec struct. */ -real calculate_QMMM(const t_commrec *cr, - rvec f[], - const t_forcerec *fr); +real calculate_QMMM(const t_commrec *cr, + gmx::ForceWithShiftForces *forceWithShiftForces, + const t_forcerec *fr); /* QMMM computes the QM forces. This routine makes either function * calls to gmx QM routines (derived from MOPAC7 (semi-emp.) and MPQC diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index fece45af89..63eb82767f 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -139,17 +139,20 @@ static void sum_forces(rvec f[], gmx::ArrayRef forceToAdd) } } -static void calc_virial(int start, int homenr, const rvec x[], const rvec f[], +static void calc_virial(int start, int homenr, const rvec x[], + const gmx::ForceWithShiftForces &forceWithShiftForces, tensor vir_part, const t_graph *graph, const matrix box, t_nrnb *nrnb, const t_forcerec *fr, int ePBC) { /* The short-range virial from surrounding boxes */ - calc_vir(SHIFTS, fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box); + const rvec *fshift = as_rvec_array(forceWithShiftForces.shiftForces().data()); + calc_vir(SHIFTS, fr->shift_vec, fshift, vir_part, ePBC == epbcSCREW, box); inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS); /* Calculate partial virial, for local atoms only, based on short range. * Total virial is computed in global_stat, called from do_md */ + const rvec *f = as_rvec_array(forceWithShiftForces.force().data()); f_calc_vir(start, start+homenr, x, f, vir_part, graph, box); inc_nrnb(nrnb, eNR_VIRIAL, homenr); @@ -267,7 +270,7 @@ static void post_process_forces(const t_commrec *cr, const gmx_vsite_t *vsite, int flags) { - rvec *f = forceOutputs->f(); + rvec *f = as_rvec_array(forceOutputs->forceWithShiftForces().force().data()); if (fr->haveDirectVirialContributions) { @@ -644,9 +647,7 @@ static void launchPmeGpuFftAndGather(gmx_pme_t *pmedata, * * \param[in] nbv Nonbonded verlet structure * \param[in,out] pmedata PME module data - * \param[in,out] force Force array to reduce task outputs into. - * \param[in,out] forceWithVirial Force and virial buffers - * \param[in,out] fshift Shift force output vector results are reduced into + * \param[in,out] forceOutputs Output buffer for the forces and virial * \param[in,out] enerd Energy data structure results are reduced into * \param[in] flags Force flags * \param[in] pmeFlags PME flags @@ -654,9 +655,7 @@ static void launchPmeGpuFftAndGather(gmx_pme_t *pmedata, */ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv, gmx_pme_t *pmedata, - gmx::ArrayRefWithPadding *force, - gmx::ForceWithVirial *forceWithVirial, - rvec fshift[], + gmx::ForceOutputs *forceOutputs, gmx_enerdata_t *enerd, int flags, int pmeFlags, @@ -666,14 +665,18 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv bool isNbGpuDone = false; - gmx::ArrayRef pmeGpuForces; + + gmx::ForceWithShiftForces &forceWithShiftForces = forceOutputs->forceWithShiftForces(); + gmx::ForceWithVirial &forceWithVirial = forceOutputs->forceWithVirial(); + + gmx::ArrayRef pmeGpuForces; while (!isPmeGpuDone || !isNbGpuDone) { if (!isPmeGpuDone) { GpuTaskCompletion completionType = (isNbGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check; - isPmeGpuDone = pme_gpu_try_finish_task(pmedata, pmeFlags, wcycle, forceWithVirial, enerd, completionType); + isPmeGpuDone = pme_gpu_try_finish_task(pmedata, pmeFlags, wcycle, &forceWithVirial, enerd, completionType); } if (!isNbGpuDone) @@ -685,7 +688,7 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv Nbnxm::AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), enerd->grpp.ener[egCOULSR].data(), - fshift, completionType); + forceWithShiftForces.shiftForces(), completionType); wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L); // To get the call count right, when the task finished we // issue a start/stop. @@ -697,7 +700,7 @@ static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L); nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local, - as_rvec_array(force->unpaddedArrayRef().data())); + forceWithShiftForces.force()); } } } @@ -726,13 +729,14 @@ setupForceOutputs(t_forcerec *fr, { wallcycle_sub_start(wcycle, ewcsCLEAR_FORCE_BUFFER); - // TODO: Add the shift force buffer and use it - gmx::ForceWithShiftForces forceWithShiftForces(force, doVirial, gmx::ArrayRef()); + /* NOTE: We assume fr->shiftForces is all zeros here */ + gmx::ForceWithShiftForces forceWithShiftForces(force, doVirial, fr->shiftForces); if (bDoForces) { /* Clear the short- and long-range forces */ - clear_rvecs_omp(fr->natoms_force_constr, forceWithShiftForces.f()); + clear_rvecs_omp(fr->natoms_force_constr, + as_rvec_array(forceWithShiftForces.force().data())); } /* If we need to compute the virial, we might need a separate @@ -1263,7 +1267,12 @@ void do_force(FILE *fplog, /* Reset energies */ reset_enerdata(enerd); - clear_rvecs(SHIFTS, fr->fshift); + /* Clear the shift forces */ + // TODO: This should be linked to the shift force buffer in use, or cleared before use instead + for (gmx::RVec &elem : fr->shiftForces) + { + elem = { 0.0_real, 0.0_real, 0.0_real }; + } if (DOMAINDECOMP(cr) && !thisRankHasDuty(cr, DUTY_PME)) { @@ -1283,8 +1292,9 @@ void do_force(FILE *fplog, */ wallcycle_start(wcycle, ewcFORCE); - // set up and clear force outputs - ForceOutputs forceOut = setupForceOutputs(fr, pull_work, *inputrec, force, bDoForces, + // Set up and clear force outputs. + // We use std::move to keep the compiler happy, it has no effect. + ForceOutputs forceOut = setupForceOutputs(fr, pull_work, *inputrec, std::move(force), bDoForces, ((flags & GMX_FORCE_VIRIAL) != 0), wcycle); /* We calculate the non-bonded forces, when done on the CPU, here. @@ -1307,14 +1317,14 @@ void do_force(FILE *fplog, * Happens here on the CPU both with and without GPU. */ nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local, - fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f(), *mdatoms, + fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms, inputrec->fepvals, lambda.data(), enerd, flags, nrnb); if (havePPDomainDecomposition(cr)) { nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal, - fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f(), *mdatoms, + fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms, inputrec->fepvals, lambda.data(), enerd, flags, nrnb); } @@ -1333,8 +1343,7 @@ void do_force(FILE *fplog, * communication with calculation with domain decomposition. */ wallcycle_stop(wcycle, ewcFORCE); - nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.f()); - + nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force()); wallcycle_start_nocount(wcycle, ewcFORCE); /* If there are multiple fshift output buffers we need to reduce them */ @@ -1342,8 +1351,8 @@ void do_force(FILE *fplog, { /* This is not in a subcounter because it takes a negligible and constant-sized amount of time */ - nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->nbat.get(), - fr->fshift); + nbnxn_atomdata_add_nbat_fshift_to_fshift(*nbv->nbat, + forceOut.forceWithShiftForces().shiftForces()); } } @@ -1377,6 +1386,9 @@ void do_force(FILE *fplog, float cycles_wait_gpu = 0; if (bUseOrEmulGPU) { + auto &forceWithShiftForces = forceOut.forceWithShiftForces(); + rvec *f = as_rvec_array(forceWithShiftForces.force().data()); + /* wait for non-local forces (or calculate in emulation mode) */ if (havePPDomainDecomposition(cr)) { @@ -1387,7 +1399,7 @@ void do_force(FILE *fplog, flags, Nbnxm::AtomLocality::NonLocal, enerd->grpp.ener[egLJSR].data(), enerd->grpp.ener[egCOULSR].data(), - fr->fshift); + forceWithShiftForces.shiftForces()); cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL); } else @@ -1400,26 +1412,25 @@ void do_force(FILE *fplog, if (useGpuFBufOps == BufferOpsUseGpu::True && haveCpuForces) { - nbv->launch_copy_f_to_gpu(forceOut.f(), Nbnxm::AtomLocality::NonLocal); + nbv->launch_copy_f_to_gpu(f, Nbnxm::AtomLocality::NonLocal); } // flag to specify if forces should be accumulated in force buffer // ops. For non-local part, this just depends on whether CPU forces are present. bool accumulateForce = (useGpuFBufOps == BufferOpsUseGpu::True) && haveCpuForces; nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal, - forceOut.f(), pme_gpu_get_device_f(fr->pmedata), + forceWithShiftForces.force(), pme_gpu_get_device_f(fr->pmedata), pme_gpu_get_f_ready_synchronizer(fr->pmedata), useGpuFBufOps, useGpuFPmeReduction, accumulateForce); - if (useGpuFBufOps == BufferOpsUseGpu::True) { - nbv->launch_copy_f_from_gpu(forceOut.f(), Nbnxm::AtomLocality::NonLocal); + nbv->launch_copy_f_from_gpu(f, Nbnxm::AtomLocality::NonLocal); } if (fr->nbv->emulateGpu() && (flags & GMX_FORCE_VIRIAL)) { - nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->nbat.get(), - fr->fshift); + nbnxn_atomdata_add_nbat_fshift_to_fshift(*nbv->nbat, + forceWithShiftForces.shiftForces()); } } } @@ -1439,7 +1450,7 @@ void do_force(FILE *fplog, { nbv->wait_for_gpu_force_reduction(Nbnxm::AtomLocality::NonLocal); } - dd_move_f(cr->dd, force.unpaddedArrayRef(), fr->fshift, wcycle); + dd_move_f(cr->dd, &forceOut.forceWithShiftForces(), wcycle); } } @@ -1449,7 +1460,7 @@ void do_force(FILE *fplog, (useGpuFBufOps == BufferOpsUseGpu::False)); if (alternateGpuWait) { - alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &force, &forceOut.forceWithVirial(), fr->fshift, enerd, + alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &forceOut, enerd, flags, pmeFlags, wcycle); } @@ -1473,7 +1484,7 @@ void do_force(FILE *fplog, flags, Nbnxm::AtomLocality::Local, enerd->grpp.ener[egLJSR].data(), enerd->grpp.ener[egCOULSR].data(), - fr->fshift); + forceOut.forceWithShiftForces().shiftForces()); float cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L); if (ddBalanceRegionHandler.useBalancingRegion()) @@ -1508,6 +1519,8 @@ void do_force(FILE *fplog, * on the non-alternating path. */ if (bUseOrEmulGPU && !alternateGpuWait) { + gmx::ArrayRef force = forceOut.forceWithShiftForces().force(); + rvec *f = as_rvec_array(force.data()); // TODO: move these steps as early as possible: // - CPU f H2D should be as soon as all CPU-side forces are done @@ -1516,7 +1529,7 @@ void do_force(FILE *fplog, // if (useGpuFBufOps == BufferOpsUseGpu::True && (haveCpuForces || DOMAINDECOMP(cr))) { - nbv->launch_copy_f_to_gpu(forceOut.f(), Nbnxm::AtomLocality::Local); + nbv->launch_copy_f_to_gpu(f, Nbnxm::AtomLocality::Local); } // flag to specify if forces should be accumulated in force // buffer ops. For local part, this depends on whether CPU @@ -1526,13 +1539,12 @@ void do_force(FILE *fplog, bool accumulateForce = (useGpuFBufOps == BufferOpsUseGpu::True) && (haveCpuForces || DOMAINDECOMP(cr)); nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local, - forceOut.f(), pme_gpu_get_device_f(fr->pmedata), + force, pme_gpu_get_device_f(fr->pmedata), pme_gpu_get_f_ready_synchronizer(fr->pmedata), useGpuFBufOps, useGpuFPmeReduction, accumulateForce); - if (useGpuFBufOps == BufferOpsUseGpu::True) { - nbv->launch_copy_f_from_gpu(forceOut.f(), Nbnxm::AtomLocality::Local); + nbv->launch_copy_f_from_gpu(f, Nbnxm::AtomLocality::Local); nbv->wait_for_gpu_force_reduction(Nbnxm::AtomLocality::Local); } } @@ -1551,19 +1563,23 @@ void do_force(FILE *fplog, if (bDoForces) { + rvec *f = as_rvec_array(forceOut.forceWithShiftForces().force().data()); + /* If we have NoVirSum forces, but we do not calculate the virial, * we sum fr->f_novirsum=forceOut.f later. */ if (vsite && !(fr->haveDirectVirialContributions && !(flags & GMX_FORCE_VIRIAL))) { - spread_vsite_f(vsite, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f(), fr->fshift, FALSE, nullptr, nrnb, + rvec *fshift = as_rvec_array(forceOut.forceWithShiftForces().shiftForces().data()); + spread_vsite_f(vsite, as_rvec_array(x.unpaddedArrayRef().data()), f, fshift, FALSE, nullptr, nrnb, &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle); } if (flags & GMX_FORCE_VIRIAL) { /* Calculation of the virial must be done after vsites! */ - calc_virial(0, mdatoms->homenr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f(), + calc_virial(0, mdatoms->homenr, as_rvec_array(x.unpaddedArrayRef().data()), + forceOut.forceWithShiftForces(), vir_force, graph, box, nrnb, fr, inputrec->ePBC); } } diff --git a/src/gromacs/mdtypes/forceoutput.h b/src/gromacs/mdtypes/forceoutput.h index d43cb69fa6..c81319bfb8 100644 --- a/src/gromacs/mdtypes/forceoutput.h +++ b/src/gromacs/mdtypes/forceoutput.h @@ -83,10 +83,16 @@ class ForceWithShiftForces computeVirial_(computeVirial), shiftForces_(shiftForces) {} - //! Returns a, deprecated, rvec pointer to the force buffer - rvec *f() + //! Returns an arrayref to the force buffer without padding + gmx::ArrayRef force() { - return as_rvec_array(force_.paddedArrayRef().data()); + return force_.unpaddedArrayRef(); + } + + //! Returns a const arrayref to the force buffer without padding + gmx::ArrayRef force() const + { + return force_.unpaddedConstArrayRef(); } //! Returns whether the virial needs to be computed @@ -101,6 +107,12 @@ class ForceWithShiftForces return shiftForces_; } + //! Returns a const shift forces buffer + gmx::ArrayRef shiftForces() const + { + return shiftForces_; + } + private: //! The force buffer gmx::ArrayRefWithPadding force_; @@ -203,10 +215,10 @@ class ForceOutputs forceWithShiftForces_(forceWithShiftForces), forceWithVirial_(forceWithVirial) {} - //! Returns a, deprecated, rvec pointer to the force buffer for use with shift forces - rvec *f() + //! Returns a reference to the force with shift forces object + ForceWithShiftForces &forceWithShiftForces() { - return forceWithShiftForces_.f(); + return forceWithShiftForces_; } //! Returns a reference to the force with virial object diff --git a/src/gromacs/mdtypes/forcerec.h b/src/gromacs/mdtypes/forcerec.h index ef706b94b1..9849889571 100644 --- a/src/gromacs/mdtypes/forcerec.h +++ b/src/gromacs/mdtypes/forcerec.h @@ -231,8 +231,8 @@ struct t_forcerec { // NOLINT (clang-analyzer-optin.performance.Padding) /* PME/Ewald stuff */ struct gmx_ewald_tab_t *ewald_table = nullptr; - /* Shift force array for computing the virial */ - rvec *fshift = nullptr; + /* Shift force array for computing the virial, size SHIFTS */ + std::vector shiftForces; /* Non bonded Parameter lists */ int ntype = 0; /* Number of atom types */ diff --git a/src/gromacs/nbnxm/atomdata.cpp b/src/gromacs/nbnxm/atomdata.cpp index dd0ccb2ce7..41caa8d4b2 100644 --- a/src/gromacs/nbnxm/atomdata.cpp +++ b/src/gromacs/nbnxm/atomdata.cpp @@ -1532,10 +1532,10 @@ void reduceForces(nbnxn_atomdata_t *nbat, bool useGpuFPmeReduction, bool accumulateForce); -void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat, - rvec *fshift) +void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t &nbat, + gmx::ArrayRef fshift) { - gmx::ArrayRef outputBuffers = nbat->out; + gmx::ArrayRef outputBuffers = nbat.out; for (int s = 0; s < SHIFTS; s++) { @@ -1547,7 +1547,7 @@ void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat, sum[YY] += out.fshift[s*DIM+YY]; sum[ZZ] += out.fshift[s*DIM+ZZ]; } - rvec_inc(fshift[s], sum); + fshift[s] += sum; } } diff --git a/src/gromacs/nbnxm/atomdata.h b/src/gromacs/nbnxm/atomdata.h index e34c8bd558..6bbe082c0b 100644 --- a/src/gromacs/nbnxm/atomdata.h +++ b/src/gromacs/nbnxm/atomdata.h @@ -374,8 +374,8 @@ void reduceForces(nbnxn_atomdata_t *nbat, bool accumulateForce); /* Add the fshift force stored in nbat to fshift */ -void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat, - rvec *fshift); +void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t &nbat, + gmx::ArrayRef fshift); /* Get the atom start index and number of atoms for a given locality */ void nbnxn_get_atom_range(Nbnxm::AtomLocality atomLocality, diff --git a/src/gromacs/nbnxm/gpu_common.h b/src/gromacs/nbnxm/gpu_common.h index 4f4edddbeb..fc5977c7c3 100644 --- a/src/gromacs/nbnxm/gpu_common.h +++ b/src/gromacs/nbnxm/gpu_common.h @@ -365,13 +365,13 @@ gpu_accumulate_timings(gmx_wallclock_gpu_nbnxn_t *timings, //TODO: move into shared source file with gmx_compile_cpp_as_cuda //NOLINTNEXTLINE(misc-definitions-in-headers) -bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb, - const int flags, - const AtomLocality aloc, - real *e_lj, - real *e_el, - rvec *fshift, - GpuTaskCompletion completionKind) +bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb, + const int flags, + const AtomLocality aloc, + real *e_lj, + real *e_el, + gmx::ArrayRef shiftForces, + GpuTaskCompletion completionKind) { GMX_ASSERT(nb, "Need a valid nbnxn_gpu object"); @@ -404,7 +404,8 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb, gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, calcEner, nb->bDoTime != 0); - gpu_reduce_staged_outputs(nb->nbst, iLocality, calcEner, calcFshift, e_lj, e_el, fshift); + gpu_reduce_staged_outputs(nb->nbst, iLocality, calcEner, calcFshift, + e_lj, e_el, as_rvec_array(shiftForces.data())); } /* Always reset both pruning flags (doesn't hurt doing it even when timing is off). */ @@ -429,17 +430,17 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb, * \param[in] aloc Atom locality identifier * \param[out] e_lj Pointer to the LJ energy output to accumulate into * \param[out] e_el Pointer to the electrostatics energy output to accumulate into - * \param[out] fshift Pointer to the shift force buffer to accumulate into + * \param[out] shiftForces Shift forces buffer to accumulate into */ //NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file -void gpu_wait_finish_task(gmx_nbnxn_gpu_t *nb, - int flags, - AtomLocality aloc, - real *e_lj, - real *e_el, - rvec *fshift) +void gpu_wait_finish_task(gmx_nbnxn_gpu_t *nb, + int flags, + AtomLocality aloc, + real *e_lj, + real *e_el, + gmx::ArrayRef shiftForces) { - gpu_try_finish_task(nb, flags, aloc, e_lj, e_el, fshift, + gpu_try_finish_task(nb, flags, aloc, e_lj, e_el, shiftForces, GpuTaskCompletion::Wait); } diff --git a/src/gromacs/nbnxm/kerneldispatch.cpp b/src/gromacs/nbnxm/kerneldispatch.cpp index a600029a2b..e02b649739 100644 --- a/src/gromacs/nbnxm/kerneldispatch.cpp +++ b/src/gromacs/nbnxm/kerneldispatch.cpp @@ -45,6 +45,7 @@ #include "gromacs/mdlib/force_flags.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" #include "gromacs/mdtypes/enerdata.h" +#include "gromacs/mdtypes/forceoutput.h" #include "gromacs/mdtypes/inputrec.h" #include "gromacs/mdtypes/interaction_const.h" #include "gromacs/mdtypes/md_enums.h" @@ -518,9 +519,9 @@ nonbonded_verlet_t::dispatchNonbondedKernel(Nbnxm::InteractionLocality iLocality void nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality, - t_forcerec *fr, + const t_forcerec *fr, rvec x[], - rvec f[], + gmx::ForceWithShiftForces *forceWithShiftForces, const t_mdatoms &mdatoms, t_lambda *fepvals, real *lambda, @@ -572,7 +573,8 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocali try { gmx_nb_free_energy_kernel(nbl_fep[th].get(), - x, f, fr, &mdatoms, &kernel_data, nrnb); + x, forceWithShiftForces, + fr, &mdatoms, &kernel_data, nrnb); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } @@ -613,7 +615,8 @@ nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocali try { gmx_nb_free_energy_kernel(nbl_fep[th].get(), - x, f, fr, &mdatoms, &kernel_data, nrnb); + x, forceWithShiftForces, + fr, &mdatoms, &kernel_data, nrnb); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } diff --git a/src/gromacs/nbnxm/nbnxm.cpp b/src/gromacs/nbnxm/nbnxm.cpp index dfe8c4e936..3e4990a6ac 100644 --- a/src/gromacs/nbnxm/nbnxm.cpp +++ b/src/gromacs/nbnxm/nbnxm.cpp @@ -160,7 +160,7 @@ gmx::ArrayRef nonbonded_verlet_t::getGridIndices() const void nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality, - rvec *f) + gmx::ArrayRef force) { /* Skip the reduction if there was no short-range GPU work to do @@ -173,7 +173,7 @@ nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS); wallcycle_sub_start(wcycle_, ewcsNB_F_BUF_OPS); - reduceForces(nbat.get(), locality, pairSearch_->gridSet(), f, nullptr, nullptr, gpu_nbv, false, false); + reduceForces(nbat.get(), locality, pairSearch_->gridSet(), as_rvec_array(force.data()), nullptr, nullptr, gpu_nbv, false, false); wallcycle_sub_stop(wcycle_, ewcsNB_F_BUF_OPS); wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS); @@ -181,7 +181,7 @@ nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality void nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality, - rvec *f, + gmx::ArrayRef force, void *fPmeDeviceBuffer, GpuEventSynchronizer *pmeForcesReady, BufferOpsUseGpu useGpu, @@ -206,7 +206,7 @@ nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality wallcycle_sub_start(wcycle_, ewcsNB_F_BUF_OPS); auto fn = useGpu == BufferOpsUseGpu::True ? reduceForces : reduceForces; - fn(nbat.get(), locality, pairSearch_->gridSet(), f, fPmeDeviceBuffer, pmeForcesReady, gpu_nbv, useGpuFPmeReduction, accumulateForce); + fn(nbat.get(), locality, pairSearch_->gridSet(), as_rvec_array(force.data()), fPmeDeviceBuffer, pmeForcesReady, gpu_nbv, useGpuFPmeReduction, accumulateForce); wallcycle_sub_stop(wcycle_, ewcsNB_F_BUF_OPS); wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS); diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h index cf0b2f62ab..86394f02ae 100644 --- a/src/gromacs/nbnxm/nbnxm.h +++ b/src/gromacs/nbnxm/nbnxm.h @@ -139,6 +139,7 @@ class GpuEventSynchronizer; namespace gmx { +class ForceWithShiftForces; class MDLogger; class UpdateGroupsCog; } @@ -293,9 +294,9 @@ struct nonbonded_verlet_t //! Executes the non-bonded free-energy kernel, always runs on the CPU void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality, - t_forcerec *fr, + const t_forcerec *fr, rvec x[], - rvec f[], + gmx::ForceWithShiftForces *forceWithShiftForces, const t_mdatoms &mdatoms, t_lambda *fepvals, real *lambda, @@ -305,14 +306,14 @@ struct nonbonded_verlet_t /*! \brief Add the forces stored in nbat to f, zeros the forces in nbat * \param [in] locality Local or non-local - * \param [inout] f Force to be added to + * \param [inout] force Force to be added to */ void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality, - rvec *f); + gmx::ArrayRef force); /*! \brief Add the forces stored in nbat to f, allowing for possibility that GPU buffer ops are active * \param [in] locality Local or non-local - * \param [inout] f Force to be added to + * \param [inout] force Force to be added to * \param [in] fPme Force from PME calculation * \param [in] pmeForcesReady Event triggered when PME force calculation has completed * \param [in] useGpu Whether GPU buffer ops are active @@ -320,7 +321,7 @@ struct nonbonded_verlet_t * \param [in] accumulateForce Whether force should be accumulated or stored */ void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality, - rvec *f, + gmx::ArrayRef force, void *fPme, GpuEventSynchronizer *pmeForcesReady, BufferOpsUseGpu useGpu, diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h index 0f0e8de218..46b1dc4c50 100644 --- a/src/gromacs/nbnxm/nbnxm_gpu.h +++ b/src/gromacs/nbnxm/nbnxm_gpu.h @@ -174,18 +174,18 @@ void gpu_launch_cpyback(gmx_nbnxn_gpu_t gmx_unused *nb, * \param[in] aloc Atom locality identifier * \param[out] e_lj Pointer to the LJ energy output to accumulate into * \param[out] e_el Pointer to the electrostatics energy output to accumulate into - * \param[out] fshift Pointer to the shift force buffer to accumulate into + * \param[out] shiftForces Shift forces buffer to accumulate into * \param[in] completionKind Indicates whether nnbonded task completion should only be checked rather than waited for * \returns True if the nonbonded tasks associated with \p aloc locality have completed */ GPU_FUNC_QUALIFIER -bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, - int gmx_unused flags, - AtomLocality gmx_unused aloc, - real gmx_unused *e_lj, - real gmx_unused *e_el, - rvec gmx_unused *fshift, - GpuTaskCompletion gmx_unused completionKind) GPU_FUNC_TERM_WITH_RETURN(false); +bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, + int gmx_unused flags, + AtomLocality gmx_unused aloc, + real gmx_unused *e_lj, + real gmx_unused *e_el, + gmx::ArrayRef gmx_unused shiftForces, + GpuTaskCompletion gmx_unused completionKind) GPU_FUNC_TERM_WITH_RETURN(false); /*! \brief Completes the nonbonded GPU task blocking until GPU tasks and data * transfers to finish. @@ -199,15 +199,15 @@ bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, * \param[in] aloc Atom locality identifier * \param[out] e_lj Pointer to the LJ energy output to accumulate into * \param[out] e_el Pointer to the electrostatics energy output to accumulate into - * \param[out] fshift Pointer to the shift force buffer to accumulate into + * \param[out] shiftForces Shift forces buffer to accumulate into */ GPU_FUNC_QUALIFIER -void gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, - int gmx_unused flags, - AtomLocality gmx_unused aloc, - real gmx_unused *e_lj, - real gmx_unused *e_el, - rvec gmx_unused *fshift) GPU_FUNC_TERM; +void gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb, + int gmx_unused flags, + AtomLocality gmx_unused aloc, + real gmx_unused *e_lj, + real gmx_unused *e_el, + gmx::ArrayRef gmx_unused shiftForces) GPU_FUNC_TERM; /*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */ GPU_FUNC_QUALIFIER -- 2.22.0