#include "gromacs/mdlib/updategroups.h"
#include "gromacs/mdlib/vsite.h"
#include "gromacs/mdtypes/commrec.h"
+#include "gromacs/mdtypes/forceoutput.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/mdrunoptions.h"
#include "gromacs/mdtypes/state.h"
wallcycle_stop(wcycle, ewcMOVEX);
}
-void dd_move_f(gmx_domdec_t *dd,
- gmx::ArrayRef<gmx::RVec> f,
- rvec *fshift,
- gmx_wallcycle *wcycle)
+void dd_move_f(gmx_domdec_t *dd,
+ gmx::ForceWithShiftForces *forceWithShiftForces,
+ gmx_wallcycle *wcycle)
{
wallcycle_start(wcycle, ewcMOVEF);
- int nzone, nat_tot;
- gmx_domdec_comm_t *comm;
- gmx_domdec_comm_dim_t *cd;
- ivec vis;
- int is;
- gmx_bool bShiftForcesNeedPbc, bScrew;
+ gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
+ gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
- comm = dd->comm;
-
- nzone = comm->zones.n/2;
- nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
+ gmx_domdec_comm_t &comm = *dd->comm;
+ int nzone = comm.zones.n/2;
+ int nat_tot = comm.atomRanges.end(DDAtomRanges::Type::Zones);
for (int d = dd->ndim-1; d >= 0; d--)
{
/* Only forces in domains near the PBC boundaries need to
consider PBC in the treatment of fshift */
- bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
- bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
- if (fshift == nullptr && !bScrew)
- {
- bShiftForcesNeedPbc = FALSE;
- }
+ const bool shiftForcesNeedPbc = (forceWithShiftForces->computeVirial() && dd->ci[dd->dim[d]] == 0);
+ const bool applyScrewPbc = (shiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
/* Determine which shift vector we need */
- clear_ivec(vis);
- vis[dd->dim[d]] = 1;
- is = IVEC2IS(vis);
+ ivec vis = { 0, 0, 0 };
+ vis[dd->dim[d]] = 1;
+ const int is = IVEC2IS(vis);
- cd = &comm->cd[d];
- for (int p = cd->numPulses() - 1; p >= 0; p--)
+ /* Loop over the pulses */
+ const gmx_domdec_comm_dim_t &cd = comm.cd[d];
+ for (int p = cd.numPulses() - 1; p >= 0; p--)
{
- const gmx_domdec_ind_t &ind = cd->ind[p];
- DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
+ const gmx_domdec_ind_t &ind = cd.ind[p];
+ DDBufferAccess<gmx::RVec> receiveBufferAccess(comm.rvecBuffer, ind.nsend[nzone + 1]);
gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
- nat_tot -= ind.nrecv[nzone+1];
+ nat_tot -= ind.nrecv[nzone+1];
- DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
+ DDBufferAccess<gmx::RVec> sendBufferAccess(comm.rvecBuffer2, cd.receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
gmx::ArrayRef<gmx::RVec> sendBuffer;
- if (cd->receiveInPlace)
+ if (cd.receiveInPlace)
{
sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
}
sendBuffer, receiveBuffer);
/* Add the received forces */
int n = 0;
- if (!bShiftForcesNeedPbc)
+ if (!shiftForcesNeedPbc)
{
for (int j : ind.index)
{
n++;
}
}
- else if (!bScrew)
+ else if (!applyScrewPbc)
{
- /* fshift should always be defined if this function is
- * called when bShiftForcesNeedPbc is true */
- assert(nullptr != fshift);
for (int j : ind.index)
{
for (int d = 0; d < DIM; d++)
f[j][XX] += receiveBuffer[n][XX];
f[j][YY] -= receiveBuffer[n][YY];
f[j][ZZ] -= receiveBuffer[n][ZZ];
- if (fshift)
+ if (shiftForcesNeedPbc)
{
/* Add this force to the shift force */
for (int d = 0; d < DIM; d++)
namespace gmx
{
+class ForceWithShiftForces;
class MDLogger;
class LocalAtomSetManager;
class RangePartitioning;
* When fshift!=NULL the shift forces are updated to obtain
* the correct virial from the single sum including f.
*/
-void dd_move_f(struct gmx_domdec_t *dd,
- gmx::ArrayRef<gmx::RVec> f,
- rvec *fshift,
- gmx_wallcycle *wcycle);
+void dd_move_f(struct gmx_domdec_t *dd,
+ gmx::ForceWithShiftForces *forceWithShiftForces,
+ gmx_wallcycle *wcycle);
/*! \brief Communicate a real for each atom to the neighboring cells. */
void dd_atom_spread_real(struct gmx_domdec_t *dd, real v[]);
#include "gromacs/gmxlib/nonbonded/nonbonded.h"
#include "gromacs/math/functions.h"
#include "gromacs/math/vec.h"
+#include "gromacs/mdtypes/forceoutput.h"
#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/utility/fatalerror.h"
static void
nb_free_energy_kernel(const t_nblist * gmx_restrict nlist,
rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- t_forcerec * gmx_restrict fr,
+ gmx::ForceWithShiftForces * forceWithShiftForces,
+ const t_forcerec * gmx_restrict fr,
const t_mdatoms * gmx_restrict mdatoms,
nb_kernel_data_t * gmx_restrict kernel_data,
t_nrnb * gmx_restrict nrnb)
const int * typeB;
int ntype;
const real * shiftvec;
- real * fshift;
- const real * x;
- real * f;
const real * chargeA;
const real * chargeB;
real sigma6_min, sigma6_def, lam_power;
/* Extract pointer to non-bonded interaction constants */
const interaction_const_t *ic = fr->ic;
- x = xx[0];
- f = ff[0];
+ // TODO: We should get rid of using pointers to real
+ const real *x = xx[0];
+ real * gmx_restrict f = &(forceWithShiftForces->force()[0][0]);
- fshift = fr->fshift[0];
+ real * gmx_restrict fshift = &(forceWithShiftForces->shiftForces()[0][0]);
+ // Extract pair list data
nri = nlist->nri;
iinr = nlist->iinr;
jindex = nlist->jindex;
inc_nrnb(nrnb, eNR_NBKERNEL_FREE_ENERGY, nlist->nri*12 + nlist->jindex[n]*150);
}
-void gmx_nb_free_energy_kernel(const t_nblist *nlist,
- rvec *xx,
- rvec *ff,
- t_forcerec *fr,
- const t_mdatoms *mdatoms,
- nb_kernel_data_t *kernel_data,
- t_nrnb *nrnb)
+void gmx_nb_free_energy_kernel(const t_nblist *nlist,
+ rvec *xx,
+ gmx::ForceWithShiftForces *ff,
+ const t_forcerec *fr,
+ const t_mdatoms *mdatoms,
+ nb_kernel_data_t *kernel_data,
+ t_nrnb *nrnb)
{
if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
{
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2008, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2018, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/mdtypes/nblist.h"
struct t_forcerec;
+namespace gmx
+{
+class ForceWithShiftForces;
+}
void
gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist,
rvec * gmx_restrict xx,
- rvec * gmx_restrict ff,
- t_forcerec * gmx_restrict fr,
+ gmx::ForceWithShiftForces * forceWithShiftForces,
+ const t_forcerec * gmx_restrict fr,
const t_mdatoms * gmx_restrict mdatoms,
nb_kernel_data_t * gmx_restrict kernel_data,
t_nrnb * gmx_restrict nrnb);
/*! \brief Reduce thread-local force buffers */
void
-reduce_thread_forces(int n, rvec *f,
- const bonded_threading_t *bt,
- int nthreads)
+reduce_thread_forces(int n,
+ gmx::ArrayRef<gmx::RVec> force,
+ const bonded_threading_t *bt,
+ int nthreads)
{
if (nthreads > MAX_BONDED_THREADS)
{
MAX_BONDED_THREADS);
}
+ rvec * gmx_restrict f = as_rvec_array(force.data());
+
/* This reduction can run on any number of threads,
* independently of bt->nthreads.
* But if nthreads matches bt->nthreads (which it currently does)
/*! \brief Reduce thread-local forces, shift forces and energies */
void
-reduce_thread_output(int n, rvec *f, rvec *fshift,
+reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces,
real *ener, gmx_grppairener_t *grpp, real *dvdl,
const bonded_threading_t *bt,
gmx_bool bCalcEnerVir,
if (bt->nblock_used > 0)
{
/* Reduce the bonded force buffer */
- reduce_thread_forces(n, f, bt, bt->nthreads);
+ reduce_thread_forces(n, forceWithShiftForces->force(), bt, bt->nthreads);
}
+ rvec * gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
+
/* When necessary, reduce energy and virial using one thread only */
if (bCalcEnerVir && bt->nthreads > 1)
{
const t_forcerec *fr,
const t_pbc *pbc_null,
const t_graph *g,
+ rvec *fshiftMasterBuffer,
gmx_enerdata_t *enerd,
t_nrnb *nrnb,
const real *lambda,
*/
if (thread == 0)
{
- fshift = fr->fshift;
+ fshift = fshiftMasterBuffer;
epot = enerd->term;
grpp = &enerd->grpp;
dvdlt = dvdl;
struct gmx_wallcycle *wcycle,
const t_idef *idef,
const rvec x[], history_t *hist,
- rvec f[],
- gmx::ForceWithVirial *forceWithVirial,
+ gmx::ForceOutputs *forceOutputs,
const t_forcerec *fr,
const struct t_pbc *pbc,
const struct t_pbc *pbc_full,
if (idef->il[F_POSRES].nr > 0)
{
posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr,
- forceWithVirial);
+ &forceOutputs->forceWithVirial());
}
if (idef->il[F_FBPOSRES].nr > 0)
{
fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr,
- forceWithVirial);
+ &forceOutputs->forceWithVirial());
}
/* Do pre force calculation stuff which might require communication */
if (haveCpuBondeds(*fr))
{
+ gmx::ForceWithShiftForces &forceWithShiftForces = forceOutputs->forceWithShiftForces();
+
wallcycle_sub_start(wcycle, ewcsLISTED);
/* The dummy array is to have a place to store the dhdl at other values
of lambda, which will be thrown away in the end */
real dvdl[efptNR] = {0};
- calcBondedForces(idef, x, fr, pbc_null, g, enerd, nrnb, lambda, dvdl, md,
+ calcBondedForces(idef, x, fr, pbc_null, g,
+ as_rvec_array(forceWithShiftForces.shiftForces().data()),
+ enerd, nrnb, lambda, dvdl, md,
fcd, bCalcEnerVir, global_atom_index);
wallcycle_sub_stop(wcycle, ewcsLISTED);
wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
- reduce_thread_output(fr->natoms_force, f, fr->fshift,
+ reduce_thread_output(fr->natoms_force, &forceWithShiftForces,
enerd->term, &enerd->grpp, dvdl,
bt,
bCalcEnerVir,
const t_idef *idef,
const rvec x[],
history_t *hist,
- rvec *forceForUseWithShiftForces,
- gmx::ForceWithVirial *forceWithVirial,
+ gmx::ForceOutputs *forceOutputs,
const t_forcerec *fr,
const struct t_pbc *pbc,
const struct t_graph *graph,
set_pbc(&pbc_full, fr->ePBC, box);
}
calc_listed(cr, ms, wcycle, idef, x, hist,
- forceForUseWithShiftForces, forceWithVirial,
+ forceOutputs,
fr, pbc, &pbc_full,
graph, enerd, nrnb, lambda, md, fcd,
global_atom_index, flags);
namespace gmx
{
-class ForceWithVirial;
+class ForceOutputs;
}
//! Type of CPU function to compute a bonded interaction.
struct gmx_wallcycle *wcycle,
const t_idef *idef,
const rvec x[], history_t *hist,
- rvec f[],
- gmx::ForceWithVirial *forceWithVirial,
+ gmx::ForceOutputs *forceOutputs,
const t_forcerec *fr,
const struct t_pbc *pbc, const struct t_pbc *pbc_full,
const struct t_graph *g,
const t_idef *idef,
const rvec x[],
history_t *hist,
- rvec *forceForUseWithShiftForces,
- gmx::ForceWithVirial *forceWithVirial,
+ gmx::ForceOutputs *forceOutputs,
const t_forcerec *fr,
const struct t_pbc *pbc,
const struct t_graph *graph,
const DDBalanceRegionHandler &ddBalanceRegionHandler)
{
// TODO: Replace all uses of x by const coordinates
- rvec *x = as_rvec_array(coordinates.paddedArrayRef().data());
+ rvec *x = as_rvec_array(coordinates.paddedArrayRef().data());
- // TODO: Add the shift forces to forceOutputs
- rvec *forceForUseWithShiftForces = forceOutputs->f();
- auto &forceWithVirial = forceOutputs->forceWithVirial();
+ auto &forceWithVirial = forceOutputs->forceWithVirial();
/* do QMMM first if requested */
if (fr->bQMMM)
{
- enerd->term[F_EQM] = calculate_QMMM(cr, forceForUseWithShiftForces, fr);
+ enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr);
}
/* Call the short range functions all in one go. */
do_force_listed(wcycle, box, ir->fepvals, cr, ms,
idef, x, hist,
- forceForUseWithShiftForces, &forceWithVirial,
+ forceOutputs,
fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr,
flags);
fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
Vlr_q, ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]);
pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
- pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
+ rvec *fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
+ pr_rvecs(debug, 0, "fshift after LR Corrections", fshift, SHIFTS);
fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
Vlr_lj, ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]);
pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
if (debug)
{
- pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
+ rvec *fshift = as_rvec_array(forceOutputs->forceWithShiftForces().shiftForces().data());
+ pr_rvecs(debug, 0, "fshift after bondeds", fshift, SHIFTS);
}
}
snew(fr->shift_vec, SHIFTS);
}
- if (fr->fshift == nullptr)
- {
- snew(fr->fshift, SHIFTS);
- }
+ fr->shiftForces.resize(SHIFTS);
if (fr->nbfp == nullptr)
{
sfree(fr->nbfp);
delete fr->ic;
sfree(fr->shift_vec);
- sfree(fr->fshift);
sfree(fr->ewc_t);
tear_down_bonded_threading(fr->bondedThreading);
GMX_RELEASE_ASSERT(fr->gpuBonded == nullptr, "Should have been deleted earlier, when used");
#include "gromacs/mdlib/qm_mopac.h"
#include "gromacs/mdlib/qm_orca.h"
#include "gromacs/mdtypes/commrec.h"
+#include "gromacs/mdtypes/forceoutput.h"
#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
}
} /* update_QMMM_rec */
-real calculate_QMMM(const t_commrec *cr,
- rvec f[],
- const t_forcerec *fr)
+real calculate_QMMM(const t_commrec *cr,
+ gmx::ForceWithShiftForces *forceWithShiftForces,
+ const t_forcerec *fr)
{
real
QMener = 0.0;
/* now different procedures are carried out for one layer ONION and
* normal QMMM on one hand and multilayer oniom on the other
*/
+ gmx::ArrayRef<gmx::RVec> fMM = forceWithShiftForces->force();
+ gmx::ArrayRef<gmx::RVec> fshiftMM = forceWithShiftForces->shiftForces();
if (qr->QMMMscheme == eQMMMschemenormal || qr->nrQMlayers == 1)
{
qm = qr->qm[0];
{
for (j = 0; j < DIM; j++)
{
- f[qm->indexQM[i]][j] -= forces[i][j];
- fr->fshift[qm->shiftQM[i]][j] += fshift[i][j];
+ fMM[qm->indexQM[i]][j] -= forces[i][j];
+ fshiftMM[qm->shiftQM[i]][j] += fshift[i][j];
}
}
for (i = 0; i < mm->nrMMatoms; i++)
{
for (j = 0; j < DIM; j++)
{
- f[mm->indexMM[i]][j] -= forces[qm->nrQMatoms+i][j];
- fr->fshift[mm->shiftMM[i]][j] += fshift[qm->nrQMatoms+i][j];
+ fMM[mm->indexMM[i]][j] -= forces[qm->nrQMatoms+i][j];
+ fshiftMM[mm->shiftMM[i]][j] += fshift[qm->nrQMatoms+i][j];
}
}
{
for (j = 0; j < DIM; j++)
{
- f[qm->indexQM[i]][j] -= (forces[i][j]-forces2[i][j]);
- fr->fshift[qm->shiftQM[i]][j] += (fshift[i][j]-fshift2[i][j]);
+ fMM[qm->indexQM[i]][j] -= (forces[i][j]-forces2[i][j]);
+ fshiftMM[qm->shiftQM[i]][j] += (fshift[i][j]-fshift2[i][j]);
}
}
free(qm2);
{
for (j = 0; j < DIM; j++)
{
- f[qm->indexQM[i]][j] -= forces[i][j];
- fr->fshift[qm->shiftQM[i]][j] += fshift[i][j];
+ fMM[qm->indexQM[i]][j] -= forces[i][j];
+ fshiftMM[qm->shiftQM[i]][j] += fshift[i][j];
}
}
free(forces);
struct t_mdatoms;
struct t_QMMMrec;
+namespace gmx
+{
+class ForceWithShiftForces;
+}
+
typedef struct {
int nrQMatoms; /* total nr of QM atoms */
rvec *xQM; /* shifted to center of box */
* routine should be called at every step, since it updates the MM
* elements of the t_QMMMrec struct.
*/
-real calculate_QMMM(const t_commrec *cr,
- rvec f[],
- const t_forcerec *fr);
+real calculate_QMMM(const t_commrec *cr,
+ gmx::ForceWithShiftForces *forceWithShiftForces,
+ const t_forcerec *fr);
/* QMMM computes the QM forces. This routine makes either function
* calls to gmx QM routines (derived from MOPAC7 (semi-emp.) and MPQC
}
}
-static void calc_virial(int start, int homenr, const rvec x[], const rvec f[],
+static void calc_virial(int start, int homenr, const rvec x[],
+ const gmx::ForceWithShiftForces &forceWithShiftForces,
tensor vir_part, const t_graph *graph, const matrix box,
t_nrnb *nrnb, const t_forcerec *fr, int ePBC)
{
/* The short-range virial from surrounding boxes */
- calc_vir(SHIFTS, fr->shift_vec, fr->fshift, vir_part, ePBC == epbcSCREW, box);
+ const rvec *fshift = as_rvec_array(forceWithShiftForces.shiftForces().data());
+ calc_vir(SHIFTS, fr->shift_vec, fshift, vir_part, ePBC == epbcSCREW, box);
inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS);
/* Calculate partial virial, for local atoms only, based on short range.
* Total virial is computed in global_stat, called from do_md
*/
+ const rvec *f = as_rvec_array(forceWithShiftForces.force().data());
f_calc_vir(start, start+homenr, x, f, vir_part, graph, box);
inc_nrnb(nrnb, eNR_VIRIAL, homenr);
const gmx_vsite_t *vsite,
int flags)
{
- rvec *f = forceOutputs->f();
+ rvec *f = as_rvec_array(forceOutputs->forceWithShiftForces().force().data());
if (fr->haveDirectVirialContributions)
{
*
* \param[in] nbv Nonbonded verlet structure
* \param[in,out] pmedata PME module data
- * \param[in,out] force Force array to reduce task outputs into.
- * \param[in,out] forceWithVirial Force and virial buffers
- * \param[in,out] fshift Shift force output vector results are reduced into
+ * \param[in,out] forceOutputs Output buffer for the forces and virial
* \param[in,out] enerd Energy data structure results are reduced into
* \param[in] flags Force flags
* \param[in] pmeFlags PME flags
*/
static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t *nbv,
gmx_pme_t *pmedata,
- gmx::ArrayRefWithPadding<gmx::RVec> *force,
- gmx::ForceWithVirial *forceWithVirial,
- rvec fshift[],
+ gmx::ForceOutputs *forceOutputs,
gmx_enerdata_t *enerd,
int flags,
int pmeFlags,
bool isNbGpuDone = false;
- gmx::ArrayRef<const gmx::RVec> pmeGpuForces;
+
+ gmx::ForceWithShiftForces &forceWithShiftForces = forceOutputs->forceWithShiftForces();
+ gmx::ForceWithVirial &forceWithVirial = forceOutputs->forceWithVirial();
+
+ gmx::ArrayRef<const gmx::RVec> pmeGpuForces;
while (!isPmeGpuDone || !isNbGpuDone)
{
if (!isPmeGpuDone)
{
GpuTaskCompletion completionType = (isNbGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check;
- isPmeGpuDone = pme_gpu_try_finish_task(pmedata, pmeFlags, wcycle, forceWithVirial, enerd, completionType);
+ isPmeGpuDone = pme_gpu_try_finish_task(pmedata, pmeFlags, wcycle, &forceWithVirial, enerd, completionType);
}
if (!isNbGpuDone)
Nbnxm::AtomLocality::Local,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
- fshift, completionType);
+ forceWithShiftForces.shiftForces(), completionType);
wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
// To get the call count right, when the task finished we
// issue a start/stop.
wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
- as_rvec_array(force->unpaddedArrayRef().data()));
+ forceWithShiftForces.force());
}
}
}
{
wallcycle_sub_start(wcycle, ewcsCLEAR_FORCE_BUFFER);
- // TODO: Add the shift force buffer and use it
- gmx::ForceWithShiftForces forceWithShiftForces(force, doVirial, gmx::ArrayRef<gmx::RVec>());
+ /* NOTE: We assume fr->shiftForces is all zeros here */
+ gmx::ForceWithShiftForces forceWithShiftForces(force, doVirial, fr->shiftForces);
if (bDoForces)
{
/* Clear the short- and long-range forces */
- clear_rvecs_omp(fr->natoms_force_constr, forceWithShiftForces.f());
+ clear_rvecs_omp(fr->natoms_force_constr,
+ as_rvec_array(forceWithShiftForces.force().data()));
}
/* If we need to compute the virial, we might need a separate
/* Reset energies */
reset_enerdata(enerd);
- clear_rvecs(SHIFTS, fr->fshift);
+ /* Clear the shift forces */
+ // TODO: This should be linked to the shift force buffer in use, or cleared before use instead
+ for (gmx::RVec &elem : fr->shiftForces)
+ {
+ elem = { 0.0_real, 0.0_real, 0.0_real };
+ }
if (DOMAINDECOMP(cr) && !thisRankHasDuty(cr, DUTY_PME))
{
*/
wallcycle_start(wcycle, ewcFORCE);
- // set up and clear force outputs
- ForceOutputs forceOut = setupForceOutputs(fr, pull_work, *inputrec, force, bDoForces,
+ // Set up and clear force outputs.
+ // We use std::move to keep the compiler happy, it has no effect.
+ ForceOutputs forceOut = setupForceOutputs(fr, pull_work, *inputrec, std::move(force), bDoForces,
((flags & GMX_FORCE_VIRIAL) != 0), wcycle);
/* We calculate the non-bonded forces, when done on the CPU, here.
* Happens here on the CPU both with and without GPU.
*/
nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::Local,
- fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f(), *mdatoms,
+ fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
inputrec->fepvals, lambda.data(),
enerd, flags, nrnb);
if (havePPDomainDecomposition(cr))
{
nbv->dispatchFreeEnergyKernel(Nbnxm::InteractionLocality::NonLocal,
- fr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f(), *mdatoms,
+ fr, as_rvec_array(x.unpaddedArrayRef().data()), &forceOut.forceWithShiftForces(), *mdatoms,
inputrec->fepvals, lambda.data(),
enerd, flags, nrnb);
}
* communication with calculation with domain decomposition.
*/
wallcycle_stop(wcycle, ewcFORCE);
- nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.f());
-
+ nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::All, forceOut.forceWithShiftForces().force());
wallcycle_start_nocount(wcycle, ewcFORCE);
/* If there are multiple fshift output buffers we need to reduce them */
{
/* This is not in a subcounter because it takes a
negligible and constant-sized amount of time */
- nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->nbat.get(),
- fr->fshift);
+ nbnxn_atomdata_add_nbat_fshift_to_fshift(*nbv->nbat,
+ forceOut.forceWithShiftForces().shiftForces());
}
}
float cycles_wait_gpu = 0;
if (bUseOrEmulGPU)
{
+ auto &forceWithShiftForces = forceOut.forceWithShiftForces();
+ rvec *f = as_rvec_array(forceWithShiftForces.force().data());
+
/* wait for non-local forces (or calculate in emulation mode) */
if (havePPDomainDecomposition(cr))
{
flags, Nbnxm::AtomLocality::NonLocal,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
- fr->fshift);
+ forceWithShiftForces.shiftForces());
cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_NL);
}
else
if (useGpuFBufOps == BufferOpsUseGpu::True && haveCpuForces)
{
- nbv->launch_copy_f_to_gpu(forceOut.f(), Nbnxm::AtomLocality::NonLocal);
+ nbv->launch_copy_f_to_gpu(f, Nbnxm::AtomLocality::NonLocal);
}
// flag to specify if forces should be accumulated in force buffer
// ops. For non-local part, this just depends on whether CPU forces are present.
bool accumulateForce = (useGpuFBufOps == BufferOpsUseGpu::True) && haveCpuForces;
nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::NonLocal,
- forceOut.f(), pme_gpu_get_device_f(fr->pmedata),
+ forceWithShiftForces.force(), pme_gpu_get_device_f(fr->pmedata),
pme_gpu_get_f_ready_synchronizer(fr->pmedata),
useGpuFBufOps, useGpuFPmeReduction, accumulateForce);
-
if (useGpuFBufOps == BufferOpsUseGpu::True)
{
- nbv->launch_copy_f_from_gpu(forceOut.f(), Nbnxm::AtomLocality::NonLocal);
+ nbv->launch_copy_f_from_gpu(f, Nbnxm::AtomLocality::NonLocal);
}
if (fr->nbv->emulateGpu() && (flags & GMX_FORCE_VIRIAL))
{
- nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->nbat.get(),
- fr->fshift);
+ nbnxn_atomdata_add_nbat_fshift_to_fshift(*nbv->nbat,
+ forceWithShiftForces.shiftForces());
}
}
}
{
nbv->wait_for_gpu_force_reduction(Nbnxm::AtomLocality::NonLocal);
}
- dd_move_f(cr->dd, force.unpaddedArrayRef(), fr->fshift, wcycle);
+ dd_move_f(cr->dd, &forceOut.forceWithShiftForces(), wcycle);
}
}
(useGpuFBufOps == BufferOpsUseGpu::False));
if (alternateGpuWait)
{
- alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &force, &forceOut.forceWithVirial(), fr->fshift, enerd,
+ alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &forceOut, enerd,
flags, pmeFlags, wcycle);
}
flags, Nbnxm::AtomLocality::Local,
enerd->grpp.ener[egLJSR].data(),
enerd->grpp.ener[egCOULSR].data(),
- fr->fshift);
+ forceOut.forceWithShiftForces().shiftForces());
float cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
if (ddBalanceRegionHandler.useBalancingRegion())
* on the non-alternating path. */
if (bUseOrEmulGPU && !alternateGpuWait)
{
+ gmx::ArrayRef<gmx::RVec> force = forceOut.forceWithShiftForces().force();
+ rvec *f = as_rvec_array(force.data());
// TODO: move these steps as early as possible:
// - CPU f H2D should be as soon as all CPU-side forces are done
//
if (useGpuFBufOps == BufferOpsUseGpu::True && (haveCpuForces || DOMAINDECOMP(cr)))
{
- nbv->launch_copy_f_to_gpu(forceOut.f(), Nbnxm::AtomLocality::Local);
+ nbv->launch_copy_f_to_gpu(f, Nbnxm::AtomLocality::Local);
}
// flag to specify if forces should be accumulated in force
// buffer ops. For local part, this depends on whether CPU
bool accumulateForce = (useGpuFBufOps == BufferOpsUseGpu::True) &&
(haveCpuForces || DOMAINDECOMP(cr));
nbv->atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality::Local,
- forceOut.f(), pme_gpu_get_device_f(fr->pmedata),
+ force, pme_gpu_get_device_f(fr->pmedata),
pme_gpu_get_f_ready_synchronizer(fr->pmedata),
useGpuFBufOps, useGpuFPmeReduction, accumulateForce);
-
if (useGpuFBufOps == BufferOpsUseGpu::True)
{
- nbv->launch_copy_f_from_gpu(forceOut.f(), Nbnxm::AtomLocality::Local);
+ nbv->launch_copy_f_from_gpu(f, Nbnxm::AtomLocality::Local);
nbv->wait_for_gpu_force_reduction(Nbnxm::AtomLocality::Local);
}
}
if (bDoForces)
{
+ rvec *f = as_rvec_array(forceOut.forceWithShiftForces().force().data());
+
/* If we have NoVirSum forces, but we do not calculate the virial,
* we sum fr->f_novirsum=forceOut.f later.
*/
if (vsite && !(fr->haveDirectVirialContributions && !(flags & GMX_FORCE_VIRIAL)))
{
- spread_vsite_f(vsite, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f(), fr->fshift, FALSE, nullptr, nrnb,
+ rvec *fshift = as_rvec_array(forceOut.forceWithShiftForces().shiftForces().data());
+ spread_vsite_f(vsite, as_rvec_array(x.unpaddedArrayRef().data()), f, fshift, FALSE, nullptr, nrnb,
&top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
}
if (flags & GMX_FORCE_VIRIAL)
{
/* Calculation of the virial must be done after vsites! */
- calc_virial(0, mdatoms->homenr, as_rvec_array(x.unpaddedArrayRef().data()), forceOut.f(),
+ calc_virial(0, mdatoms->homenr, as_rvec_array(x.unpaddedArrayRef().data()),
+ forceOut.forceWithShiftForces(),
vir_force, graph, box, nrnb, fr, inputrec->ePBC);
}
}
computeVirial_(computeVirial),
shiftForces_(shiftForces) {}
- //! Returns a, deprecated, rvec pointer to the force buffer
- rvec *f()
+ //! Returns an arrayref to the force buffer without padding
+ gmx::ArrayRef<gmx::RVec> force()
{
- return as_rvec_array(force_.paddedArrayRef().data());
+ return force_.unpaddedArrayRef();
+ }
+
+ //! Returns a const arrayref to the force buffer without padding
+ gmx::ArrayRef<const gmx::RVec> force() const
+ {
+ return force_.unpaddedConstArrayRef();
}
//! Returns whether the virial needs to be computed
return shiftForces_;
}
+ //! Returns a const shift forces buffer
+ gmx::ArrayRef<const gmx::RVec> shiftForces() const
+ {
+ return shiftForces_;
+ }
+
private:
//! The force buffer
gmx::ArrayRefWithPadding<gmx::RVec> force_;
forceWithShiftForces_(forceWithShiftForces),
forceWithVirial_(forceWithVirial) {}
- //! Returns a, deprecated, rvec pointer to the force buffer for use with shift forces
- rvec *f()
+ //! Returns a reference to the force with shift forces object
+ ForceWithShiftForces &forceWithShiftForces()
{
- return forceWithShiftForces_.f();
+ return forceWithShiftForces_;
}
//! Returns a reference to the force with virial object
/* PME/Ewald stuff */
struct gmx_ewald_tab_t *ewald_table = nullptr;
- /* Shift force array for computing the virial */
- rvec *fshift = nullptr;
+ /* Shift force array for computing the virial, size SHIFTS */
+ std::vector<gmx::RVec> shiftForces;
/* Non bonded Parameter lists */
int ntype = 0; /* Number of atom types */
bool useGpuFPmeReduction,
bool accumulateForce);
-void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
- rvec *fshift)
+void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t &nbat,
+ gmx::ArrayRef<gmx::RVec> fshift)
{
- gmx::ArrayRef<const nbnxn_atomdata_output_t> outputBuffers = nbat->out;
+ gmx::ArrayRef<const nbnxn_atomdata_output_t> outputBuffers = nbat.out;
for (int s = 0; s < SHIFTS; s++)
{
sum[YY] += out.fshift[s*DIM+YY];
sum[ZZ] += out.fshift[s*DIM+ZZ];
}
- rvec_inc(fshift[s], sum);
+ fshift[s] += sum;
}
}
bool accumulateForce);
/* Add the fshift force stored in nbat to fshift */
-void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
- rvec *fshift);
+void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t &nbat,
+ gmx::ArrayRef<gmx::RVec> fshift);
/* Get the atom start index and number of atoms for a given locality */
void nbnxn_get_atom_range(Nbnxm::AtomLocality atomLocality,
//TODO: move into shared source file with gmx_compile_cpp_as_cuda
//NOLINTNEXTLINE(misc-definitions-in-headers)
-bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb,
- const int flags,
- const AtomLocality aloc,
- real *e_lj,
- real *e_el,
- rvec *fshift,
- GpuTaskCompletion completionKind)
+bool gpu_try_finish_task(gmx_nbnxn_gpu_t *nb,
+ const int flags,
+ const AtomLocality aloc,
+ real *e_lj,
+ real *e_el,
+ gmx::ArrayRef<gmx::RVec> shiftForces,
+ GpuTaskCompletion completionKind)
{
GMX_ASSERT(nb, "Need a valid nbnxn_gpu object");
gpu_accumulate_timings(nb->timings, nb->timers, nb->plist[iLocality], aloc, calcEner,
nb->bDoTime != 0);
- gpu_reduce_staged_outputs(nb->nbst, iLocality, calcEner, calcFshift, e_lj, e_el, fshift);
+ gpu_reduce_staged_outputs(nb->nbst, iLocality, calcEner, calcFshift,
+ e_lj, e_el, as_rvec_array(shiftForces.data()));
}
/* Always reset both pruning flags (doesn't hurt doing it even when timing is off). */
* \param[in] aloc Atom locality identifier
* \param[out] e_lj Pointer to the LJ energy output to accumulate into
* \param[out] e_el Pointer to the electrostatics energy output to accumulate into
- * \param[out] fshift Pointer to the shift force buffer to accumulate into
+ * \param[out] shiftForces Shift forces buffer to accumulate into
*/
//NOLINTNEXTLINE(misc-definitions-in-headers) TODO: move into source file
-void gpu_wait_finish_task(gmx_nbnxn_gpu_t *nb,
- int flags,
- AtomLocality aloc,
- real *e_lj,
- real *e_el,
- rvec *fshift)
+void gpu_wait_finish_task(gmx_nbnxn_gpu_t *nb,
+ int flags,
+ AtomLocality aloc,
+ real *e_lj,
+ real *e_el,
+ gmx::ArrayRef<gmx::RVec> shiftForces)
{
- gpu_try_finish_task(nb, flags, aloc, e_lj, e_el, fshift,
+ gpu_try_finish_task(nb, flags, aloc, e_lj, e_el, shiftForces,
GpuTaskCompletion::Wait);
}
#include "gromacs/mdlib/force_flags.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdtypes/enerdata.h"
+#include "gromacs/mdtypes/forceoutput.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/md_enums.h"
void
nonbonded_verlet_t::dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality,
- t_forcerec *fr,
+ const t_forcerec *fr,
rvec x[],
- rvec f[],
+ gmx::ForceWithShiftForces *forceWithShiftForces,
const t_mdatoms &mdatoms,
t_lambda *fepvals,
real *lambda,
try
{
gmx_nb_free_energy_kernel(nbl_fep[th].get(),
- x, f, fr, &mdatoms, &kernel_data, nrnb);
+ x, forceWithShiftForces,
+ fr, &mdatoms, &kernel_data, nrnb);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
try
{
gmx_nb_free_energy_kernel(nbl_fep[th].get(),
- x, f, fr, &mdatoms, &kernel_data, nrnb);
+ x, forceWithShiftForces,
+ fr, &mdatoms, &kernel_data, nrnb);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
void
nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality,
- rvec *f)
+ gmx::ArrayRef<gmx::RVec> force)
{
/* Skip the reduction if there was no short-range GPU work to do
wallcycle_start(wcycle_, ewcNB_XF_BUF_OPS);
wallcycle_sub_start(wcycle_, ewcsNB_F_BUF_OPS);
- reduceForces<false>(nbat.get(), locality, pairSearch_->gridSet(), f, nullptr, nullptr, gpu_nbv, false, false);
+ reduceForces<false>(nbat.get(), locality, pairSearch_->gridSet(), as_rvec_array(force.data()), nullptr, nullptr, gpu_nbv, false, false);
wallcycle_sub_stop(wcycle_, ewcsNB_F_BUF_OPS);
wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
void
nonbonded_verlet_t::atomdata_add_nbat_f_to_f(const Nbnxm::AtomLocality locality,
- rvec *f,
+ gmx::ArrayRef<gmx::RVec> force,
void *fPmeDeviceBuffer,
GpuEventSynchronizer *pmeForcesReady,
BufferOpsUseGpu useGpu,
wallcycle_sub_start(wcycle_, ewcsNB_F_BUF_OPS);
auto fn = useGpu == BufferOpsUseGpu::True ? reduceForces<true> : reduceForces<false>;
- fn(nbat.get(), locality, pairSearch_->gridSet(), f, fPmeDeviceBuffer, pmeForcesReady, gpu_nbv, useGpuFPmeReduction, accumulateForce);
+ fn(nbat.get(), locality, pairSearch_->gridSet(), as_rvec_array(force.data()), fPmeDeviceBuffer, pmeForcesReady, gpu_nbv, useGpuFPmeReduction, accumulateForce);
wallcycle_sub_stop(wcycle_, ewcsNB_F_BUF_OPS);
wallcycle_stop(wcycle_, ewcNB_XF_BUF_OPS);
namespace gmx
{
+class ForceWithShiftForces;
class MDLogger;
class UpdateGroupsCog;
}
//! Executes the non-bonded free-energy kernel, always runs on the CPU
void dispatchFreeEnergyKernel(Nbnxm::InteractionLocality iLocality,
- t_forcerec *fr,
+ const t_forcerec *fr,
rvec x[],
- rvec f[],
+ gmx::ForceWithShiftForces *forceWithShiftForces,
const t_mdatoms &mdatoms,
t_lambda *fepvals,
real *lambda,
/*! \brief Add the forces stored in nbat to f, zeros the forces in nbat
* \param [in] locality Local or non-local
- * \param [inout] f Force to be added to
+ * \param [inout] force Force to be added to
*/
void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality,
- rvec *f);
+ gmx::ArrayRef<gmx::RVec> force);
/*! \brief Add the forces stored in nbat to f, allowing for possibility that GPU buffer ops are active
* \param [in] locality Local or non-local
- * \param [inout] f Force to be added to
+ * \param [inout] force Force to be added to
* \param [in] fPme Force from PME calculation
* \param [in] pmeForcesReady Event triggered when PME force calculation has completed
* \param [in] useGpu Whether GPU buffer ops are active
* \param [in] accumulateForce Whether force should be accumulated or stored
*/
void atomdata_add_nbat_f_to_f(Nbnxm::AtomLocality locality,
- rvec *f,
+ gmx::ArrayRef<gmx::RVec> force,
void *fPme,
GpuEventSynchronizer *pmeForcesReady,
BufferOpsUseGpu useGpu,
* \param[in] aloc Atom locality identifier
* \param[out] e_lj Pointer to the LJ energy output to accumulate into
* \param[out] e_el Pointer to the electrostatics energy output to accumulate into
- * \param[out] fshift Pointer to the shift force buffer to accumulate into
+ * \param[out] shiftForces Shift forces buffer to accumulate into
* \param[in] completionKind Indicates whether nnbonded task completion should only be checked rather than waited for
* \returns True if the nonbonded tasks associated with \p aloc locality have completed
*/
GPU_FUNC_QUALIFIER
-bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb,
- int gmx_unused flags,
- AtomLocality gmx_unused aloc,
- real gmx_unused *e_lj,
- real gmx_unused *e_el,
- rvec gmx_unused *fshift,
- GpuTaskCompletion gmx_unused completionKind) GPU_FUNC_TERM_WITH_RETURN(false);
+bool gpu_try_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb,
+ int gmx_unused flags,
+ AtomLocality gmx_unused aloc,
+ real gmx_unused *e_lj,
+ real gmx_unused *e_el,
+ gmx::ArrayRef<gmx::RVec> gmx_unused shiftForces,
+ GpuTaskCompletion gmx_unused completionKind) GPU_FUNC_TERM_WITH_RETURN(false);
/*! \brief Completes the nonbonded GPU task blocking until GPU tasks and data
* transfers to finish.
* \param[in] aloc Atom locality identifier
* \param[out] e_lj Pointer to the LJ energy output to accumulate into
* \param[out] e_el Pointer to the electrostatics energy output to accumulate into
- * \param[out] fshift Pointer to the shift force buffer to accumulate into
+ * \param[out] shiftForces Shift forces buffer to accumulate into
*/
GPU_FUNC_QUALIFIER
-void gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb,
- int gmx_unused flags,
- AtomLocality gmx_unused aloc,
- real gmx_unused *e_lj,
- real gmx_unused *e_el,
- rvec gmx_unused *fshift) GPU_FUNC_TERM;
+void gpu_wait_finish_task(gmx_nbnxn_gpu_t gmx_unused *nb,
+ int gmx_unused flags,
+ AtomLocality gmx_unused aloc,
+ real gmx_unused *e_lj,
+ real gmx_unused *e_el,
+ gmx::ArrayRef<gmx::RVec> gmx_unused shiftForces) GPU_FUNC_TERM;
/*! \brief Selects the Ewald kernel type, analytical or tabulated, single or twin cut-off. */
GPU_FUNC_QUALIFIER