Converted pme_atomcomm_t to PmeAtomComm class.
The PME atom count on CPU without separate PME ranks is now set by
gmx_pme_reinit_atoms(), as for PME on GPU.
Made all coordinates in PME const and use arrayref where possible.
Change-Id: Iaeb59f8cee910e4c52f2f59016af85662b2109fb
This source code file is part of thread_mpi.
Written by Sander Pronk, Erik Lindahl, and possibly others.
- Copyright (c) 2009,2016,2018, Sander Pronk, Erik Lindahl.
+ Copyright (c) 2009,2016,2018,2019, Sander Pronk, Erik Lindahl.
All rights reserved.
Redistribution and use in source and binary forms, with or without
/** A pre-defined NULL communicator to compare against, to check comm
validity */
-#define TMPI_COMM_NULL NULL
+#define TMPI_COMM_NULL nullptr
/** A pre-defined NULL group to compare against, to check group
validity */
-#define TMPI_GROUP_NULL NULL
+#define TMPI_GROUP_NULL nullptr
/** the empty group */
extern tMPI_Group TMPI_GROUP_EMPTY;
fr->gpuBonded != nullptr,
top->idef);
- gmx_pme_reinit_atoms(fr->pmedata, numHomeAtoms, mdatoms->chargeA);
- /* This handles the PP+PME rank case where fr->pmedata is valid.
- * For PME-only ranks, gmx_pmeonly() has its own call to gmx_pme_reinit_atoms().
- * TODO: this only handles the GPU logic so far, should handle CPU as well.
- * TODO: this also does not account for TPI.
- */
+ if (EEL_PME(fr->ic->eeltype) && (cr->duty & DUTY_PME))
+ {
+ /* This handles the PP+PME rank case where fr->pmedata is valid.
+ * For PME-only ranks, gmx_pmeonly() has its own call to gmx_pme_reinit_atoms().
+ */
+ const int numPmeAtoms = numHomeAtoms - fr->n_tpi;
+ gmx_pme_reinit_atoms(fr->pmedata, numPmeAtoms, mdatoms->chargeA);
+ }
if (constr)
{
const int gmxCacheLineSize = 64;
//! Set up coordinate communication
-static void setup_coordinate_communication(pme_atomcomm_t *atc)
+static void setup_coordinate_communication(PmeAtomComm *atc)
{
int nslab, n, i;
int fw, bw;
bw = (atc->nodeid - i + nslab) % nslab;
if (n < nslab - 1)
{
- atc->node_dest[n] = fw;
- atc->node_src[n] = bw;
+ atc->slabCommSetup[n].node_dest = fw;
+ atc->slabCommSetup[n].node_src = bw;
n++;
}
if (n < nslab - 1)
{
- atc->node_dest[n] = bw;
- atc->node_src[n] = fw;
+ atc->slabCommSetup[n].node_dest = bw;
+ atc->slabCommSetup[n].node_src = fw;
n++;
}
}
return (n1 + n2 + 3*n3)/static_cast<double>(6*pme->nkx*pme->nky*pme->nkz);
}
-/*! \brief Initialize atom communication data structure */
-static void init_atomcomm(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
- int dimind, gmx_bool bSpread)
+#ifndef DOXYGEN
+
+PmeAtomComm::PmeAtomComm(MPI_Comm PmeMpiCommunicator,
+ const int numThreads,
+ const int pmeOrder,
+ const int dimIndex,
+ const bool doSpread) :
+ dimind(dimIndex),
+ bSpread(doSpread),
+ pme_order(pmeOrder),
+ nthread(numThreads),
+ spline(nthread)
{
- int thread;
-
- atc->dimind = dimind;
- atc->nslab = 1;
- atc->nodeid = 0;
- atc->pd_nalloc = 0;
-#if GMX_MPI
- if (pme->nnodes > 1)
+ if (PmeMpiCommunicator != MPI_COMM_NULL)
{
- atc->mpi_comm = pme->mpi_comm_d[dimind];
- MPI_Comm_size(atc->mpi_comm, &atc->nslab);
- MPI_Comm_rank(atc->mpi_comm, &atc->nodeid);
+ mpi_comm = PmeMpiCommunicator;
+#if GMX_MPI
+ MPI_Comm_size(mpi_comm, &nslab);
+ MPI_Comm_rank(mpi_comm, &nodeid);
+#endif
}
if (debug)
{
- fprintf(debug, "For PME atom communication in dimind %d: nslab %d rank %d\n", atc->dimind, atc->nslab, atc->nodeid);
+ fprintf(debug, "For PME atom communication in dimind %d: nslab %d rank %d\n", dimind, nslab, nodeid);
}
-#endif
- atc->bSpread = bSpread;
- atc->pme_order = pme->pme_order;
-
- if (atc->nslab > 1)
+ if (nslab > 1)
{
- snew(atc->node_dest, atc->nslab);
- snew(atc->node_src, atc->nslab);
- setup_coordinate_communication(atc);
-
- snew(atc->count_thread, pme->nthread);
- for (thread = 0; thread < pme->nthread; thread++)
- {
- snew(atc->count_thread[thread], atc->nslab);
- }
- atc->count = atc->count_thread[0];
- snew(atc->rcount, atc->nslab);
- snew(atc->buf_index, atc->nslab);
- }
+ slabCommSetup.resize(nslab);
+ setup_coordinate_communication(this);
- atc->nthread = pme->nthread;
- if (atc->nthread > 1)
- {
- snew(atc->thread_plist, atc->nthread);
- }
- snew(atc->spline, atc->nthread);
- for (thread = 0; thread < atc->nthread; thread++)
- {
- if (atc->nthread > 1)
+ count_thread.resize(nthread);
+ for (auto &countThread : count_thread)
{
- snew(atc->thread_plist[thread].n, atc->nthread+2*gmxCacheLineSize);
- atc->thread_plist[thread].n += gmxCacheLineSize;
+ countThread.resize(nslab);
}
}
-}
-/*! \brief Destroy an atom communication data structure and its child structs */
-static void destroy_atomcomm(pme_atomcomm_t *atc)
-{
- sfree(atc->pd);
- if (atc->nslab > 1)
+ if (nthread > 1)
{
- sfree(atc->node_dest);
- sfree(atc->node_src);
- for (int i = 0; i < atc->nthread; i++)
- {
- sfree(atc->count_thread[i]);
- }
- sfree(atc->count_thread);
- sfree(atc->rcount);
- sfree(atc->buf_index);
-
- sfree(atc->x);
- sfree(atc->coefficient);
- sfree(atc->f);
- }
- sfree(atc->idx);
- sfree(atc->fractx);
+ threadMap.resize(nthread);
- sfree(atc->thread_idx);
- for (int i = 0; i < atc->nthread; i++)
- {
- if (atc->nthread > 1)
- {
- int *n_ptr = atc->thread_plist[i].n - gmxCacheLineSize;
- sfree(n_ptr);
- sfree(atc->thread_plist[i].i);
- }
- sfree(atc->spline[i].ind);
- for (int d = 0; d < ZZ; d++)
+#pragma omp parallel for num_threads(nthread) schedule(static)
+ for (int thread = 0; thread < nthread; thread++)
{
- sfree(atc->spline[i].theta[d]);
- sfree(atc->spline[i].dtheta[d]);
+ try
+ {
+ /* Allocate buffer with padding to avoid cache polution */
+ threadMap[thread].nBuffer.resize(nthread + 2*gmxCacheLineSize);
+ threadMap[thread].n = threadMap[thread].nBuffer.data() + gmxCacheLineSize;
+ }
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
- sfree_aligned(atc->spline[i].ptr_dtheta_z);
- sfree_aligned(atc->spline[i].ptr_theta_z);
- }
- if (atc->nthread > 1)
- {
- sfree(atc->thread_plist);
}
- sfree(atc->spline);
}
+#endif // !DOXYGEN
+
/*! \brief Initialize data structure for communication */
static void
init_overlap_comm(pme_overlap_t * ol,
gmx_pme_t *gmx_pme_init(const t_commrec *cr,
const NumPmeDomains &numPmeDomains,
const t_inputrec *ir,
- int homenr,
gmx_bool bFreeEnergy_q,
gmx_bool bFreeEnergy_lj,
gmx_bool bReproducible,
pme->ndecompdim = 0;
pme->nodeid_major = 0;
pme->nodeid_minor = 0;
-#if GMX_MPI
- pme->mpi_comm_d[0] = pme->mpi_comm_d[1] = MPI_COMM_NULL;
-#endif
}
else
{
}
/* Use atc[0] for spreading */
- init_atomcomm(pme.get(), &pme->atc[0], numPmeDomains.x > 1 ? 0 : 1, TRUE);
+ const int firstDimIndex = (numPmeDomains.x > 1 ? 0 : 1);
+ MPI_Comm mpiCommFirstDim = (pme->nnodes > 1 ? pme->mpi_comm_d[firstDimIndex] : MPI_COMM_NULL);
+ bool doSpread = true;
+ pme->atc.emplace_back(mpiCommFirstDim, pme->nthread,
+ pme->pme_order,
+ firstDimIndex, doSpread);
if (pme->ndecompdim >= 2)
{
- init_atomcomm(pme.get(), &pme->atc[1], 1, FALSE);
+ const int secondDimIndex = 1;
+ doSpread = false;
+ pme->atc.emplace_back(pme->mpi_comm_d[1], pme->nthread,
+ pme->pme_order,
+ secondDimIndex, doSpread);
}
- if (pme->nnodes == 1)
- {
- pme->atc[0].n = homenr;
- pme_realloc_atomcomm_things(&pme->atc[0]);
- }
-
- pme->lb_buf1 = nullptr;
- pme->lb_buf2 = nullptr;
- pme->lb_buf_nalloc = 0;
-
if (pme_gpu_active(pme.get()))
{
if (!pme->gpu)
real ewaldcoeff_q,
real ewaldcoeff_lj)
{
- int homenr;
-
// Create a copy of t_inputrec fields that are used in gmx_pme_init().
// TODO: This would be better as just copying a sub-structure that contains
// all the PME parameters and nothing else.
irc.nky = grid_size[YY];
irc.nkz = grid_size[ZZ];
- if (pme_src->nnodes == 1)
- {
- homenr = pme_src->atc[0].n;
- }
- else
- {
- homenr = -1;
- }
-
try
{
const gmx::MDLogger dummyLogger;
GMX_ASSERT(pmedata, "Invalid PME pointer");
NumPmeDomains numPmeDomains = { pme_src->nnodes_major, pme_src->nnodes_minor };
*pmedata = gmx_pme_init(cr, numPmeDomains,
- &irc, homenr, pme_src->bFEP_q, pme_src->bFEP_lj, FALSE, ewaldcoeff_q, ewaldcoeff_lj,
+ &irc, pme_src->bFEP_q, pme_src->bFEP_lj, FALSE, ewaldcoeff_q, ewaldcoeff_lj,
pme_src->nthread, pme_src->runMode, pme_src->gpu, nullptr, nullptr, dummyLogger);
+ /* When running PME on the CPU not using domain decomposition,
+ * the atom data is allocated once only in gmx_pme_(re)init().
+ */
+ if (!pme_src->gpu && pme_src->nnodes == 1)
+ {
+ gmx_pme_reinit_atoms(*pmedata, pme_src->atc[0].numAtoms(), nullptr);
+ }
//TODO this is mostly passing around current values
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
/* We would like to reuse the fft grids, but that's harder */
}
-void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V)
+void gmx_pme_calc_energy(gmx_pme_t *pme,
+ gmx::ArrayRef<const gmx::RVec> x,
+ gmx::ArrayRef<const real> q,
+ real *V)
{
- pme_atomcomm_t *atc;
pmegrids_t *grid;
if (pme->nnodes > 1)
gmx_incons("gmx_pme_calc_energy with free energy");
}
- atc = &pme->atc_energy;
- atc->nthread = 1;
- if (atc->spline == nullptr)
+ if (!pme->atc_energy)
{
- snew(atc->spline, atc->nthread);
+ pme->atc_energy = std::make_unique<PmeAtomComm>(MPI_COMM_NULL, 1, pme->pme_order,
+ 0, true);
}
- atc->nslab = 1;
- atc->bSpread = TRUE;
- atc->pme_order = pme->pme_order;
- atc->n = n;
- pme_realloc_atomcomm_things(atc);
+ PmeAtomComm *atc = pme->atc_energy.get();
+ atc->setNumAtoms(x.ssize());
atc->x = x;
atc->coefficient = q;
/*! \brief Calculate initial Lorentz-Berthelot coefficients for LJ-PME */
static void
-calc_initial_lb_coeffs(struct gmx_pme_t *pme, const real *local_c6, const real *local_sigma)
+calc_initial_lb_coeffs(gmx::ArrayRef<real> coefficient,
+ const real *local_c6,
+ const real *local_sigma)
{
- int i;
- for (i = 0; i < pme->atc[0].n; ++i)
+ for (gmx::index i = 0; i < coefficient.ssize(); ++i)
{
- real sigma4;
- sigma4 = local_sigma[i];
- sigma4 = sigma4*sigma4;
- sigma4 = sigma4*sigma4;
- pme->atc[0].coefficient[i] = local_c6[i] / sigma4;
+ real sigma4 = local_sigma[i];
+ sigma4 = sigma4*sigma4;
+ sigma4 = sigma4*sigma4;
+ coefficient[i] = local_c6[i] / sigma4;
}
}
/*! \brief Calculate next Lorentz-Berthelot coefficients for LJ-PME */
static void
-calc_next_lb_coeffs(struct gmx_pme_t *pme, const real *local_sigma)
+calc_next_lb_coeffs(gmx::ArrayRef<real> coefficient,
+ const real *local_sigma)
{
- int i;
-
- for (i = 0; i < pme->atc[0].n; ++i)
+ for (gmx::index i = 0; i < coefficient.ssize(); ++i)
{
- pme->atc[0].coefficient[i] *= local_sigma[i];
+ coefficient[i] *= local_sigma[i];
}
}
int gmx_pme_do(struct gmx_pme_t *pme,
- int start, int homenr,
- rvec x[], rvec f[],
+ gmx::ArrayRef<const gmx::RVec> coordinates,
+ gmx::ArrayRef<gmx::RVec> forces,
real chargeA[], real chargeB[],
real c6A[], real c6B[],
real sigmaA[], real sigmaB[],
{
GMX_ASSERT(pme->runMode == PmeRunMode::CPU, "gmx_pme_do should not be called on the GPU PME run.");
- int d, i, j, npme, grid_index, max_grid_index;
- int n_d;
- pme_atomcomm_t *atc = nullptr;
- pmegrids_t *pmegrid = nullptr;
- real *grid = nullptr;
- rvec *f_d;
+ int d, npme, grid_index, max_grid_index;
+ PmeAtomComm &atc = pme->atc[0];
+ pmegrids_t *pmegrid = nullptr;
+ real *grid = nullptr;
real *coefficient = nullptr;
PmeOutput output[2]; // The second is used for the B state with FEP
real scale, lambda;
if (pme->nnodes > 1)
{
- atc = &pme->atc[0];
- atc->npd = homenr;
- if (atc->npd > atc->pd_nalloc)
+ atc.pd.resize(coordinates.ssize());
+ for (int d = pme->ndecompdim-1; d >= 0; d--)
{
- atc->pd_nalloc = over_alloc_dd(atc->npd);
- srenew(atc->pd, atc->pd_nalloc);
- }
- for (d = pme->ndecompdim-1; d >= 0; d--)
- {
- atc = &pme->atc[d];
- atc->maxshift = (atc->dimind == 0 ? maxshift_x : maxshift_y);
+ PmeAtomComm &atc = pme->atc[d];
+ atc.maxshift = (atc.dimind == 0 ? maxshift_x : maxshift_y);
}
}
else
{
- atc = &pme->atc[0];
- /* This could be necessary for TPI */
- pme->atc[0].n = homenr;
- if (DOMAINDECOMP(cr))
- {
- pme_realloc_atomcomm_things(atc);
- }
- atc->x = x;
- atc->f = f;
+ GMX_ASSERT(coordinates.ssize() == atc.numAtoms(), "We expect atc.numAtoms() coordinates");
+ GMX_ASSERT(forces.ssize() >= atc.numAtoms(), "We need a force buffer with at least atc.numAtoms() elements");
+
+ atc.x = coordinates;
+ atc.f = forces;
}
matrix scaledBox;
pfft_setup = pme->pfft_setup[grid_index];
switch (grid_index)
{
- case 0: coefficient = chargeA + start; break;
- case 1: coefficient = chargeB + start; break;
- case 2: coefficient = c6A + start; break;
- case 3: coefficient = c6B + start; break;
+ case 0: coefficient = chargeA; break;
+ case 1: coefficient = chargeB; break;
+ case 2: coefficient = c6A; break;
+ case 3: coefficient = c6B; break;
}
grid = pmegrid->grid.grid;
if (pme->nnodes == 1)
{
- atc->coefficient = coefficient;
+ atc.coefficient = gmx::arrayRefFromArray(coefficient, coordinates.size());
}
else
{
wallcycle_start(wcycle, ewcPME_REDISTXF);
- do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, coefficient);
+ do_redist_pos_coeffs(pme, cr, bFirst, coordinates, coefficient);
wallcycle_stop(wcycle, ewcPME_REDISTXF);
}
if (debug)
{
fprintf(debug, "Rank= %6d, pme local particles=%6d\n",
- cr->nodeid, atc->n);
+ cr->nodeid, atc.numAtoms());
}
if (flags & GMX_PME_SPREAD)
wallcycle_start(wcycle, ewcPME_SPREAD);
/* Spread the coefficients on a grid */
- spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
+ spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
if (bFirst)
{
- inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
+ inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc.numAtoms());
}
inc_nrnb(nrnb, eNR_SPREADBSP,
- pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+ pme->pme_order*pme->pme_order*pme->pme_order*atc.numAtoms());
if (!pme->bUseThreads)
{
{
try
{
- gather_f_bsplines(pme, grid, bClearF, atc,
- &atc->spline[thread],
+ gather_f_bsplines(pme, grid, bClearF, &atc,
+ &atc.spline[thread],
pme->bFEP ? (grid_index % 2 == 0 ? 1.0-lambda : lambda) : 1.0);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
inc_nrnb(nrnb, eNR_GATHERFBSP,
- pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+ pme->pme_order*pme->pme_order*pme->pme_order*atc.numAtoms());
/* Note: this wallcycle region is opened above inside an OpenMP
region, so take care if refactoring code here. */
wallcycle_stop(wcycle, ewcPME_GATHER);
/* Loop over A- and B-state if we are doing FEP */
for (fep_state = 0; fep_state < fep_states_lj; ++fep_state)
{
- real *local_c6 = nullptr, *local_sigma = nullptr, *RedistC6 = nullptr, *RedistSigma = nullptr;
+ real *local_c6 = nullptr, *local_sigma = nullptr, *RedistC6 = nullptr, *RedistSigma = nullptr;
+ gmx::ArrayRef<real> coefficientBuffer;
if (pme->nnodes == 1)
{
- if (pme->lb_buf1 == nullptr)
- {
- pme->lb_buf_nalloc = pme->atc[0].n;
- snew(pme->lb_buf1, pme->lb_buf_nalloc);
- }
- pme->atc[0].coefficient = pme->lb_buf1;
+ pme->lb_buf1.resize(atc.numAtoms());
+ coefficientBuffer = pme->lb_buf1;
switch (fep_state)
{
case 0:
}
else
{
- atc = &pme->atc[0];
+ coefficientBuffer = atc.coefficientBuffer;
switch (fep_state)
{
case 0:
}
wallcycle_start(wcycle, ewcPME_REDISTXF);
- do_redist_pos_coeffs(pme, cr, start, homenr, bFirst, x, RedistC6);
- if (pme->lb_buf_nalloc < atc->n)
- {
- pme->lb_buf_nalloc = atc->nalloc;
- srenew(pme->lb_buf1, pme->lb_buf_nalloc);
- srenew(pme->lb_buf2, pme->lb_buf_nalloc);
- }
- local_c6 = pme->lb_buf1;
- for (i = 0; i < atc->n; ++i)
+ do_redist_pos_coeffs(pme, cr, bFirst, coordinates, RedistC6);
+ pme->lb_buf1.resize(atc.numAtoms());
+ pme->lb_buf2.resize(atc.numAtoms());
+ local_c6 = pme->lb_buf1.data();
+ for (int i = 0; i < atc.numAtoms(); ++i)
{
- local_c6[i] = atc->coefficient[i];
+ local_c6[i] = atc.coefficient[i];
}
- do_redist_pos_coeffs(pme, cr, start, homenr, FALSE, x, RedistSigma);
- local_sigma = pme->lb_buf2;
- for (i = 0; i < atc->n; ++i)
+ do_redist_pos_coeffs(pme, cr, FALSE, coordinates, RedistSigma);
+ local_sigma = pme->lb_buf2.data();
+ for (int i = 0; i < atc.numAtoms(); ++i)
{
- local_sigma[i] = atc->coefficient[i];
+ local_sigma[i] = atc.coefficient[i];
}
wallcycle_stop(wcycle, ewcPME_REDISTXF);
}
- calc_initial_lb_coeffs(pme, local_c6, local_sigma);
+ atc.coefficient = coefficientBuffer;
+ calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
/*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/
for (grid_index = 2; grid_index < 9; ++grid_index)
pmegrid = &pme->pmegrid[grid_index];
fftgrid = pme->fftgrid[grid_index];
pfft_setup = pme->pfft_setup[grid_index];
- calc_next_lb_coeffs(pme, local_sigma);
+ calc_next_lb_coeffs(coefficientBuffer, local_sigma);
grid = pmegrid->grid.grid;
if (flags & GMX_PME_SPREAD)
{
wallcycle_start(wcycle, ewcPME_SPREAD);
/* Spread the c6 on a grid */
- spread_on_grid(pme, &pme->atc[0], pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
+ spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index);
if (bFirst)
{
- inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc->n);
+ inc_nrnb(nrnb, eNR_WEIGHTS, DIM*atc.numAtoms());
}
inc_nrnb(nrnb, eNR_SPREADBSP,
- pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+ pme->pme_order*pme->pme_order*pme->pme_order*atc.numAtoms());
if (pme->nthread == 1)
{
wrap_periodic_pmegrid(pme, grid);
if (bBackFFT)
{
bFirst = !pme->doCoulomb;
- calc_initial_lb_coeffs(pme, local_c6, local_sigma);
+ calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma);
for (grid_index = 8; grid_index >= 2; --grid_index)
{
/* Unpack structure */
fftgrid = pme->fftgrid[grid_index];
pfft_setup = pme->pfft_setup[grid_index];
grid = pmegrid->grid.grid;
- calc_next_lb_coeffs(pme, local_sigma);
+ calc_next_lb_coeffs(coefficientBuffer, local_sigma);
#pragma omp parallel num_threads(pme->nthread) private(thread)
{
try
inc_nrnb(nrnb, eNR_GATHERFBSP,
- pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+ pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].numAtoms());
}
wallcycle_stop(wcycle, ewcPME_GATHER);
wallcycle_start(wcycle, ewcPME_REDISTXF);
for (d = 0; d < pme->ndecompdim; d++)
{
- atc = &pme->atc[d];
+ gmx::ArrayRef<gmx::RVec> forcesRef;
if (d == pme->ndecompdim - 1)
{
- n_d = homenr;
- f_d = f + start;
+ const size_t numAtoms = coordinates.size();
+ GMX_ASSERT(forces.size() >= numAtoms, "Need at least numAtoms forces");
+ forcesRef = forces.subArray(0, numAtoms);
}
else
{
- n_d = pme->atc[d+1].n;
- f_d = pme->atc[d+1].f;
+ forcesRef = pme->atc[d + 1].f;
}
if (DOMAINDECOMP(cr))
{
- dd_pmeredist_f(pme, atc, n_d, f_d,
+ dd_pmeredist_f(pme, &pme->atc[d], forcesRef,
d == pme->ndecompdim-1 && pme->bPPnode);
}
}
{
*energy_q = (1.0-lambda_q)*output[0].coulombEnergy_ + lambda_q*output[1].coulombEnergy_;
*dvdlambda_q += output[1].coulombEnergy_ - output[0].coulombEnergy_;
- for (i = 0; i < DIM; i++)
+ for (int i = 0; i < DIM; i++)
{
- for (j = 0; j < DIM; j++)
+ for (int j = 0; j < DIM; j++)
{
vir_q[i][j] += (1.0-lambda_q)*output[0].coulombVirial_[i][j] +
lambda_q*output[1].coulombVirial_[i][j];
{
*energy_lj = (1.0-lambda_lj)*output[0].lennardJonesEnergy_ + lambda_lj*output[1].lennardJonesEnergy_;
*dvdlambda_lj += output[1].lennardJonesEnergy_ - output[0].lennardJonesEnergy_;
- for (i = 0; i < DIM; i++)
+ for (int i = 0; i < DIM; i++)
{
- for (j = 0; j < DIM; j++)
+ for (int j = 0; j < DIM; j++)
{
vir_lj[i][j] += (1.0-lambda_lj)*output[0].lennardJonesVirial_[i][j] + lambda_lj*output[1].lennardJonesVirial_[i][j];
}
sfree(pme->cfftgrid);
sfree(pme->pfft_setup);
- for (int i = 0; i < std::max(1, pme->ndecompdim); i++) //pme->atc[0] is always allocated
- {
- destroy_atomcomm(&pme->atc[i]);
- }
-
for (int i = 0; i < DIM; i++)
{
sfree(pme->bsp_mod[i]);
}
- sfree(pme->lb_buf1);
- sfree(pme->lb_buf2);
-
sfree(pme->bufv);
sfree(pme->bufr);
delete pme;
}
-void gmx_pme_reinit_atoms(const gmx_pme_t *pme, const int nAtoms, const real *charges)
+void gmx_pme_reinit_atoms(gmx_pme_t *pme,
+ const int numAtoms,
+ const real *charges)
{
if (pme_gpu_active(pme))
{
- pme_gpu_reinit_atoms(pme->gpu, nAtoms, charges);
+ pme_gpu_reinit_atoms(pme->gpu, numAtoms, charges);
+ }
+ else
+ {
+ pme->atc[0].setNumAtoms(numAtoms);
+ // TODO: set the charges here as well
}
- // TODO: handle the CPU case here; handle the whole t_mdatoms
}
*/
gmx_pme_t *gmx_pme_init(const t_commrec *cr,
const NumPmeDomains &numPmeDomains,
- const t_inputrec *ir, int homenr,
+ const t_inputrec *ir,
gmx_bool bFreeEnergy_q, gmx_bool bFreeEnergy_lj,
gmx_bool bReproducible,
real ewaldcoeff_q, real ewaldcoeff_lj,
/*! \brief Do a PME calculation on a CPU for the long range electrostatics and/or LJ.
*
+ * Computes the PME forces and the energy and viral, when requested,
+ * for all atoms in \p coordinates. Forces, when requested, are added
+ * to the buffer \p forces, which is allowed to contain more elements
+ * than the number of elements in \p coordinates.
* The meaning of \p flags is defined above, and determines which
* parts of the calculation are performed.
*
* \return 0 indicates all well, non zero is an error code.
*/
int gmx_pme_do(struct gmx_pme_t *pme,
- int start, int homenr,
- rvec x[], rvec f[],
+ gmx::ArrayRef<const gmx::RVec> coordinates,
+ gmx::ArrayRef<gmx::RVec> forces,
real chargeA[], real chargeB[],
real c6A[], real c6B[],
real sigmaA[], real sigmaB[],
* pme struct. Currently does not work in parallel or with free
* energy.
*/
-void gmx_pme_calc_energy(struct gmx_pme_t *pme, int n, rvec *x, real *q, real *V);
+void gmx_pme_calc_energy(gmx_pme_t *pme,
+ gmx::ArrayRef<const gmx::RVec> x,
+ gmx::ArrayRef<const real> q,
+ real *V);
/*! \brief Send the charges and maxshift to out PME-only node. */
void gmx_pme_send_parameters(const t_commrec *cr,
* TODO: it should update the PME CPU atom data as well.
* (currently PME CPU call gmx_pme_do() gets passed the input pointers for each computation).
*
- * \param[in] pme The PME structure.
- * \param[in] nAtoms The number of particles.
- * \param[in] charges The pointer to the array of particle charges.
+ * \param[in,out] pme The PME structure.
+ * \param[in] numAtoms The number of particles.
+ * \param[in] charges The pointer to the array of particle charges.
*/
-void gmx_pme_reinit_atoms(const gmx_pme_t *pme, int nAtoms, const real *charges);
+void gmx_pme_reinit_atoms(gmx_pme_t *pme,
+ int numAtoms,
+ const real *charges);
/* A block of PME GPU functions */
do_fspline (
const gmx_pme_t * pme,
const real * gmx_restrict grid,
- const pme_atomcomm_t * gmx_restrict atc,
+ const PmeAtomComm * gmx_restrict atc,
const splinedata_t * gmx_restrict spline,
int nn)
: pme(pme), grid(grid), atc(atc), spline(spline), nn(nn) {}
const int norder = nn*order;
/* Pointer arithmetic alert, next six statements */
- const real *const gmx_restrict thx = spline->theta[XX] + norder;
- const real *const gmx_restrict thy = spline->theta[YY] + norder;
- const real *const gmx_restrict thz = spline->theta[ZZ] + norder;
- const real *const gmx_restrict dthx = spline->dtheta[XX] + norder;
- const real *const gmx_restrict dthy = spline->dtheta[YY] + norder;
- const real *const gmx_restrict dthz = spline->dtheta[ZZ] + norder;
+ const real *const gmx_restrict thx = spline->theta.coefficients[XX] + norder;
+ const real *const gmx_restrict thy = spline->theta.coefficients[YY] + norder;
+ const real *const gmx_restrict thz = spline->theta.coefficients[ZZ] + norder;
+ const real *const gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
+ const real *const gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
+ const real *const gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
RVec f(0, 0, 0);
{
const int norder = nn*4;
/* Pointer arithmetic alert, next six statements */
- const real *const gmx_restrict thx = spline->theta[XX] + norder;
- const real *const gmx_restrict thy = spline->theta[YY] + norder;
- const real *const gmx_restrict thz = spline->theta[ZZ] + norder;
- const real *const gmx_restrict dthx = spline->dtheta[XX] + norder;
- const real *const gmx_restrict dthy = spline->dtheta[YY] + norder;
- const real *const gmx_restrict dthz = spline->dtheta[ZZ] + norder;
+ const real *const gmx_restrict thx = spline->theta.coefficients[XX] + norder;
+ const real *const gmx_restrict thy = spline->theta.coefficients[YY] + norder;
+ const real *const gmx_restrict thz = spline->theta.coefficients[ZZ] + norder;
+ const real *const gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
+ const real *const gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
+ const real *const gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
Simd4NReal fx_S = setZero();
Simd4NReal fy_S = setZero();
const int norder = nn*order;
GMX_ASSERT(gridNZ % 4 == 0, "For aligned SIMD4 operations the grid size has to be padded up to a multiple of 4");
/* Pointer arithmetic alert, next six statements */
- const real *const gmx_restrict thx = spline->theta[XX] + norder;
- const real *const gmx_restrict thy = spline->theta[YY] + norder;
- const real *const gmx_restrict thz = spline->theta[ZZ] + norder;
- const real *const gmx_restrict dthx = spline->dtheta[XX] + norder;
- const real *const gmx_restrict dthy = spline->dtheta[YY] + norder;
- const real *const gmx_restrict dthz = spline->dtheta[ZZ] + norder;
+ const real *const gmx_restrict thx = spline->theta.coefficients[XX] + norder;
+ const real *const gmx_restrict thy = spline->theta.coefficients[YY] + norder;
+ const real *const gmx_restrict thz = spline->theta.coefficients[ZZ] + norder;
+ const real *const gmx_restrict dthx = spline->dtheta.coefficients[XX] + norder;
+ const real *const gmx_restrict dthy = spline->dtheta.coefficients[YY] + norder;
+ const real *const gmx_restrict dthz = spline->dtheta.coefficients[ZZ] + norder;
struct pme_spline_work *const work = pme->spline_work;
private:
const gmx_pme_t *const pme;
const real *const gmx_restrict grid;
- const pme_atomcomm_t *const gmx_restrict atc;
+ const PmeAtomComm *const gmx_restrict atc;
const splinedata_t *const gmx_restrict spline;
const int nn;
void gather_f_bsplines(const gmx_pme_t *pme, const real *grid,
- gmx_bool bClearF, const pme_atomcomm_t *atc,
+ gmx_bool bClearF, const PmeAtomComm *atc,
const splinedata_t *spline,
real scale)
{
const real rzz = pme->recipbox[ZZ][ZZ];
/* Extract the buffer for force output */
- rvec * gmx_restrict force = atc->f;
+ rvec * gmx_restrict force = as_rvec_array(atc->f.data());
/* Note that unrolling this loop by templating this function on order
* deteriorates performance significantly with gcc5/6/7.
real gather_energy_bsplines(gmx_pme_t *pme, const real *grid,
- pme_atomcomm_t *atc)
+ PmeAtomComm *atc)
{
splinedata_t *spline;
- int n, ithx, ithy, ithz, i0, j0, k0;
+ int ithx, ithy, ithz, i0, j0, k0;
int index_x, index_xy;
int *idxptr;
real energy, pot, tx, ty, coefficient, gval;
order = pme->pme_order;
energy = 0;
- for (n = 0; (n < atc->n); n++)
+ for (int n = 0; n < atc->numAtoms(); n++)
{
coefficient = atc->coefficient[n];
k0 = idxptr[ZZ];
/* Pointer arithmetic alert, next three statements */
- thx = spline->theta[XX] + norder;
- thy = spline->theta[YY] + norder;
- thz = spline->theta[ZZ] + norder;
+ thx = spline->theta.coefficients[XX] + norder;
+ thy = spline->theta.coefficients[YY] + norder;
+ thz = spline->theta.coefficients[ZZ] + norder;
pot = 0;
for (ithx = 0; (ithx < order); ithx++)
void
gather_f_bsplines(const struct gmx_pme_t *pme, const real *grid,
- gmx_bool bClearF, const pme_atomcomm_t *atc,
+ gmx_bool bClearF, const PmeAtomComm *atc,
const splinedata_t *spline,
real scale);
real
gather_energy_bsplines(struct gmx_pme_t *pme, const real *grid,
- pme_atomcomm_t *atc);
+ PmeAtomComm *atc);
#endif
kernelParamsPtr->constants.elFactor = ONE_4PI_EPS0 / pmeGpu->common->epsilon_r;
}
-void pme_gpu_transform_spline_atom_data(const PmeGpu *pmeGpu, const pme_atomcomm_t *atc,
+void pme_gpu_transform_spline_atom_data(const PmeGpu *pmeGpu, const PmeAtomComm *atc,
PmeSplineDataType type, int dimIndex, PmeLayoutTransform transform)
{
// The GPU atom spline data is laid out in a different way currently than the CPU one.
switch (type)
{
case PmeSplineDataType::Values:
- cpuSplineBuffer = atc->spline[threadIndex].theta[dimIndex];
+ cpuSplineBuffer = atc->spline[threadIndex].theta.coefficients[dimIndex];
h_splineBuffer = pmeGpu->staging.h_theta;
break;
case PmeSplineDataType::Derivatives:
- cpuSplineBuffer = atc->spline[threadIndex].dtheta[dimIndex];
+ cpuSplineBuffer = atc->spline[threadIndex].dtheta.coefficients[dimIndex];
h_splineBuffer = pmeGpu->staging.h_dtheta;
break;
struct gmx_gpu_opt_t;
struct gmx_pme_t; // only used in pme_gpu_reinit
struct gmx_wallclock_gpu_pme_t;
-struct pme_atomcomm_t;
+class PmeAtomComm;
struct t_complex;
namespace gmx
* \param[in] transform Layout transform type
*/
GPU_FUNC_QUALIFIER void pme_gpu_transform_spline_atom_data(const PmeGpu *GPU_FUNC_ARGUMENT(pmeGpu),
- const pme_atomcomm_t *GPU_FUNC_ARGUMENT(atc),
+ const PmeAtomComm *GPU_FUNC_ARGUMENT(atc),
PmeSplineDataType GPU_FUNC_ARGUMENT(type),
int GPU_FUNC_ARGUMENT(dimIndex),
PmeLayoutTransform GPU_FUNC_ARGUMENT(transform)) GPU_FUNC_TERM
#include "gromacs/math/gmxcomplex.h"
#include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/defaultinitializationallocator.h"
#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/smalloc.h"
#include "pme_gpu_types_host.h"
std::vector<real> recvbuf; //!< Shared buffer for receiving
};
+template<typename T>
+using AlignedVector = std::vector < T, gmx::AlignedAllocator < T>>;
+
+template<typename T>
+using FastVector = std::vector < T, gmx::DefaultInitializationAllocator < T>>;
+
/*! \brief Data structure for organizing particle allocation to threads */
-typedef struct {
- int *n; /* Cumulative counts of the number of particles per thread */
- int nalloc; /* Allocation size of i */
- int *i; /* Particle indices ordered on thread index (n) */
-} thread_plist_t;
+struct AtomToThreadMap
+{
+ //! Cumulative counts of the number of particles per thread
+ int *n = nullptr;
+ //! Storage buffer for n
+ std::vector<int> nBuffer;
+ //! Particle indices ordered on thread index (n)
+ FastVector<int> i;
+};
/*! \brief Helper typedef for spline vectors */
typedef real *splinevec[DIM];
+/*! \internal
+ * \brief Coefficients for theta or dtheta
+ */
+class SplineCoefficients
+{
+ public:
+ //! Reallocate for use with up to nalloc coefficients
+ void realloc(int nalloc);
+
+ //! Pointers to the coefficient buffer for x, y, z
+ splinevec coefficients = { nullptr };
+ private:
+ //! Storage for x coefficients
+ std::vector<real> bufferX_;
+ //! Storage for y coefficients
+ std::vector<real> bufferY_;
+ //! Storage for z coefficients, aligned for SIMD load
+ AlignedVector<real> bufferZ_;
+};
+
/*! \brief Data structure for beta-spline interpolation */
-typedef struct {
- int n;
- int *ind;
- splinevec theta;
- real *ptr_theta_z;
- splinevec dtheta;
- real *ptr_dtheta_z;
-} splinedata_t;
-
-/*! \brief Data structure for coordinating transfer between PP and PME ranks*/
-struct pme_atomcomm_t{
- int dimind; /* The index of the dimension, 0=x, 1=y */
- int nslab;
- int nodeid;
-#if GMX_MPI
- MPI_Comm mpi_comm;
-#endif
+struct splinedata_t
+{
+ int n = 0;
+ FastVector<int> ind;
+ SplineCoefficients theta;
+ SplineCoefficients dtheta;
+ int nalloc = 0;
+};
- int *node_dest; /* The nodes to send x and q to with DD */
- int *node_src; /* The nodes to receive x and q from with DD */
- int *buf_index; /* Index for commnode into the buffers */
-
- int maxshift;
-
- int npd;
- int pd_nalloc;
- int *pd;
- int *count; /* The number of atoms to send to each node */
- int **count_thread;
- int *rcount; /* The number of atoms to receive */
-
- int n;
- int nalloc;
- rvec *x;
- real *coefficient;
- rvec *f;
- gmx_bool bSpread; /* These coordinates are used for spreading */
- int pme_order;
- ivec *idx;
- rvec *fractx; /* Fractional coordinate relative to
- * the lower cell boundary
- */
- int nthread;
- int *thread_idx; /* Which thread should spread which coefficient */
- thread_plist_t *thread_plist;
- splinedata_t *spline;
+/*! \brief PME slab MPI communication setup */
+struct SlabCommSetup
+{
+ //! The nodes to send x and q to with DD
+ int node_dest;
+ //! The nodes to receive x and q from with DD
+ int node_src;
+ //! Index for commnode into the buffers
+ int buf_index;
+ //! The number of atoms to receive
+ int rcount;
+};
+
+/*! \internal
+ * \brief Data structure for coordinating transfers between PME ranks along one dimension
+ *
+ * Also used for passing coordinates, coefficients and forces to and from PME routines.
+ */
+class PmeAtomComm
+{
+ public:
+ //! Constructor, \p PmeMpiCommunicator is the communicator for this dimension
+ PmeAtomComm(MPI_Comm PmeMpiCommunicator,
+ int numThreads,
+ int pmeOrder,
+ int dimIndex,
+ bool doSpread);
+
+ //! Set the atom count and when necessary resizes atom buffers
+ void setNumAtoms(int numAtoms);
+
+ //! Returns the atom count
+ int numAtoms() const
+ {
+ return numAtoms_;
+ }
+
+ //! Returns the number of atoms to send to each rank
+ gmx::ArrayRef<int> sendCount()
+ {
+ GMX_ASSERT(!count_thread.empty(), "Need at least one thread_count");
+ return count_thread[0];
+ }
+
+ //! The index of the dimension, 0=x, 1=y
+ int dimind = 0;
+ //! The number of slabs and ranks this dimension is decomposed over
+ int nslab = 1;
+ //! Our MPI rank index
+ int nodeid = 0;
+ //! Communicator for this dimension
+ MPI_Comm mpi_comm;
+
+ //! Communication setup for each slab, only present with nslab > 1
+ std::vector<SlabCommSetup> slabCommSetup;
+ //! The maximum communication distance counted in MPI ranks
+ int maxshift = 0;
+
+ //! The target slab index for each particle
+ FastVector<int> pd;
+ //! Target particle counts for each slab, for each thread
+ std::vector < std::vector < int>> count_thread;
+
+ private:
+ //! The number of atoms
+ int numAtoms_ = 0;
+ public:
+ //! The coordinates
+ gmx::ArrayRef<const gmx::RVec> x;
+ //! The coefficient, charges or LJ C6
+ gmx::ArrayRef<const real> coefficient;
+ //! The forces
+ gmx::ArrayRef<gmx::RVec> f;
+ //! Coordinate buffer, used only with nslab > 1
+ FastVector<gmx::RVec> xBuffer;
+ //! Coefficient buffer, used only with nslab > 1
+ FastVector<real> coefficientBuffer;
+ //! Force buffer, used only with nslab > 1
+ FastVector<gmx::RVec> fBuffer;
+ //! Tells whether these coordinates are used for spreading
+ bool bSpread;
+ //! The PME order
+ int pme_order;
+ //! The grid index per atom
+ FastVector<gmx::IVec> idx;
+ //! Fractional atom coordinates relative to the lower cell boundary
+ FastVector<gmx::RVec> fractx;
+
+ //! The number of threads to use in PME
+ int nthread;
+ //! Thread index for each atom
+ FastVector<int> thread_idx;
+ std::vector<AtomToThreadMap> threadMap;
+ std::vector<splinedata_t> spline;
};
/*! \brief Data structure for a single PME grid */
int *nnx, *nny, *nnz;
real *fshx, *fshy, *fshz;
- pme_atomcomm_t atc[2]; /* Indexed on decomposition index */
+ std::vector<PmeAtomComm> atc; /* Indexed on decomposition index */
matrix recipbox;
real boxVolume;
splinevec bsp_mod;
* for spreading/gathering (in serial), or the C6 coefficient for
* local atoms (in parallel). lb_buf2 is only used in parallel,
* and stores the sigma values for local atoms. */
- real *lb_buf1, *lb_buf2;
- int lb_buf_nalloc; /* Allocation size for the above buffers. */
+ FastVector<real> lb_buf1;
+ FastVector<real> lb_buf2;
- pme_overlap_t overlap[2]; /* Indexed on dimension, 0=x, 1=y */
+ pme_overlap_t overlap[2]; /* Indexed on dimension, 0=x, 1=y */
- pme_atomcomm_t atc_energy; /* Only for gmx_pme_calc_energy */
+ /* Atom step for energy only calculation in gmx_pme_calc_energy() */
+ std::unique_ptr<PmeAtomComm> atc_energy;
+ /* Communication buffers */
rvec *bufv; /* Communication buffer */
real *bufr; /* Communication buffer */
int buf_nalloc; /* The communication buffer size */
}
else
{
- gmx_pme_do(pme, 0, natoms, as_rvec_array(pme_pp->x.data()), as_rvec_array(pme_pp->f.data()),
+ GMX_ASSERT(pme_pp->x.size() == static_cast<size_t>(natoms), "The coordinate buffer should have size natoms");
+
+ gmx_pme_do(pme, pme_pp->x, pme_pp->f,
pme_pp->chargeA.data(), pme_pp->chargeB.data(),
pme_pp->sqrt_c6A.data(), pme_pp->sqrt_c6B.data(),
pme_pp->sigmaA.data(), pme_pp->sigmaB.data(), box,
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+
+/*! \internal \file
+ *
+ * \brief This file contains function definitions for redistributing
+ * atoms over the PME domains
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
+
#include "gmxpre.h"
#include "pme_redistribute.h"
#include "gromacs/utility/smalloc.h"
#include "pme_internal.h"
-#include "pme_simd.h"
+//! Calculate the slab indices and store in \p atc, store counts in \p count
static void pme_calc_pidx(int start, int end,
- matrix recipbox, rvec x[],
- pme_atomcomm_t *atc, int *count)
+ const matrix recipbox, const rvec x[],
+ PmeAtomComm *atc, int *count)
{
- int nslab, i;
- int si;
- real *xptr, s;
- real rxx, ryx, rzx, ryy, rzy;
- int *pd;
+ int nslab, i;
+ int si;
+ const real *xptr;
+ real s;
+ real rxx, ryx, rzx, ryy, rzy;
+ int *pd;
/* Calculate PME task index (pidx) for each grid index.
* Here we always assign equally sized slabs to each node
*/
nslab = atc->nslab;
- pd = atc->pd;
+ pd = atc->pd.data();
/* Reset the count */
for (i = 0; i < nslab; i++)
}
}
-static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
- pme_atomcomm_t *atc)
+//! Wrapper function for calculating slab indices, stored in \p atc
+static void pme_calc_pidx_wrapper(gmx::ArrayRef<const gmx::RVec> x,
+ const matrix recipbox,
+ PmeAtomComm *atc)
{
int nthread, thread, slab;
{
try
{
+ const int natoms = x.ssize();
pme_calc_pidx(natoms* thread /nthread,
natoms*(thread+1)/nthread,
- recipbox, x, atc, atc->count_thread[thread]);
+ recipbox, as_rvec_array(x.data()),
+ atc, atc->count_thread[thread].data());
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
}
}
-static void realloc_splinevec(splinevec th, real **ptr_z, int nalloc)
+#ifndef DOXYGEN
+
+void SplineCoefficients::realloc(const int nalloc)
{
const int padding = 4;
- int i;
-
- srenew(th[XX], nalloc);
- srenew(th[YY], nalloc);
- /* In z we add padding, this is only required for the aligned SIMD code */
- sfree_aligned(*ptr_z);
- snew_aligned(*ptr_z, nalloc+2*padding, SIMD4_ALIGNMENT);
- th[ZZ] = *ptr_z + padding;
- for (i = 0; i < padding; i++)
- {
- (*ptr_z)[ i] = 0;
- (*ptr_z)[padding+nalloc+i] = 0;
- }
+ bufferX_.resize(nalloc);
+ coefficients[XX] = bufferX_.data();
+ bufferY_.resize(nalloc);
+ coefficients[YY] = bufferY_.data();
+ /* In z we add padding, this is only required for the aligned 4-wide SIMD code */
+ bufferZ_.resize(nalloc + 2*padding);
+ coefficients[ZZ] = bufferZ_.data() + padding;
}
-static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
+#endif // !DOXYGEN
+
+//! Reallocates all buffers in \p spline to fit atoms in \p atc
+static void pme_realloc_splinedata(splinedata_t *spline,
+ const PmeAtomComm *atc)
{
- int i;
+ if (spline->nalloc >= atc->x.ssize() &&
+ spline->nalloc >= atc->numAtoms())
+ {
+ return;
+ }
- srenew(spline->ind, atc->nalloc);
+ spline->nalloc = std::max(atc->x.capacity(), static_cast<size_t>(atc->numAtoms()));
+ spline->ind.resize(spline->nalloc);
/* Initialize the index to identity so it works without threads */
- for (i = 0; i < atc->nalloc; i++)
+ for (int i = 0; i < spline->nalloc; i++)
{
spline->ind[i] = i;
}
- realloc_splinevec(spline->theta, &spline->ptr_theta_z,
- atc->pme_order*atc->nalloc);
- realloc_splinevec(spline->dtheta, &spline->ptr_dtheta_z,
- atc->pme_order*atc->nalloc);
+ spline->theta.realloc(atc->pme_order*spline->nalloc);
+ spline->dtheta.realloc(atc->pme_order*spline->nalloc);
}
-void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
+#ifndef DOXYGEN
+
+void PmeAtomComm::setNumAtoms(const int numAtoms)
{
- int nalloc_old, i;
+ numAtoms_ = numAtoms;
- /* We have to avoid a NULL pointer for atc->x to avoid
- * possible fatal errors in MPI routines.
- */
- if (atc->n > atc->nalloc || atc->nalloc == 0)
+ if (nslab > 1)
{
- nalloc_old = atc->nalloc;
- atc->nalloc = over_alloc_dd(std::max(atc->n, 1));
-
- if (atc->nslab > 1)
+ /* We have to avoid a NULL pointer for atc->x to avoid
+ * possible fatal errors in MPI routines.
+ */
+ xBuffer.resize(numAtoms_);
+ if (xBuffer.capacity() == 0)
{
- srenew(atc->x, atc->nalloc);
- srenew(atc->coefficient, atc->nalloc);
- srenew(atc->f, atc->nalloc);
- for (i = nalloc_old; i < atc->nalloc; i++)
- {
- clear_rvec(atc->f[i]);
- }
+ xBuffer.reserve(1);
+ }
+ x = xBuffer;
+ coefficientBuffer.resize(numAtoms_);
+ if (coefficientBuffer.capacity() == 0)
+ {
+ coefficientBuffer.reserve(1);
}
- if (atc->bSpread)
+ coefficient = coefficientBuffer;
+ const int nalloc_old = fBuffer.size();
+ fBuffer.resize(numAtoms_);
+ for (int i = nalloc_old; i < numAtoms_; i++)
{
- srenew(atc->fractx, atc->nalloc);
- srenew(atc->idx, atc->nalloc);
+ clear_rvec(fBuffer[i]);
+ }
+ f = fBuffer;
+ }
+ if (bSpread)
+ {
+ fractx.resize(numAtoms_);
+ idx.resize(numAtoms_);
- if (atc->nthread > 1)
- {
- srenew(atc->thread_idx, atc->nalloc);
- }
+ if (nthread > 1)
+ {
+ thread_idx.resize(numAtoms_);
+ }
- for (i = 0; i < atc->nthread; i++)
- {
- pme_realloc_splinedata(&atc->spline[i], atc);
- }
+ for (int i = 0; i < nthread; i++)
+ {
+ pme_realloc_splinedata(&spline[i], this);
}
}
}
-static void pme_dd_sendrecv(pme_atomcomm_t gmx_unused *atc,
+#endif // !DOXYGEN
+
+//! Communicates buffers between rank separated by \p shift slabs
+static void pme_dd_sendrecv(PmeAtomComm gmx_unused *atc,
gmx_bool gmx_unused bBackward, int gmx_unused shift,
void gmx_unused *buf_s, int gmx_unused nbyte_s,
void gmx_unused *buf_r, int gmx_unused nbyte_r)
if (!bBackward)
{
- dest = atc->node_dest[shift];
- src = atc->node_src[shift];
+ dest = atc->slabCommSetup[shift].node_dest;
+ src = atc->slabCommSetup[shift].node_src;
}
else
{
- dest = atc->node_src[shift];
- src = atc->node_dest[shift];
+ dest = atc->slabCommSetup[shift].node_src;
+ src = atc->slabCommSetup[shift].node_dest;
}
if (nbyte_s > 0 && nbyte_r > 0)
#endif
}
-static void dd_pmeredist_pos_coeffs(struct gmx_pme_t *pme,
- int n, gmx_bool bX, rvec *x, const real *data,
- pme_atomcomm_t *atc)
+//! Redistristributes \p data and optionally coordinates between MPI ranks
+static void dd_pmeredist_pos_coeffs(gmx_pme_t *pme,
+ const gmx_bool bX,
+ gmx::ArrayRef<const gmx::RVec> x,
+ const real *data,
+ PmeAtomComm *atc)
{
- int *commnode, *buf_index;
- int nnodes_comm, i, nsend, local_pos, buf_pos, node, scount, rcount;
-
- commnode = atc->node_dest;
- buf_index = atc->buf_index;
+ int nnodes_comm, i, local_pos, buf_pos, node;
nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
- nsend = 0;
+ auto sendCount = atc->sendCount();
+ int nsend = 0;
for (i = 0; i < nnodes_comm; i++)
{
- buf_index[commnode[i]] = nsend;
- nsend += atc->count[commnode[i]];
+ const int commnode = atc->slabCommSetup[i].node_dest;
+ atc->slabCommSetup[commnode].buf_index = nsend;
+ nsend += sendCount[commnode];
}
if (bX)
{
- if (atc->count[atc->nodeid] + nsend != n)
+ if (sendCount[atc->nodeid] + nsend != x.ssize())
{
- gmx_fatal(FARGS, "%d particles communicated to PME rank %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
+ gmx_fatal(FARGS, "%zd particles communicated to PME rank %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
"This usually means that your system is not well equilibrated.",
- n - (atc->count[atc->nodeid] + nsend),
+ x.ssize() - (sendCount[atc->nodeid] + nsend),
pme->nodeid, 'x'+atc->dimind);
}
srenew(pme->bufr, pme->buf_nalloc);
}
- atc->n = atc->count[atc->nodeid];
+ int numAtoms = sendCount[atc->nodeid];
for (i = 0; i < nnodes_comm; i++)
{
- scount = atc->count[commnode[i]];
+ const int commnode = atc->slabCommSetup[i].node_dest;
+ int scount = sendCount[commnode];
/* Communicate the count */
if (debug)
{
fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n",
- atc->dimind, atc->nodeid, commnode[i], scount);
+ atc->dimind, atc->nodeid, commnode, scount);
}
pme_dd_sendrecv(atc, FALSE, i,
&scount, sizeof(int),
- &atc->rcount[i], sizeof(int));
- atc->n += atc->rcount[i];
+ &atc->slabCommSetup[i].rcount, sizeof(int));
+ numAtoms += atc->slabCommSetup[i].rcount;
}
- pme_realloc_atomcomm_things(atc);
+ atc->setNumAtoms(numAtoms);
}
local_pos = 0;
- for (i = 0; i < n; i++)
+ for (gmx::index i = 0; i < x.ssize(); i++)
{
node = atc->pd[i];
if (node == atc->nodeid)
/* Copy direct to the receive buffer */
if (bX)
{
- copy_rvec(x[i], atc->x[local_pos]);
+ copy_rvec(x[i], atc->xBuffer[local_pos]);
}
- atc->coefficient[local_pos] = data[i];
+ atc->coefficientBuffer[local_pos] = data[i];
local_pos++;
}
else
{
/* Copy to the send buffer */
+ int &buf_index = atc->slabCommSetup[node].buf_index;
if (bX)
{
- copy_rvec(x[i], pme->bufv[buf_index[node]]);
+ copy_rvec(x[i], pme->bufv[buf_index]);
}
- pme->bufr[buf_index[node]] = data[i];
- buf_index[node]++;
+ pme->bufr[buf_index] = data[i];
+ buf_index++;
}
}
buf_pos = 0;
for (i = 0; i < nnodes_comm; i++)
{
- scount = atc->count[commnode[i]];
- rcount = atc->rcount[i];
+ const int scount = atc->sendCount()[atc->slabCommSetup[i].node_dest];
+ const int rcount = atc->slabCommSetup[i].rcount;
if (scount > 0 || rcount > 0)
{
if (bX)
/* Communicate the coordinates */
pme_dd_sendrecv(atc, FALSE, i,
pme->bufv[buf_pos], scount*sizeof(rvec),
- atc->x[local_pos], rcount*sizeof(rvec));
+ atc->xBuffer[local_pos], rcount*sizeof(rvec));
}
/* Communicate the coefficients */
pme_dd_sendrecv(atc, FALSE, i,
pme->bufr+buf_pos, scount*sizeof(real),
- atc->coefficient+local_pos, rcount*sizeof(real));
+ atc->coefficientBuffer.data() + local_pos, rcount*sizeof(real));
buf_pos += scount;
- local_pos += atc->rcount[i];
+ local_pos += atc->slabCommSetup[i].rcount;
}
}
}
-void dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
- int n, rvec *f,
+void dd_pmeredist_f(struct gmx_pme_t *pme, PmeAtomComm *atc,
+ gmx::ArrayRef<gmx::RVec> f,
gmx_bool bAddF)
{
- int *commnode, *buf_index;
- int nnodes_comm, local_pos, buf_pos, i, scount, rcount, node;
-
- commnode = atc->node_dest;
- buf_index = atc->buf_index;
+ int nnodes_comm, local_pos, buf_pos, i, node;
nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
- local_pos = atc->count[atc->nodeid];
+ local_pos = atc->sendCount()[atc->nodeid];
buf_pos = 0;
for (i = 0; i < nnodes_comm; i++)
{
- scount = atc->rcount[i];
- rcount = atc->count[commnode[i]];
+ const int commnode = atc->slabCommSetup[i].node_dest;
+ const int scount = atc->slabCommSetup[i].rcount;
+ const int rcount = atc->sendCount()[commnode];
if (scount > 0 || rcount > 0)
{
/* Communicate the forces */
pme->bufv[buf_pos], rcount*sizeof(rvec));
local_pos += scount;
}
- buf_index[commnode[i]] = buf_pos;
- buf_pos += rcount;
+ atc->slabCommSetup[commnode].buf_index = buf_pos;
+ buf_pos += rcount;
}
local_pos = 0;
if (bAddF)
{
- for (i = 0; i < n; i++)
+ for (gmx::index i = 0; i < f.ssize(); i++)
{
node = atc->pd[i];
if (node == atc->nodeid)
else
{
/* Add from the receive buffer */
- rvec_inc(f[i], pme->bufv[buf_index[node]]);
- buf_index[node]++;
+ rvec_inc(f[i], pme->bufv[atc->slabCommSetup[node].buf_index]);
+ atc->slabCommSetup[node].buf_index++;
}
}
}
else
{
- for (i = 0; i < n; i++)
+ for (gmx::index i = 0; i < f.ssize(); i++)
{
node = atc->pd[i];
if (node == atc->nodeid)
else
{
/* Copy from the receive buffer */
- copy_rvec(pme->bufv[buf_index[node]], f[i]);
- buf_index[node]++;
+ copy_rvec(pme->bufv[atc->slabCommSetup[node].buf_index], f[i]);
+ atc->slabCommSetup[node].buf_index++;
}
}
}
}
void
-do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr, int start, int homenr,
- gmx_bool bFirst, rvec x[], real *data)
+do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr,
+ gmx_bool bFirst, gmx::ArrayRef<const gmx::RVec> x, const real *data)
{
- int d;
- pme_atomcomm_t *atc;
- atc = &pme->atc[0];
-
- for (d = pme->ndecompdim - 1; d >= 0; d--)
+ for (int d = pme->ndecompdim - 1; d >= 0; d--)
{
- int n_d;
- rvec *x_d;
- real *param_d;
-
+ gmx::ArrayRef<const gmx::RVec> xRef;
+ const real *param_d;
if (d == pme->ndecompdim - 1)
{
- n_d = homenr;
- x_d = x + start;
+ /* Start out with the local coordinates and charges */
+ xRef = x;
param_d = data;
}
else
{
- n_d = pme->atc[d + 1].n;
- x_d = atc->x;
- param_d = atc->coefficient;
- }
- atc = &pme->atc[d];
- atc->npd = n_d;
- if (atc->npd > atc->pd_nalloc)
- {
- atc->pd_nalloc = over_alloc_dd(atc->npd);
- srenew(atc->pd, atc->pd_nalloc);
+ /* Redistribute the data collected along the previous dimension */
+ const PmeAtomComm &atc = pme->atc[d + 1];
+ xRef = atc.x;
+ param_d = atc.coefficient.data();
}
- pme_calc_pidx_wrapper(n_d, pme->recipbox, x_d, atc);
+ PmeAtomComm &atc = pme->atc[d];
+ atc.pd.resize(xRef.size());
+ pme_calc_pidx_wrapper(xRef, pme->recipbox, &atc);
/* Redistribute x (only once) and qA/c6A or qB/c6B */
if (DOMAINDECOMP(cr))
{
- dd_pmeredist_pos_coeffs(pme, n_d, bFirst, x_d, param_d, atc);
+ dd_pmeredist_pos_coeffs(pme, bFirst, xRef, param_d, &atc);
}
}
}
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+
+/*! \internal \file
+ *
+ * \brief Declares functions for redistributing atoms over the PME domains
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \ingroup module_ewald
+ */
+
#ifndef GMX_EWALD_PME_REDISTRIBUTE_H
#define GMX_EWALD_PME_REDISTRIBUTE_H
#include "pme_internal.h"
+//! Redistributes forces along the dimension gives by \p atc
void
-pme_realloc_atomcomm_things(pme_atomcomm_t *atc);
-
-void
-dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
- int n, rvec *f,
+dd_pmeredist_f(struct gmx_pme_t *pme, PmeAtomComm *atc,
+ gmx::ArrayRef<gmx::RVec> f,
gmx_bool bAddF);
+//! Redistributes coefficients and when \p bFirst=true coordinates over MPI ranks
void
-do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr, int start, int homenr,
- gmx_bool bFirst, rvec x[], real *data);
+do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr,
+ gmx_bool bFirst, gmx::ArrayRef<const gmx::RVec> x, const real *data);
#endif
/* TODO consider split of pme-spline from this file */
-static void calc_interpolation_idx(const gmx_pme_t *pme, const pme_atomcomm_t *atc,
+static void calc_interpolation_idx(const gmx_pme_t *pme, PmeAtomComm *atc,
int start, int grid_index, int end, int thread)
{
int i;
int *idxptr, tix, tiy, tiz;
- real *xptr, *fptr, tx, ty, tz;
+ const real *xptr;
+ real *fptr, tx, ty, tz;
real rxx, ryx, ryy, rzx, rzy, rzz;
int nx, ny, nz;
int *g2tx, *g2ty, *g2tz;
gmx_bool bThreads;
int *thread_idx = nullptr;
- thread_plist_t *tpl = nullptr;
int *tpl_n = nullptr;
int thread_i;
bThreads = (atc->nthread > 1);
if (bThreads)
{
- thread_idx = atc->thread_idx;
+ thread_idx = atc->thread_idx.data();
- tpl = &atc->thread_plist[thread];
- tpl_n = tpl->n;
+ tpl_n = atc->threadMap[thread].n;
for (i = 0; i < atc->nthread; i++)
{
tpl_n[i] = 0;
* over the threads, so we could actually allocate for that
* in pme_realloc_atomcomm_things.
*/
- if (tpl_n[atc->nthread-1] > tpl->nalloc)
- {
- tpl->nalloc = over_alloc_large(tpl_n[atc->nthread-1]);
- srenew(tpl->i, tpl->nalloc);
- }
+ AtomToThreadMap &threadMap = atc->threadMap[thread];
+ threadMap.i.resize(tpl_n[atc->nthread - 1]);
/* Set tpl_n to the cumulative start */
for (i = atc->nthread-1; i >= 1; i--)
{
/* Fill our thread local array with indices sorted on thread */
for (i = start; i < end; i++)
{
- tpl->i[tpl_n[atc->thread_idx[i]]++] = i;
+ threadMap.i[tpl_n[atc->thread_idx[i]]++] = i;
}
/* Now tpl_n contains the cummulative count again */
}
}
-static void make_thread_local_ind(const pme_atomcomm_t *atc,
+static void make_thread_local_ind(const PmeAtomComm *atc,
int thread, splinedata_t *spline)
{
int n, t, i, start, end;
- thread_plist_t *tpl;
/* Combine the indices made by each thread into one index */
start = 0;
for (t = 0; t < atc->nthread; t++)
{
- tpl = &atc->thread_plist[t];
+ const AtomToThreadMap &threadMap = atc->threadMap[t];
/* Copy our part (start - end) from the list of thread t */
if (thread > 0)
{
- start = tpl->n[thread-1];
+ start = threadMap.n[thread-1];
}
- end = tpl->n[thread];
+ end = threadMap.n[thread];
for (i = start; i < end; i++)
{
- spline->ind[n++] = tpl->i[i];
+ spline->ind[n++] = threadMap.i[i];
}
}
static void spread_coefficients_bsplines_thread(const pmegrid_t *pmegrid,
- const pme_atomcomm_t *atc,
+ const PmeAtomComm *atc,
splinedata_t *spline,
struct pme_spline_work gmx_unused *work)
{
/* spread coefficients from home atoms to local grid */
real *grid;
int i, nn, n, ithx, ithy, ithz, i0, j0, k0;
- int * idxptr;
+ const int *idxptr;
int order, norder, index_x, index_xy, index_xyz;
real valx, valxy, coefficient;
real *thx, *thy, *thz;
j0 = idxptr[YY] - offy;
k0 = idxptr[ZZ] - offz;
- thx = spline->theta[XX] + norder;
- thy = spline->theta[YY] + norder;
- thz = spline->theta[ZZ] + norder;
+ thx = spline->theta.coefficients[XX] + norder;
+ thy = spline->theta.coefficients[YY] + norder;
+ thz = spline->theta.coefficients[ZZ] + norder;
switch (order)
{
}
void spread_on_grid(const gmx_pme_t *pme,
- const pme_atomcomm_t *atc, const pmegrids_t *grids,
+ PmeAtomComm *atc, const pmegrids_t *grids,
gmx_bool bCalcSplines, gmx_bool bSpread,
real *fftgrid, gmx_bool bDoSplines, int grid_index)
{
{
int start, end;
- start = atc->n* thread /nthread;
- end = atc->n*(thread+1)/nthread;
+ start = atc->numAtoms()* thread /nthread;
+ end = atc->numAtoms()*(thread+1)/nthread;
/* Compute fftgrid index for all atoms,
* with help of some extra variables.
{
spline = &atc->spline[0];
- spline->n = atc->n;
+ spline->n = atc->numAtoms();
}
else
{
if (grids->nthread == 1)
{
/* One thread, we operate on all coefficients */
- spline->n = atc->n;
+ spline->n = atc->numAtoms();
}
else
{
if (bCalcSplines)
{
- make_bsplines(spline->theta, spline->dtheta, pme->pme_order,
- atc->fractx, spline->n, spline->ind, atc->coefficient, bDoSplines);
+ make_bsplines(spline->theta.coefficients,
+ spline->dtheta.coefficients,
+ pme->pme_order,
+ as_rvec_array(atc->fractx.data()), spline->n, spline->ind.data(),
+ atc->coefficient.data(), bDoSplines);
}
if (bSpread)
void
spread_on_grid(const gmx_pme_t *pme,
- const pme_atomcomm_t *atc, const pmegrids_t *grids,
+ PmeAtomComm *atc, const pmegrids_t *grids,
gmx_bool bCalcSplines, gmx_bool bSpread,
real *fftgrid, gmx_bool bDoSplines, int grid_index);
#include "gromacs/ewald/pme_gpu_internal.h"
#include "gromacs/ewald/pme_grid.h"
#include "gromacs/ewald/pme_internal.h"
+#include "gromacs/ewald/pme_redistribute.h"
#include "gromacs/ewald/pme_solve.h"
#include "gromacs/ewald/pme_spread.h"
#include "gromacs/fft/parallel_3dfft.h"
CodePath mode,
const gmx_device_info_t *gpuInfo,
PmeGpuProgramHandle pmeGpuProgram,
- size_t atomCount,
const Matrix3x3 &box,
real ewaldCoeff_q = 1.0f,
real ewaldCoeff_lj = 1.0f
const auto runMode = (mode == CodePath::CPU) ? PmeRunMode::CPU : PmeRunMode::Mixed;
t_commrec dummyCommrec = {0};
NumPmeDomains numPmeDomains = { 1, 1 };
- gmx_pme_t *pmeDataRaw = gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, atomCount, false, false, true,
+ gmx_pme_t *pmeDataRaw = gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, false, false, true,
ewaldCoeff_q, ewaldCoeff_lj, 1, runMode, nullptr, gpuInfo, pmeGpuProgram, dummyLogger);
PmeSafePointer pme(pmeDataRaw); // taking ownership
real ewaldCoeff_lj
)
{
- return pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, 0, box, ewaldCoeff_q, ewaldCoeff_lj);
+ return pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj);
// hiding the fact that PME actually needs to know the number of atoms in advance
}
{
const index atomCount = coordinates.size();
GMX_RELEASE_ASSERT(atomCount == charges.ssize(), "Mismatch in atom data");
- PmeSafePointer pmeSafe = pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, atomCount, box);
- pme_atomcomm_t *atc = nullptr;
+ PmeSafePointer pmeSafe = pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, box);
+ PmeAtomComm *atc = nullptr;
switch (mode)
{
case CodePath::CPU:
atc = &(pmeSafe->atc[0]);
- atc->x = const_cast<rvec *>(as_rvec_array(coordinates.data()));
- atc->coefficient = const_cast<real *>(charges.data());
+ atc->x = coordinates;
+ atc->coefficient = charges;
+ gmx_pme_reinit_atoms(pmeSafe.get(), atomCount, charges.data());
/* With decomposition there would be more boilerplate atc code here, e.g. do_redist_pos_coeffs */
break;
case CodePath::GPU:
+ // TODO: Avoid use of atc in the GPU code path
+ atc = &(pmeSafe->atc[0]);
+ // We need to set atc->n for passing the size in the tests
+ atc->setNumAtoms(atomCount);
gmx_pme_reinit_atoms(pmeSafe.get(), atomCount, charges.data());
pme_gpu_copy_input_coordinates(pmeSafe->gpu, as_rvec_array(coordinates.data()));
break;
bool computeSplines, bool spreadCharges)
{
GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
- pme_atomcomm_t *atc = &(pme->atc[0]);
+ PmeAtomComm *atc = &(pme->atc[0]);
const size_t gridIndex = 0;
const bool computeSplinesForZeroCharges = true;
real *fftgrid = spreadCharges ? pme->fftgrid[gridIndex] : nullptr;
static real *pmeGetSplineDataInternal(const gmx_pme_t *pme, PmeSplineDataType type, int dimIndex)
{
GMX_ASSERT((0 <= dimIndex) && (dimIndex < DIM), "Invalid dimension index");
- const pme_atomcomm_t *atc = &(pme->atc[0]);
+ const PmeAtomComm *atc = &(pme->atc[0]);
const size_t threadIndex = 0;
real *splineBuffer = nullptr;
switch (type)
{
case PmeSplineDataType::Values:
- splineBuffer = atc->spline[threadIndex].theta[dimIndex];
+ splineBuffer = atc->spline[threadIndex].theta.coefficients[dimIndex];
break;
case PmeSplineDataType::Derivatives:
- splineBuffer = atc->spline[threadIndex].dtheta[dimIndex];
+ splineBuffer = atc->spline[threadIndex].dtheta.coefficients[dimIndex];
break;
default:
void pmePerformGather(gmx_pme_t *pme, CodePath mode,
PmeForceOutputHandling inputTreatment, ForcesVector &forces)
{
- pme_atomcomm_t *atc = &(pme->atc[0]);
- const index atomCount = atc->n;
+ PmeAtomComm *atc = &(pme->atc[0]);
+ const index atomCount = atc->numAtoms();
GMX_RELEASE_ASSERT(forces.ssize() == atomCount, "Invalid force buffer size");
const bool forceReductionWithInput = (inputTreatment == PmeForceOutputHandling::ReduceWithInput);
const real scale = 1.0;
switch (mode)
{
case CodePath::CPU:
- atc->f = as_rvec_array(forces.begin());
+ atc->f = forces;
if (atc->nthread == 1)
{
// something which is normally done in serial spline computation (make_thread_local_ind())
void pmeSetSplineData(const gmx_pme_t *pme, CodePath mode,
const SplineParamsDimVector &splineValues, PmeSplineDataType type, int dimIndex)
{
- const pme_atomcomm_t *atc = &(pme->atc[0]);
- const index atomCount = atc->n;
+ const PmeAtomComm *atc = &(pme->atc[0]);
+ const index atomCount = atc->numAtoms();
const index pmeOrder = pme->pme_order;
const index dimSize = pmeOrder * atomCount;
GMX_RELEASE_ASSERT(dimSize == splineValues.ssize(), "Mismatch in spline data");
}
//! Setting gridline indices to be used in spread/gather
-void pmeSetGridLineIndices(const gmx_pme_t *pme, CodePath mode,
+void pmeSetGridLineIndices(gmx_pme_t *pme, CodePath mode,
const GridLineIndicesVector &gridLineIndices)
{
- const pme_atomcomm_t *atc = &(pme->atc[0]);
- const index atomCount = atc->n;
+ PmeAtomComm *atc = &(pme->atc[0]);
+ const index atomCount = atc->numAtoms();
GMX_RELEASE_ASSERT(atomCount == gridLineIndices.ssize(), "Mismatch in gridline indices size");
IVec paddedGridSizeUnused, gridSize(0, 0, 0);
break;
case CodePath::CPU:
- // incompatible IVec and ivec assignment?
- //std::copy(gridLineIndices.begin(), gridLineIndices.end(), atc->idx);
- memcpy(atc->idx, gridLineIndices.data(), atomCount * sizeof(gridLineIndices[0]));
+ atc->idx.resize(gridLineIndices.size());
+ std::copy(gridLineIndices.begin(), gridLineIndices.end(), atc->idx.begin());
break;
-
default:
GMX_THROW(InternalError("Test not implemented for this mode"));
}
PmeSplineDataType type, int dimIndex)
{
GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
- const pme_atomcomm_t *atc = &(pme->atc[0]);
- const size_t atomCount = atc->n;
+ const PmeAtomComm *atc = &(pme->atc[0]);
+ const size_t atomCount = atc->numAtoms();
const size_t pmeOrder = pme->pme_order;
const size_t dimSize = pmeOrder * atomCount;
GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t *pme, CodePath mode)
{
GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
- const pme_atomcomm_t *atc = &(pme->atc[0]);
- const size_t atomCount = atc->n;
+ const PmeAtomComm *atc = &(pme->atc[0]);
+ const size_t atomCount = atc->numAtoms();
GridLineIndicesVector gridLineIndices;
switch (mode)
break;
case CodePath::CPU:
- gridLineIndices = arrayRefFromArray(reinterpret_cast<IVec *>(atc->idx), atomCount);
+ gridLineIndices = atc->idx;
break;
default:
void pmeSetSplineData(const gmx_pme_t *pme, CodePath mode,
const SplineParamsDimVector &splineValues, PmeSplineDataType type, int dimIndex);
//! Setting gridline indices be used in spread/gather
-void pmeSetGridLineIndices(const gmx_pme_t *pme, CodePath mode,
+void pmeSetGridLineIndices(gmx_pme_t *pme, CodePath mode,
const GridLineIndicesVector &gridLineIndices);
//! Setting real grid to be used in gather
void pmeSetRealGrid(const gmx_pme_t *pme, CodePath mode,
}
}
-void do_force_lowlevel(t_forcerec *fr,
- const t_inputrec *ir,
- const t_idef *idef,
- const t_commrec *cr,
- const gmx_multisim_t *ms,
- t_nrnb *nrnb,
- gmx_wallcycle_t wcycle,
- const t_mdatoms *md,
- rvec *x,
- history_t *hist,
- rvec *forceForUseWithShiftForces,
- gmx::ForceWithVirial *forceWithVirial,
- gmx_enerdata_t *enerd,
- t_fcdata *fcd,
- const matrix box,
- const real *lambda,
- const t_graph *graph,
- const rvec *mu_tot,
- const int flags,
- const DDBalanceRegionHandler &ddBalanceRegionHandler)
+void
+do_force_lowlevel(t_forcerec *fr,
+ const t_inputrec *ir,
+ const t_idef *idef,
+ const t_commrec *cr,
+ const gmx_multisim_t *ms,
+ t_nrnb *nrnb,
+ gmx_wallcycle_t wcycle,
+ const t_mdatoms *md,
+ gmx::ArrayRefWithPadding<gmx::RVec> coordinates,
+ history_t *hist,
+ rvec *forceForUseWithShiftForces,
+ gmx::ForceWithVirial *forceWithVirial,
+ gmx_enerdata_t *enerd,
+ t_fcdata *fcd,
+ const matrix box,
+ const real *lambda,
+ const t_graph *graph,
+ const rvec *mu_tot,
+ const int flags,
+ const DDBalanceRegionHandler &ddBalanceRegionHandler)
{
+ // TODO: Replace all uses of x by const coordinates
+ rvec *x = as_rvec_array(coordinates.paddedArrayRef().data());
+
/* do QMMM first if requested */
if (fr->bQMMM)
{
wallcycle_start(wcycle, ewcPMEMESH);
status = gmx_pme_do(fr->pmedata,
- 0, md->homenr - fr->n_tpi,
- x,
- as_rvec_array(forceWithVirial->force_.data()),
+ gmx::constArrayRefFromArray(coordinates.unpaddedConstArrayRef().data(), md->homenr - fr->n_tpi),
+ forceWithVirial->force_,
md->chargeA, md->chargeB,
md->sqrt_c6A, md->sqrt_c6B,
md->sigmaA, md->sigmaB,
/* Determine the PME grid energy of the test molecule
* with the PME grid potential of the other charges.
*/
- gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
- x + md->homenr - fr->n_tpi,
- md->chargeA + md->homenr - fr->n_tpi,
+ gmx_pme_calc_energy(fr->pmedata,
+ coordinates.unpaddedConstArrayRef().subArray(md->homenr - fr->n_tpi, fr->n_tpi),
+ gmx::arrayRefFromArray(md->chargeA + md->homenr - fr->n_tpi, fr->n_tpi),
&Vlr_q);
}
}
void
-do_force_lowlevel(t_forcerec *fr,
- const t_inputrec *ir,
- const t_idef *idef,
- const t_commrec *cr,
- const gmx_multisim_t *ms,
- t_nrnb *nrnb,
- gmx_wallcycle *wcycle,
- const t_mdatoms *md,
- rvec *x,
- history_t *hist,
- rvec *f_shortrange,
- gmx::ForceWithVirial *forceWithVirial,
- gmx_enerdata_t *enerd,
- t_fcdata *fcd,
- const matrix box,
- const real *lambda,
- const t_graph *graph,
- const rvec *mu_tot,
- int flags,
- const DDBalanceRegionHandler &ddBalanceRegionHandler);
+do_force_lowlevel(t_forcerec *fr,
+ const t_inputrec *ir,
+ const t_idef *idef,
+ const t_commrec *cr,
+ const gmx_multisim_t *ms,
+ t_nrnb *nrnb,
+ gmx_wallcycle *wcycle,
+ const t_mdatoms *md,
+ gmx::ArrayRefWithPadding<gmx::RVec> coordinates,
+ history_t *hist,
+ rvec *f_shortrange,
+ gmx::ForceWithVirial *forceWithVirial,
+ gmx_enerdata_t *enerd,
+ t_fcdata *fcd,
+ const matrix box,
+ const real *lambda,
+ const t_graph *graph,
+ const rvec *mu_tot,
+ int flags,
+ const DDBalanceRegionHandler &ddBalanceRegionHandler);
/* Call all the force routines */
#endif
/* Compute the bonded and non-bonded energies and optionally forces */
do_force_lowlevel(fr, inputrec, &(top->idef),
cr, ms, nrnb, wcycle, mdatoms,
- as_rvec_array(x.unpaddedArrayRef().data()), hist, forceOut.f, &forceOut.forceWithVirial, enerd, fcd,
+ x, hist, forceOut.f, &forceOut.forceWithVirial, enerd, fcd,
box, lambda.data(), graph, fr->mu_tot,
flags,
ddBalanceRegionHandler);
pmedata = gmx_pme_init(cr,
getNumPmeDomains(cr->dd),
inputrec,
- mtop.natoms, nChargePerturbed != 0, nTypePerturbed != 0,
+ nChargePerturbed != 0, nTypePerturbed != 0,
mdrunOptions.reproducible,
ewaldcoeff_q, ewaldcoeff_lj,
gmx_omp_nthreads_get(emntPME),