#include "config.h"
-#include <assert.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
+#include <cassert>
+#include <climits>
#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
#include <algorithm>
+#include "gromacs/compat/make_unique.h"
+#include "gromacs/domdec/collect.h"
+#include "gromacs/domdec/dlbtiming.h"
#include "gromacs/domdec/domdec_network.h"
#include "gromacs/domdec/ga2la.h"
+#include "gromacs/domdec/localatomsetmanager.h"
#include "gromacs/ewald/pme.h"
#include "gromacs/fileio/gmxfio.h"
#include "gromacs/fileio/pdbio.h"
#include "gromacs/math/vectypes.h"
#include "gromacs/mdlib/calc_verletbuf.h"
#include "gromacs/mdlib/constr.h"
-#include "gromacs/mdlib/force.h"
+#include "gromacs/mdlib/constraintrange.h"
#include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/genborn.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
+#include "gromacs/mdlib/lincs.h"
#include "gromacs/mdlib/mdatoms.h"
#include "gromacs/mdlib/mdrun.h"
#include "gromacs/mdlib/mdsetup.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/pulling/pull.h"
-#include "gromacs/pulling/pull_rotation.h"
-#include "gromacs/swap/swapcoords.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/topology/block.h"
#include "gromacs/topology/idef.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/qsort_threadsafe.h"
#include "gromacs/utility/real.h"
#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/strconvert.h"
#include "gromacs/utility/stringutil.h"
+#include "atomdistribution.h"
+#include "cellsizes.h"
+#include "distribute.h"
#include "domdec_constraints.h"
#include "domdec_internal.h"
#include "domdec_vsite.h"
-
-#define DDRANK(dd, rank) (rank)
-#define DDMASTERRANK(dd) (dd->masterrank)
-
-struct gmx_domdec_master_t
-{
- /* The cell boundaries */
- real **cell_x;
- /* The global charge group division */
- int *ncg; /* Number of home charge groups for each node */
- int *index; /* Index of nnodes+1 into cg */
- int *cg; /* Global charge group index */
- int *nat; /* Number of home atoms for each node. */
- int *ibuf; /* Buffer for communication */
- rvec *vbuf; /* Buffer for state scattering and gathering */
-};
+#include "redistribute.h"
+#include "utility.h"
#define DD_NLOAD_MAX 9
-const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
+static const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
/* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
#define DD_CGIBS 2
{2, 5, 6},
{3, 5, 7}};
-/* Factors used to avoid problems due to rounding issues */
-#define DD_CELL_MARGIN 1.0001
-#define DD_CELL_MARGIN2 1.00005
-/* Factor to account for pressure scaling during nstlist steps */
-#define DD_PRES_SCALE_MARGIN 1.02
-
/* Turn on DLB when the load imbalance causes this amount of total loss.
* There is a bit of overhead with DLB and it's difficult to achieve
* a load imbalance of less than 2% with DLB.
/* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
#define DD_PERF_LOSS_WARN 0.05
-#define DD_CELL_F_SIZE(dd, di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
-
-/* Use separate MPI send and receive commands
- * when nnodes <= GMX_DD_NNODES_SENDRECV.
- * This saves memory (and some copying for small nnodes).
- * For high parallelization scatter and gather calls are used.
- */
-#define GMX_DD_NNODES_SENDRECV 4
-
/* We check if to turn on DLB at the first and every 100 DD partitionings.
* With large imbalance DLB will turn on at the first step, so we can
}
*/
-/* This order is required to minimize the coordinate communication in PME
- * which uses decomposition in the x direction.
- */
-#define dd_index(n, i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
-
-static void ddindex2xyz(ivec nc, int ind, ivec xyz)
+static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
{
xyz[XX] = ind / (nc[YY]*nc[ZZ]);
xyz[YY] = (ind / nc[ZZ]) % nc[YY];
}
else
{
- if (i >= dd->comm->nat[ddnatNR-1])
+ if (i >= dd->comm->atomRanges.numAtomsTotal())
{
- gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->nat[ddnatNR-1]);
+ gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
}
- atnr = dd->gatindex[i] + 1;
+ atnr = dd->globalAtomIndices[i] + 1;
}
return atnr;
return &dd->comm->cgs_gl;
}
-/*! \brief Returns true if the DLB state indicates that the balancer is on. */
-static bool isDlbOn(const gmx_domdec_comm_t *comm)
-{
- return (comm->dlbState == edlbsOnCanTurnOff ||
- comm->dlbState == edlbsOnUser);
-}
-/*! \brief Returns true if the DLB state indicates that the balancer is off/disabled.
- */
-static bool isDlbDisabled(const gmx_domdec_comm_t *comm)
-{
- return (comm->dlbState == edlbsOffUser ||
- comm->dlbState == edlbsOffForever);
-}
-
-static void vec_rvec_init(vec_rvec_t *v)
-{
- v->nalloc = 0;
- v->v = nullptr;
-}
-
-static void vec_rvec_check_alloc(vec_rvec_t *v, int n)
-{
- if (n > v->nalloc)
- {
- v->nalloc = over_alloc_dd(n);
- srenew(v->v, v->nalloc);
- }
-}
-
void dd_store_state(gmx_domdec_t *dd, t_state *state)
{
int i;
state->cg_gl.resize(dd->ncg_home);
for (i = 0; i < dd->ncg_home; i++)
{
- state->cg_gl[i] = dd->index_gl[i];
+ state->cg_gl[i] = dd->globalAtomGroupIndices[i];
}
state->ddp_count_cg_gl = dd->ddp_count;
}
}
+int dd_numHomeAtoms(const gmx_domdec_t &dd)
+{
+ return dd.comm->atomRanges.numHomeAtoms();
+}
+
int dd_natoms_mdatoms(const gmx_domdec_t *dd)
{
/* We currently set mdatoms entries for all atoms:
* local + non-local + communicated for vsite + constraints
*/
- return dd->comm->nat[ddnatNR - 1];
+ return dd->comm->atomRanges.numAtomsTotal();
}
int dd_natoms_vsite(const gmx_domdec_t *dd)
{
- return dd->comm->nat[ddnatVSITE];
+ return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
}
void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
{
- *at_start = dd->comm->nat[ddnatCON-1];
- *at_end = dd->comm->nat[ddnatCON];
+ *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
+ *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
}
-void dd_move_x(gmx_domdec_t *dd, matrix box, rvec x[])
+void dd_move_x(gmx_domdec_t *dd,
+ matrix box,
+ gmx::ArrayRef<gmx::RVec> x,
+ gmx_wallcycle *wcycle)
{
- int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
- int *index, *cgindex;
+ wallcycle_start(wcycle, ewcMOVEX);
+
+ int nzone, nat_tot;
gmx_domdec_comm_t *comm;
gmx_domdec_comm_dim_t *cd;
- gmx_domdec_ind_t *ind;
- rvec shift = {0, 0, 0}, *buf, *rbuf;
+ rvec shift = {0, 0, 0};
gmx_bool bPBC, bScrew;
comm = dd->comm;
- cgindex = dd->cgindex;
-
- buf = comm->vbuf.v;
+ const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
nzone = 1;
- nat_tot = dd->nat_home;
- for (d = 0; d < dd->ndim; d++)
+ nat_tot = comm->atomRanges.numHomeAtoms();
+ for (int d = 0; d < dd->ndim; d++)
{
bPBC = (dd->ci[dd->dim[d]] == 0);
bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
copy_rvec(box[dd->dim[d]], shift);
}
cd = &comm->cd[d];
- for (p = 0; p < cd->np; p++)
+ for (const gmx_domdec_ind_t &ind : cd->ind)
{
- ind = &cd->ind[p];
- index = ind->index;
- n = 0;
+ DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
+ gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
+ int n = 0;
if (!bPBC)
{
- for (i = 0; i < ind->nsend[nzone]; i++)
+ for (int g : ind.index)
{
- at0 = cgindex[index[i]];
- at1 = cgindex[index[i]+1];
- for (j = at0; j < at1; j++)
+ for (int j : atomGrouping.block(g))
{
- copy_rvec(x[j], buf[n]);
+ sendBuffer[n] = x[j];
n++;
}
}
}
else if (!bScrew)
{
- for (i = 0; i < ind->nsend[nzone]; i++)
+ for (int g : ind.index)
{
- at0 = cgindex[index[i]];
- at1 = cgindex[index[i]+1];
- for (j = at0; j < at1; j++)
+ for (int j : atomGrouping.block(g))
{
/* We need to shift the coordinates */
- rvec_add(x[j], shift, buf[n]);
+ for (int d = 0; d < DIM; d++)
+ {
+ sendBuffer[n][d] = x[j][d] + shift[d];
+ }
n++;
}
}
}
else
{
- for (i = 0; i < ind->nsend[nzone]; i++)
+ for (int g : ind.index)
{
- at0 = cgindex[index[i]];
- at1 = cgindex[index[i]+1];
- for (j = at0; j < at1; j++)
+ for (int j : atomGrouping.block(g))
{
/* Shift x */
- buf[n][XX] = x[j][XX] + shift[XX];
+ sendBuffer[n][XX] = x[j][XX] + shift[XX];
/* Rotate y and z.
* This operation requires a special shift force
* treatment, which is performed in calc_vir.
*/
- buf[n][YY] = box[YY][YY] - x[j][YY];
- buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
+ sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
+ sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
n++;
}
}
}
- if (cd->bInPlace)
+ DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
+
+ gmx::ArrayRef<gmx::RVec> receiveBuffer;
+ if (cd->receiveInPlace)
{
- rbuf = x + nat_tot;
+ receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
}
else
{
- rbuf = comm->vbuf2.v;
+ receiveBuffer = receiveBufferAccess.buffer;
}
/* Send and receive the coordinates */
- dd_sendrecv_rvec(dd, d, dddirBackward,
- buf, ind->nsend[nzone+1],
- rbuf, ind->nrecv[nzone+1]);
- if (!cd->bInPlace)
+ ddSendrecv(dd, d, dddirBackward,
+ sendBuffer, receiveBuffer);
+
+ if (!cd->receiveInPlace)
{
- j = 0;
- for (zone = 0; zone < nzone; zone++)
+ int j = 0;
+ for (int zone = 0; zone < nzone; zone++)
{
- for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
+ for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
{
- copy_rvec(rbuf[j], x[i]);
- j++;
+ x[i] = receiveBuffer[j++];
}
}
}
- nat_tot += ind->nrecv[nzone+1];
+ nat_tot += ind.nrecv[nzone+1];
}
nzone += nzone;
}
+
+ wallcycle_stop(wcycle, ewcMOVEX);
}
-void dd_move_f(gmx_domdec_t *dd, rvec f[], rvec *fshift)
+void dd_move_f(gmx_domdec_t *dd,
+ gmx::ArrayRef<gmx::RVec> f,
+ rvec *fshift,
+ gmx_wallcycle *wcycle)
{
- int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
- int *index, *cgindex;
+ wallcycle_start(wcycle, ewcMOVEF);
+
+ int nzone, nat_tot;
gmx_domdec_comm_t *comm;
gmx_domdec_comm_dim_t *cd;
- gmx_domdec_ind_t *ind;
- rvec *buf, *sbuf;
ivec vis;
int is;
gmx_bool bShiftForcesNeedPbc, bScrew;
comm = dd->comm;
- cgindex = dd->cgindex;
-
- buf = comm->vbuf.v;
+ const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
nzone = comm->zones.n/2;
- nat_tot = dd->nat_tot;
- for (d = dd->ndim-1; d >= 0; d--)
+ nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
+ for (int d = dd->ndim-1; d >= 0; d--)
{
/* Only forces in domains near the PBC boundaries need to
consider PBC in the treatment of fshift */
is = IVEC2IS(vis);
cd = &comm->cd[d];
- for (p = cd->np-1; p >= 0; p--)
+ for (int p = cd->numPulses() - 1; p >= 0; p--)
{
- ind = &cd->ind[p];
- nat_tot -= ind->nrecv[nzone+1];
- if (cd->bInPlace)
+ const gmx_domdec_ind_t &ind = cd->ind[p];
+ DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
+ gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
+
+ nat_tot -= ind.nrecv[nzone+1];
+
+ DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
+
+ gmx::ArrayRef<gmx::RVec> sendBuffer;
+ if (cd->receiveInPlace)
{
- sbuf = f + nat_tot;
+ sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
}
else
{
- sbuf = comm->vbuf2.v;
- j = 0;
- for (zone = 0; zone < nzone; zone++)
+ sendBuffer = sendBufferAccess.buffer;
+ int j = 0;
+ for (int zone = 0; zone < nzone; zone++)
{
- for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
+ for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
{
- copy_rvec(f[i], sbuf[j]);
- j++;
+ sendBuffer[j++] = f[i];
}
}
}
/* Communicate the forces */
- dd_sendrecv_rvec(dd, d, dddirForward,
- sbuf, ind->nrecv[nzone+1],
- buf, ind->nsend[nzone+1]);
- index = ind->index;
+ ddSendrecv(dd, d, dddirForward,
+ sendBuffer, receiveBuffer);
/* Add the received forces */
- n = 0;
+ int n = 0;
if (!bShiftForcesNeedPbc)
{
- for (i = 0; i < ind->nsend[nzone]; i++)
+ for (int g : ind.index)
{
- at0 = cgindex[index[i]];
- at1 = cgindex[index[i]+1];
- for (j = at0; j < at1; j++)
+ for (int j : atomGrouping.block(g))
{
- rvec_inc(f[j], buf[n]);
+ for (int d = 0; d < DIM; d++)
+ {
+ f[j][d] += receiveBuffer[n][d];
+ }
n++;
}
}
{
/* fshift should always be defined if this function is
* called when bShiftForcesNeedPbc is true */
- assert(NULL != fshift);
- for (i = 0; i < ind->nsend[nzone]; i++)
+ assert(nullptr != fshift);
+ for (int g : ind.index)
{
- at0 = cgindex[index[i]];
- at1 = cgindex[index[i]+1];
- for (j = at0; j < at1; j++)
+ for (int j : atomGrouping.block(g))
{
- rvec_inc(f[j], buf[n]);
+ for (int d = 0; d < DIM; d++)
+ {
+ f[j][d] += receiveBuffer[n][d];
+ }
/* Add this force to the shift force */
- rvec_inc(fshift[is], buf[n]);
+ for (int d = 0; d < DIM; d++)
+ {
+ fshift[is][d] += receiveBuffer[n][d];
+ }
n++;
}
}
}
else
{
- for (i = 0; i < ind->nsend[nzone]; i++)
+ for (int g : ind.index)
{
- at0 = cgindex[index[i]];
- at1 = cgindex[index[i]+1];
- for (j = at0; j < at1; j++)
+ for (int j : atomGrouping.block(g))
{
/* Rotate the force */
- f[j][XX] += buf[n][XX];
- f[j][YY] -= buf[n][YY];
- f[j][ZZ] -= buf[n][ZZ];
+ f[j][XX] += receiveBuffer[n][XX];
+ f[j][YY] -= receiveBuffer[n][YY];
+ f[j][ZZ] -= receiveBuffer[n][ZZ];
if (fshift)
{
/* Add this force to the shift force */
- rvec_inc(fshift[is], buf[n]);
+ for (int d = 0; d < DIM; d++)
+ {
+ fshift[is][d] += receiveBuffer[n][d];
+ }
}
n++;
}
}
nzone /= 2;
}
+ wallcycle_stop(wcycle, ewcMOVEF);
+}
+
+/* Convenience function for extracting a real buffer from an rvec buffer
+ *
+ * To reduce the number of temporary communication buffers and avoid
+ * cache polution, we reuse gmx::RVec buffers for storing reals.
+ * This functions return a real buffer reference with the same number
+ * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
+ */
+static gmx::ArrayRef<real>
+realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
+{
+ return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
+ arrayRef.size());
}
void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
{
- int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
- int *index, *cgindex;
+ int nzone, nat_tot;
gmx_domdec_comm_t *comm;
gmx_domdec_comm_dim_t *cd;
- gmx_domdec_ind_t *ind;
- real *buf, *rbuf;
comm = dd->comm;
- cgindex = dd->cgindex;
-
- buf = &comm->vbuf.v[0][0];
+ const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
nzone = 1;
- nat_tot = dd->nat_home;
- for (d = 0; d < dd->ndim; d++)
+ nat_tot = comm->atomRanges.numHomeAtoms();
+ for (int d = 0; d < dd->ndim; d++)
{
cd = &comm->cd[d];
- for (p = 0; p < cd->np; p++)
+ for (const gmx_domdec_ind_t &ind : cd->ind)
{
- ind = &cd->ind[p];
- index = ind->index;
- n = 0;
- for (i = 0; i < ind->nsend[nzone]; i++)
+ /* Note: We provision for RVec instead of real, so a factor of 3
+ * more than needed. The buffer actually already has this size
+ * and we pass a plain pointer below, so this does not matter.
+ */
+ DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
+ gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
+ int n = 0;
+ for (int g : ind.index)
{
- at0 = cgindex[index[i]];
- at1 = cgindex[index[i]+1];
- for (j = at0; j < at1; j++)
+ for (int j : atomGrouping.block(g))
{
- buf[n] = v[j];
- n++;
+ sendBuffer[n++] = v[j];
}
}
- if (cd->bInPlace)
+ DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
+
+ gmx::ArrayRef<real> receiveBuffer;
+ if (cd->receiveInPlace)
{
- rbuf = v + nat_tot;
+ receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
}
else
{
- rbuf = &comm->vbuf2.v[0][0];
+ receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
}
- /* Send and receive the coordinates */
- dd_sendrecv_real(dd, d, dddirBackward,
- buf, ind->nsend[nzone+1],
- rbuf, ind->nrecv[nzone+1]);
- if (!cd->bInPlace)
+ /* Send and receive the data */
+ ddSendrecv(dd, d, dddirBackward,
+ sendBuffer, receiveBuffer);
+ if (!cd->receiveInPlace)
{
- j = 0;
- for (zone = 0; zone < nzone; zone++)
+ int j = 0;
+ for (int zone = 0; zone < nzone; zone++)
{
- for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
+ for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
{
- v[i] = rbuf[j];
- j++;
+ v[i] = receiveBuffer[j++];
}
}
}
- nat_tot += ind->nrecv[nzone+1];
+ nat_tot += ind.nrecv[nzone+1];
}
nzone += nzone;
}
void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
{
- int nzone, nat_tot, n, d, p, i, j, at0, at1, zone;
- int *index, *cgindex;
+ int nzone, nat_tot;
gmx_domdec_comm_t *comm;
gmx_domdec_comm_dim_t *cd;
- gmx_domdec_ind_t *ind;
- real *buf, *sbuf;
comm = dd->comm;
- cgindex = dd->cgindex;
-
- buf = &comm->vbuf.v[0][0];
+ const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
nzone = comm->zones.n/2;
- nat_tot = dd->nat_tot;
- for (d = dd->ndim-1; d >= 0; d--)
+ nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
+ for (int d = dd->ndim-1; d >= 0; d--)
{
cd = &comm->cd[d];
- for (p = cd->np-1; p >= 0; p--)
+ for (int p = cd->numPulses() - 1; p >= 0; p--)
{
- ind = &cd->ind[p];
- nat_tot -= ind->nrecv[nzone+1];
- if (cd->bInPlace)
+ const gmx_domdec_ind_t &ind = cd->ind[p];
+
+ /* Note: We provision for RVec instead of real, so a factor of 3
+ * more than needed. The buffer actually already has this size
+ * and we typecast, so this works as intended.
+ */
+ DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
+ gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
+ nat_tot -= ind.nrecv[nzone + 1];
+
+ DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
+
+ gmx::ArrayRef<real> sendBuffer;
+ if (cd->receiveInPlace)
{
- sbuf = v + nat_tot;
+ sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
}
else
{
- sbuf = &comm->vbuf2.v[0][0];
- j = 0;
- for (zone = 0; zone < nzone; zone++)
+ sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
+ int j = 0;
+ for (int zone = 0; zone < nzone; zone++)
{
- for (i = ind->cell2at0[zone]; i < ind->cell2at1[zone]; i++)
+ for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
{
- sbuf[j] = v[i];
- j++;
+ sendBuffer[j++] = v[i];
}
}
}
/* Communicate the forces */
- dd_sendrecv_real(dd, d, dddirForward,
- sbuf, ind->nrecv[nzone+1],
- buf, ind->nsend[nzone+1]);
- index = ind->index;
+ ddSendrecv(dd, d, dddirForward,
+ sendBuffer, receiveBuffer);
/* Add the received forces */
- n = 0;
- for (i = 0; i < ind->nsend[nzone]; i++)
+ int n = 0;
+ for (int g : ind.index)
{
- at0 = cgindex[index[i]];
- at1 = cgindex[index[i]+1];
- for (j = at0; j < at1; j++)
+ for (int j : atomGrouping.block(g))
{
- v[j] += buf[n];
+ v[j] += receiveBuffer[n];
n++;
}
}
zone->p1_0, zone->p1_1);
}
+/* Using the home grid size as input in cell_ns_x0 and cell_ns_x1
+ * takes the extremes over all home and remote zones in the halo
+ * and returns the results in cell_ns_x0 and cell_ns_x1.
+ * Note: only used with the group cut-off scheme.
+ */
+static void dd_move_cellx(gmx_domdec_t *dd,
+ const gmx_ddbox_t *ddbox,
+ rvec cell_ns_x0,
+ rvec cell_ns_x1)
+{
+ constexpr int c_ddZoneCommMaxNumZones = 5;
+ gmx_ddzone_t buf_s[c_ddZoneCommMaxNumZones];
+ gmx_ddzone_t buf_r[c_ddZoneCommMaxNumZones];
+ gmx_ddzone_t buf_e[c_ddZoneCommMaxNumZones];
+ gmx_domdec_comm_t *comm = dd->comm;
-#define DDZONECOMM_MAXZONE 5
-#define DDZONECOMM_BUFSIZE 3
-
-static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
- int ddimind, int direction,
- gmx_ddzone_t *buf_s, int n_s,
- gmx_ddzone_t *buf_r, int n_r)
-{
-#define ZBS DDZONECOMM_BUFSIZE
- rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
- rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
- int i;
-
- for (i = 0; i < n_s; i++)
+ rvec extr_s[2];
+ rvec extr_r[2];
+ for (int d = 1; d < dd->ndim; d++)
{
- vbuf_s[i*ZBS ][0] = buf_s[i].min0;
- vbuf_s[i*ZBS ][1] = buf_s[i].max1;
- vbuf_s[i*ZBS ][2] = buf_s[i].min1;
- vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
- vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
- vbuf_s[i*ZBS+1][2] = 0;
- vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
- vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
- vbuf_s[i*ZBS+2][2] = 0;
- }
+ int dim = dd->dim[d];
+ gmx_ddzone_t &zp = (d == 1) ? comm->zone_d1[0] : comm->zone_d2[0][0];
- dd_sendrecv_rvec(dd, ddimind, direction,
- vbuf_s, n_s*ZBS,
- vbuf_r, n_r*ZBS);
-
- for (i = 0; i < n_r; i++)
- {
- buf_r[i].min0 = vbuf_r[i*ZBS ][0];
- buf_r[i].max1 = vbuf_r[i*ZBS ][1];
- buf_r[i].min1 = vbuf_r[i*ZBS ][2];
- buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
- buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
- buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
- buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
+ /* Copy the base sizes of the home zone */
+ zp.min0 = cell_ns_x0[dim];
+ zp.max1 = cell_ns_x1[dim];
+ zp.min1 = cell_ns_x1[dim];
+ zp.mch0 = cell_ns_x0[dim];
+ zp.mch1 = cell_ns_x1[dim];
+ zp.p1_0 = cell_ns_x0[dim];
+ zp.p1_1 = cell_ns_x1[dim];
}
-#undef ZBS
-}
-
-static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
- rvec cell_ns_x0, rvec cell_ns_x1)
-{
- int d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
- gmx_ddzone_t *zp;
- gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
- gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
- gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
- rvec extr_s[2], extr_r[2];
- rvec dh;
- real dist_d, c = 0, det;
- gmx_domdec_comm_t *comm;
- gmx_bool bPBC, bUse;
-
- comm = dd->comm;
-
- for (d = 1; d < dd->ndim; d++)
- {
- dim = dd->dim[d];
- zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
- zp->min0 = cell_ns_x0[dim];
- zp->max1 = cell_ns_x1[dim];
- zp->min1 = cell_ns_x1[dim];
- zp->mch0 = cell_ns_x0[dim];
- zp->mch1 = cell_ns_x1[dim];
- zp->p1_0 = cell_ns_x0[dim];
- zp->p1_1 = cell_ns_x1[dim];
- }
+ gmx::ArrayRef<DDCellsizesWithDlb> cellsizes = comm->cellsizesWithDlb;
- for (d = dd->ndim-2; d >= 0; d--)
+ /* Loop backward over the dimensions and aggregate the extremes
+ * of the cell sizes.
+ */
+ for (int d = dd->ndim - 2; d >= 0; d--)
{
- dim = dd->dim[d];
- bPBC = (dim < ddbox->npbcdim);
+ const int dim = dd->dim[d];
+ const bool applyPbc = (dim < ddbox->npbcdim);
/* Use an rvec to store two reals */
- extr_s[d][0] = comm->cell_f0[d+1];
- extr_s[d][1] = comm->cell_f1[d+1];
- extr_s[d][2] = comm->cell_f1[d+1];
+ extr_s[d][0] = cellsizes[d + 1].fracLower;
+ extr_s[d][1] = cellsizes[d + 1].fracUpper;
+ extr_s[d][2] = cellsizes[d + 1].fracUpper;
- pos = 0;
+ int pos = 0;
+ GMX_ASSERT(pos < c_ddZoneCommMaxNumZones, "The buffers should be sufficiently large");
/* Store the extremes in the backward sending buffer,
- * so the get updated separately from the forward communication.
+ * so they get updated separately from the forward communication.
*/
- for (d1 = d; d1 < dd->ndim-1; d1++)
+ for (int d1 = d; d1 < dd->ndim-1; d1++)
{
/* We invert the order to be able to use the same loop for buf_e */
buf_s[pos].min0 = extr_s[d1][1];
/* We only need to communicate the extremes
* in the forward direction
*/
- npulse = comm->cd[d].np;
- if (bPBC)
+ int numPulses = comm->cd[d].numPulses();
+ int numPulsesMin;
+ if (applyPbc)
{
/* Take the minimum to avoid double communication */
- npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
+ numPulsesMin = std::min(numPulses, dd->nc[dim] - 1 - numPulses);
}
else
{
* the communication setup and therefore we simply
* do all communication, but ignore some data.
*/
- npulse_min = npulse;
+ numPulsesMin = numPulses;
}
- for (p = 0; p < npulse_min; p++)
+ for (int pulse = 0; pulse < numPulsesMin; pulse++)
{
/* Communicate the extremes forward */
- bUse = (bPBC || dd->ci[dim] > 0);
+ bool receiveValidData = (applyPbc || dd->ci[dim] > 0);
- dd_sendrecv_rvec(dd, d, dddirForward,
- extr_s+d, dd->ndim-d-1,
- extr_r+d, dd->ndim-d-1);
+ int numElements = dd->ndim - d - 1;
+ ddSendrecv(dd, d, dddirForward,
+ extr_s + d, numElements,
+ extr_r + d, numElements);
- if (bUse)
+ if (receiveValidData)
{
- for (d1 = d; d1 < dd->ndim-1; d1++)
+ for (int d1 = d; d1 < dd->ndim - 1; d1++)
{
extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
}
}
- buf_size = pos;
- for (p = 0; p < npulse; p++)
+ const int numElementsInBuffer = pos;
+ for (int pulse = 0; pulse < numPulses; pulse++)
{
/* Communicate all the zone information backward */
- bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
+ bool receiveValidData = (applyPbc || dd->ci[dim] < dd->nc[dim] - 1);
- dd_sendrecv_ddzone(dd, d, dddirBackward,
- buf_s, buf_size,
- buf_r, buf_size);
+ static_assert(sizeof(gmx_ddzone_t) == c_ddzoneNumReals*sizeof(real), "Here we expect gmx_ddzone_t to consist of c_ddzoneNumReals reals (only)");
- clear_rvec(dh);
- if (p > 0)
+ int numReals = numElementsInBuffer*c_ddzoneNumReals;
+ ddSendrecv(dd, d, dddirBackward,
+ gmx::arrayRefFromArray(&buf_s[0].min0, numReals),
+ gmx::arrayRefFromArray(&buf_r[0].min0, numReals));
+
+ rvec dh = { 0 };
+ if (pulse > 0)
{
- for (d1 = d+1; d1 < dd->ndim; d1++)
+ for (int d1 = d + 1; d1 < dd->ndim; d1++)
{
/* Determine the decrease of maximum required
* communication height along d1 due to the distance along d,
* this avoids a lot of useless atom communication.
*/
- dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
+ real dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
+ int c;
if (ddbox->tric_dir[dim])
{
/* c is the off-diagonal coupling between the cell planes
{
c = 0;
}
- det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
+ real det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
if (det > 0)
{
dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
}
/* Accumulate the extremes over all pulses */
- for (i = 0; i < buf_size; i++)
+ for (int i = 0; i < numElementsInBuffer; i++)
{
- if (p == 0)
+ if (pulse == 0)
{
buf_e[i] = buf_r[i];
}
else
{
- if (bUse)
+ if (receiveValidData)
{
buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
}
- if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
+ int d1;
+ if (dd->ndim == 3 && d == 0 && i == numElementsInBuffer - 1)
{
d1 = 1;
}
{
d1 = d + 1;
}
- if (bUse && dh[d1] >= 0)
+ if (receiveValidData && dh[d1] >= 0)
{
buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
*/
buf_s[i] = buf_r[i];
}
- if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
- (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
+ if (((applyPbc || dd->ci[dim] + numPulses < dd->nc[dim]) && pulse == numPulses - 1) ||
+ (!applyPbc && dd->ci[dim] + 1 + pulse == dd->nc[dim] - 1))
{
/* Store the extremes */
- pos = 0;
+ int pos = 0;
- for (d1 = d; d1 < dd->ndim-1; d1++)
+ for (int d1 = d; d1 < dd->ndim-1; d1++)
{
extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
if (d == 1 || (d == 0 && dd->ndim == 3))
{
- for (i = d; i < 2; i++)
+ for (int i = d; i < 2; i++)
{
comm->zone_d2[1-d][i] = buf_e[pos];
pos++;
if (dd->ndim >= 2)
{
- dim = dd->dim[1];
- for (i = 0; i < 2; i++)
+ int dim = dd->dim[1];
+ for (int i = 0; i < 2; i++)
{
if (debug)
{
}
if (dd->ndim >= 3)
{
- dim = dd->dim[2];
- for (i = 0; i < 2; i++)
+ int dim = dd->dim[2];
+ for (int i = 0; i < 2; i++)
{
- for (j = 0; j < 2; j++)
+ for (int j = 0; j < 2; j++)
{
if (debug)
{
}
}
}
- for (d = 1; d < dd->ndim; d++)
+ for (int d = 1; d < dd->ndim; d++)
{
- comm->cell_f_max0[d] = extr_s[d-1][0];
- comm->cell_f_min1[d] = extr_s[d-1][1];
+ cellsizes[d].fracLowerMax = extr_s[d-1][0];
+ cellsizes[d].fracUpperMin = extr_s[d-1][1];
if (debug)
{
fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
- d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
+ d, cellsizes[d].fracLowerMax, cellsizes[d].fracUpperMin);
}
}
}
-static void dd_collect_cg(gmx_domdec_t *dd,
- const t_state *state_local)
+static void write_dd_grid_pdb(const char *fn, int64_t step,
+ gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
{
- gmx_domdec_master_t *ma = nullptr;
- int buf2[2], *ibuf, i, ncg_home = 0, nat_home = 0;
-
- if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
- {
- /* The master has the correct distribution */
- return;
- }
-
- const int *cg;
-
- if (state_local->ddp_count == dd->ddp_count)
- {
- /* The local state and DD are in sync, use the DD indices */
- ncg_home = dd->ncg_home;
- cg = dd->index_gl;
- nat_home = dd->nat_home;
- }
- else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
- {
- /* The DD is out of sync with the local state, but we have stored
- * the cg indices with the local state, so we can use those.
- */
- t_block *cgs_gl;
-
- cgs_gl = &dd->comm->cgs_gl;
+ rvec grid_s[2], *grid_r = nullptr, cx, r;
+ char fname[STRLEN], buf[22];
+ FILE *out;
+ int a, i, d, z, y, x;
+ matrix tric;
+ real vol;
- ncg_home = state_local->cg_gl.size();
- cg = state_local->cg_gl.data();
- nat_home = 0;
- for (i = 0; i < ncg_home; i++)
- {
- nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
- }
- }
- else
- {
- gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
- }
+ copy_rvec(dd->comm->cell_x0, grid_s[0]);
+ copy_rvec(dd->comm->cell_x1, grid_s[1]);
- buf2[0] = ncg_home;
- buf2[1] = nat_home;
if (DDMASTER(dd))
{
- ma = dd->ma;
- ibuf = ma->ibuf;
- }
- else
- {
- ibuf = nullptr;
+ snew(grid_r, 2*dd->nnodes);
}
- /* Collect the charge group and atom counts on the master */
- dd_gather(dd, 2*sizeof(int), buf2, ibuf);
+
+ dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
if (DDMASTER(dd))
{
- ma->index[0] = 0;
- for (i = 0; i < dd->nnodes; i++)
- {
- ma->ncg[i] = ma->ibuf[2*i];
- ma->nat[i] = ma->ibuf[2*i+1];
- ma->index[i+1] = ma->index[i] + ma->ncg[i];
-
- }
- /* Make byte counts and indices */
- for (i = 0; i < dd->nnodes; i++)
- {
- ma->ibuf[i] = ma->ncg[i]*sizeof(int);
- ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
- }
- if (debug)
+ for (d = 0; d < DIM; d++)
{
- fprintf(debug, "Initial charge group distribution: ");
- for (i = 0; i < dd->nnodes; i++)
+ for (i = 0; i < DIM; i++)
{
- fprintf(debug, " %d", ma->ncg[i]);
+ if (d == i)
+ {
+ tric[d][i] = 1;
+ }
+ else
+ {
+ if (d < ddbox->npbcdim && dd->nc[d] > 1)
+ {
+ tric[d][i] = box[i][d]/box[i][i];
+ }
+ else
+ {
+ tric[d][i] = 0;
+ }
+ }
}
- fprintf(debug, "\n");
}
- }
-
- /* Collect the charge group indices on the master */
- dd_gatherv(dd,
- ncg_home*sizeof(int), cg,
- DDMASTER(dd) ? ma->ibuf : nullptr,
- DDMASTER(dd) ? ma->ibuf+dd->nnodes : nullptr,
- DDMASTER(dd) ? ma->cg : nullptr);
-
- dd->comm->master_cg_ddp_count = state_local->ddp_count;
-}
-
-static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
- gmx::ArrayRef<const gmx::RVec> lv,
- gmx::ArrayRef<gmx::RVec> v)
-{
- gmx_domdec_master_t *ma;
- int n, i, c, a, nalloc = 0;
- rvec *buf = nullptr;
- t_block *cgs_gl;
-
- ma = dd->ma;
-
- if (!DDMASTER(dd))
- {
-#if GMX_MPI
- MPI_Send(const_cast<void *>(static_cast<const void *>(lv.data())), dd->nat_home*sizeof(rvec), MPI_BYTE,
- DDMASTERRANK(dd), dd->rank, dd->mpi_comm_all);
-#endif
- }
- else
- {
- /* Copy the master coordinates to the global array */
- cgs_gl = &dd->comm->cgs_gl;
-
- n = DDMASTERRANK(dd);
- a = 0;
- for (i = ma->index[n]; i < ma->index[n+1]; i++)
+ sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
+ out = gmx_fio_fopen(fname, "w");
+ gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
+ a = 1;
+ for (i = 0; i < dd->nnodes; i++)
{
- for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
+ vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
+ for (d = 0; d < DIM; d++)
{
- copy_rvec(lv[a++], v[c]);
+ vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
}
- }
-
- for (n = 0; n < dd->nnodes; n++)
- {
- if (n != dd->rank)
+ for (z = 0; z < 2; z++)
{
- if (ma->nat[n] > nalloc)
+ for (y = 0; y < 2; y++)
{
- nalloc = over_alloc_dd(ma->nat[n]);
- srenew(buf, nalloc);
+ for (x = 0; x < 2; x++)
+ {
+ cx[XX] = grid_r[i*2+x][XX];
+ cx[YY] = grid_r[i*2+y][YY];
+ cx[ZZ] = grid_r[i*2+z][ZZ];
+ mvmul(tric, cx, r);
+ gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
+ 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
+ }
}
-#if GMX_MPI
- MPI_Recv(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE, DDRANK(dd, n),
- n, dd->mpi_comm_all, MPI_STATUS_IGNORE);
-#endif
- a = 0;
- for (i = ma->index[n]; i < ma->index[n+1]; i++)
+ }
+ for (d = 0; d < DIM; d++)
+ {
+ for (x = 0; x < 4; x++)
{
- for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
+ switch (d)
{
- copy_rvec(buf[a++], v[c]);
+ case 0: y = 1 + i*8 + 2*x; break;
+ case 1: y = 1 + i*8 + 2*x - (x % 2); break;
+ case 2: y = 1 + i*8 + x; break;
}
+ fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
}
}
}
- sfree(buf);
+ gmx_fio_fclose(out);
+ sfree(grid_r);
}
}
-static void get_commbuffer_counts(gmx_domdec_t *dd,
- int **counts, int **disps)
+void write_dd_pdb(const char *fn, int64_t step, const char *title,
+ const gmx_mtop_t *mtop, const t_commrec *cr,
+ int natoms, const rvec x[], const matrix box)
{
- gmx_domdec_master_t *ma;
- int n;
-
- ma = dd->ma;
+ char fname[STRLEN], buf[22];
+ FILE *out;
+ int resnr;
+ const char *atomname, *resname;
+ gmx_domdec_t *dd;
- /* Make the rvec count and displacment arrays */
- *counts = ma->ibuf;
- *disps = ma->ibuf + dd->nnodes;
- for (n = 0; n < dd->nnodes; n++)
+ dd = cr->dd;
+ if (natoms == -1)
{
- (*counts)[n] = ma->nat[n]*sizeof(rvec);
- (*disps)[n] = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
+ natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
}
-}
-
-static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
- gmx::ArrayRef<const gmx::RVec> lv,
- gmx::ArrayRef<gmx::RVec> v)
-{
- gmx_domdec_master_t *ma;
- int *rcounts = nullptr, *disps = nullptr;
- int n, i, c, a;
- rvec *buf = nullptr;
- t_block *cgs_gl;
-
- ma = dd->ma;
-
- if (DDMASTER(dd))
- {
- get_commbuffer_counts(dd, &rcounts, &disps);
- buf = ma->vbuf;
- }
+ sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
- dd_gatherv(dd, dd->nat_home*sizeof(rvec), lv.data(), rcounts, disps, buf);
+ out = gmx_fio_fopen(fname, "w");
- if (DDMASTER(dd))
+ fprintf(out, "TITLE %s\n", title);
+ gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
+ int molb = 0;
+ for (int i = 0; i < natoms; i++)
{
- cgs_gl = &dd->comm->cgs_gl;
-
- a = 0;
- for (n = 0; n < dd->nnodes; n++)
+ int ii = dd->globalAtomIndices[i];
+ mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
+ int c;
+ real b;
+ if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
{
- for (i = ma->index[n]; i < ma->index[n+1]; i++)
+ c = 0;
+ while (i >= dd->atomGrouping().subRange(0, dd->comm->zones.cg_range[c + 1]).end())
{
- for (c = cgs_gl->index[ma->cg[i]]; c < cgs_gl->index[ma->cg[i]+1]; c++)
- {
- copy_rvec(buf[a++], v[c]);
- }
+ c++;
}
+ b = c;
}
+ else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
+ {
+ b = dd->comm->zones.n;
+ }
+ else
+ {
+ b = dd->comm->zones.n + 1;
+ }
+ gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
+ 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
}
-}
-
-void dd_collect_vec(gmx_domdec_t *dd,
- const t_state *state_local,
- gmx::ArrayRef<const gmx::RVec> lv,
- gmx::ArrayRef<gmx::RVec> v)
-{
- dd_collect_cg(dd, state_local);
+ fprintf(out, "TER\n");
- if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
- {
- dd_collect_vec_sendrecv(dd, lv, v);
- }
- else
- {
- dd_collect_vec_gatherv(dd, lv, v);
- }
+ gmx_fio_fclose(out);
}
-
-void dd_collect_state(gmx_domdec_t *dd,
- const t_state *state_local, t_state *state)
+real dd_cutoff_multibody(const gmx_domdec_t *dd)
{
- int nh = state_local->nhchainlength;
+ gmx_domdec_comm_t *comm;
+ int di;
+ real r;
- if (DDMASTER(dd))
- {
- GMX_RELEASE_ASSERT(state->nhchainlength == nh, "The global and local Nose-Hoover chain lengths should match");
+ comm = dd->comm;
- for (int i = 0; i < efptNR; i++)
+ r = -1;
+ if (comm->bInterCGBondeds)
+ {
+ if (comm->cutoff_mbody > 0)
{
- state->lambda[i] = state_local->lambda[i];
+ r = comm->cutoff_mbody;
}
- state->fep_state = state_local->fep_state;
- state->veta = state_local->veta;
- state->vol0 = state_local->vol0;
- copy_mat(state_local->box, state->box);
- copy_mat(state_local->boxv, state->boxv);
- copy_mat(state_local->svir_prev, state->svir_prev);
- copy_mat(state_local->fvir_prev, state->fvir_prev);
- copy_mat(state_local->pres_prev, state->pres_prev);
-
- for (int i = 0; i < state_local->ngtc; i++)
+ else
{
- for (int j = 0; j < nh; j++)
+ /* cutoff_mbody=0 means we do not have DLB */
+ r = comm->cellsize_min[dd->dim[0]];
+ for (di = 1; di < dd->ndim; di++)
{
- state->nosehoover_xi[i*nh+j] = state_local->nosehoover_xi[i*nh+j];
- state->nosehoover_vxi[i*nh+j] = state_local->nosehoover_vxi[i*nh+j];
+ r = std::min(r, comm->cellsize_min[dd->dim[di]]);
}
- state->therm_integral[i] = state_local->therm_integral[i];
- }
- for (int i = 0; i < state_local->nnhpres; i++)
- {
- for (int j = 0; j < nh; j++)
+ if (comm->bBondComm)
+ {
+ r = std::max(r, comm->cutoff_mbody);
+ }
+ else
{
- state->nhpres_xi[i*nh+j] = state_local->nhpres_xi[i*nh+j];
- state->nhpres_vxi[i*nh+j] = state_local->nhpres_vxi[i*nh+j];
+ r = std::min(r, comm->cutoff);
}
}
- state->baros_integral = state_local->baros_integral;
- }
- if (state_local->flags & (1 << estX))
- {
- gmx::ArrayRef<gmx::RVec> globalXRef = state ? gmx::makeArrayRef(state->x) : gmx::EmptyArrayRef();
- dd_collect_vec(dd, state_local, state_local->x, globalXRef);
- }
- if (state_local->flags & (1 << estV))
- {
- gmx::ArrayRef<gmx::RVec> globalVRef = state ? gmx::makeArrayRef(state->v) : gmx::EmptyArrayRef();
- dd_collect_vec(dd, state_local, state_local->v, globalVRef);
- }
- if (state_local->flags & (1 << estCGP))
- {
- gmx::ArrayRef<gmx::RVec> globalCgpRef = state ? gmx::makeArrayRef(state->cg_p) : gmx::EmptyArrayRef();
- dd_collect_vec(dd, state_local, state_local->cg_p, globalCgpRef);
}
+
+ return r;
}
-static void dd_resize_state(t_state *state, PaddedRVecVector *f, int natoms)
+real dd_cutoff_twobody(const gmx_domdec_t *dd)
{
- if (debug)
- {
- fprintf(debug, "Resizing state: currently %d, required %d\n", state->natoms, natoms);
- }
+ real r_mb;
- state_change_natoms(state, natoms);
+ r_mb = dd_cutoff_multibody(dd);
- if (f != nullptr)
- {
- /* We need to allocate one element extra, since we might use
- * (unaligned) 4-wide SIMD loads to access rvec entries.
- */
- f->resize(paddedRVecVectorSize(natoms));
- }
+ return std::max(dd->comm->cutoff, r_mb);
}
-static void dd_check_alloc_ncg(t_forcerec *fr,
- t_state *state,
- PaddedRVecVector *f,
- int numChargeGroups)
+
+static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
+ ivec coord_pme)
{
- if (numChargeGroups > fr->cg_nalloc)
- {
- if (debug)
- {
- fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, numChargeGroups, over_alloc_dd(numChargeGroups));
- }
- fr->cg_nalloc = over_alloc_dd(numChargeGroups);
- srenew(fr->cginfo, fr->cg_nalloc);
- if (fr->cutoff_scheme == ecutsGROUP)
- {
- srenew(fr->cg_cm, fr->cg_nalloc);
- }
- }
- if (fr->cutoff_scheme == ecutsVERLET)
- {
- /* We don't use charge groups, we use x in state to set up
- * the atom communication.
- */
- dd_resize_state(state, f, numChargeGroups);
- }
+ int nc, ntot;
+
+ nc = dd->nc[dd->comm->cartpmedim];
+ ntot = dd->comm->ntot[dd->comm->cartpmedim];
+ copy_ivec(coord, coord_pme);
+ coord_pme[dd->comm->cartpmedim] =
+ nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
}
-static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd, t_block *cgs,
- const rvec *v, rvec *lv)
+static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
{
- gmx_domdec_master_t *ma;
- int n, i, c, a, nalloc = 0;
- rvec *buf = nullptr;
-
- if (DDMASTER(dd))
- {
- ma = dd->ma;
+ int npp, npme;
- for (n = 0; n < dd->nnodes; n++)
- {
- if (n != dd->rank)
- {
- if (ma->nat[n] > nalloc)
- {
- nalloc = over_alloc_dd(ma->nat[n]);
- srenew(buf, nalloc);
- }
- /* Use lv as a temporary buffer */
- a = 0;
- for (i = ma->index[n]; i < ma->index[n+1]; i++)
- {
- for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
- {
- copy_rvec(v[c], buf[a++]);
- }
- }
- if (a != ma->nat[n])
- {
- gmx_fatal(FARGS, "Internal error a (%d) != nat (%d)",
- a, ma->nat[n]);
- }
+ npp = dd->nnodes;
+ npme = dd->comm->npmenodes;
-#if GMX_MPI
- MPI_Send(buf, ma->nat[n]*sizeof(rvec), MPI_BYTE,
- DDRANK(dd, n), n, dd->mpi_comm_all);
-#endif
- }
- }
- sfree(buf);
- n = DDMASTERRANK(dd);
- a = 0;
- for (i = ma->index[n]; i < ma->index[n+1]; i++)
- {
- for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
- {
- copy_rvec(v[c], lv[a++]);
- }
- }
- }
- else
- {
-#if GMX_MPI
- MPI_Recv(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
- MPI_ANY_TAG, dd->mpi_comm_all, MPI_STATUS_IGNORE);
-#endif
- }
+ /* Here we assign a PME node to communicate with this DD node
+ * by assuming that the major index of both is x.
+ * We add cr->npmenodes/2 to obtain an even distribution.
+ */
+ return (ddindex*npme + npme/2)/npp;
}
-static void dd_distribute_vec_scatterv(gmx_domdec_t *dd, t_block *cgs,
- const rvec *v, rvec *lv)
+static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
{
- gmx_domdec_master_t *ma;
- int *scounts = nullptr, *disps = nullptr;
- int n, i, c, a;
- rvec *buf = nullptr;
+ int *pme_rank;
+ int n, i, p0, p1;
- if (DDMASTER(dd))
+ snew(pme_rank, dd->comm->npmenodes);
+ n = 0;
+ for (i = 0; i < dd->nnodes; i++)
{
- ma = dd->ma;
-
- get_commbuffer_counts(dd, &scounts, &disps);
-
- buf = ma->vbuf;
- a = 0;
- for (n = 0; n < dd->nnodes; n++)
+ p0 = ddindex2pmeindex(dd, i);
+ p1 = ddindex2pmeindex(dd, i+1);
+ if (i+1 == dd->nnodes || p1 > p0)
{
- for (i = ma->index[n]; i < ma->index[n+1]; i++)
+ if (debug)
{
- for (c = cgs->index[ma->cg[i]]; c < cgs->index[ma->cg[i]+1]; c++)
- {
- copy_rvec(v[c], buf[a++]);
- }
+ fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
}
+ pme_rank[n] = i + 1 + n;
+ n++;
}
}
- dd_scatterv(dd, scounts, disps, buf, dd->nat_home*sizeof(rvec), lv);
+ return pme_rank;
}
-static void dd_distribute_vec(gmx_domdec_t *dd, t_block *cgs,
- const rvec *v, rvec *lv)
+static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
{
- if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
- {
- dd_distribute_vec_sendrecv(dd, cgs, v, lv);
- }
- else
- {
- dd_distribute_vec_scatterv(dd, cgs, v, lv);
- }
+ gmx_domdec_t *dd;
+ ivec coords;
+ int slab;
+
+ dd = cr->dd;
+ /*
+ if (dd->comm->bCartesian) {
+ gmx_ddindex2xyz(dd->nc,ddindex,coords);
+ dd_coords2pmecoords(dd,coords,coords_pme);
+ copy_ivec(dd->ntot,nc);
+ nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
+ coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
+
+ slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
+ } else {
+ slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
+ }
+ */
+ coords[XX] = x;
+ coords[YY] = y;
+ coords[ZZ] = z;
+ slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
+
+ return slab;
}
-static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
+static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
{
- if (dfhist == nullptr)
- {
- return;
- }
+ gmx_domdec_comm_t *comm;
+ ivec coords;
+ int ddindex, nodeid = -1;
- dd_bcast(dd, sizeof(int), &dfhist->bEquil);
- dd_bcast(dd, sizeof(int), &dfhist->nlambda);
- dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
+ comm = cr->dd->comm;
- if (dfhist->nlambda > 0)
+ coords[XX] = x;
+ coords[YY] = y;
+ coords[ZZ] = z;
+ if (comm->bCartesianPP_PME)
{
- int nlam = dfhist->nlambda;
- dd_bcast(dd, sizeof(int)*nlam, dfhist->n_at_lam);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->wl_histo);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_weights);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_dg);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
-
- for (int i = 0; i < nlam; i++)
+#if GMX_MPI
+ MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
+#endif
+ }
+ else
+ {
+ ddindex = dd_index(cr->dd->nc, coords);
+ if (comm->bCartesianPP)
+ {
+ nodeid = comm->ddindex2simnodeid[ddindex];
+ }
+ else
{
- dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p2[i]);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m2[i]);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij[i]);
- dd_bcast(dd, sizeof(real)*nlam, dfhist->Tij_empirical[i]);
+ if (comm->pmenodes)
+ {
+ nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
+ }
+ else
+ {
+ nodeid = ddindex;
+ }
}
}
+
+ return nodeid;
}
-static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
- t_state *state, t_state *state_local,
- PaddedRVecVector *f)
+static int dd_simnode2pmenode(const gmx_domdec_t *dd,
+ const t_commrec gmx_unused *cr,
+ int sim_nodeid)
{
- int nh = state_local->nhchainlength;
+ int pmenode = -1;
- if (DDMASTER(dd))
- {
- GMX_RELEASE_ASSERT(state->nhchainlength == nh, "The global and local Nose-Hoover chain lengths should match");
+ const gmx_domdec_comm_t *comm = dd->comm;
- for (int i = 0; i < efptNR; i++)
+ /* This assumes a uniform x domain decomposition grid cell size */
+ if (comm->bCartesianPP_PME)
+ {
+#if GMX_MPI
+ ivec coord, coord_pme;
+ MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
+ if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
{
- state_local->lambda[i] = state->lambda[i];
+ /* This is a PP node */
+ dd_cart_coord2pmecoord(dd, coord, coord_pme);
+ MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
}
- state_local->fep_state = state->fep_state;
- state_local->veta = state->veta;
- state_local->vol0 = state->vol0;
- copy_mat(state->box, state_local->box);
- copy_mat(state->box_rel, state_local->box_rel);
- copy_mat(state->boxv, state_local->boxv);
- copy_mat(state->svir_prev, state_local->svir_prev);
- copy_mat(state->fvir_prev, state_local->fvir_prev);
- if (state->dfhist != nullptr)
+#endif
+ }
+ else if (comm->bCartesianPP)
+ {
+ if (sim_nodeid < dd->nnodes)
{
- copy_df_history(state_local->dfhist, state->dfhist);
+ pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
}
- for (int i = 0; i < state_local->ngtc; i++)
+ }
+ else
+ {
+ /* This assumes DD cells with identical x coordinates
+ * are numbered sequentially.
+ */
+ if (dd->comm->pmenodes == nullptr)
{
- for (int j = 0; j < nh; j++)
+ if (sim_nodeid < dd->nnodes)
{
- state_local->nosehoover_xi[i*nh+j] = state->nosehoover_xi[i*nh+j];
- state_local->nosehoover_vxi[i*nh+j] = state->nosehoover_vxi[i*nh+j];
+ /* The DD index equals the nodeid */
+ pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
}
- state_local->therm_integral[i] = state->therm_integral[i];
}
- for (int i = 0; i < state_local->nnhpres; i++)
+ else
{
- for (int j = 0; j < nh; j++)
+ int i = 0;
+ while (sim_nodeid > dd->comm->pmenodes[i])
+ {
+ i++;
+ }
+ if (sim_nodeid < dd->comm->pmenodes[i])
{
- state_local->nhpres_xi[i*nh+j] = state->nhpres_xi[i*nh+j];
- state_local->nhpres_vxi[i*nh+j] = state->nhpres_vxi[i*nh+j];
+ pmenode = dd->comm->pmenodes[i];
}
}
- state_local->baros_integral = state->baros_integral;
}
- dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda.data());
- dd_bcast(dd, sizeof(int), &state_local->fep_state);
- dd_bcast(dd, sizeof(real), &state_local->veta);
- dd_bcast(dd, sizeof(real), &state_local->vol0);
- dd_bcast(dd, sizeof(state_local->box), state_local->box);
- dd_bcast(dd, sizeof(state_local->box_rel), state_local->box_rel);
- dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
- dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
- dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
- dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi.data());
- dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi.data());
- dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral.data());
- dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi.data());
- dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi.data());
-
- /* communicate df_history -- required for restarting from checkpoint */
- dd_distribute_dfhist(dd, state_local->dfhist);
-
- dd_resize_state(state_local, f, dd->nat_home);
- if (state_local->flags & (1 << estX))
- {
- const rvec *xGlobal = (DDMASTER(dd) ? as_rvec_array(state->x.data()) : nullptr);
- dd_distribute_vec(dd, cgs, xGlobal, as_rvec_array(state_local->x.data()));
- }
- if (state_local->flags & (1 << estV))
- {
- const rvec *vGlobal = (DDMASTER(dd) ? as_rvec_array(state->v.data()) : nullptr);
- dd_distribute_vec(dd, cgs, vGlobal, as_rvec_array(state_local->v.data()));
- }
- if (state_local->flags & (1 << estCGP))
- {
- const rvec *cgpGlobal = (DDMASTER(dd) ? as_rvec_array(state->cg_p.data()) : nullptr);
- dd_distribute_vec(dd, cgs, cgpGlobal, as_rvec_array(state_local->cg_p.data()));
- }
+ return pmenode;
}
-static char dim2char(int dim)
+NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
{
- char c = '?';
-
- switch (dim)
+ if (dd != nullptr)
{
- case XX: c = 'X'; break;
- case YY: c = 'Y'; break;
- case ZZ: c = 'Z'; break;
- default: gmx_fatal(FARGS, "Unknown dim %d", dim);
+ return {
+ dd->comm->npmenodes_x, dd->comm->npmenodes_y
+ };
+ }
+ else
+ {
+ return {
+ 1, 1
+ };
}
-
- return c;
}
-static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
- gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
+std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
{
- rvec grid_s[2], *grid_r = nullptr, cx, r;
- char fname[STRLEN], buf[22];
- FILE *out;
- int a, i, d, z, y, x;
- matrix tric;
- real vol;
-
- copy_rvec(dd->comm->cell_x0, grid_s[0]);
- copy_rvec(dd->comm->cell_x1, grid_s[1]);
+ gmx_domdec_t *dd;
+ int x, y, z;
+ ivec coord, coord_pme;
- if (DDMASTER(dd))
- {
- snew(grid_r, 2*dd->nnodes);
- }
+ dd = cr->dd;
- dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
+ std::vector<int> ddranks;
+ ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
- if (DDMASTER(dd))
+ for (x = 0; x < dd->nc[XX]; x++)
{
- for (d = 0; d < DIM; d++)
+ for (y = 0; y < dd->nc[YY]; y++)
{
- for (i = 0; i < DIM; i++)
+ for (z = 0; z < dd->nc[ZZ]; z++)
{
- if (d == i)
+ if (dd->comm->bCartesianPP_PME)
{
- tric[d][i] = 1;
+ coord[XX] = x;
+ coord[YY] = y;
+ coord[ZZ] = z;
+ dd_cart_coord2pmecoord(dd, coord, coord_pme);
+ if (dd->ci[XX] == coord_pme[XX] &&
+ dd->ci[YY] == coord_pme[YY] &&
+ dd->ci[ZZ] == coord_pme[ZZ])
+ {
+ ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
+ }
}
else
{
- if (d < ddbox->npbcdim && dd->nc[d] > 1)
- {
- tric[d][i] = box[i][d]/box[i][i];
- }
- else
+ /* The slab corresponds to the nodeid in the PME group */
+ if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
{
- tric[d][i] = 0;
+ ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
}
}
}
}
- sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
- out = gmx_fio_fopen(fname, "w");
- gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
- a = 1;
- for (i = 0; i < dd->nnodes; i++)
+ }
+ return ddranks;
+}
+
+static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
+{
+ gmx_bool bReceive = TRUE;
+
+ if (cr->npmenodes < dd->nnodes)
+ {
+ gmx_domdec_comm_t *comm = dd->comm;
+ if (comm->bCartesianPP_PME)
{
- vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
- for (d = 0; d < DIM; d++)
- {
- vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
- }
- for (z = 0; z < 2; z++)
+#if GMX_MPI
+ int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
+ ivec coords;
+ MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
+ coords[comm->cartpmedim]++;
+ if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
{
- for (y = 0; y < 2; y++)
+ int rank;
+ MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
+ if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
{
- for (x = 0; x < 2; x++)
- {
- cx[XX] = grid_r[i*2+x][XX];
- cx[YY] = grid_r[i*2+y][YY];
- cx[ZZ] = grid_r[i*2+z][ZZ];
- mvmul(tric, cx, r);
- gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
- 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
- }
+ /* This is not the last PP node for pmenode */
+ bReceive = FALSE;
}
}
- for (d = 0; d < DIM; d++)
+#else
+ GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
+#endif
+ }
+ else
+ {
+ int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
+ if (cr->sim_nodeid+1 < cr->nnodes &&
+ dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
{
- for (x = 0; x < 4; x++)
- {
- switch (d)
- {
- case 0: y = 1 + i*8 + 2*x; break;
- case 1: y = 1 + i*8 + 2*x - (x % 2); break;
- case 2: y = 1 + i*8 + x; break;
- }
- fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
- }
+ /* This is not the last PP node for pmenode */
+ bReceive = FALSE;
}
}
- gmx_fio_fclose(out);
- sfree(grid_r);
}
+
+ return bReceive;
}
-void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
- const gmx_mtop_t *mtop, t_commrec *cr,
- int natoms, rvec x[], matrix box)
+static void set_zones_ncg_home(gmx_domdec_t *dd)
{
- char fname[STRLEN], buf[22];
- FILE *out;
- int i, ii, resnr, c;
- const char *atomname, *resname;
- real b;
- gmx_domdec_t *dd;
+ gmx_domdec_zones_t *zones;
+ int i;
- dd = cr->dd;
- if (natoms == -1)
+ zones = &dd->comm->zones;
+
+ zones->cg_range[0] = 0;
+ for (i = 1; i < zones->n+1; i++)
{
- natoms = dd->comm->nat[ddnatVSITE];
+ zones->cg_range[i] = dd->ncg_home;
}
+ /* zone_ncg1[0] should always be equal to ncg_home */
+ dd->comm->zone_ncg1[0] = dd->ncg_home;
+}
- sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
+static void restoreAtomGroups(gmx_domdec_t *dd,
+ const int *gcgs_index, const t_state *state)
+{
+ gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
- out = gmx_fio_fopen(fname, "w");
+ std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
+ gmx::RangePartitioning &atomGrouping = dd->atomGrouping_;
- fprintf(out, "TITLE %s\n", title);
- gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
- int molb = 0;
- for (i = 0; i < natoms; i++)
+ globalAtomGroupIndices.resize(atomGroupsState.size());
+ atomGrouping.clear();
+
+ /* Copy back the global charge group indices from state
+ * and rebuild the local charge group to atom index.
+ */
+ for (gmx::index i = 0; i < atomGroupsState.size(); i++)
{
- ii = dd->gatindex[i];
- mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
- if (i < dd->comm->nat[ddnatZONE])
- {
- c = 0;
- while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
- {
- c++;
- }
- b = c;
- }
- else if (i < dd->comm->nat[ddnatVSITE])
+ const int atomGroupGlobal = atomGroupsState[i];
+ const int groupSize = gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
+ globalAtomGroupIndices[i] = atomGroupGlobal;
+ atomGrouping.appendBlock(groupSize);
+ }
+
+ dd->ncg_home = atomGroupsState.size();
+ dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomGrouping.fullRange().end());
+
+ set_zones_ncg_home(dd);
+}
+
+static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
+ t_forcerec *fr, char *bLocalCG)
+{
+ cginfo_mb_t *cginfo_mb;
+ int *cginfo;
+ int cg;
+
+ if (fr != nullptr)
+ {
+ cginfo_mb = fr->cginfo_mb;
+ cginfo = fr->cginfo;
+
+ for (cg = cg0; cg < cg1; cg++)
{
- b = dd->comm->zones.n;
+ cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
}
- else
+ }
+
+ if (bLocalCG != nullptr)
+ {
+ for (cg = cg0; cg < cg1; cg++)
{
- b = dd->comm->zones.n + 1;
+ bLocalCG[index_gl[cg]] = TRUE;
}
- gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
- 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
}
- fprintf(out, "TER\n");
-
- gmx_fio_fclose(out);
}
-real dd_cutoff_multibody(const gmx_domdec_t *dd)
+static void make_dd_indices(gmx_domdec_t *dd,
+ const int *gcgs_index, int cg_start)
{
- gmx_domdec_comm_t *comm;
- int di;
- real r;
+ const int numZones = dd->comm->zones.n;
+ const int *zone2cg = dd->comm->zones.cg_range;
+ const int *zone_ncg1 = dd->comm->zone_ncg1;
+ gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
+ const gmx_bool bCGs = dd->comm->bCGs;
- comm = dd->comm;
+ std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
+ gmx_ga2la_t &ga2la = *dd->ga2la;
- r = -1;
- if (comm->bInterCGBondeds)
+ if (zone2cg[1] != dd->ncg_home)
{
- if (comm->cutoff_mbody > 0)
+ gmx_incons("dd->ncg_zone is not up to date");
+ }
+
+ /* Make the local to global and global to local atom index */
+ int a = dd->atomGrouping().subRange(cg_start, cg_start).begin();
+ globalAtomIndices.resize(a);
+ for (int zone = 0; zone < numZones; zone++)
+ {
+ int cg0;
+ if (zone == 0)
{
- r = comm->cutoff_mbody;
+ cg0 = cg_start;
}
else
{
- /* cutoff_mbody=0 means we do not have DLB */
- r = comm->cellsize_min[dd->dim[0]];
- for (di = 1; di < dd->ndim; di++)
+ cg0 = zone2cg[zone];
+ }
+ int cg1 = zone2cg[zone+1];
+ int cg1_p1 = cg0 + zone_ncg1[zone];
+
+ for (int cg = cg0; cg < cg1; cg++)
+ {
+ int zone1 = zone;
+ if (cg >= cg1_p1)
{
- r = std::min(r, comm->cellsize_min[dd->dim[di]]);
+ /* Signal that this cg is from more than one pulse away */
+ zone1 += numZones;
}
- if (comm->bBondComm)
+ int cg_gl = globalAtomGroupIndices[cg];
+ if (bCGs)
{
- r = std::max(r, comm->cutoff_mbody);
+ for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
+ {
+ globalAtomIndices.push_back(a_gl);
+ ga2la.insert(a_gl, {a, zone1});
+ a++;
+ }
}
else
{
- r = std::min(r, comm->cutoff);
+ globalAtomIndices.push_back(cg_gl);
+ ga2la.insert(cg_gl, {a, zone1});
+ a++;
}
}
}
-
- return r;
}
-real dd_cutoff_twobody(const gmx_domdec_t *dd)
+static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
+ const char *where)
{
- real r_mb;
-
- r_mb = dd_cutoff_multibody(dd);
+ int nerr = 0;
+ if (bLocalCG == nullptr)
+ {
+ return nerr;
+ }
+ for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
+ {
+ if (!bLocalCG[dd->globalAtomGroupIndices[i]])
+ {
+ fprintf(stderr,
+ "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
+ nerr++;
+ }
+ }
+ size_t ngl = 0;
+ for (int i = 0; i < ncg_sys; i++)
+ {
+ if (bLocalCG[i])
+ {
+ ngl++;
+ }
+ }
+ if (ngl != dd->globalAtomGroupIndices.size())
+ {
+ fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
+ nerr++;
+ }
- return std::max(dd->comm->cutoff, r_mb);
+ return nerr;
}
-
-static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
- ivec coord_pme)
+static void check_index_consistency(gmx_domdec_t *dd,
+ int natoms_sys, int ncg_sys,
+ const char *where)
{
- int nc, ntot;
+ int nerr = 0;
- nc = dd->nc[dd->comm->cartpmedim];
- ntot = dd->comm->ntot[dd->comm->cartpmedim];
- copy_ivec(coord, coord_pme);
- coord_pme[dd->comm->cartpmedim] =
- nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
-}
+ const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
-static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
-{
- int npp, npme;
+ if (dd->comm->DD_debug > 1)
+ {
+ std::vector<int> have(natoms_sys);
+ for (int a = 0; a < numAtomsInZones; a++)
+ {
+ int globalAtomIndex = dd->globalAtomIndices[a];
+ if (have[globalAtomIndex] > 0)
+ {
+ fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
+ }
+ else
+ {
+ have[globalAtomIndex] = a + 1;
+ }
+ }
+ }
- npp = dd->nnodes;
- npme = dd->comm->npmenodes;
+ std::vector<int> have(numAtomsInZones);
- /* Here we assign a PME node to communicate with this DD node
- * by assuming that the major index of both is x.
- * We add cr->npmenodes/2 to obtain an even distribution.
- */
- return (ddindex*npme + npme/2)/npp;
-}
-
-static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
-{
- int *pme_rank;
- int n, i, p0, p1;
-
- snew(pme_rank, dd->comm->npmenodes);
- n = 0;
- for (i = 0; i < dd->nnodes; i++)
+ int ngl = 0;
+ for (int i = 0; i < natoms_sys; i++)
{
- p0 = ddindex2pmeindex(dd, i);
- p1 = ddindex2pmeindex(dd, i+1);
- if (i+1 == dd->nnodes || p1 > p0)
+ if (const auto entry = dd->ga2la->find(i))
{
- if (debug)
+ const int a = entry->la;
+ if (a >= numAtomsInZones)
{
- fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
+ fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
+ nerr++;
}
- pme_rank[n] = i + 1 + n;
- n++;
+ else
+ {
+ have[a] = 1;
+ if (dd->globalAtomIndices[a] != i)
+ {
+ fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
+ nerr++;
+ }
+ }
+ ngl++;
+ }
+ }
+ if (ngl != numAtomsInZones)
+ {
+ fprintf(stderr,
+ "DD rank %d, %s: %d global atom indices, %d local atoms\n",
+ dd->rank, where, ngl, numAtomsInZones);
+ }
+ for (int a = 0; a < numAtomsInZones; a++)
+ {
+ if (have[a] == 0)
+ {
+ fprintf(stderr,
+ "DD rank %d, %s: local atom %d, global %d has no global index\n",
+ dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
}
}
- return pme_rank;
+ nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
+
+ if (nerr > 0)
+ {
+ gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
+ dd->rank, where, nerr);
+ }
}
-static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
+/* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
+static void clearDDStateIndices(gmx_domdec_t *dd,
+ int atomGroupStart,
+ int atomStart)
{
- gmx_domdec_t *dd;
- ivec coords;
- int slab;
+ gmx_ga2la_t &ga2la = *dd->ga2la;
- dd = cr->dd;
- /*
- if (dd->comm->bCartesian) {
- gmx_ddindex2xyz(dd->nc,ddindex,coords);
- dd_coords2pmecoords(dd,coords,coords_pme);
- copy_ivec(dd->ntot,nc);
- nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
- coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
+ if (atomStart == 0)
+ {
+ /* Clear the whole list without the overhead of searching */
+ ga2la.clear();
+ }
+ else
+ {
+ const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
+ for (int i = 0; i < numAtomsInZones; i++)
+ {
+ ga2la.erase(dd->globalAtomIndices[i]);
+ }
+ }
- slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
- } else {
- slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
- }
- */
- coords[XX] = x;
- coords[YY] = y;
- coords[ZZ] = z;
- slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
+ char *bLocalCG = dd->comm->bLocalCG;
+ if (bLocalCG)
+ {
+ for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
+ {
+ bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
+ }
+ }
- return slab;
+ dd_clear_local_vsite_indices(dd);
+
+ if (dd->constraints)
+ {
+ dd_clear_local_constraint_indices(dd);
+ }
}
-static int ddcoord2simnodeid(t_commrec *cr, int x, int y, int z)
+static bool check_grid_jump(int64_t step,
+ const gmx_domdec_t *dd,
+ real cutoff,
+ const gmx_ddbox_t *ddbox,
+ gmx_bool bFatal)
{
- gmx_domdec_comm_t *comm;
- ivec coords;
- int ddindex, nodeid = -1;
-
- comm = cr->dd->comm;
+ gmx_domdec_comm_t *comm = dd->comm;
+ bool invalid = false;
- coords[XX] = x;
- coords[YY] = y;
- coords[ZZ] = z;
- if (comm->bCartesianPP_PME)
- {
-#if GMX_MPI
- MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
-#endif
- }
- else
+ for (int d = 1; d < dd->ndim; d++)
{
- ddindex = dd_index(cr->dd->nc, coords);
- if (comm->bCartesianPP)
+ const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
+ const int dim = dd->dim[d];
+ const real limit = grid_jump_limit(comm, cutoff, d);
+ real bfac = ddbox->box_size[dim];
+ if (ddbox->tric_dir[dim])
{
- nodeid = comm->ddindex2simnodeid[ddindex];
+ bfac *= ddbox->skew_fac[dim];
}
- else
+ if ((cellsizes.fracUpper - cellsizes.fracLowerMax)*bfac < limit ||
+ (cellsizes.fracLower - cellsizes.fracUpperMin)*bfac > -limit)
{
- if (comm->pmenodes)
- {
- nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
- }
- else
+ invalid = true;
+
+ if (bFatal)
{
- nodeid = ddindex;
+ char buf[22];
+
+ /* This error should never be triggered under normal
+ * circumstances, but you never know ...
+ */
+ gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
+ gmx_step_str(step, buf),
+ dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
}
}
}
- return nodeid;
+ return invalid;
}
-static int dd_simnode2pmenode(const gmx_domdec_t *dd,
- const t_commrec gmx_unused *cr,
- int sim_nodeid)
+static float dd_force_load(gmx_domdec_comm_t *comm)
{
- int pmenode = -1;
-
- const gmx_domdec_comm_t *comm = dd->comm;
+ float load;
- /* This assumes a uniform x domain decomposition grid cell size */
- if (comm->bCartesianPP_PME)
+ if (comm->eFlop)
{
-#if GMX_MPI
- ivec coord, coord_pme;
- MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
- if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
+ load = comm->flop;
+ if (comm->eFlop > 1)
{
- /* This is a PP node */
- dd_cart_coord2pmecoord(dd, coord, coord_pme);
- MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
+ load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
}
-#endif
}
- else if (comm->bCartesianPP)
+ else
{
- if (sim_nodeid < dd->nnodes)
+ load = comm->cycl[ddCyclF];
+ if (comm->cycl_n[ddCyclF] > 1)
{
- pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
+ /* Subtract the maximum of the last n cycle counts
+ * to get rid of possible high counts due to other sources,
+ * for instance system activity, that would otherwise
+ * affect the dynamic load balancing.
+ */
+ load -= comm->cycl_max[ddCyclF];
}
- }
- else
- {
- /* This assumes DD cells with identical x coordinates
- * are numbered sequentially.
- */
- if (dd->comm->pmenodes == nullptr)
+
+#if GMX_MPI
+ if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
{
- if (sim_nodeid < dd->nnodes)
+ float gpu_wait, gpu_wait_sum;
+
+ gpu_wait = comm->cycl[ddCyclWaitGPU];
+ if (comm->cycl_n[ddCyclF] > 1)
{
- /* The DD index equals the nodeid */
- pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
+ /* We should remove the WaitGPU time of the same MD step
+ * as the one with the maximum F time, since the F time
+ * and the wait time are not independent.
+ * Furthermore, the step for the max F time should be chosen
+ * the same on all ranks that share the same GPU.
+ * But to keep the code simple, we remove the average instead.
+ * The main reason for artificially long times at some steps
+ * is spurious CPU activity or MPI time, so we don't expect
+ * that changes in the GPU wait time matter a lot here.
+ */
+ gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/static_cast<float>(comm->cycl_n[ddCyclF]);
}
+ /* Sum the wait times over the ranks that share the same GPU */
+ MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
+ comm->mpi_comm_gpu_shared);
+ /* Replace the wait time by the average over the ranks */
+ load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
+ }
+#endif
+ }
+
+ return load;
+}
+
+static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
+{
+ gmx_domdec_comm_t *comm;
+ int i;
+
+ comm = dd->comm;
+
+ snew(*dim_f, dd->nc[dim]+1);
+ (*dim_f)[0] = 0;
+ for (i = 1; i < dd->nc[dim]; i++)
+ {
+ if (comm->slb_frac[dim])
+ {
+ (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
}
else
{
- int i = 0;
- while (sim_nodeid > dd->comm->pmenodes[i])
- {
- i++;
- }
- if (sim_nodeid < dd->comm->pmenodes[i])
- {
- pmenode = dd->comm->pmenodes[i];
- }
+ (*dim_f)[i] = static_cast<real>(i)/static_cast<real>(dd->nc[dim]);
}
}
-
- return pmenode;
+ (*dim_f)[dd->nc[dim]] = 1;
}
-void get_pme_nnodes(const gmx_domdec_t *dd,
- int *npmenodes_x, int *npmenodes_y)
+static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
{
- if (dd != nullptr)
+ int pmeindex, slab, nso, i;
+ ivec xyz;
+
+ if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
{
- *npmenodes_x = dd->comm->npmenodes_x;
- *npmenodes_y = dd->comm->npmenodes_y;
+ ddpme->dim = YY;
}
else
{
- *npmenodes_x = 1;
- *npmenodes_y = 1;
+ ddpme->dim = dimind;
}
-}
-
-std::vector<int> get_pme_ddranks(t_commrec *cr, int pmenodeid)
-{
- gmx_domdec_t *dd;
- int x, y, z;
- ivec coord, coord_pme;
+ ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
- dd = cr->dd;
+ ddpme->nslab = (ddpme->dim == 0 ?
+ dd->comm->npmenodes_x :
+ dd->comm->npmenodes_y);
- std::vector<int> ddranks;
- ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
+ if (ddpme->nslab <= 1)
+ {
+ return;
+ }
- for (x = 0; x < dd->nc[XX]; x++)
+ nso = dd->comm->npmenodes/ddpme->nslab;
+ /* Determine for each PME slab the PP location range for dimension dim */
+ snew(ddpme->pp_min, ddpme->nslab);
+ snew(ddpme->pp_max, ddpme->nslab);
+ for (slab = 0; slab < ddpme->nslab; slab++)
{
- for (y = 0; y < dd->nc[YY]; y++)
- {
- for (z = 0; z < dd->nc[ZZ]; z++)
- {
- if (dd->comm->bCartesianPP_PME)
- {
- coord[XX] = x;
- coord[YY] = y;
- coord[ZZ] = z;
- dd_cart_coord2pmecoord(dd, coord, coord_pme);
- if (dd->ci[XX] == coord_pme[XX] &&
- dd->ci[YY] == coord_pme[YY] &&
- dd->ci[ZZ] == coord_pme[ZZ])
- {
- ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
- }
- }
- else
- {
- /* The slab corresponds to the nodeid in the PME group */
- if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
- {
- ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
- }
- }
- }
- }
+ ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
+ ddpme->pp_max[slab] = 0;
}
- return ddranks;
-}
-
-static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
-{
- gmx_bool bReceive = TRUE;
-
- if (cr->npmenodes < dd->nnodes)
+ for (i = 0; i < dd->nnodes; i++)
{
- gmx_domdec_comm_t *comm = dd->comm;
- if (comm->bCartesianPP_PME)
+ ddindex2xyz(dd->nc, i, xyz);
+ /* For y only use our y/z slab.
+ * This assumes that the PME x grid size matches the DD grid size.
+ */
+ if (dimind == 0 || xyz[XX] == dd->ci[XX])
{
-#if GMX_MPI
- int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
- ivec coords;
- MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
- coords[comm->cartpmedim]++;
- if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
+ pmeindex = ddindex2pmeindex(dd, i);
+ if (dimind == 0)
{
- int rank;
- MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
- if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
- {
- /* This is not the last PP node for pmenode */
- bReceive = FALSE;
- }
+ slab = pmeindex/nso;
}
-#else
- GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
-#endif
- }
- else
- {
- int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
- if (cr->sim_nodeid+1 < cr->nnodes &&
- dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
+ else
{
- /* This is not the last PP node for pmenode */
- bReceive = FALSE;
+ slab = pmeindex % ddpme->nslab;
}
+ ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
+ ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
}
}
- return bReceive;
-}
-
-static void set_zones_ncg_home(gmx_domdec_t *dd)
-{
- gmx_domdec_zones_t *zones;
- int i;
-
- zones = &dd->comm->zones;
-
- zones->cg_range[0] = 0;
- for (i = 1; i < zones->n+1; i++)
- {
- zones->cg_range[i] = dd->ncg_home;
- }
- /* zone_ncg1[0] should always be equal to ncg_home */
- dd->comm->zone_ncg1[0] = dd->ncg_home;
-}
-
-static void rebuild_cgindex(gmx_domdec_t *dd,
- const int *gcgs_index, const t_state *state)
-{
- int * gmx_restrict dd_cg_gl = dd->index_gl;
- int * gmx_restrict cgindex = dd->cgindex;
- int nat = 0;
-
- /* Copy back the global charge group indices from state
- * and rebuild the local charge group to atom index.
- */
- cgindex[0] = nat;
- for (unsigned int i = 0; i < state->cg_gl.size(); i++)
- {
- cgindex[i] = nat;
- int cg_gl = state->cg_gl[i];
- dd_cg_gl[i] = cg_gl;
- nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
- }
- cgindex[state->cg_gl.size()] = nat;
-
- dd->ncg_home = state->cg_gl.size();
- dd->nat_home = nat;
-
- set_zones_ncg_home(dd);
-}
-
-static int ddcginfo(const cginfo_mb_t *cginfo_mb, int cg)
-{
- while (cg >= cginfo_mb->cg_end)
- {
- cginfo_mb++;
- }
-
- return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
-}
-
-static void dd_set_cginfo(int *index_gl, int cg0, int cg1,
- t_forcerec *fr, char *bLocalCG)
-{
- cginfo_mb_t *cginfo_mb;
- int *cginfo;
- int cg;
-
- if (fr != nullptr)
- {
- cginfo_mb = fr->cginfo_mb;
- cginfo = fr->cginfo;
-
- for (cg = cg0; cg < cg1; cg++)
- {
- cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
- }
- }
-
- if (bLocalCG != nullptr)
- {
- for (cg = cg0; cg < cg1; cg++)
- {
- bLocalCG[index_gl[cg]] = TRUE;
- }
- }
+ set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
}
-static void make_dd_indices(gmx_domdec_t *dd,
- const int *gcgs_index, int cg_start)
+int dd_pme_maxshift_x(const gmx_domdec_t *dd)
{
- int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
- int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
- gmx_bool bCGs;
-
- if (dd->nat_tot > dd->gatindex_nalloc)
- {
- dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
- srenew(dd->gatindex, dd->gatindex_nalloc);
- }
-
- nzone = dd->comm->zones.n;
- zone2cg = dd->comm->zones.cg_range;
- zone_ncg1 = dd->comm->zone_ncg1;
- index_gl = dd->index_gl;
- gatindex = dd->gatindex;
- bCGs = dd->comm->bCGs;
-
- if (zone2cg[1] != dd->ncg_home)
+ if (dd->comm->ddpme[0].dim == XX)
{
- gmx_incons("dd->ncg_zone is not up to date");
+ return dd->comm->ddpme[0].maxshift;
}
-
- /* Make the local to global and global to local atom index */
- a = dd->cgindex[cg_start];
- for (zone = 0; zone < nzone; zone++)
+ else
{
- if (zone == 0)
- {
- cg0 = cg_start;
- }
- else
- {
- cg0 = zone2cg[zone];
- }
- cg1 = zone2cg[zone+1];
- cg1_p1 = cg0 + zone_ncg1[zone];
-
- for (cg = cg0; cg < cg1; cg++)
- {
- zone1 = zone;
- if (cg >= cg1_p1)
- {
- /* Signal that this cg is from more than one pulse away */
- zone1 += nzone;
- }
- cg_gl = index_gl[cg];
- if (bCGs)
- {
- for (a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
- {
- gatindex[a] = a_gl;
- ga2la_set(dd->ga2la, a_gl, a, zone1);
- a++;
- }
- }
- else
- {
- gatindex[a] = cg_gl;
- ga2la_set(dd->ga2la, cg_gl, a, zone1);
- a++;
- }
- }
+ return 0;
}
}
-static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
- const char *where)
+int dd_pme_maxshift_y(const gmx_domdec_t *dd)
{
- int i, ngl, nerr;
-
- nerr = 0;
- if (bLocalCG == nullptr)
- {
- return nerr;
- }
- for (i = 0; i < dd->ncg_tot; i++)
+ if (dd->comm->ddpme[0].dim == YY)
{
- if (!bLocalCG[dd->index_gl[i]])
- {
- fprintf(stderr,
- "DD rank %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i+1, dd->index_gl[i]+1, dd->ncg_home);
- nerr++;
- }
+ return dd->comm->ddpme[0].maxshift;
}
- ngl = 0;
- for (i = 0; i < ncg_sys; i++)
+ else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
{
- if (bLocalCG[i])
- {
- ngl++;
- }
+ return dd->comm->ddpme[1].maxshift;
}
- if (ngl != dd->ncg_tot)
+ else
{
- fprintf(stderr, "DD rank %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n", dd->rank, where, ngl, dd->ncg_tot);
- nerr++;
+ return 0;
}
-
- return nerr;
}
-static void check_index_consistency(gmx_domdec_t *dd,
- int natoms_sys, int ncg_sys,
- const char *where)
+static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
+ gmx_ddbox_t *ddbox,
+ rvec cell_ns_x0, rvec cell_ns_x1,
+ int64_t step)
{
- int nerr, ngl, i, a, cell;
- int *have;
-
- nerr = 0;
-
- if (dd->comm->DD_debug > 1)
- {
- snew(have, natoms_sys);
- for (a = 0; a < dd->nat_tot; a++)
- {
- if (have[dd->gatindex[a]] > 0)
- {
- fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, dd->gatindex[a]+1, have[dd->gatindex[a]], a+1);
- }
- else
- {
- have[dd->gatindex[a]] = a + 1;
- }
- }
- sfree(have);
- }
-
- snew(have, dd->nat_tot);
-
- ngl = 0;
- for (i = 0; i < natoms_sys; i++)
- {
- if (ga2la_get(dd->ga2la, i, &a, &cell))
- {
- if (a >= dd->nat_tot)
- {
- fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, dd->nat_tot);
- nerr++;
- }
- else
- {
- have[a] = 1;
- if (dd->gatindex[a] != i)
- {
- fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->gatindex[a]+1);
- nerr++;
- }
- }
- ngl++;
- }
- }
- if (ngl != dd->nat_tot)
- {
- fprintf(stderr,
- "DD rank %d, %s: %d global atom indices, %d local atoms\n",
- dd->rank, where, ngl, dd->nat_tot);
- }
- for (a = 0; a < dd->nat_tot; a++)
- {
- if (have[a] == 0)
- {
- fprintf(stderr,
- "DD rank %d, %s: local atom %d, global %d has no global index\n",
- dd->rank, where, a+1, dd->gatindex[a]+1);
- }
- }
- sfree(have);
-
- nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
-
- if (nerr > 0)
- {
- gmx_fatal(FARGS, "DD rank %d, %s: %d atom/cg index inconsistencies",
- dd->rank, where, nerr);
- }
-}
-
-static void clear_dd_indices(gmx_domdec_t *dd, int cg_start, int a_start)
-{
- int i;
- char *bLocalCG;
-
- if (a_start == 0)
- {
- /* Clear the whole list without searching */
- ga2la_clear(dd->ga2la);
- }
- else
- {
- for (i = a_start; i < dd->nat_tot; i++)
- {
- ga2la_del(dd->ga2la, dd->gatindex[i]);
- }
- }
-
- bLocalCG = dd->comm->bLocalCG;
- if (bLocalCG)
- {
- for (i = cg_start; i < dd->ncg_tot; i++)
- {
- bLocalCG[dd->index_gl[i]] = FALSE;
- }
- }
-
- dd_clear_local_vsite_indices(dd);
-
- if (dd->constraints)
- {
- dd_clear_local_constraint_indices(dd);
- }
-}
-
-/* This function should be used for moving the domain boudaries during DLB,
- * for obtaining the minimum cell size. It checks the initially set limit
- * comm->cellsize_min, for bonded and initial non-bonded cut-offs,
- * and, possibly, a longer cut-off limit set for PME load balancing.
- */
-static real cellsize_min_dlb(gmx_domdec_comm_t *comm, int dim_ind, int dim)
-{
- real cellsize_min;
-
- cellsize_min = comm->cellsize_min[dim];
-
- if (!comm->bVacDLBNoLimit)
- {
- /* The cut-off might have changed, e.g. by PME load balacning,
- * from the value used to set comm->cellsize_min, so check it.
- */
- cellsize_min = std::max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
-
- if (comm->bPMELoadBalDLBLimits)
- {
- /* Check for the cut-off limit set by the PME load balancing */
- cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
- }
- }
-
- return cellsize_min;
-}
-
-static real grid_jump_limit(gmx_domdec_comm_t *comm, real cutoff,
- int dim_ind)
-{
- real grid_jump_limit;
-
- /* The distance between the boundaries of cells at distance
- * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
- * and by the fact that cells should not be shifted by more than
- * half their size, such that cg's only shift by one cell
- * at redecomposition.
- */
- grid_jump_limit = comm->cellsize_limit;
- if (!comm->bVacDLBNoLimit)
- {
- if (comm->bPMELoadBalDLBLimits)
- {
- cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff);
- }
- grid_jump_limit = std::max(grid_jump_limit,
- cutoff/comm->cd[dim_ind].np);
- }
-
- return grid_jump_limit;
-}
-
-static gmx_bool check_grid_jump(gmx_int64_t step,
- gmx_domdec_t *dd,
- real cutoff,
- gmx_ddbox_t *ddbox,
- gmx_bool bFatal)
-{
- gmx_domdec_comm_t *comm;
- int d, dim;
- real limit, bfac;
- gmx_bool bInvalid;
-
- bInvalid = FALSE;
-
- comm = dd->comm;
-
- for (d = 1; d < dd->ndim; d++)
- {
- dim = dd->dim[d];
- limit = grid_jump_limit(comm, cutoff, d);
- bfac = ddbox->box_size[dim];
- if (ddbox->tric_dir[dim])
- {
- bfac *= ddbox->skew_fac[dim];
- }
- if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
- (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
- {
- bInvalid = TRUE;
-
- if (bFatal)
- {
- char buf[22];
-
- /* This error should never be triggered under normal
- * circumstances, but you never know ...
- */
- gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
- gmx_step_str(step, buf),
- dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
- }
- }
- }
-
- return bInvalid;
-}
-
-static int dd_load_count(gmx_domdec_comm_t *comm)
-{
- return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
-}
-
-static float dd_force_load(gmx_domdec_comm_t *comm)
-{
- float load;
-
- if (comm->eFlop)
- {
- load = comm->flop;
- if (comm->eFlop > 1)
- {
- load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
- }
- }
- else
- {
- load = comm->cycl[ddCyclF];
- if (comm->cycl_n[ddCyclF] > 1)
- {
- /* Subtract the maximum of the last n cycle counts
- * to get rid of possible high counts due to other sources,
- * for instance system activity, that would otherwise
- * affect the dynamic load balancing.
- */
- load -= comm->cycl_max[ddCyclF];
- }
-
-#if GMX_MPI
- if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
- {
- float gpu_wait, gpu_wait_sum;
-
- gpu_wait = comm->cycl[ddCyclWaitGPU];
- if (comm->cycl_n[ddCyclF] > 1)
- {
- /* We should remove the WaitGPU time of the same MD step
- * as the one with the maximum F time, since the F time
- * and the wait time are not independent.
- * Furthermore, the step for the max F time should be chosen
- * the same on all ranks that share the same GPU.
- * But to keep the code simple, we remove the average instead.
- * The main reason for artificially long times at some steps
- * is spurious CPU activity or MPI time, so we don't expect
- * that changes in the GPU wait time matter a lot here.
- */
- gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
- }
- /* Sum the wait times over the ranks that share the same GPU */
- MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
- comm->mpi_comm_gpu_shared);
- /* Replace the wait time by the average over the ranks */
- load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
- }
-#endif
- }
-
- return load;
-}
-
-static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
-{
- gmx_domdec_comm_t *comm;
- int i;
-
- comm = dd->comm;
-
- snew(*dim_f, dd->nc[dim]+1);
- (*dim_f)[0] = 0;
- for (i = 1; i < dd->nc[dim]; i++)
- {
- if (comm->slb_frac[dim])
- {
- (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
- }
- else
- {
- (*dim_f)[i] = (real)i/(real)dd->nc[dim];
- }
- }
- (*dim_f)[dd->nc[dim]] = 1;
-}
-
-static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
-{
- int pmeindex, slab, nso, i;
- ivec xyz;
-
- if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
- {
- ddpme->dim = YY;
- }
- else
- {
- ddpme->dim = dimind;
- }
- ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
-
- ddpme->nslab = (ddpme->dim == 0 ?
- dd->comm->npmenodes_x :
- dd->comm->npmenodes_y);
-
- if (ddpme->nslab <= 1)
- {
- return;
- }
-
- nso = dd->comm->npmenodes/ddpme->nslab;
- /* Determine for each PME slab the PP location range for dimension dim */
- snew(ddpme->pp_min, ddpme->nslab);
- snew(ddpme->pp_max, ddpme->nslab);
- for (slab = 0; slab < ddpme->nslab; slab++)
- {
- ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
- ddpme->pp_max[slab] = 0;
- }
- for (i = 0; i < dd->nnodes; i++)
- {
- ddindex2xyz(dd->nc, i, xyz);
- /* For y only use our y/z slab.
- * This assumes that the PME x grid size matches the DD grid size.
- */
- if (dimind == 0 || xyz[XX] == dd->ci[XX])
- {
- pmeindex = ddindex2pmeindex(dd, i);
- if (dimind == 0)
- {
- slab = pmeindex/nso;
- }
- else
- {
- slab = pmeindex % ddpme->nslab;
- }
- ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
- ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
- }
- }
-
- set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
-}
-
-int dd_pme_maxshift_x(const gmx_domdec_t *dd)
-{
- if (dd->comm->ddpme[0].dim == XX)
- {
- return dd->comm->ddpme[0].maxshift;
- }
- else
- {
- return 0;
- }
-}
-
-int dd_pme_maxshift_y(const gmx_domdec_t *dd)
-{
- if (dd->comm->ddpme[0].dim == YY)
- {
- return dd->comm->ddpme[0].maxshift;
- }
- else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
- {
- return dd->comm->ddpme[1].maxshift;
- }
- else
- {
- return 0;
- }
-}
-
-static void set_pme_maxshift(gmx_domdec_t *dd, gmx_ddpme_t *ddpme,
- gmx_bool bUniform, const gmx_ddbox_t *ddbox,
- const real *cell_f)
-{
- gmx_domdec_comm_t *comm;
- int nc, ns, s;
- int *xmin, *xmax;
- real range, pme_boundary;
- int sh;
-
- comm = dd->comm;
- nc = dd->nc[ddpme->dim];
- ns = ddpme->nslab;
-
- if (!ddpme->dim_match)
- {
- /* PP decomposition is not along dim: the worst situation */
- sh = ns/2;
- }
- else if (ns <= 3 || (bUniform && ns == nc))
- {
- /* The optimal situation */
- sh = 1;
- }
- else
- {
- /* We need to check for all pme nodes which nodes they
- * could possibly need to communicate with.
- */
- xmin = ddpme->pp_min;
- xmax = ddpme->pp_max;
- /* Allow for atoms to be maximally 2/3 times the cut-off
- * out of their DD cell. This is a reasonable balance between
- * between performance and support for most charge-group/cut-off
- * combinations.
- */
- range = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
- /* Avoid extra communication when we are exactly at a boundary */
- range *= 0.999;
-
- sh = 1;
- for (s = 0; s < ns; s++)
- {
- /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
- pme_boundary = (real)s/ns;
- while (sh+1 < ns &&
- ((s-(sh+1) >= 0 &&
- cell_f[xmax[s-(sh+1) ]+1] + range > pme_boundary) ||
- (s-(sh+1) < 0 &&
- cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
- {
- sh++;
- }
- pme_boundary = (real)(s+1)/ns;
- while (sh+1 < ns &&
- ((s+(sh+1) < ns &&
- cell_f[xmin[s+(sh+1) ] ] - range < pme_boundary) ||
- (s+(sh+1) >= ns &&
- cell_f[xmin[s+(sh+1)-ns] ] + 1 - range < pme_boundary)))
- {
- sh++;
- }
- }
- }
-
- ddpme->maxshift = sh;
-
- if (debug)
- {
- fprintf(debug, "PME slab communication range for dim %d is %d\n",
- ddpme->dim, ddpme->maxshift);
- }
-}
-
-static void check_box_size(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
-{
- int d, dim;
-
- for (d = 0; d < dd->ndim; d++)
- {
- dim = dd->dim[d];
- if (dim < ddbox->nboundeddim &&
- ddbox->box_size[dim]*ddbox->skew_fac[dim] <
- dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
- {
- gmx_fatal(FARGS, "The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
- dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
- dd->nc[dim], dd->comm->cellsize_limit);
- }
- }
-}
-
-enum {
- setcellsizeslbLOCAL, setcellsizeslbMASTER, setcellsizeslbPULSE_ONLY
-};
-
-/* Set the domain boundaries. Use for static (or no) load balancing,
- * and also for the starting state for dynamic load balancing.
- * setmode determine if and where the boundaries are stored, use enum above.
- * Returns the number communication pulses in npulse.
- */
-static void set_dd_cell_sizes_slb(gmx_domdec_t *dd, const gmx_ddbox_t *ddbox,
- int setmode, ivec npulse)
-{
- gmx_domdec_comm_t *comm;
- int d, j;
- rvec cellsize_min;
- real *cell_x, cell_dx, cellsize;
-
- comm = dd->comm;
-
- for (d = 0; d < DIM; d++)
- {
- cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
- npulse[d] = 1;
- if (dd->nc[d] == 1 || comm->slb_frac[d] == nullptr)
- {
- /* Uniform grid */
- cell_dx = ddbox->box_size[d]/dd->nc[d];
- switch (setmode)
- {
- case setcellsizeslbMASTER:
- for (j = 0; j < dd->nc[d]+1; j++)
- {
- dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
- }
- break;
- case setcellsizeslbLOCAL:
- comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d] )*cell_dx;
- comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
- break;
- default:
- break;
- }
- cellsize = cell_dx*ddbox->skew_fac[d];
- while (cellsize*npulse[d] < comm->cutoff)
- {
- npulse[d]++;
- }
- cellsize_min[d] = cellsize;
- }
- else
- {
- /* Statically load balanced grid */
- /* Also when we are not doing a master distribution we determine
- * all cell borders in a loop to obtain identical values
- * to the master distribution case and to determine npulse.
- */
- if (setmode == setcellsizeslbMASTER)
- {
- cell_x = dd->ma->cell_x[d];
- }
- else
- {
- snew(cell_x, dd->nc[d]+1);
- }
- cell_x[0] = ddbox->box0[d];
- for (j = 0; j < dd->nc[d]; j++)
- {
- cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
- cell_x[j+1] = cell_x[j] + cell_dx;
- cellsize = cell_dx*ddbox->skew_fac[d];
- while (cellsize*npulse[d] < comm->cutoff &&
- npulse[d] < dd->nc[d]-1)
- {
- npulse[d]++;
- }
- cellsize_min[d] = std::min(cellsize_min[d], cellsize);
- }
- if (setmode == setcellsizeslbLOCAL)
- {
- comm->cell_x0[d] = cell_x[dd->ci[d]];
- comm->cell_x1[d] = cell_x[dd->ci[d]+1];
- }
- if (setmode != setcellsizeslbMASTER)
- {
- sfree(cell_x);
- }
- }
- /* The following limitation is to avoid that a cell would receive
- * some of its own home charge groups back over the periodic boundary.
- * Double charge groups cause trouble with the global indices.
- */
- if (d < ddbox->npbcdim &&
- dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
- {
- char error_string[STRLEN];
-
- sprintf(error_string,
- "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
- dim2char(d), ddbox->box_size[d], ddbox->skew_fac[d],
- comm->cutoff,
- dd->nc[d], dd->nc[d],
- dd->nnodes > dd->nc[d] ? "cells" : "ranks");
-
- if (setmode == setcellsizeslbLOCAL)
- {
- gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
- error_string);
- }
- else
- {
- gmx_fatal(FARGS, error_string);
- }
- }
- }
-
- if (!isDlbOn(comm))
- {
- copy_rvec(cellsize_min, comm->cellsize_min);
- }
-
- for (d = 0; d < comm->npmedecompdim; d++)
- {
- set_pme_maxshift(dd, &comm->ddpme[d],
- comm->slb_frac[dd->dim[d]] == nullptr, ddbox,
- comm->ddpme[d].slb_dim_f);
- }
-}
-
-
-static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
- int d, int dim, domdec_root_t *root,
- const gmx_ddbox_t *ddbox,
- gmx_bool bUniform, gmx_int64_t step, real cellsize_limit_f, int range[])
-{
- gmx_domdec_comm_t *comm;
- int ncd, i, j, nmin, nmin_old;
- gmx_bool bLimLo, bLimHi;
- real *cell_size;
- real fac, halfway, cellsize_limit_f_i, region_size;
- gmx_bool bPBC, bLastHi = FALSE;
- int nrange[] = {range[0], range[1]};
-
- region_size = root->cell_f[range[1]]-root->cell_f[range[0]];
-
- comm = dd->comm;
-
- ncd = dd->nc[dim];
-
- bPBC = (dim < ddbox->npbcdim);
-
- cell_size = root->buf_ncd;
-
- if (debug)
- {
- fprintf(debug, "enforce_limits: %d %d\n", range[0], range[1]);
- }
-
- /* First we need to check if the scaling does not make cells
- * smaller than the smallest allowed size.
- * We need to do this iteratively, since if a cell is too small,
- * it needs to be enlarged, which makes all the other cells smaller,
- * which could in turn make another cell smaller than allowed.
- */
- for (i = range[0]; i < range[1]; i++)
- {
- root->bCellMin[i] = FALSE;
- }
- nmin = 0;
- do
- {
- nmin_old = nmin;
- /* We need the total for normalization */
- fac = 0;
- for (i = range[0]; i < range[1]; i++)
- {
- if (root->bCellMin[i] == FALSE)
- {
- fac += cell_size[i];
- }
- }
- fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
- /* Determine the cell boundaries */
- for (i = range[0]; i < range[1]; i++)
- {
- if (root->bCellMin[i] == FALSE)
- {
- cell_size[i] *= fac;
- if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
- {
- cellsize_limit_f_i = 0;
- }
- else
- {
- cellsize_limit_f_i = cellsize_limit_f;
- }
- if (cell_size[i] < cellsize_limit_f_i)
- {
- root->bCellMin[i] = TRUE;
- cell_size[i] = cellsize_limit_f_i;
- nmin++;
- }
- }
- root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
- }
- }
- while (nmin > nmin_old);
-
- i = range[1]-1;
- cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
- /* For this check we should not use DD_CELL_MARGIN,
- * but a slightly smaller factor,
- * since rounding could get use below the limit.
- */
- if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
- {
- char buf[22];
- gmx_fatal(FARGS, "step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
- gmx_step_str(step, buf),
- dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
- ncd, comm->cellsize_min[dim]);
- }
-
- root->bLimited = (nmin > 0) || (range[0] > 0) || (range[1] < ncd);
-
- if (!bUniform)
- {
- /* Check if the boundary did not displace more than halfway
- * each of the cells it bounds, as this could cause problems,
- * especially when the differences between cell sizes are large.
- * If changes are applied, they will not make cells smaller
- * than the cut-off, as we check all the boundaries which
- * might be affected by a change and if the old state was ok,
- * the cells will at most be shrunk back to their old size.
- */
- for (i = range[0]+1; i < range[1]; i++)
- {
- halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
- if (root->cell_f[i] < halfway)
- {
- root->cell_f[i] = halfway;
- /* Check if the change also causes shifts of the next boundaries */
- for (j = i+1; j < range[1]; j++)
- {
- if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
- {
- root->cell_f[j] = root->cell_f[j-1] + cellsize_limit_f;
- }
- }
- }
- halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
- if (root->cell_f[i] > halfway)
- {
- root->cell_f[i] = halfway;
- /* Check if the change also causes shifts of the next boundaries */
- for (j = i-1; j >= range[0]+1; j--)
- {
- if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
- {
- root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
- }
- }
- }
- }
- }
-
- /* nrange is defined as [lower, upper) range for new call to enforce_limits */
- /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
- * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
- * for a and b nrange is used */
- if (d > 0)
- {
- /* Take care of the staggering of the cell boundaries */
- if (bUniform)
- {
- for (i = range[0]; i < range[1]; i++)
- {
- root->cell_f_max0[i] = root->cell_f[i];
- root->cell_f_min1[i] = root->cell_f[i+1];
- }
- }
- else
- {
- for (i = range[0]+1; i < range[1]; i++)
- {
- bLimLo = (root->cell_f[i] < root->bound_min[i]);
- bLimHi = (root->cell_f[i] > root->bound_max[i]);
- if (bLimLo && bLimHi)
- {
- /* Both limits violated, try the best we can */
- /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
- root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
- nrange[0] = range[0];
- nrange[1] = i;
- dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
-
- nrange[0] = i;
- nrange[1] = range[1];
- dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
-
- return;
- }
- else if (bLimLo)
- {
- /* root->cell_f[i] = root->bound_min[i]; */
- nrange[1] = i; /* only store violation location. There could be a LimLo violation following with an higher index */
- bLastHi = FALSE;
- }
- else if (bLimHi && !bLastHi)
- {
- bLastHi = TRUE;
- if (nrange[1] < range[1]) /* found a LimLo before */
- {
- root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
- dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
- nrange[0] = nrange[1];
- }
- root->cell_f[i] = root->bound_max[i];
- nrange[1] = i;
- dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
- nrange[0] = i;
- nrange[1] = range[1];
- }
- }
- if (nrange[1] < range[1]) /* found last a LimLo */
- {
- root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
- dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
- nrange[0] = nrange[1];
- nrange[1] = range[1];
- dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
- }
- else if (nrange[0] > range[0]) /* found at least one LimHi */
- {
- dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
- }
- }
- }
-}
-
-
-static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
- int d, int dim, domdec_root_t *root,
- const gmx_ddbox_t *ddbox,
- gmx_bool bDynamicBox,
- gmx_bool bUniform, gmx_int64_t step)
-{
- gmx_domdec_comm_t *comm;
- int ncd, d1, i, pos;
- real *cell_size;
- real load_aver, load_i, imbalance, change, change_max, sc;
- real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
- real change_limit;
- real relax = 0.5;
- gmx_bool bPBC;
- int range[] = { 0, 0 };
-
- comm = dd->comm;
-
- /* Convert the maximum change from the input percentage to a fraction */
- change_limit = comm->dlb_scale_lim*0.01;
-
- ncd = dd->nc[dim];
-
- bPBC = (dim < ddbox->npbcdim);
-
- cell_size = root->buf_ncd;
-
- /* Store the original boundaries */
- for (i = 0; i < ncd+1; i++)
- {
- root->old_cell_f[i] = root->cell_f[i];
- }
- if (bUniform)
- {
- for (i = 0; i < ncd; i++)
- {
- cell_size[i] = 1.0/ncd;
- }
- }
- else if (dd_load_count(comm) > 0)
- {
- load_aver = comm->load[d].sum_m/ncd;
- change_max = 0;
- for (i = 0; i < ncd; i++)
- {
- /* Determine the relative imbalance of cell i */
- load_i = comm->load[d].load[i*comm->load[d].nload+2];
- imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
- /* Determine the change of the cell size using underrelaxation */
- change = -relax*imbalance;
- change_max = std::max(change_max, std::max(change, -change));
- }
- /* Limit the amount of scaling.
- * We need to use the same rescaling for all cells in one row,
- * otherwise the load balancing might not converge.
- */
- sc = relax;
- if (change_max > change_limit)
- {
- sc *= change_limit/change_max;
- }
- for (i = 0; i < ncd; i++)
- {
- /* Determine the relative imbalance of cell i */
- load_i = comm->load[d].load[i*comm->load[d].nload+2];
- imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
- /* Determine the change of the cell size using underrelaxation */
- change = -sc*imbalance;
- cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
- }
- }
-
- cellsize_limit_f = cellsize_min_dlb(comm, d, dim)/ddbox->box_size[dim];
- cellsize_limit_f *= DD_CELL_MARGIN;
- dist_min_f_hard = grid_jump_limit(comm, comm->cutoff, d)/ddbox->box_size[dim];
- dist_min_f = dist_min_f_hard * DD_CELL_MARGIN;
- if (ddbox->tric_dir[dim])
- {
- cellsize_limit_f /= ddbox->skew_fac[dim];
- dist_min_f /= ddbox->skew_fac[dim];
- }
- if (bDynamicBox && d > 0)
- {
- dist_min_f *= DD_PRES_SCALE_MARGIN;
- }
- if (d > 0 && !bUniform)
- {
- /* Make sure that the grid is not shifted too much */
- for (i = 1; i < ncd; i++)
- {
- if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard)
- {
- gmx_incons("Inconsistent DD boundary staggering limits!");
- }
- root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
- space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
- if (space > 0)
- {
- root->bound_min[i] += 0.5*space;
- }
- root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
- space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
- if (space < 0)
- {
- root->bound_max[i] += 0.5*space;
- }
- if (debug)
- {
- fprintf(debug,
- "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
- d, i,
- root->cell_f_max0[i-1] + dist_min_f,
- root->bound_min[i], root->cell_f[i], root->bound_max[i],
- root->cell_f_min1[i] - dist_min_f);
- }
- }
- }
- range[1] = ncd;
- root->cell_f[0] = 0;
- root->cell_f[ncd] = 1;
- dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
-
-
- /* After the checks above, the cells should obey the cut-off
- * restrictions, but it does not hurt to check.
- */
- for (i = 0; i < ncd; i++)
- {
- if (debug)
- {
- fprintf(debug, "Relative bounds dim %d cell %d: %f %f\n",
- dim, i, root->cell_f[i], root->cell_f[i+1]);
- }
-
- if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
- root->cell_f[i+1] - root->cell_f[i] <
- cellsize_limit_f/DD_CELL_MARGIN)
- {
- char buf[22];
- fprintf(stderr,
- "\nWARNING step %s: direction %c, cell %d too small: %f\n",
- gmx_step_str(step, buf), dim2char(dim), i,
- (root->cell_f[i+1] - root->cell_f[i])
- *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
- }
- }
-
- pos = ncd + 1;
- /* Store the cell boundaries of the lower dimensions at the end */
- for (d1 = 0; d1 < d; d1++)
- {
- root->cell_f[pos++] = comm->cell_f0[d1];
- root->cell_f[pos++] = comm->cell_f1[d1];
- }
-
- if (d < comm->npmedecompdim)
- {
- /* The master determines the maximum shift for
- * the coordinate communication between separate PME nodes.
- */
- set_pme_maxshift(dd, &comm->ddpme[d], bUniform, ddbox, root->cell_f);
- }
- root->cell_f[pos++] = comm->ddpme[0].maxshift;
- if (d >= 1)
- {
- root->cell_f[pos++] = comm->ddpme[1].maxshift;
- }
-}
-
-static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
- const gmx_ddbox_t *ddbox,
- int dimind)
-{
- gmx_domdec_comm_t *comm;
- int dim;
-
- comm = dd->comm;
-
- /* Set the cell dimensions */
- dim = dd->dim[dimind];
- comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
- comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
- if (dim >= ddbox->nboundeddim)
- {
- comm->cell_x0[dim] += ddbox->box0[dim];
- comm->cell_x1[dim] += ddbox->box0[dim];
- }
-}
-
-static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
- int d, int dim, real *cell_f_row,
- const gmx_ddbox_t *ddbox)
-{
- gmx_domdec_comm_t *comm;
- int d1, pos;
-
- comm = dd->comm;
-
-#if GMX_MPI
- /* Each node would only need to know two fractions,
- * but it is probably cheaper to broadcast the whole array.
- */
- MPI_Bcast(cell_f_row, DD_CELL_F_SIZE(dd, d)*sizeof(real), MPI_BYTE,
- 0, comm->mpi_comm_load[d]);
-#endif
- /* Copy the fractions for this dimension from the buffer */
- comm->cell_f0[d] = cell_f_row[dd->ci[dim] ];
- comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
- /* The whole array was communicated, so set the buffer position */
- pos = dd->nc[dim] + 1;
- for (d1 = 0; d1 <= d; d1++)
- {
- if (d1 < d)
- {
- /* Copy the cell fractions of the lower dimensions */
- comm->cell_f0[d1] = cell_f_row[pos++];
- comm->cell_f1[d1] = cell_f_row[pos++];
- }
- relative_to_absolute_cell_bounds(dd, ddbox, d1);
- }
- /* Convert the communicated shift from float to int */
- comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
- if (d >= 1)
- {
- comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
- }
-}
-
-static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
- const gmx_ddbox_t *ddbox,
- gmx_bool bDynamicBox,
- gmx_bool bUniform, gmx_int64_t step)
-{
- gmx_domdec_comm_t *comm;
- int d, dim, d1;
- gmx_bool bRowMember, bRowRoot;
- real *cell_f_row;
-
- comm = dd->comm;
-
- for (d = 0; d < dd->ndim; d++)
- {
- dim = dd->dim[d];
- bRowMember = TRUE;
- bRowRoot = TRUE;
- for (d1 = d; d1 < dd->ndim; d1++)
- {
- if (dd->ci[dd->dim[d1]] > 0)
- {
- if (d1 != d)
- {
- bRowMember = FALSE;
- }
- bRowRoot = FALSE;
- }
- }
- if (bRowMember)
- {
- if (bRowRoot)
- {
- set_dd_cell_sizes_dlb_root(dd, d, dim, comm->root[d],
- ddbox, bDynamicBox, bUniform, step);
- cell_f_row = comm->root[d]->cell_f;
- }
- else
- {
- cell_f_row = comm->cell_f_row;
- }
- distribute_dd_cell_sizes_dlb(dd, d, dim, cell_f_row, ddbox);
- }
- }
-}
-
-static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,
- const gmx_ddbox_t *ddbox)
-{
- int d;
-
- /* This function assumes the box is static and should therefore
- * not be called when the box has changed since the last
- * call to dd_partition_system.
- */
- for (d = 0; d < dd->ndim; d++)
- {
- relative_to_absolute_cell_bounds(dd, ddbox, d);
- }
-}
-
-
-
-static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
- const gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
- gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
- gmx_wallcycle_t wcycle)
-{
- gmx_domdec_comm_t *comm;
- int dim;
-
- comm = dd->comm;
-
- if (bDoDLB)
- {
- wallcycle_start(wcycle, ewcDDCOMMBOUND);
- set_dd_cell_sizes_dlb_change(dd, ddbox, bDynamicBox, bUniform, step);
- wallcycle_stop(wcycle, ewcDDCOMMBOUND);
- }
- else if (bDynamicBox)
- {
- set_dd_cell_sizes_dlb_nochange(dd, ddbox);
- }
-
- /* Set the dimensions for which no DD is used */
- for (dim = 0; dim < DIM; dim++)
- {
- if (dd->nc[dim] == 1)
- {
- comm->cell_x0[dim] = 0;
- comm->cell_x1[dim] = ddbox->box_size[dim];
- if (dim >= ddbox->nboundeddim)
- {
- comm->cell_x0[dim] += ddbox->box0[dim];
- comm->cell_x1[dim] += ddbox->box0[dim];
- }
- }
- }
-}
-
-static void realloc_comm_ind(gmx_domdec_t *dd, ivec npulse)
-{
- int d, np, i;
- gmx_domdec_comm_dim_t *cd;
-
- for (d = 0; d < dd->ndim; d++)
- {
- cd = &dd->comm->cd[d];
- np = npulse[dd->dim[d]];
- if (np > cd->np_nalloc)
- {
- if (debug)
- {
- fprintf(debug, "(Re)allocing cd for %c to %d pulses\n",
- dim2char(dd->dim[d]), np);
- }
- if (DDMASTER(dd) && cd->np_nalloc > 0)
- {
- fprintf(stderr, "\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n", dim2char(dd->dim[d]), np);
- }
- srenew(cd->ind, np);
- for (i = cd->np_nalloc; i < np; i++)
- {
- cd->ind[i].index = nullptr;
- cd->ind[i].nalloc = 0;
- }
- cd->np_nalloc = np;
- }
- cd->np = np;
- }
-}
-
-
-static void set_dd_cell_sizes(gmx_domdec_t *dd,
- gmx_ddbox_t *ddbox, gmx_bool bDynamicBox,
- gmx_bool bUniform, gmx_bool bDoDLB, gmx_int64_t step,
- gmx_wallcycle_t wcycle)
-{
- gmx_domdec_comm_t *comm;
- int d;
- ivec npulse;
-
- comm = dd->comm;
-
- /* Copy the old cell boundaries for the cg displacement check */
- copy_rvec(comm->cell_x0, comm->old_cell_x0);
- copy_rvec(comm->cell_x1, comm->old_cell_x1);
-
- if (isDlbOn(comm))
- {
- if (DDMASTER(dd))
- {
- check_box_size(dd, ddbox);
- }
- set_dd_cell_sizes_dlb(dd, ddbox, bDynamicBox, bUniform, bDoDLB, step, wcycle);
- }
- else
- {
- set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbLOCAL, npulse);
- realloc_comm_ind(dd, npulse);
- }
-
- if (debug)
- {
- for (d = 0; d < DIM; d++)
- {
- fprintf(debug, "cell_x[%d] %f - %f skew_fac %f\n",
- d, comm->cell_x0[d], comm->cell_x1[d], ddbox->skew_fac[d]);
- }
- }
-}
-
-static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
- gmx_ddbox_t *ddbox,
- rvec cell_ns_x0, rvec cell_ns_x1,
- gmx_int64_t step)
-{
- gmx_domdec_comm_t *comm;
- int dim_ind, dim;
-
- comm = dd->comm;
-
- for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
- {
- dim = dd->dim[dim_ind];
-
- /* Without PBC we don't have restrictions on the outer cells */
- if (!(dim >= ddbox->npbcdim &&
- (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
- isDlbOn(comm) &&
- (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
- comm->cellsize_min[dim])
- {
- char buf[22];
- gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
- gmx_step_str(step, buf), dim2char(dim),
- comm->cell_x1[dim] - comm->cell_x0[dim],
- ddbox->skew_fac[dim],
- dd->comm->cellsize_min[dim],
- dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
- }
- }
-
- if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
- {
- /* Communicate the boundaries and update cell_ns_x0/1 */
- dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
- if (isDlbOn(dd->comm) && dd->ndim > 1)
- {
- check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
- }
- }
-}
-
-static void make_tric_corr_matrix(int npbcdim, const matrix box, matrix tcm)
-{
- if (YY < npbcdim)
- {
- tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
- }
- else
- {
- tcm[YY][XX] = 0;
- }
- if (ZZ < npbcdim)
- {
- tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
- tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
- }
- else
- {
- tcm[ZZ][XX] = 0;
- tcm[ZZ][YY] = 0;
- }
-}
-
-static void check_screw_box(const matrix box)
-{
- /* Mathematical limitation */
- if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
- {
- gmx_fatal(FARGS, "With screw pbc the unit cell can not have non-zero off-diagonal x-components");
- }
-
- /* Limitation due to the asymmetry of the eighth shell method */
- if (box[ZZ][YY] != 0)
- {
- gmx_fatal(FARGS, "pbc=screw with non-zero box_zy is not supported");
- }
-}
-
-static void distribute_cg(FILE *fplog,
- const matrix box, ivec tric_dir, t_block *cgs, rvec pos[],
- gmx_domdec_t *dd)
-{
- gmx_domdec_master_t *ma;
- int **tmp_ind = nullptr, *tmp_nalloc = nullptr;
- int i, icg, j, k, k0, k1, d;
- matrix tcm;
- rvec cg_cm;
- ivec ind;
- real nrcg, inv_ncg, pos_d;
- int *cgindex;
- gmx_bool bScrew;
-
- ma = dd->ma;
-
- snew(tmp_nalloc, dd->nnodes);
- snew(tmp_ind, dd->nnodes);
- for (i = 0; i < dd->nnodes; i++)
- {
- tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
- snew(tmp_ind[i], tmp_nalloc[i]);
- }
-
- /* Clear the count */
- for (i = 0; i < dd->nnodes; i++)
- {
- ma->ncg[i] = 0;
- ma->nat[i] = 0;
- }
-
- make_tric_corr_matrix(dd->npbcdim, box, tcm);
-
- cgindex = cgs->index;
-
- /* Compute the center of geometry for all charge groups */
- for (icg = 0; icg < cgs->nr; icg++)
- {
- k0 = cgindex[icg];
- k1 = cgindex[icg+1];
- nrcg = k1 - k0;
- if (nrcg == 1)
- {
- copy_rvec(pos[k0], cg_cm);
- }
- else
- {
- inv_ncg = 1.0/nrcg;
-
- clear_rvec(cg_cm);
- for (k = k0; (k < k1); k++)
- {
- rvec_inc(cg_cm, pos[k]);
- }
- for (d = 0; (d < DIM); d++)
- {
- cg_cm[d] *= inv_ncg;
- }
- }
- /* Put the charge group in the box and determine the cell index */
- for (d = DIM-1; d >= 0; d--)
- {
- pos_d = cg_cm[d];
- if (d < dd->npbcdim)
- {
- bScrew = (dd->bScrewPBC && d == XX);
- if (tric_dir[d] && dd->nc[d] > 1)
- {
- /* Use triclinic coordintates for this dimension */
- for (j = d+1; j < DIM; j++)
- {
- pos_d += cg_cm[j]*tcm[j][d];
- }
- }
- while (pos_d >= box[d][d])
- {
- pos_d -= box[d][d];
- rvec_dec(cg_cm, box[d]);
- if (bScrew)
- {
- cg_cm[YY] = box[YY][YY] - cg_cm[YY];
- cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
- }
- for (k = k0; (k < k1); k++)
- {
- rvec_dec(pos[k], box[d]);
- if (bScrew)
- {
- pos[k][YY] = box[YY][YY] - pos[k][YY];
- pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
- }
- }
- }
- while (pos_d < 0)
- {
- pos_d += box[d][d];
- rvec_inc(cg_cm, box[d]);
- if (bScrew)
- {
- cg_cm[YY] = box[YY][YY] - cg_cm[YY];
- cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
- }
- for (k = k0; (k < k1); k++)
- {
- rvec_inc(pos[k], box[d]);
- if (bScrew)
- {
- pos[k][YY] = box[YY][YY] - pos[k][YY];
- pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
- }
- }
- }
- }
- /* This could be done more efficiently */
- ind[d] = 0;
- while (ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
- {
- ind[d]++;
- }
- }
- i = dd_index(dd->nc, ind);
- if (ma->ncg[i] == tmp_nalloc[i])
- {
- tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
- srenew(tmp_ind[i], tmp_nalloc[i]);
- }
- tmp_ind[i][ma->ncg[i]] = icg;
- ma->ncg[i]++;
- ma->nat[i] += cgindex[icg+1] - cgindex[icg];
- }
-
- k1 = 0;
- for (i = 0; i < dd->nnodes; i++)
- {
- ma->index[i] = k1;
- for (k = 0; k < ma->ncg[i]; k++)
- {
- ma->cg[k1++] = tmp_ind[i][k];
- }
- }
- ma->index[dd->nnodes] = k1;
-
- for (i = 0; i < dd->nnodes; i++)
- {
- sfree(tmp_ind[i]);
- }
- sfree(tmp_ind);
- sfree(tmp_nalloc);
-
- if (fplog)
- {
- // Use double for the sums to avoid natoms^2 overflowing
- // (65537^2 > 2^32)
- int nat_sum, nat_min, nat_max;
- double nat2_sum;
-
- nat_sum = 0;
- nat2_sum = 0;
- nat_min = ma->nat[0];
- nat_max = ma->nat[0];
- for (i = 0; i < dd->nnodes; i++)
- {
- nat_sum += ma->nat[i];
- // cast to double to avoid integer overflows when squaring
- nat2_sum += gmx::square(static_cast<double>(ma->nat[i]));
- nat_min = std::min(nat_min, ma->nat[i]);
- nat_max = std::max(nat_max, ma->nat[i]);
- }
- nat_sum /= dd->nnodes;
- nat2_sum /= dd->nnodes;
-
- fprintf(fplog, "Atom distribution over %d domains: av %d stddev %d min %d max %d\n",
- dd->nnodes,
- nat_sum,
- static_cast<int>(std::sqrt(nat2_sum - gmx::square(static_cast<double>(nat_sum)) + 0.5)),
- nat_min, nat_max);
- }
-}
-
-static void get_cg_distribution(FILE *fplog, gmx_domdec_t *dd,
- t_block *cgs, const matrix box, gmx_ddbox_t *ddbox,
- rvec pos[])
-{
- gmx_domdec_master_t *ma = nullptr;
- ivec npulse;
- int i, cg_gl;
- int *ibuf, buf2[2] = { 0, 0 };
- gmx_bool bMaster = DDMASTER(dd);
-
- if (bMaster)
- {
- ma = dd->ma;
-
- if (dd->bScrewPBC)
- {
- check_screw_box(box);
- }
-
- set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbMASTER, npulse);
-
- distribute_cg(fplog, box, ddbox->tric_dir, cgs, pos, dd);
- for (i = 0; i < dd->nnodes; i++)
- {
- ma->ibuf[2*i] = ma->ncg[i];
- ma->ibuf[2*i+1] = ma->nat[i];
- }
- ibuf = ma->ibuf;
- }
- else
- {
- ibuf = nullptr;
- }
- dd_scatter(dd, 2*sizeof(int), ibuf, buf2);
-
- dd->ncg_home = buf2[0];
- dd->nat_home = buf2[1];
- dd->ncg_tot = dd->ncg_home;
- dd->nat_tot = dd->nat_home;
- if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
- {
- dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
- srenew(dd->index_gl, dd->cg_nalloc);
- srenew(dd->cgindex, dd->cg_nalloc+1);
- }
- if (bMaster)
- {
- for (i = 0; i < dd->nnodes; i++)
- {
- ma->ibuf[i] = ma->ncg[i]*sizeof(int);
- ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
- }
- }
-
- dd_scatterv(dd,
- bMaster ? ma->ibuf : nullptr,
- bMaster ? ma->ibuf+dd->nnodes : nullptr,
- bMaster ? ma->cg : nullptr,
- dd->ncg_home*sizeof(int), dd->index_gl);
-
- /* Determine the home charge group sizes */
- dd->cgindex[0] = 0;
- for (i = 0; i < dd->ncg_home; i++)
- {
- cg_gl = dd->index_gl[i];
- dd->cgindex[i+1] =
- dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
- }
-
- if (debug)
- {
- fprintf(debug, "Home charge groups:\n");
- for (i = 0; i < dd->ncg_home; i++)
- {
- fprintf(debug, " %d", dd->index_gl[i]);
- if (i % 10 == 9)
- {
- fprintf(debug, "\n");
- }
- }
- fprintf(debug, "\n");
- }
-}
-
-static int compact_and_copy_vec_at(int ncg, int *move,
- int *cgindex,
- int nvec, int vec,
- rvec *src, gmx_domdec_comm_t *comm,
- gmx_bool bCompact)
-{
- int m, icg, i, i0, i1, nrcg;
- int home_pos;
- int pos_vec[DIM*2];
-
- home_pos = 0;
-
- for (m = 0; m < DIM*2; m++)
- {
- pos_vec[m] = 0;
- }
-
- i0 = 0;
- for (icg = 0; icg < ncg; icg++)
- {
- i1 = cgindex[icg+1];
- m = move[icg];
- if (m == -1)
- {
- if (bCompact)
- {
- /* Compact the home array in place */
- for (i = i0; i < i1; i++)
- {
- copy_rvec(src[i], src[home_pos++]);
- }
- }
- }
- else
- {
- /* Copy to the communication buffer */
- nrcg = i1 - i0;
- pos_vec[m] += 1 + vec*nrcg;
- for (i = i0; i < i1; i++)
- {
- copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
- }
- pos_vec[m] += (nvec - vec - 1)*nrcg;
- }
- if (!bCompact)
- {
- home_pos += i1 - i0;
- }
- i0 = i1;
- }
-
- return home_pos;
-}
-
-static int compact_and_copy_vec_cg(int ncg, int *move,
- int *cgindex,
- int nvec, rvec *src, gmx_domdec_comm_t *comm,
- gmx_bool bCompact)
-{
- int m, icg, i0, i1, nrcg;
- int home_pos;
- int pos_vec[DIM*2];
-
- home_pos = 0;
-
- for (m = 0; m < DIM*2; m++)
- {
- pos_vec[m] = 0;
- }
-
- i0 = 0;
- for (icg = 0; icg < ncg; icg++)
- {
- i1 = cgindex[icg+1];
- m = move[icg];
- if (m == -1)
- {
- if (bCompact)
- {
- /* Compact the home array in place */
- copy_rvec(src[icg], src[home_pos++]);
- }
- }
- else
- {
- nrcg = i1 - i0;
- /* Copy to the communication buffer */
- copy_rvec(src[icg], comm->cgcm_state[m][pos_vec[m]]);
- pos_vec[m] += 1 + nrcg*nvec;
- }
- i0 = i1;
- }
- if (!bCompact)
- {
- home_pos = ncg;
- }
-
- return home_pos;
-}
-
-static int compact_ind(int ncg, int *move,
- int *index_gl, int *cgindex,
- int *gatindex,
- gmx_ga2la_t *ga2la, char *bLocalCG,
- int *cginfo)
-{
- int cg, nat, a0, a1, a, a_gl;
- int home_pos;
-
- home_pos = 0;
- nat = 0;
- for (cg = 0; cg < ncg; cg++)
- {
- a0 = cgindex[cg];
- a1 = cgindex[cg+1];
- if (move[cg] == -1)
- {
- /* Compact the home arrays in place.
- * Anything that can be done here avoids access to global arrays.
- */
- cgindex[home_pos] = nat;
- for (a = a0; a < a1; a++)
- {
- a_gl = gatindex[a];
- gatindex[nat] = a_gl;
- /* The cell number stays 0, so we don't need to set it */
- ga2la_change_la(ga2la, a_gl, nat);
- nat++;
- }
- index_gl[home_pos] = index_gl[cg];
- cginfo[home_pos] = cginfo[cg];
- /* The charge group remains local, so bLocalCG does not change */
- home_pos++;
- }
- else
- {
- /* Clear the global indices */
- for (a = a0; a < a1; a++)
- {
- ga2la_del(ga2la, gatindex[a]);
- }
- if (bLocalCG)
- {
- bLocalCG[index_gl[cg]] = FALSE;
- }
- }
- }
- cgindex[home_pos] = nat;
-
- return home_pos;
-}
-
-static void clear_and_mark_ind(int ncg, int *move,
- int *index_gl, int *cgindex, int *gatindex,
- gmx_ga2la_t *ga2la, char *bLocalCG,
- int *cell_index)
-{
- int cg, a0, a1, a;
-
- for (cg = 0; cg < ncg; cg++)
- {
- if (move[cg] >= 0)
- {
- a0 = cgindex[cg];
- a1 = cgindex[cg+1];
- /* Clear the global indices */
- for (a = a0; a < a1; a++)
- {
- ga2la_del(ga2la, gatindex[a]);
- }
- if (bLocalCG)
- {
- bLocalCG[index_gl[cg]] = FALSE;
- }
- /* Signal that this cg has moved using the ns cell index.
- * Here we set it to -1. fill_grid will change it
- * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
- */
- cell_index[cg] = -1;
- }
- }
-}
-
-static void print_cg_move(FILE *fplog,
- gmx_domdec_t *dd,
- gmx_int64_t step, int cg, int dim, int dir,
- gmx_bool bHaveCgcmOld, real limitd,
- rvec cm_old, rvec cm_new, real pos_d)
-{
- gmx_domdec_comm_t *comm;
- char buf[22];
-
- comm = dd->comm;
-
- fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
- if (limitd > 0)
- {
- fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
- dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
- ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
- }
- else
- {
- /* We don't have a limiting distance available: don't print it */
- fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
- dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
- ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
- }
- fprintf(fplog, "distance out of cell %f\n",
- dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
- if (bHaveCgcmOld)
- {
- fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
- cm_old[XX], cm_old[YY], cm_old[ZZ]);
- }
- fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
- cm_new[XX], cm_new[YY], cm_new[ZZ]);
- fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
- dim2char(dim),
- comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
- fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
- dim2char(dim),
- comm->cell_x0[dim], comm->cell_x1[dim]);
-}
-
-static void cg_move_error(FILE *fplog,
- gmx_domdec_t *dd,
- gmx_int64_t step, int cg, int dim, int dir,
- gmx_bool bHaveCgcmOld, real limitd,
- rvec cm_old, rvec cm_new, real pos_d)
-{
- if (fplog)
- {
- print_cg_move(fplog, dd, step, cg, dim, dir,
- bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
- }
- print_cg_move(stderr, dd, step, cg, dim, dir,
- bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
- gmx_fatal(FARGS,
- "%s moved too far between two domain decomposition steps\n"
- "This usually means that your system is not well equilibrated",
- dd->comm->bCGs ? "A charge group" : "An atom");
-}
-
-static void rotate_state_atom(t_state *state, int a)
-{
- if (state->flags & (1 << estX))
- {
- /* Rotate the complete state; for a rectangular box only */
- state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
- state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
- }
- if (state->flags & (1 << estV))
- {
- state->v[a][YY] = -state->v[a][YY];
- state->v[a][ZZ] = -state->v[a][ZZ];
- }
- if (state->flags & (1 << estCGP))
- {
- state->cg_p[a][YY] = -state->cg_p[a][YY];
- state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
- }
-}
-
-static int *get_moved(gmx_domdec_comm_t *comm, int natoms)
-{
- if (natoms > comm->moved_nalloc)
- {
- /* Contents should be preserved here */
- comm->moved_nalloc = over_alloc_dd(natoms);
- srenew(comm->moved, comm->moved_nalloc);
- }
-
- return comm->moved;
-}
-
-static void calc_cg_move(FILE *fplog, gmx_int64_t step,
- gmx_domdec_t *dd,
- t_state *state,
- ivec tric_dir, matrix tcm,
- rvec cell_x0, rvec cell_x1,
- rvec limitd, rvec limit0, rvec limit1,
- const int *cgindex,
- int cg_start, int cg_end,
- rvec *cg_cm,
- int *move)
-{
- int npbcdim;
- int cg, k, k0, k1, d, dim, d2;
- int mc, nrcg;
- int flag;
- gmx_bool bScrew;
- ivec dev;
- real inv_ncg, pos_d;
- rvec cm_new;
-
- npbcdim = dd->npbcdim;
-
- for (cg = cg_start; cg < cg_end; cg++)
- {
- k0 = cgindex[cg];
- k1 = cgindex[cg+1];
- nrcg = k1 - k0;
- if (nrcg == 1)
- {
- copy_rvec(state->x[k0], cm_new);
- }
- else
- {
- inv_ncg = 1.0/nrcg;
-
- clear_rvec(cm_new);
- for (k = k0; (k < k1); k++)
- {
- rvec_inc(cm_new, state->x[k]);
- }
- for (d = 0; (d < DIM); d++)
- {
- cm_new[d] = inv_ncg*cm_new[d];
- }
- }
-
- clear_ivec(dev);
- /* Do pbc and check DD cell boundary crossings */
- for (d = DIM-1; d >= 0; d--)
- {
- if (dd->nc[d] > 1)
- {
- bScrew = (dd->bScrewPBC && d == XX);
- /* Determine the location of this cg in lattice coordinates */
- pos_d = cm_new[d];
- if (tric_dir[d])
- {
- for (d2 = d+1; d2 < DIM; d2++)
- {
- pos_d += cm_new[d2]*tcm[d2][d];
- }
- }
- /* Put the charge group in the triclinic unit-cell */
- if (pos_d >= cell_x1[d])
- {
- if (pos_d >= limit1[d])
- {
- cg_move_error(fplog, dd, step, cg, d, 1,
- cg_cm != as_rvec_array(state->x.data()), limitd[d],
- cg_cm[cg], cm_new, pos_d);
- }
- dev[d] = 1;
- if (dd->ci[d] == dd->nc[d] - 1)
- {
- rvec_dec(cm_new, state->box[d]);
- if (bScrew)
- {
- cm_new[YY] = state->box[YY][YY] - cm_new[YY];
- cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
- }
- for (k = k0; (k < k1); k++)
- {
- rvec_dec(state->x[k], state->box[d]);
- if (bScrew)
- {
- rotate_state_atom(state, k);
- }
- }
- }
- }
- else if (pos_d < cell_x0[d])
- {
- if (pos_d < limit0[d])
- {
- cg_move_error(fplog, dd, step, cg, d, -1,
- cg_cm != as_rvec_array(state->x.data()), limitd[d],
- cg_cm[cg], cm_new, pos_d);
- }
- dev[d] = -1;
- if (dd->ci[d] == 0)
- {
- rvec_inc(cm_new, state->box[d]);
- if (bScrew)
- {
- cm_new[YY] = state->box[YY][YY] - cm_new[YY];
- cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
- }
- for (k = k0; (k < k1); k++)
- {
- rvec_inc(state->x[k], state->box[d]);
- if (bScrew)
- {
- rotate_state_atom(state, k);
- }
- }
- }
- }
- }
- else if (d < npbcdim)
- {
- /* Put the charge group in the rectangular unit-cell */
- while (cm_new[d] >= state->box[d][d])
- {
- rvec_dec(cm_new, state->box[d]);
- for (k = k0; (k < k1); k++)
- {
- rvec_dec(state->x[k], state->box[d]);
- }
- }
- while (cm_new[d] < 0)
- {
- rvec_inc(cm_new, state->box[d]);
- for (k = k0; (k < k1); k++)
- {
- rvec_inc(state->x[k], state->box[d]);
- }
- }
- }
- }
-
- copy_rvec(cm_new, cg_cm[cg]);
-
- /* Determine where this cg should go */
- flag = 0;
- mc = -1;
- for (d = 0; d < dd->ndim; d++)
- {
- dim = dd->dim[d];
- if (dev[dim] == 1)
- {
- flag |= DD_FLAG_FW(d);
- if (mc == -1)
- {
- mc = d*2;
- }
- }
- else if (dev[dim] == -1)
- {
- flag |= DD_FLAG_BW(d);
- if (mc == -1)
- {
- if (dd->nc[dim] > 2)
- {
- mc = d*2 + 1;
- }
- else
- {
- mc = d*2;
- }
- }
- }
- }
- /* Temporarily store the flag in move */
- move[cg] = mc + flag;
- }
-}
-
-static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
- gmx_domdec_t *dd, ivec tric_dir,
- t_state *state, PaddedRVecVector *f,
- t_forcerec *fr,
- gmx_bool bCompact,
- t_nrnb *nrnb,
- int *ncg_stay_home,
- int *ncg_moved)
-{
- int *move;
- int npbcdim;
- int ncg[DIM*2] = { 0 }, nat[DIM*2] = { 0 };
- int i, cg, k, d, dim, dim2, dir, d2, d3;
- int mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec;
- int sbuf[2], rbuf[2];
- int home_pos_cg, home_pos_at, buf_pos;
- int flag;
- real pos_d;
- matrix tcm;
- rvec *cg_cm = nullptr, cell_x0, cell_x1, limitd, limit0, limit1;
- int *cgindex;
- cginfo_mb_t *cginfo_mb;
- gmx_domdec_comm_t *comm;
- int *moved;
- int nthread, thread;
-
- if (dd->bScrewPBC)
- {
- check_screw_box(state->box);
- }
-
- comm = dd->comm;
- if (fr->cutoff_scheme == ecutsGROUP)
- {
- cg_cm = fr->cg_cm;
- }
-
- // Positions are always present, so there's nothing to flag
- bool bV = state->flags & (1<<estV);
- bool bCGP = state->flags & (1<<estCGP);
-
- if (dd->ncg_tot > comm->nalloc_int)
- {
- comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
- srenew(comm->buf_int, comm->nalloc_int);
- }
- move = comm->buf_int;
-
- npbcdim = dd->npbcdim;
-
- for (d = 0; (d < DIM); d++)
- {
- limitd[d] = dd->comm->cellsize_min[d];
- if (d >= npbcdim && dd->ci[d] == 0)
- {
- cell_x0[d] = -GMX_FLOAT_MAX;
- }
- else
- {
- cell_x0[d] = comm->cell_x0[d];
- }
- if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
- {
- cell_x1[d] = GMX_FLOAT_MAX;
- }
- else
- {
- cell_x1[d] = comm->cell_x1[d];
- }
- if (d < npbcdim)
- {
- limit0[d] = comm->old_cell_x0[d] - limitd[d];
- limit1[d] = comm->old_cell_x1[d] + limitd[d];
- }
- else
- {
- /* We check after communication if a charge group moved
- * more than one cell. Set the pre-comm check limit to float_max.
- */
- limit0[d] = -GMX_FLOAT_MAX;
- limit1[d] = GMX_FLOAT_MAX;
- }
- }
-
- make_tric_corr_matrix(npbcdim, state->box, tcm);
-
- cgindex = dd->cgindex;
-
- nthread = gmx_omp_nthreads_get(emntDomdec);
-
- /* Compute the center of geometry for all home charge groups
- * and put them in the box and determine where they should go.
- */
-#pragma omp parallel for num_threads(nthread) schedule(static)
- for (thread = 0; thread < nthread; thread++)
- {
- try
- {
- calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
- cell_x0, cell_x1, limitd, limit0, limit1,
- cgindex,
- ( thread *dd->ncg_home)/nthread,
- ((thread+1)*dd->ncg_home)/nthread,
- fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
- move);
- }
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
- }
-
- for (cg = 0; cg < dd->ncg_home; cg++)
- {
- if (move[cg] >= 0)
- {
- mc = move[cg];
- flag = mc & ~DD_FLAG_NRCG;
- mc = mc & DD_FLAG_NRCG;
- move[cg] = mc;
-
- if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
- {
- comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
- srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
- }
- comm->cggl_flag[mc][ncg[mc]*DD_CGIBS ] = dd->index_gl[cg];
- /* We store the cg size in the lower 16 bits
- * and the place where the charge group should go
- * in the next 6 bits. This saves some communication volume.
- */
- nrcg = cgindex[cg+1] - cgindex[cg];
- comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
- ncg[mc] += 1;
- nat[mc] += nrcg;
- }
- }
-
- inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
- inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
-
- *ncg_moved = 0;
- for (i = 0; i < dd->ndim*2; i++)
- {
- *ncg_moved += ncg[i];
- }
-
- nvec = 1;
- if (bV)
- {
- nvec++;
- }
- if (bCGP)
- {
- nvec++;
- }
-
- /* Make sure the communication buffers are large enough */
- for (mc = 0; mc < dd->ndim*2; mc++)
- {
- nvr = ncg[mc] + nat[mc]*nvec;
- if (nvr > comm->cgcm_state_nalloc[mc])
- {
- comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
- srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
- }
- }
-
- switch (fr->cutoff_scheme)
- {
- case ecutsGROUP:
- /* Recalculating cg_cm might be cheaper than communicating,
- * but that could give rise to rounding issues.
- */
- home_pos_cg =
- compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
- nvec, cg_cm, comm, bCompact);
- break;
- case ecutsVERLET:
- /* Without charge groups we send the moved atom coordinates
- * over twice. This is so the code below can be used without
- * many conditionals for both for with and without charge groups.
- */
- home_pos_cg =
- compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
- nvec, as_rvec_array(state->x.data()), comm, FALSE);
- if (bCompact)
- {
- home_pos_cg -= *ncg_moved;
- }
- break;
- default:
- gmx_incons("unimplemented");
- home_pos_cg = 0;
- }
-
- vec = 0;
- home_pos_at =
- compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
- nvec, vec++, as_rvec_array(state->x.data()),
- comm, bCompact);
- if (bV)
- {
- compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
- nvec, vec++, as_rvec_array(state->v.data()),
- comm, bCompact);
- }
- if (bCGP)
- {
- compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
- nvec, vec++, as_rvec_array(state->cg_p.data()),
- comm, bCompact);
- }
-
- if (bCompact)
- {
- compact_ind(dd->ncg_home, move,
- dd->index_gl, dd->cgindex, dd->gatindex,
- dd->ga2la, comm->bLocalCG,
- fr->cginfo);
- }
- else
- {
- if (fr->cutoff_scheme == ecutsVERLET)
- {
- moved = get_moved(comm, dd->ncg_home);
-
- for (k = 0; k < dd->ncg_home; k++)
- {
- moved[k] = 0;
- }
- }
- else
- {
- moved = fr->ns->grid->cell_index;
- }
-
- clear_and_mark_ind(dd->ncg_home, move,
- dd->index_gl, dd->cgindex, dd->gatindex,
- dd->ga2la, comm->bLocalCG,
- moved);
- }
-
- cginfo_mb = fr->cginfo_mb;
-
- *ncg_stay_home = home_pos_cg;
- for (d = 0; d < dd->ndim; d++)
- {
- dim = dd->dim[d];
- ncg_recv = 0;
- nvr = 0;
- for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
- {
- cdd = d*2 + dir;
- /* Communicate the cg and atom counts */
- sbuf[0] = ncg[cdd];
- sbuf[1] = nat[cdd];
- if (debug)
- {
- fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
- d, dir, sbuf[0], sbuf[1]);
- }
- dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
-
- if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
- {
- comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
- srenew(comm->buf_int, comm->nalloc_int);
- }
-
- /* Communicate the charge group indices, sizes and flags */
- dd_sendrecv_int(dd, d, dir,
- comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
- comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
-
- nvs = ncg[cdd] + nat[cdd]*nvec;
- i = rbuf[0] + rbuf[1] *nvec;
- vec_rvec_check_alloc(&comm->vbuf, nvr+i);
-
- /* Communicate cgcm and state */
- dd_sendrecv_rvec(dd, d, dir,
- comm->cgcm_state[cdd], nvs,
- comm->vbuf.v+nvr, i);
- ncg_recv += rbuf[0];
- nvr += i;
- }
-
- dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
- if (fr->cutoff_scheme == ecutsGROUP)
- {
- /* Here we resize to more than necessary and shrink later */
- dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
- }
-
- /* Process the received charge groups */
- buf_pos = 0;
- for (cg = 0; cg < ncg_recv; cg++)
- {
- flag = comm->buf_int[cg*DD_CGIBS+1];
-
- if (dim >= npbcdim && dd->nc[dim] > 2)
- {
- /* No pbc in this dim and more than one domain boundary.
- * We do a separate check if a charge group didn't move too far.
- */
- if (((flag & DD_FLAG_FW(d)) &&
- comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
- ((flag & DD_FLAG_BW(d)) &&
- comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
- {
- cg_move_error(fplog, dd, step, cg, dim,
- (flag & DD_FLAG_FW(d)) ? 1 : 0,
- fr->cutoff_scheme == ecutsGROUP, 0,
- comm->vbuf.v[buf_pos],
- comm->vbuf.v[buf_pos],
- comm->vbuf.v[buf_pos][dim]);
- }
- }
-
- mc = -1;
- if (d < dd->ndim-1)
- {
- /* Check which direction this cg should go */
- for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
- {
- if (isDlbOn(dd->comm))
- {
- /* The cell boundaries for dimension d2 are not equal
- * for each cell row of the lower dimension(s),
- * therefore we might need to redetermine where
- * this cg should go.
- */
- dim2 = dd->dim[d2];
- /* If this cg crosses the box boundary in dimension d2
- * we can use the communicated flag, so we do not
- * have to worry about pbc.
- */
- if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
- (flag & DD_FLAG_FW(d2))) ||
- (dd->ci[dim2] == 0 &&
- (flag & DD_FLAG_BW(d2)))))
- {
- /* Clear the two flags for this dimension */
- flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
- /* Determine the location of this cg
- * in lattice coordinates
- */
- pos_d = comm->vbuf.v[buf_pos][dim2];
- if (tric_dir[dim2])
- {
- for (d3 = dim2+1; d3 < DIM; d3++)
- {
- pos_d +=
- comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
- }
- }
- /* Check of we are not at the box edge.
- * pbc is only handled in the first step above,
- * but this check could move over pbc while
- * the first step did not due to different rounding.
- */
- if (pos_d >= cell_x1[dim2] &&
- dd->ci[dim2] != dd->nc[dim2]-1)
- {
- flag |= DD_FLAG_FW(d2);
- }
- else if (pos_d < cell_x0[dim2] &&
- dd->ci[dim2] != 0)
- {
- flag |= DD_FLAG_BW(d2);
- }
- comm->buf_int[cg*DD_CGIBS+1] = flag;
- }
- }
- /* Set to which neighboring cell this cg should go */
- if (flag & DD_FLAG_FW(d2))
- {
- mc = d2*2;
- }
- else if (flag & DD_FLAG_BW(d2))
- {
- if (dd->nc[dd->dim[d2]] > 2)
- {
- mc = d2*2+1;
- }
- else
- {
- mc = d2*2;
- }
- }
- }
- }
-
- nrcg = flag & DD_FLAG_NRCG;
- if (mc == -1)
- {
- if (home_pos_cg+1 > dd->cg_nalloc)
- {
- dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
- srenew(dd->index_gl, dd->cg_nalloc);
- srenew(dd->cgindex, dd->cg_nalloc+1);
- }
- /* Set the global charge group index and size */
- dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
- dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
- /* Copy the state from the buffer */
- if (fr->cutoff_scheme == ecutsGROUP)
- {
- cg_cm = fr->cg_cm;
- copy_rvec(comm->vbuf.v[buf_pos], cg_cm[home_pos_cg]);
- }
- buf_pos++;
-
- /* Set the cginfo */
- fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
- dd->index_gl[home_pos_cg]);
- if (comm->bLocalCG)
- {
- comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
- }
-
- for (i = 0; i < nrcg; i++)
- {
- copy_rvec(comm->vbuf.v[buf_pos++],
- state->x[home_pos_at+i]);
- }
- if (bV)
- {
- for (i = 0; i < nrcg; i++)
- {
- copy_rvec(comm->vbuf.v[buf_pos++],
- state->v[home_pos_at+i]);
- }
- }
- if (bCGP)
- {
- for (i = 0; i < nrcg; i++)
- {
- copy_rvec(comm->vbuf.v[buf_pos++],
- state->cg_p[home_pos_at+i]);
- }
- }
- home_pos_cg += 1;
- home_pos_at += nrcg;
- }
- else
- {
- /* Reallocate the buffers if necessary */
- if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
- {
- comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
- srenew(comm->cggl_flag[mc], comm->cggl_flag_nalloc[mc]*DD_CGIBS);
- }
- nvr = ncg[mc] + nat[mc]*nvec;
- if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
- {
- comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
- srenew(comm->cgcm_state[mc], comm->cgcm_state_nalloc[mc]);
- }
- /* Copy from the receive to the send buffers */
- memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
- comm->buf_int + cg*DD_CGIBS,
- DD_CGIBS*sizeof(int));
- memcpy(comm->cgcm_state[mc][nvr],
- comm->vbuf.v[buf_pos],
- (1+nrcg*nvec)*sizeof(rvec));
- buf_pos += 1 + nrcg*nvec;
- ncg[mc] += 1;
- nat[mc] += nrcg;
- }
- }
- }
+ gmx_domdec_comm_t *comm;
+ int dim_ind, dim;
- /* With sorting (!bCompact) the indices are now only partially up to date
- * and ncg_home and nat_home are not the real count, since there are
- * "holes" in the arrays for the charge groups that moved to neighbors.
- */
- if (fr->cutoff_scheme == ecutsVERLET)
+ comm = dd->comm;
+
+ for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
{
- moved = get_moved(comm, home_pos_cg);
+ dim = dd->dim[dim_ind];
- for (i = dd->ncg_home; i < home_pos_cg; i++)
+ /* Without PBC we don't have restrictions on the outer cells */
+ if (!(dim >= ddbox->npbcdim &&
+ (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
+ isDlbOn(comm) &&
+ (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
+ comm->cellsize_min[dim])
{
- moved[i] = 0;
+ char buf[22];
+ gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
+ gmx_step_str(step, buf), dim2char(dim),
+ comm->cell_x1[dim] - comm->cell_x0[dim],
+ ddbox->skew_fac[dim],
+ dd->comm->cellsize_min[dim],
+ dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
}
}
- dd->ncg_home = home_pos_cg;
- dd->nat_home = home_pos_at;
-
- if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
- {
- /* We overallocated before, we need to set the right size here */
- dd_resize_state(state, f, dd->nat_home);
- }
- if (debug)
+ if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
{
- fprintf(debug,
- "Finished repartitioning: cgs moved out %d, new home %d\n",
- *ncg_moved, dd->ncg_home-*ncg_moved);
-
+ /* Communicate the boundaries and update cell_ns_x0/1 */
+ dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
+ if (isDlbOn(dd->comm) && dd->ndim > 1)
+ {
+ check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
+ }
}
}
{
gmx_domdec_comm_t *comm;
domdec_load_t *load;
- domdec_root_t *root = nullptr;
- int d, dim, i, pos;
float cell_frac = 0, sbuf[DD_NLOAD_MAX];
gmx_bool bSepPME;
comm->load[0].pme = comm->cycl[ddCyclPME];
}
- for (d = dd->ndim-1; d >= 0; d--)
+ for (int d = dd->ndim - 1; d >= 0; d--)
{
- dim = dd->dim[d];
+ const DDCellsizesWithDlb &cellsizes = comm->cellsizesWithDlb[d];
+ const int dim = dd->dim[d];
/* Check if we participate in the communication in this dimension */
if (d == dd->ndim-1 ||
(dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
load = &comm->load[d];
if (isDlbOn(dd->comm))
{
- cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
+ cell_frac = cellsizes.fracUpper - cellsizes.fracLower;
}
- pos = 0;
+ int pos = 0;
if (d == dd->ndim-1)
{
sbuf[pos++] = dd_force_load(comm);
sbuf[pos++] = cell_frac;
if (d > 0)
{
- sbuf[pos++] = comm->cell_f_max0[d];
- sbuf[pos++] = comm->cell_f_min1[d];
+ sbuf[pos++] = cellsizes.fracLowerMax;
+ sbuf[pos++] = cellsizes.fracUpperMin;
}
}
if (bSepPME)
sbuf[pos++] = comm->load[d+1].flags;
if (d > 0)
{
- sbuf[pos++] = comm->cell_f_max0[d];
- sbuf[pos++] = comm->cell_f_min1[d];
+ sbuf[pos++] = cellsizes.fracLowerMax;
+ sbuf[pos++] = cellsizes.fracUpperMin;
}
}
if (bSepPME)
#endif
if (dd->ci[dim] == dd->master_ci[dim])
{
- /* We are the root, process this row */
+ /* We are the master along this row, process this row */
+ RowMaster *rowMaster = nullptr;
+
if (isDlbOn(comm))
{
- root = comm->root[d];
+ rowMaster = cellsizes.rowMaster.get();
}
load->sum = 0;
load->max = 0;
load->flags = 0;
load->mdf = 0;
load->pme = 0;
- pos = 0;
- for (i = 0; i < dd->nc[dim]; i++)
+ int pos = 0;
+ for (int i = 0; i < dd->nc[dim]; i++)
{
load->sum += load->load[pos++];
load->max = std::max(load->max, load->load[pos]);
pos++;
if (isDlbOn(dd->comm))
{
- if (root->bLimited)
+ if (rowMaster->dlbIsLimited)
{
/* This direction could not be load balanced properly,
* therefore we need to use the maximum iso the average load.
pos++;
if (d < dd->ndim-1)
{
- load->flags = (int)(load->load[pos++] + 0.5);
+ load->flags = static_cast<int>(load->load[pos++] + 0.5);
}
if (d > 0)
{
- root->cell_f_max0[i] = load->load[pos++];
- root->cell_f_min1[i] = load->load[pos++];
+ rowMaster->bounds[i].cellFracLowerMax = load->load[pos++];
+ rowMaster->bounds[i].cellFracUpperMin = load->load[pos++];
}
}
if (bSepPME)
pos++;
}
}
- if (isDlbOn(comm) && root->bLimited)
+ if (isDlbOn(comm) && rowMaster->dlbIsLimited)
{
load->sum_m *= dd->nc[dim];
load->flags |= (1<<d);
comm->load_max += comm->load[0].max;
if (isDlbOn(comm))
{
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
if (comm->load[0].flags & (1<<d))
{
lossFraction = dd_force_imb_perf_loss(dd);
std::string msg = "\n Dynamic load balancing report:\n";
- std::string dlbStateStr = "";
+ std::string dlbStateStr;
switch (dd->comm->dlbState)
{
sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
fprintf(fplog, "%s", buf);
fprintf(stderr, "%s", buf);
- sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossFractionPme)*100);
+ sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", std::fabs(lossFractionPme)*100);
fprintf(fplog, "%s", buf);
fprintf(stderr, "%s", buf);
}
fprintf(fplog, "%s\n", buf);
fprintf(stderr, "%s\n", buf);
}
- if (numPmeRanks > 0 && fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
+ if (numPmeRanks > 0 && std::fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
{
sprintf(buf,
"NOTE: %.1f %% performance was lost because the PME ranks\n"
" had %s work to do than the PP ranks.\n"
" You might want to %s the number of PME ranks\n"
" or %s the cut-off and the grid spacing.\n",
- fabs(lossFractionPme*100),
+ std::fabs(lossFractionPme*100),
(lossFractionPme < 0) ? "less" : "more",
(lossFractionPme < 0) ? "decrease" : "increase",
(lossFractionPme < 0) ? "decrease" : "increase");
return dd->comm->load[0].cvol_min*dd->nnodes;
}
-static gmx_bool dd_load_flags(gmx_domdec_t *dd)
+static int dd_load_flags(gmx_domdec_t *dd)
{
return dd->comm->load[0].flags;
}
}
}
-static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
+static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, int64_t step)
{
int flags, d;
char buf[22];
}
if (dd->nnodes > 1)
{
- fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
+ fprintf(stderr, "imb F %2d%% ", static_cast<int>(dd_f_imbal(dd)*100+0.5));
}
if (dd->comm->cycl_n[ddCyclPME])
{
MPI_Comm c_row;
int dim, i, rank;
ivec loc_c;
- domdec_root_t *root;
gmx_bool bPartOfGroup = FALSE;
dim = dd->dim[dim_ind];
dd->comm->mpi_comm_load[dim_ind] = c_row;
if (!isDlbDisabled(dd->comm))
{
+ DDCellsizesWithDlb &cellsizes = dd->comm->cellsizesWithDlb[dim_ind];
+
if (dd->ci[dim] == dd->master_ci[dim])
{
/* This is the root process of this row */
- snew(dd->comm->root[dim_ind], 1);
- root = dd->comm->root[dim_ind];
- snew(root->cell_f, DD_CELL_F_SIZE(dd, dim_ind));
- snew(root->old_cell_f, dd->nc[dim]+1);
- snew(root->bCellMin, dd->nc[dim]);
+ cellsizes.rowMaster = gmx::compat::make_unique<RowMaster>();
+
+ RowMaster &rowMaster = *cellsizes.rowMaster;
+ rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
+ rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
+ rowMaster.isCellMin.resize(dd->nc[dim]);
if (dim_ind > 0)
{
- snew(root->cell_f_max0, dd->nc[dim]);
- snew(root->cell_f_min1, dd->nc[dim]);
- snew(root->bound_min, dd->nc[dim]);
- snew(root->bound_max, dd->nc[dim]);
+ rowMaster.bounds.resize(dd->nc[dim]);
}
- snew(root->buf_ncd, dd->nc[dim]);
+ rowMaster.buf_ncd.resize(dd->nc[dim]);
}
else
{
/* This is not a root process, we only need to receive cell_f */
- snew(dd->comm->cell_f_row, DD_CELL_F_SIZE(dd, dim_ind));
+ cellsizes.fracRow.resize(ddCellFractionBufferSize(dd, dim_ind));
}
}
if (dd->ci[dim] == dd->master_ci[dim])
}
/* Split the PP communicator over the physical nodes */
/* TODO: See if we should store this (before), as it's also used for
- * for the nodecomm summution.
+ * for the nodecomm summation.
*/
+ // TODO PhysicalNodeCommunicator could be extended/used to handle
+ // the need for per-node per-group communicators.
MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
&mpi_comm_pp_physicalnode);
MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
if (!isDlbDisabled(dd->comm))
{
- snew(dd->comm->root, dd->ndim);
+ dd->comm->cellsizesWithDlb.resize(dd->ndim);
}
if (dd->comm->bRecordLoad)
static void make_pp_communicator(FILE *fplog,
gmx_domdec_t *dd,
t_commrec gmx_unused *cr,
- int gmx_unused reorder)
+ bool gmx_unused reorder)
{
#if GMX_MPI
gmx_domdec_comm_t *comm;
{
periods[i] = TRUE;
}
- MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
+ MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder),
&comm_cart);
/* We overwrite the old communicator with the new cartesian one */
cr->mpi_comm_mygroup = comm_cart;
#endif
}
-static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
- int ncg, int natoms)
-{
- gmx_domdec_master_t *ma;
- int i;
-
- snew(ma, 1);
-
- snew(ma->ncg, dd->nnodes);
- snew(ma->index, dd->nnodes+1);
- snew(ma->cg, ncg);
- snew(ma->nat, dd->nnodes);
- snew(ma->ibuf, dd->nnodes*2);
- snew(ma->cell_x, DIM);
- for (i = 0; i < DIM; i++)
- {
- snew(ma->cell_x[i], dd->nc[i]+1);
- }
-
- if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
- {
- ma->vbuf = nullptr;
- }
- else
- {
- snew(ma->vbuf, natoms);
- }
-
- return ma;
-}
-
static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
DdRankOrder gmx_unused rankOrder,
- int gmx_unused reorder)
+ bool gmx_unused reorder)
{
gmx_domdec_comm_t *comm;
int i;
{
periods[i] = TRUE;
}
- MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
+ MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, static_cast<int>(reorder),
&comm_cart);
MPI_Comm_rank(comm_cart, &rank);
if (MASTER(cr) && rank != 0)
gmx_domdec_t *dd, DdRankOrder ddRankOrder)
{
gmx_domdec_comm_t *comm;
- int CartReorder;
+ bool CartReorder;
comm = dd->comm;
* Real reordering is only supported on very few architectures,
* Blue Gene is one of them.
*/
- CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
+ CartReorder = getenv("GMX_NO_CART_REORDER") == nullptr;
if (cr->npmenodes > 0)
{
dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
if (debug)
{
- fprintf(debug, "My pme_nodeid %d receive ener %d\n",
- dd->pme_nodeid, dd->pme_receive_vir_ener);
+ fprintf(debug, "My pme_nodeid %d receive ener %s\n",
+ dd->pme_nodeid, gmx::boolToString(dd->pme_receive_vir_ener));
}
}
else
if (DDMASTER(dd))
{
- dd->ma = init_gmx_domdec_master_t(dd,
- comm->cgs_gl.nr,
- comm->cgs_gl.index[comm->cgs_gl.nr]);
+ dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
+ comm->cgs_gl.nr,
+ comm->cgs_gl.index[comm->cgs_gl.nr]);
}
}
{
int n, nmol, ftype;
gmx_mtop_ilistloop_t iloop;
- t_ilist *il;
+ const t_ilist *il;
n = 0;
iloop = gmx_mtop_ilistloop_init(mtop);
return nst;
}
-static void dd_warning(t_commrec *cr, FILE *fplog, const char *warn_string)
+static void dd_warning(const t_commrec *cr, FILE *fplog, const char *warn_string)
{
if (MASTER(cr))
{
if (cmdlineDlbState == edlbsOnUser)
{
- gmx_fatal(FARGS, (dlbNotSupportedErr + reasonStr).c_str());
+ gmx_fatal(FARGS, "%s", (dlbNotSupportedErr + reasonStr).c_str());
}
else if (cmdlineDlbState == edlbsOffCanTurnOn)
{
break;
case edlbsOffCanTurnOn:
return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
- break;
case edlbsOnCanTurnOff:
GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
break;
case edlbsOnUser:
return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
- break;
default:
gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
- break;
}
}
static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
{
- int dim;
-
dd->ndim = 0;
if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
{
{
fprintf(fplog, "Using domain decomposition order z, y, x\n");
}
- for (dim = DIM-1; dim >= 0; dim--)
+ for (int dim = DIM-1; dim >= 0; dim--)
{
if (dd->nc[dim] > 1)
{
else
{
/* Decomposition order x,y,z */
- for (dim = 0; dim < DIM; dim++)
+ for (int dim = 0; dim < DIM; dim++)
{
if (dd->nc[dim] > 1)
{
}
}
}
-}
-
-static gmx_domdec_comm_t *init_dd_comm()
-{
- gmx_domdec_comm_t *comm;
- int i;
- snew(comm, 1);
- snew(comm->cggl_flag, DIM*2);
- snew(comm->cgcm_state, DIM*2);
- for (i = 0; i < DIM*2; i++)
+ if (dd->ndim == 0)
{
- comm->cggl_flag_nalloc[i] = 0;
- comm->cgcm_state_nalloc[i] = 0;
+ /* Set dim[0] to avoid extra checks on ndim in several places */
+ dd->dim[0] = XX;
}
+}
- comm->nalloc_int = 0;
- comm->buf_int = nullptr;
+static gmx_domdec_comm_t *init_dd_comm()
+{
+ gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
- vec_rvec_init(&comm->vbuf);
+ comm->n_load_have = 0;
+ comm->n_load_collect = 0;
- comm->n_load_have = 0;
- comm->n_load_collect = 0;
+ comm->haveTurnedOffDlb = false;
- for (i = 0; i < ddnatNR-ddnatZONE; i++)
+ for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
{
comm->sum_nat[i] = 0;
}
const MdrunOptions &mdrunOptions,
const gmx_mtop_t *mtop,
const t_inputrec *ir,
- const matrix box, const rvec *xGlobal,
- gmx_ddbox_t *ddbox,
- int *npme_x, int *npme_y)
+ const matrix box,
+ gmx::ArrayRef<const gmx::RVec> xGlobal,
+ gmx_ddbox_t *ddbox)
{
real r_bonded = -1;
real r_bonded_limit = -1;
const real tenPercentMargin = 1.1;
gmx_domdec_comm_t *comm = dd->comm;
- snew(comm->cggl_flag, DIM*2);
- snew(comm->cgcm_state, DIM*2);
-
dd->npbcdim = ePBC2npbcdim(ir->ePBC);
dd->bScrewPBC = (ir->ePBC == epbcSCREW);
comm->bPMELoadBalDLBLimits = FALSE;
/* Allocate the charge group/atom sorting struct */
- snew(comm->sort, 1);
+ comm->sort = gmx::compat::make_unique<gmx_domdec_sort_t>();
comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
- comm->bInterCGBondeds = ((ncg_mtop(mtop) > mtop->mols.nr) ||
+ comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
mtop->bIntermolecularInteractions);
if (comm->bInterCGBondeds)
{
comm->bInterCGMultiBody = FALSE;
}
- dd->bInterCGcons = inter_charge_group_constraints(mtop);
- dd->bInterCGsettles = inter_charge_group_settles(mtop);
+ dd->bInterCGcons = gmx::inter_charge_group_constraints(*mtop);
+ dd->bInterCGsettles = gmx::inter_charge_group_settles(*mtop);
if (ir->rlist == 0)
{
if (MASTER(cr))
{
- dd_bonded_cg_distance(fplog, mtop, ir, xGlobal, box,
+ dd_bonded_cg_distance(fplog, mtop, ir, as_rvec_array(xGlobal.data()), box,
options.checkBondedInteractions,
&r_2b, &r_mb);
}
if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
{
/* There is a cell size limit due to the constraints (P-LINCS) */
- rconstr = constr_r_max(fplog, mtop, ir);
+ rconstr = gmx::constr_r_max(fplog, mtop, ir);
if (fplog)
{
fprintf(fplog,
{
copy_ivec(options.numCells, dd->nc);
set_dd_dim(fplog, dd);
- set_ddbox_cr(cr, &dd->nc, ir, box, &comm->cgs_gl, xGlobal, ddbox);
+ set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
if (options.numPmeRanks >= 0)
{
}
else
{
- set_ddbox_cr(cr, nullptr, ir, box, &comm->cgs_gl, xGlobal, ddbox);
+ set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
/* We need to choose the optimal DD grid and possibly PME nodes */
real limit =
comm->npmenodes_y = 0;
}
- /* Technically we don't need both of these,
- * but it simplifies code not having to recalculate it.
- */
- *npme_x = comm->npmenodes_x;
- *npme_y = comm->npmenodes_y;
-
snew(comm->slb_frac, DIM);
if (isDlbDisabled(comm))
{
if (debug)
{
- fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
+ fprintf(debug, "Bonded atom communication beyond the cut-off: %s\n"
"cellsize limit %f\n",
- comm->bBondComm, comm->cellsize_limit);
+ gmx::boolToString(comm->bBondComm), comm->cellsize_limit);
}
if (MASTER(cr))
static void set_dlb_limits(gmx_domdec_t *dd)
{
- int d;
-
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
- dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
+ /* Set the number of pulses to the value for DLB */
+ dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
+
dd->comm->cellsize_min[dd->dim[d]] =
dd->comm->cellsize_min_dlb[dd->dim[d]];
}
}
-static void turn_on_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
+static void turn_on_dlb(FILE *fplog, const t_commrec *cr, int64_t step)
{
- gmx_domdec_t *dd;
- gmx_domdec_comm_t *comm;
- real cellsize_min;
- int d, nc, i;
-
- dd = cr->dd;
- comm = dd->comm;
+ gmx_domdec_t *dd = cr->dd;
+ gmx_domdec_comm_t *comm = dd->comm;
- cellsize_min = comm->cellsize_min[dd->dim[0]];
- for (d = 1; d < dd->ndim; d++)
+ real cellsize_min = comm->cellsize_min[dd->dim[0]];
+ for (int d = 1; d < dd->ndim; d++)
{
cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
}
/* Turn off DLB if we're too close to the cell size limit. */
if (cellsize_min < comm->cellsize_limit*1.05)
{
- auto str = gmx::formatString("step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, "
+ auto str = gmx::formatString("step %" PRId64 " Measured %.1f %% performance loss due to load imbalance, "
"but the minimum cell size is smaller than 1.05 times the cell size limit."
"Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
dd_warning(cr, fplog, str.c_str());
}
char buf[STRLEN];
- sprintf(buf, "step %" GMX_PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
+ sprintf(buf, "step %" PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
dd_warning(cr, fplog, buf);
comm->dlbState = edlbsOnCanTurnOff;
* so we do not need to communicate this.
* The grid is completely uniform.
*/
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
- if (comm->root[d])
+ RowMaster *rowMaster = comm->cellsizesWithDlb[d].rowMaster.get();
+
+ if (rowMaster)
{
comm->load[d].sum_m = comm->load[d].sum;
- nc = dd->nc[dd->dim[d]];
- for (i = 0; i < nc; i++)
+ int nc = dd->nc[dd->dim[d]];
+ for (int i = 0; i < nc; i++)
{
- comm->root[d]->cell_f[i] = i/(real)nc;
+ rowMaster->cellFrac[i] = i/static_cast<real>(nc);
if (d > 0)
{
- comm->root[d]->cell_f_max0[i] = i /(real)nc;
- comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
+ rowMaster->bounds[i].cellFracLowerMax = i /static_cast<real>(nc);
+ rowMaster->bounds[i].cellFracUpperMin = (i + 1)/static_cast<real>(nc);
}
}
- comm->root[d]->cell_f[nc] = 1.0;
+ rowMaster->cellFrac[nc] = 1.0;
}
}
}
-static void turn_off_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
+static void turn_off_dlb(FILE *fplog, const t_commrec *cr, int64_t step)
{
gmx_domdec_t *dd = cr->dd;
char buf[STRLEN];
- sprintf(buf, "step %" GMX_PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
+ sprintf(buf, "step %" PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
dd_warning(cr, fplog, buf);
dd->comm->dlbState = edlbsOffCanTurnOn;
dd->comm->haveTurnedOffDlb = true;
dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
}
-static void turn_off_dlb_forever(FILE *fplog, t_commrec *cr, gmx_int64_t step)
+static void turn_off_dlb_forever(FILE *fplog, const t_commrec *cr, int64_t step)
{
GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
char buf[STRLEN];
- sprintf(buf, "step %" GMX_PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
+ sprintf(buf, "step %" PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
dd_warning(cr, fplog, buf);
cr->dd->comm->dlbState = edlbsOffForever;
}
fprintf(fplog, "\n\n");
}
- gmx_bool bInterCGVsites = count_intercg_vsites(mtop);
+ gmx_bool bInterCGVsites = count_intercg_vsites(mtop) != 0;
if (comm->bInterCGBondeds ||
bInterCGVsites ||
* Later cellsize_limit is redetermined,
* so we can not miss interactions due to this rounding.
*/
- npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
+ npulse = static_cast<int>(0.96 + comm->cutoff/comm->cellsize_limit);
}
else
{
for (d = 0; d < dd->ndim; d++)
{
dim = dd->dim[d];
- npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
- /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
+ npulse_d = static_cast<int>(1 + dd->nc[dim]*comm->cutoff
+ /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
npulse_d_max = std::max(npulse_d_max, npulse_d);
}
npulse = std::min(npulse, npulse_d_max);
for (d = 0; d < dd->ndim; d++)
{
comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
- comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
- snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
- comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
+ comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
{
comm->bVacDLBNoLimit = FALSE;
if (ir->ePBC == epbcNONE)
{
- vol_frac = 1 - 1/(double)dd->nnodes;
+ vol_frac = 1 - 1/static_cast<double>(dd->nnodes);
}
else
{
vol_frac =
- (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
+ (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/static_cast<double>(dd->nnodes);
}
if (debug)
{
}
natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
- dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
+ dd->ga2la = new gmx_ga2la_t(natoms_tot,
+ static_cast<int>(vol_frac*natoms_tot));
}
/*! \brief Set some important DD parameters that can be modified by env.vars */
{
gmx_domdec_comm_t *comm = dd->comm;
- dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
+ dd->bSendRecv2 = (dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0) != 0);
comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
const gmx_mtop_t *mtop,
const t_inputrec *ir,
const matrix box,
- const rvec *xGlobal,
- gmx_ddbox_t *ddbox,
- int *npme_x, int *npme_y)
+ gmx::ArrayRef<const gmx::RVec> xGlobal,
+ gmx::LocalAtomSetManager *atomSets)
{
gmx_domdec_t *dd;
"\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
}
- snew(dd, 1);
+ dd = new gmx_domdec_t;
dd->comm = init_dd_comm();
+ /* Initialize DD paritioning counters */
+ dd->comm->partition_step = INT_MIN;
+ dd->ddp_count = 0;
+
set_dd_envvar_options(fplog, dd, cr->nodeid);
+ gmx_ddbox_t ddbox = {0};
set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
mtop, ir,
box, xGlobal,
- ddbox,
- npme_x, npme_y);
+ &ddbox);
make_dd_communicators(fplog, cr, dd, options.rankOrder);
if (thisRankHasDuty(cr, DUTY_PP))
{
- set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, ddbox);
+ set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, &ddbox);
setup_neighbor_relations(dd);
}
/* Set overallocation to avoid frequent reallocation of arrays */
set_over_alloc_dd(TRUE);
- /* Initialize DD paritioning counters */
- dd->comm->partition_step = INT_MIN;
- dd->ddp_count = 0;
-
- /* We currently don't know the number of threads yet, we set this later */
- dd->comm->nth = 0;
-
clear_dd_cycle_counts(dd);
+ dd->atomSets = atomSets;
+
return dd;
}
dd = cr->dd;
- set_ddbox(dd, FALSE, cr, ir, state->box,
- TRUE, &dd->comm->cgs_gl, as_rvec_array(state->x.data()), &ddbox);
+ set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
LocallyLimited = 0;
inv_cell_size *= DD_PRES_SCALE_MARGIN;
}
- np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
+ np = 1 + static_cast<int>(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
{
{
dd->comm->bCheckWhetherToTurnDlbOn = bValue;
- if (bValue == TRUE)
+ if (bValue)
{
/* Store the DD partitioning count, so we can ignore cycle counts
* over the next nstlist steps, which are often slower.
/* We check whether we should use DLB every c_checkTurnDlbOnInterval
* partitionings (we do not do this every partioning, so that we
* avoid excessive communication). */
- if (dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1)
- {
- return TRUE;
- }
-
- return FALSE;
+ return dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1;
}
gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
static void merge_cg_buffers(int ncell,
gmx_domdec_comm_dim_t *cd, int pulse,
int *ncg_cell,
- int *index_gl, int *recv_i,
+ gmx::ArrayRef<int> index_gl,
+ const int *recv_i,
rvec *cg_cm, rvec *recv_vr,
- int *cgindex,
+ gmx::ArrayRef<int> cgindex,
cginfo_mb_t *cginfo_mb, int *cginfo)
{
gmx_domdec_ind_t *ind, *ind_p;
}
}
-static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
- int nzone, int cg0, const int *cgindex)
+static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
+ int nzone,
+ int atomGroupStart,
+ const gmx::RangePartitioning &atomGroups)
{
- int cg, zone, p;
-
/* Store the atom block boundaries for easy copying of communication buffers
*/
- cg = cg0;
- for (zone = 0; zone < nzone; zone++)
+ int g = atomGroupStart;
+ for (int zone = 0; zone < nzone; zone++)
{
- for (p = 0; p < cd->np; p++)
+ for (gmx_domdec_ind_t &ind : cd->ind)
{
- cd->ind[p].cell2at0[zone] = cgindex[cg];
- cg += cd->ind[p].nrecv[zone];
- cd->ind[p].cell2at1[zone] = cgindex[cg];
+ const auto range = atomGroups.subRange(g, g + ind.nrecv[zone]);
+ ind.cell2at0[zone] = range.begin();
+ ind.cell2at1[zone] = range.end();
+ g += ind.nrecv[zone];
}
}
}
-static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
+static gmx_bool missing_link(t_blocka *link, int cg_gl, const char *bLocalCG)
{
int i;
gmx_bool bMiss;
}
}
-/* Determine which cg's we need to send in this pulse from this zone */
+/* Add the atom groups we need to send in this pulse from this zone to
+ * \p localAtomGroups and \p work
+ */
static void
get_zone_pulse_cgs(gmx_domdec_t *dd,
int zonei, int zone,
int cg0, int cg1,
- const int *index_gl,
- const int *cgindex,
+ gmx::ArrayRef<const int> globalAtomGroupIndices,
+ const gmx::RangePartitioning &atomGroups,
int dim, int dim_ind,
int dim0, int dim1, int dim2,
real r_comm2, real r_bcomm2,
matrix box,
- ivec tric_dist,
+ bool distanceIsTriclinic,
rvec *normal,
real skew_fac2_d, real skew_fac_01,
rvec *v_d, rvec *v_0, rvec *v_1,
const dd_corners_t *c,
- rvec sf2_round,
+ const rvec sf2_round,
gmx_bool bDistBonded,
gmx_bool bBondComm,
gmx_bool bDist2B,
gmx_bool bDistMB,
rvec *cg_cm,
- int *cginfo,
- gmx_domdec_ind_t *ind,
- int **ibuf, int *ibuf_nalloc,
- vec_rvec_t *vbuf,
- int *nsend_ptr,
- int *nat_ptr,
- int *nsend_z_ptr)
+ const int *cginfo,
+ std::vector<int> *localAtomGroups,
+ dd_comm_setup_work_t *work)
{
gmx_domdec_comm_t *comm;
gmx_bool bScrew;
real r2, rb2, r, tric_sh;
rvec rn, rb;
int dimd;
- int nsend_z, nsend, nat;
+ int nsend_z, nat;
comm = dd->comm;
bDistMB_pulse = (bDistMB && bDistBonded);
- nsend_z = 0;
- nsend = *nsend_ptr;
- nat = *nat_ptr;
+ /* Unpack the work data */
+ std::vector<int> &ibuf = work->atomGroupBuffer;
+ std::vector<gmx::RVec> &vbuf = work->positionBuffer;
+ nsend_z = 0;
+ nat = work->nat;
for (cg = cg0; cg < cg1; cg++)
{
r2 = 0;
rb2 = 0;
- if (tric_dist[dim_ind] == 0)
+ if (!distanceIsTriclinic)
{
/* Rectangular direction, easy */
r = cg_cm[cg][dim] - c->c[dim_ind][zone];
(bDist2B && r2 < r_bcomm2)) &&
(!bBondComm ||
(GET_CGINFO_BOND_INTER(cginfo[cg]) &&
- missing_link(comm->cglink, index_gl[cg],
+ missing_link(comm->cglink, globalAtomGroupIndices[cg],
comm->bLocalCG)))))
{
- /* Make an index to the local charge groups */
- if (nsend+1 > ind->nalloc)
- {
- ind->nalloc = over_alloc_large(nsend+1);
- srenew(ind->index, ind->nalloc);
- }
- if (nsend+1 > *ibuf_nalloc)
- {
- *ibuf_nalloc = over_alloc_large(nsend+1);
- srenew(*ibuf, *ibuf_nalloc);
- }
- ind->index[nsend] = cg;
- (*ibuf)[nsend] = index_gl[cg];
+ /* Store the local and global atom group indices and position */
+ localAtomGroups->push_back(cg);
+ ibuf.push_back(globalAtomGroupIndices[cg]);
nsend_z++;
- vec_rvec_check_alloc(vbuf, nsend+1);
+ rvec posPbc;
if (dd->ci[dim] == 0)
{
/* Correct cg_cm for pbc */
- rvec_add(cg_cm[cg], box[dim], vbuf->v[nsend]);
+ rvec_add(cg_cm[cg], box[dim], posPbc);
if (bScrew)
{
- vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
- vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
+ posPbc[YY] = box[YY][YY] - posPbc[YY];
+ posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
}
}
else
{
- copy_rvec(cg_cm[cg], vbuf->v[nsend]);
+ copy_rvec(cg_cm[cg], posPbc);
}
- nsend++;
- nat += cgindex[cg+1] - cgindex[cg];
+ vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
+
+ nat += atomGroups.block(cg).size();
}
}
- *nsend_ptr = nsend;
- *nat_ptr = nat;
- *nsend_z_ptr = nsend_z;
+ work->nat = nat;
+ work->nsend_zone = nsend_z;
+}
+
+static void clearCommSetupData(dd_comm_setup_work_t *work)
+{
+ work->localAtomGroupBuffer.clear();
+ work->atomGroupBuffer.clear();
+ work->positionBuffer.clear();
+ work->nat = 0;
+ work->nsend_zone = 0;
}
static void setup_dd_communication(gmx_domdec_t *dd,
t_forcerec *fr,
t_state *state, PaddedRVecVector *f)
{
- int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
+ int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
int nzone, nzone_send, zone, zonei, cg0, cg1;
int c, i, cg, cg_gl, nrcg;
- int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
+ int *zone_cg_range, pos_cg;
gmx_domdec_comm_t *comm;
gmx_domdec_zones_t *zones;
gmx_domdec_comm_dim_t *cd;
- gmx_domdec_ind_t *ind;
cginfo_mb_t *cginfo_mb;
gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
real r_comm2, r_bcomm2;
dd_corners_t corners;
- ivec tric_dist;
- rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr, *recv_vr;
+ rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
real skew_fac2_d, skew_fac_01;
rvec sf2_round;
- int nsend, nat;
- int th;
if (debug)
{
comm = dd->comm;
- if (comm->nth == 0)
+ if (comm->dth.empty())
{
/* Initialize the thread data.
* This can not be done in init_domain_decomposition,
* as the numbers of threads is determined later.
*/
- comm->nth = gmx_omp_nthreads_get(emntDomdec);
- if (comm->nth > 1)
- {
- snew(comm->dth, comm->nth);
- }
+ int numThreads = gmx_omp_nthreads_get(emntDomdec);
+ comm->dth.resize(numThreads);
}
switch (fr->cutoff_scheme)
break;
default:
gmx_incons("unimplemented");
- cg_cm = nullptr;
- }
-
- for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
- {
- /* Check if we need to use triclinic distances */
- tric_dist[dim_ind] = 0;
- for (i = 0; i <= dim_ind; i++)
- {
- if (ddbox->tric_dir[dd->dim[i]])
- {
- tric_dist[dim_ind] = 1;
- }
- }
}
bBondComm = comm->bBondComm;
if (debug)
{
- fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, std::sqrt(r_bcomm2));
+ fprintf(debug, "bBondComm %s, r_bc %f\n", gmx::boolToString(bBondComm), std::sqrt(r_bcomm2));
}
zones = &comm->zones;
}
zone_cg_range = zones->cg_range;
- index_gl = dd->index_gl;
- cgindex = dd->cgindex;
cginfo_mb = fr->cginfo_mb;
zone_cg_range[0] = 0;
comm->zone_ncg1[0] = dd->ncg_home;
pos_cg = dd->ncg_home;
- nat_tot = dd->nat_home;
+ nat_tot = comm->atomRanges.numHomeAtoms();
nzone = 1;
for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
{
dim = dd->dim[dim_ind];
cd = &comm->cd[dim_ind];
+ /* Check if we need to compute triclinic distances along this dim */
+ bool distanceIsTriclinic = false;
+ for (i = 0; i <= dim_ind; i++)
+ {
+ if (ddbox->tric_dir[dd->dim[i]])
+ {
+ distanceIsTriclinic = true;
+ }
+ }
+
if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
{
/* No pbc in this dimension, the first node should not comm. */
nzone_send = nzone;
}
- v_d = ddbox->v[dim];
- skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
+ v_d = ddbox->v[dim];
+ skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
- cd->bInPlace = TRUE;
- for (p = 0; p < cd->np; p++)
+ cd->receiveInPlace = true;
+ for (int p = 0; p < cd->numPulses(); p++)
{
/* Only atoms communicated in the first pulse are used
* for multi-body bonded interactions or for bBondComm.
*/
bDistBonded = ((bDistMB || bDist2B) && p == 0);
- ind = &cd->ind[p];
- nsend = 0;
- nat = 0;
+ gmx_domdec_ind_t *ind = &cd->ind[p];
+
+ /* Thread 0 writes in the global index array */
+ ind->index.clear();
+ clearCommSetupData(&comm->dth[0]);
+
for (zone = 0; zone < nzone_send; zone++)
{
- if (tric_dist[dim_ind] && dim_ind > 0)
+ if (dim_ind > 0 && distanceIsTriclinic)
{
/* Determine slightly more optimized skew_fac's
* for rounding.
cg0 = cg1 - cd->ind[p-1].nrecv[zone];
}
-#pragma omp parallel for num_threads(comm->nth) schedule(static)
- for (th = 0; th < comm->nth; th++)
+ const int numThreads = static_cast<int>(comm->dth.size());
+#pragma omp parallel for num_threads(numThreads) schedule(static)
+ for (int th = 0; th < numThreads; th++)
{
try
{
- gmx_domdec_ind_t *ind_p;
- int **ibuf_p, *ibuf_nalloc_p;
- vec_rvec_t *vbuf_p;
- int *nsend_p, *nat_p;
- int *nsend_zone_p;
- int cg0_th, cg1_th;
-
- if (th == 0)
- {
- /* Thread 0 writes in the comm buffers */
- ind_p = ind;
- ibuf_p = &comm->buf_int;
- ibuf_nalloc_p = &comm->nalloc_int;
- vbuf_p = &comm->vbuf;
- nsend_p = &nsend;
- nat_p = &nat;
- nsend_zone_p = &ind->nsend[zone];
- }
- else
- {
- /* Other threads write into temp buffers */
- ind_p = &comm->dth[th].ind;
- ibuf_p = &comm->dth[th].ibuf;
- ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
- vbuf_p = &comm->dth[th].vbuf;
- nsend_p = &comm->dth[th].nsend;
- nat_p = &comm->dth[th].nat;
- nsend_zone_p = &comm->dth[th].nsend_zone;
-
- comm->dth[th].nsend = 0;
- comm->dth[th].nat = 0;
- comm->dth[th].nsend_zone = 0;
- }
+ dd_comm_setup_work_t &work = comm->dth[th];
- if (comm->nth == 1)
- {
- cg0_th = cg0;
- cg1_th = cg1;
- }
- else
+ /* Retain data accumulated into buffers of thread 0 */
+ if (th > 0)
{
- cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth;
- cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
+ clearCommSetupData(&work);
}
+ int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
+ int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
+
/* Get the cg's for this pulse in this zone */
get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
- index_gl, cgindex,
+ dd->globalAtomGroupIndices,
+ dd->atomGrouping(),
dim, dim_ind, dim0, dim1, dim2,
r_comm2, r_bcomm2,
- box, tric_dist,
+ box, distanceIsTriclinic,
normal, skew_fac2_d, skew_fac_01,
v_d, v_0, v_1, &corners, sf2_round,
bDistBonded, bBondComm,
bDist2B, bDistMB,
cg_cm, fr->cginfo,
- ind_p,
- ibuf_p, ibuf_nalloc_p,
- vbuf_p,
- nsend_p, nat_p,
- nsend_zone_p);
+ th == 0 ? &ind->index : &work.localAtomGroupBuffer,
+ &work);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
} // END
+ std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
+ std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
+ ind->nsend[zone] = comm->dth[0].nsend_zone;
/* Append data of threads>=1 to the communication buffers */
- for (th = 1; th < comm->nth; th++)
+ for (int th = 1; th < numThreads; th++)
{
- dd_comm_setup_work_t *dth;
- int i, ns1;
-
- dth = &comm->dth[th];
-
- ns1 = nsend + dth->nsend_zone;
- if (ns1 > ind->nalloc)
- {
- ind->nalloc = over_alloc_dd(ns1);
- srenew(ind->index, ind->nalloc);
- }
- if (ns1 > comm->nalloc_int)
- {
- comm->nalloc_int = over_alloc_dd(ns1);
- srenew(comm->buf_int, comm->nalloc_int);
- }
- if (ns1 > comm->vbuf.nalloc)
- {
- comm->vbuf.nalloc = over_alloc_dd(ns1);
- srenew(comm->vbuf.v, comm->vbuf.nalloc);
- }
+ const dd_comm_setup_work_t &dth = comm->dth[th];
- for (i = 0; i < dth->nsend_zone; i++)
- {
- ind->index[nsend] = dth->ind.index[i];
- comm->buf_int[nsend] = dth->ibuf[i];
- copy_rvec(dth->vbuf.v[i],
- comm->vbuf.v[nsend]);
- nsend++;
- }
- nat += dth->nat;
- ind->nsend[zone] += dth->nsend_zone;
+ ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
+ atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
+ positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
+ comm->dth[0].nat += dth.nat;
+ ind->nsend[zone] += dth.nsend_zone;
}
}
/* Clear the counts in case we do not have pbc */
{
ind->nsend[zone] = 0;
}
- ind->nsend[nzone] = nsend;
- ind->nsend[nzone+1] = nat;
+ ind->nsend[nzone] = ind->index.size();
+ ind->nsend[nzone + 1] = comm->dth[0].nat;
/* Communicate the number of cg's and atoms to receive */
- dd_sendrecv_int(dd, dim_ind, dddirBackward,
- ind->nsend, nzone+2,
- ind->nrecv, nzone+2);
-
- /* The rvec buffer is also required for atom buffers of size nsend
- * in dd_move_x and dd_move_f.
- */
- vec_rvec_check_alloc(&comm->vbuf, ind->nsend[nzone+1]);
+ ddSendrecv(dd, dim_ind, dddirBackward,
+ ind->nsend, nzone+2,
+ ind->nrecv, nzone+2);
if (p > 0)
{
{
if (ind->nrecv[zone] > 0)
{
- cd->bInPlace = FALSE;
- }
- }
- if (!cd->bInPlace)
- {
- /* The int buffer is only required here for the cg indices */
- if (ind->nrecv[nzone] > comm->nalloc_int2)
- {
- comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
- srenew(comm->buf_int2, comm->nalloc_int2);
+ cd->receiveInPlace = false;
}
- /* The rvec buffer is also required for atom buffers
- * of size nrecv in dd_move_x and dd_move_f.
- */
- i = std::max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
- vec_rvec_check_alloc(&comm->vbuf2, i);
}
}
- /* Make space for the global cg indices */
- if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
- || dd->cg_nalloc == 0)
+ int receiveBufferSize = 0;
+ if (!cd->receiveInPlace)
{
- dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
- srenew(index_gl, dd->cg_nalloc);
- srenew(cgindex, dd->cg_nalloc+1);
+ receiveBufferSize = ind->nrecv[nzone];
}
+ /* These buffer are actually only needed with in-place */
+ DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
+ DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
+
+ dd_comm_setup_work_t &work = comm->dth[0];
+
+ /* Make space for the global cg indices */
+ int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
+ dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
/* Communicate the global cg indices */
- if (cd->bInPlace)
+ gmx::ArrayRef<int> integerBufferRef;
+ if (cd->receiveInPlace)
{
- recv_i = index_gl + pos_cg;
+ integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
}
else
{
- recv_i = comm->buf_int2;
+ integerBufferRef = globalAtomGroupBuffer.buffer;
}
- dd_sendrecv_int(dd, dim_ind, dddirBackward,
- comm->buf_int, nsend,
- recv_i, ind->nrecv[nzone]);
+ ddSendrecv<int>(dd, dim_ind, dddirBackward,
+ work.atomGroupBuffer, integerBufferRef);
/* Make space for cg_cm */
dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
cg_cm = as_rvec_array(state->x.data());
}
/* Communicate cg_cm */
- if (cd->bInPlace)
+ gmx::ArrayRef<gmx::RVec> rvecBufferRef;
+ if (cd->receiveInPlace)
{
- recv_vr = cg_cm + pos_cg;
+ rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
}
else
{
- recv_vr = comm->vbuf2.v;
+ rvecBufferRef = rvecBuffer.buffer;
}
- dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
- comm->vbuf.v, nsend,
- recv_vr, ind->nrecv[nzone]);
+ ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
+ work.positionBuffer, rvecBufferRef);
/* Make the charge group index */
- if (cd->bInPlace)
+ if (cd->receiveInPlace)
{
zone = (p == 0 ? 0 : nzone - 1);
while (zone < nzone)
{
for (cg = 0; cg < ind->nrecv[zone]; cg++)
{
- cg_gl = index_gl[pos_cg];
- fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
- nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
- cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
+ cg_gl = dd->globalAtomGroupIndices[pos_cg];
+ fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
+ nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
+ dd->atomGrouping_.appendBlock(nrcg);
if (bBondComm)
{
/* Update the charge group presence,
else
{
/* This part of the code is never executed with bBondComm. */
+ std::vector<int> &atomGroupsIndex = dd->atomGrouping_.rawIndex();
+ atomGroupsIndex.resize(numAtomGroupsNew + 1);
+
merge_cg_buffers(nzone, cd, p, zone_cg_range,
- index_gl, recv_i, cg_cm, recv_vr,
- cgindex, fr->cginfo_mb, fr->cginfo);
+ dd->globalAtomGroupIndices, integerBufferRef.data(),
+ cg_cm, as_rvec_array(rvecBufferRef.data()),
+ atomGroupsIndex,
+ fr->cginfo_mb, fr->cginfo);
pos_cg += ind->nrecv[nzone];
}
nat_tot += ind->nrecv[nzone+1];
}
- if (!cd->bInPlace)
+ if (!cd->receiveInPlace)
{
/* Store the atom block for easy copying of communication buffers */
- make_cell2at_index(cd, nzone, zone_cg_range[nzone], cgindex);
+ make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGrouping());
}
nzone += nzone;
}
- dd->index_gl = index_gl;
- dd->cgindex = cgindex;
- dd->ncg_tot = zone_cg_range[zones->n];
- dd->nat_tot = nat_tot;
- comm->nat[ddnatHOME] = dd->nat_home;
- for (i = ddnatZONE; i < ddnatNR; i++)
- {
- comm->nat[i] = dd->nat_tot;
- }
+ comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
if (!bBondComm)
{
/* We don't need to update cginfo, since that was alrady done above.
* So we pass NULL for the forcerec.
*/
- dd_set_cginfo(dd->index_gl, dd->ncg_home, dd->ncg_tot,
+ dd_set_cginfo(dd->globalAtomGroupIndices,
+ dd->ncg_home, dd->globalAtomGroupIndices.size(),
nullptr, comm->bLocalCG);
}
{
if (zones->shift[zi][dim] == 0)
{
- for (z = zones->izone[zi].j0; z < zones->izone[zi].j1; z++)
+ /* We should only use zones up to zone_end */
+ int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
+ for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
{
if (zones->shift[z][dim] > 0)
{
}
}
-static int comp_cgsort(const void *a, const void *b)
+static bool comp_cgsort(const gmx_cgsort_t &a, const gmx_cgsort_t &b)
{
- int comp;
- gmx_cgsort_t *cga, *cgb;
- cga = (gmx_cgsort_t *)a;
- cgb = (gmx_cgsort_t *)b;
-
- comp = cga->nsc - cgb->nsc;
- if (comp == 0)
+ if (a.nsc == b.nsc)
{
- comp = cga->ind_gl - cgb->ind_gl;
+ return a.ind_gl < b.ind_gl;
}
-
- return comp;
+ return a.nsc < b.nsc;
}
-static void order_int_cg(int n, const gmx_cgsort_t *sort,
- int *a, int *buf)
+/* Order data in \p dataToSort according to \p sort
+ *
+ * Note: both buffers should have at least \p sort.size() elements.
+ */
+template <typename T>
+static void
+orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
+ gmx::ArrayRef<T> dataToSort,
+ gmx::ArrayRef<T> sortBuffer)
{
- int i;
+ GMX_ASSERT(dataToSort.size() >= sort.size(), "The vector needs to be sufficiently large");
+ GMX_ASSERT(sortBuffer.size() >= sort.size(), "The sorting buffer needs to be sufficiently large");
- /* Order the data */
- for (i = 0; i < n; i++)
+ /* Order the data into the temporary buffer */
+ size_t i = 0;
+ for (const gmx_cgsort_t &entry : sort)
{
- buf[i] = a[sort[i].ind];
+ sortBuffer[i++] = dataToSort[entry.ind];
}
/* Copy back to the original array */
- for (i = 0; i < n; i++)
- {
- a[i] = buf[i];
- }
+ std::copy(sortBuffer.begin(), sortBuffer.begin() + sort.size(),
+ dataToSort.begin());
}
-static void order_vec_cg(int n, const gmx_cgsort_t *sort,
- rvec *v, rvec *buf)
+/* Order data in \p dataToSort according to \p sort
+ *
+ * Note: \p vectorToSort should have at least \p sort.size() elements,
+ * \p workVector is resized when it is too small.
+ */
+template <typename T>
+static void
+orderVector(gmx::ArrayRef<const gmx_cgsort_t> sort,
+ gmx::ArrayRef<T> vectorToSort,
+ std::vector<T> *workVector)
{
- int i;
-
- /* Order the data */
- for (i = 0; i < n; i++)
- {
- copy_rvec(v[sort[i].ind], buf[i]);
- }
-
- /* Copy back to the original array */
- for (i = 0; i < n; i++)
+ if (gmx::index(workVector->size()) < sort.size())
{
- copy_rvec(buf[i], v[i]);
+ workVector->resize(sort.size());
}
+ orderVector<T>(sort, vectorToSort, *workVector);
}
-static void order_vec_atom(int ncg, const int *cgindex, const gmx_cgsort_t *sort,
- rvec *v, rvec *buf)
+static void order_vec_atom(const gmx::RangePartitioning *atomGroups,
+ gmx::ArrayRef<const gmx_cgsort_t> sort,
+ gmx::ArrayRef<gmx::RVec> v,
+ gmx::ArrayRef<gmx::RVec> buf)
{
- int a, atot, cg, cg0, cg1, i;
-
- if (cgindex == nullptr)
+ if (atomGroups == nullptr)
{
/* Avoid the useless loop of the atoms within a cg */
- order_vec_cg(ncg, sort, v, buf);
+ orderVector(sort, v, buf);
return;
}
/* Order the data */
- a = 0;
- for (cg = 0; cg < ncg; cg++)
+ int a = 0;
+ for (const gmx_cgsort_t &entry : sort)
{
- cg0 = cgindex[sort[cg].ind];
- cg1 = cgindex[sort[cg].ind+1];
- for (i = cg0; i < cg1; i++)
+ for (int i : atomGroups->block(entry.ind))
{
copy_rvec(v[i], buf[a]);
a++;
}
}
- atot = a;
+ int atot = a;
/* Copy back to the original array */
- for (a = 0; a < atot; a++)
+ for (int a = 0; a < atot; a++)
{
copy_rvec(buf[a], v[a]);
}
}
-static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
- int nsort_new, gmx_cgsort_t *sort_new,
- gmx_cgsort_t *sort1)
+/* Returns whether a < b */
+static bool compareCgsort(const gmx_cgsort_t &a,
+ const gmx_cgsort_t &b)
{
- int i1, i2, i_new;
+ return (a.nsc < b.nsc ||
+ (a.nsc == b.nsc && a.ind_gl < b.ind_gl));
+}
+static void orderedSort(gmx::ArrayRef<const gmx_cgsort_t> stationary,
+ gmx::ArrayRef<gmx_cgsort_t> moved,
+ std::vector<gmx_cgsort_t> *sort1)
+{
/* The new indices are not very ordered, so we qsort them */
- gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
+ std::sort(moved.begin(), moved.end(), comp_cgsort);
- /* sort2 is already ordered, so now we can merge the two arrays */
- i1 = 0;
- i2 = 0;
- i_new = 0;
- while (i2 < nsort2 || i_new < nsort_new)
- {
- if (i2 == nsort2)
- {
- sort1[i1++] = sort_new[i_new++];
- }
- else if (i_new == nsort_new)
- {
- sort1[i1++] = sort2[i2++];
- }
- else if (sort2[i2].nsc < sort_new[i_new].nsc ||
- (sort2[i2].nsc == sort_new[i_new].nsc &&
- sort2[i2].ind_gl < sort_new[i_new].ind_gl))
- {
- sort1[i1++] = sort2[i2++];
- }
- else
- {
- sort1[i1++] = sort_new[i_new++];
- }
- }
+ /* stationary is already ordered, so now we can merge the two arrays */
+ sort1->resize(stationary.size() + moved.size());
+ std::merge(stationary.begin(), stationary.end(),
+ moved.begin(), moved.end(),
+ sort1->begin(),
+ compareCgsort);
}
-static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
+/* Set the sorting order for systems with charge groups, returned in sort->sort.
+ * The order is according to the global charge group index.
+ * This adds and removes charge groups that moved between domains.
+ */
+static void dd_sort_order(const gmx_domdec_t *dd,
+ const t_forcerec *fr,
+ int ncg_home_old,
+ gmx_domdec_sort_t *sort)
{
- gmx_domdec_sort_t *sort;
- gmx_cgsort_t *cgsort, *sort_i;
- int ncg_new, nsort2, nsort_new, i, *a, moved;
-
- sort = dd->comm->sort;
+ const int *a = fr->ns->grid->cell_index;
- a = fr->ns->grid->cell_index;
-
- moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
+ const int movedValue = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
if (ncg_home_old >= 0)
{
+ std::vector<gmx_cgsort_t> &stationary = sort->stationary;
+ std::vector<gmx_cgsort_t> &moved = sort->moved;
+
/* The charge groups that remained in the same ns grid cell
* are completely ordered. So we can sort efficiently by sorting
* the charge groups that did move into the stationary list.
+ * Note: push_back() seems to be slightly slower than direct access.
*/
- ncg_new = 0;
- nsort2 = 0;
- nsort_new = 0;
- for (i = 0; i < dd->ncg_home; i++)
+ stationary.clear();
+ moved.clear();
+ for (int i = 0; i < dd->ncg_home; i++)
{
/* Check if this cg did not move to another node */
- if (a[i] < moved)
+ if (a[i] < movedValue)
{
- if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
+ gmx_cgsort_t entry;
+ entry.nsc = a[i];
+ entry.ind_gl = dd->globalAtomGroupIndices[i];
+ entry.ind = i;
+
+ if (i >= ncg_home_old || a[i] != sort->sorted[i].nsc)
{
/* This cg is new on this node or moved ns grid cell */
- if (nsort_new >= sort->sort_new_nalloc)
- {
- sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
- srenew(sort->sort_new, sort->sort_new_nalloc);
- }
- sort_i = &(sort->sort_new[nsort_new++]);
+ moved.push_back(entry);
}
else
{
/* This cg did not move */
- sort_i = &(sort->sort2[nsort2++]);
+ stationary.push_back(entry);
}
- /* Sort on the ns grid cell indices
- * and the global topology index.
- * index_gl is irrelevant with cell ns,
- * but we set it here anyhow to avoid a conditional.
- */
- sort_i->nsc = a[i];
- sort_i->ind_gl = dd->index_gl[i];
- sort_i->ind = i;
- ncg_new++;
}
}
+
if (debug)
{
- fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
- nsort2, nsort_new);
+ fprintf(debug, "ordered sort cgs: stationary %zu moved %zu\n",
+ stationary.size(), moved.size());
}
/* Sort efficiently */
- ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
- sort->sort);
+ orderedSort(stationary, moved, &sort->sorted);
}
else
{
- cgsort = sort->sort;
- ncg_new = 0;
- for (i = 0; i < dd->ncg_home; i++)
+ std::vector<gmx_cgsort_t> &cgsort = sort->sorted;
+ cgsort.clear();
+ cgsort.reserve(dd->ncg_home);
+ int numCGNew = 0;
+ for (int i = 0; i < dd->ncg_home; i++)
{
/* Sort on the ns grid cell indices
* and the global topology index
*/
- cgsort[i].nsc = a[i];
- cgsort[i].ind_gl = dd->index_gl[i];
- cgsort[i].ind = i;
- if (cgsort[i].nsc < moved)
+ gmx_cgsort_t entry;
+ entry.nsc = a[i];
+ entry.ind_gl = dd->globalAtomGroupIndices[i];
+ entry.ind = i;
+ cgsort.push_back(entry);
+ if (cgsort[i].nsc < movedValue)
{
- ncg_new++;
+ numCGNew++;
}
}
if (debug)
{
- fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
+ fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, numCGNew);
}
/* Determine the order of the charge groups using qsort */
- gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
- }
+ std::sort(cgsort.begin(), cgsort.end(), comp_cgsort);
- return ncg_new;
+ /* Remove the charge groups which are no longer at home here */
+ cgsort.resize(numCGNew);
+ }
}
-static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
+/* Returns the sorting order for atoms based on the nbnxn grid order in sort */
+static void dd_sort_order_nbnxn(const t_forcerec *fr,
+ std::vector<gmx_cgsort_t> *sort)
{
- gmx_cgsort_t *sort;
- int ncg_new, i, na;
- const int *a;
-
- sort = dd->comm->sort->sort;
+ gmx::ArrayRef<const int> atomOrder = nbnxn_get_atomorder(fr->nbv->nbs.get());
- nbnxn_get_atomorder(fr->nbv->nbs, &a, &na);
-
- ncg_new = 0;
- for (i = 0; i < na; i++)
+ /* Using push_back() instead of this resize results in much slower code */
+ sort->resize(atomOrder.size());
+ gmx::ArrayRef<gmx_cgsort_t> buffer = *sort;
+ size_t numSorted = 0;
+ for (int i : atomOrder)
{
- if (a[i] >= 0)
+ if (i >= 0)
{
- sort[ncg_new].ind = a[i];
- ncg_new++;
+ /* The values of nsc and ind_gl are not used in this case */
+ buffer[numSorted++].ind = i;
}
}
-
- return ncg_new;
+ sort->resize(numSorted);
}
static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
int ncg_home_old)
{
- gmx_domdec_sort_t *sort;
- gmx_cgsort_t *cgsort;
- int *cgindex;
- int ncg_new, i, *ibuf, cgsize;
- rvec *vbuf;
-
- sort = dd->comm->sort;
-
- if (dd->ncg_home > sort->sort_nalloc)
- {
- sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
- srenew(sort->sort, sort->sort_nalloc);
- srenew(sort->sort2, sort->sort_nalloc);
- }
- cgsort = sort->sort;
+ gmx_domdec_sort_t *sort = dd->comm->sort.get();
switch (fr->cutoff_scheme)
{
case ecutsGROUP:
- ncg_new = dd_sort_order(dd, fr, ncg_home_old);
+ dd_sort_order(dd, fr, ncg_home_old, sort);
break;
case ecutsVERLET:
- ncg_new = dd_sort_order_nbnxn(dd, fr);
+ dd_sort_order_nbnxn(fr, &sort->sorted);
break;
default:
gmx_incons("unimplemented");
- ncg_new = 0;
}
+ const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
+
/* We alloc with the old size, since cgindex is still old */
- vec_rvec_check_alloc(&dd->comm->vbuf, dd->cgindex[dd->ncg_home]);
- vbuf = dd->comm->vbuf.v;
+ GMX_ASSERT(atomGrouping.numBlocks() == dd->ncg_home, "atomGroups and dd should be consistent");
+ DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGrouping.fullRange().end());
- if (dd->comm->bCGs)
- {
- cgindex = dd->cgindex;
- }
- else
- {
- cgindex = nullptr;
- }
+ const gmx::RangePartitioning *atomGroupsPtr = (dd->comm->bCGs ? &atomGrouping : nullptr);
- /* Remove the charge groups which are no longer at home here */
- dd->ncg_home = ncg_new;
+ /* Set the new home atom/charge group count */
+ dd->ncg_home = sort->sorted.size();
if (debug)
{
fprintf(debug, "Set the new home charge group count to %d\n",
}
/* Reorder the state */
+ gmx::ArrayRef<const gmx_cgsort_t> cgsort = sort->sorted;
+ GMX_RELEASE_ASSERT(cgsort.size() == dd->ncg_home, "We should sort all the home atom groups");
+
if (state->flags & (1 << estX))
{
- order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->x.data()), vbuf);
+ order_vec_atom(atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
}
if (state->flags & (1 << estV))
{
- order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->v.data()), vbuf);
+ order_vec_atom(atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
}
if (state->flags & (1 << estCGP))
{
- order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->cg_p.data()), vbuf);
+ order_vec_atom(atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
}
if (fr->cutoff_scheme == ecutsGROUP)
{
/* Reorder cgcm */
- order_vec_cg(dd->ncg_home, cgsort, cgcm, vbuf);
+ gmx::ArrayRef<gmx::RVec> cgcmRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cgcm[0]), cgsort.size());
+ orderVector(cgsort, cgcmRef, rvecBuffer.buffer);
}
- if (dd->ncg_home+1 > sort->ibuf_nalloc)
- {
- sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
- srenew(sort->ibuf, sort->ibuf_nalloc);
- }
- ibuf = sort->ibuf;
/* Reorder the global cg index */
- order_int_cg(dd->ncg_home, cgsort, dd->index_gl, ibuf);
+ orderVector<int>(cgsort, dd->globalAtomGroupIndices, &sort->intBuffer);
/* Reorder the cginfo */
- order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
+ orderVector<int>(cgsort, gmx::arrayRefFromArray(fr->cginfo, cgsort.size()), &sort->intBuffer);
/* Rebuild the local cg index */
if (dd->comm->bCGs)
{
- ibuf[0] = 0;
- for (i = 0; i < dd->ncg_home; i++)
- {
- cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
- ibuf[i+1] = ibuf[i] + cgsize;
- }
- for (i = 0; i < dd->ncg_home+1; i++)
+ /* We make a new, ordered atomGroups object and assign it to
+ * the old one. This causes some allocation overhead, but saves
+ * a copy back of the whole index.
+ */
+ gmx::RangePartitioning ordered;
+ for (const gmx_cgsort_t &entry : cgsort)
{
- dd->cgindex[i] = ibuf[i];
+ ordered.appendBlock(atomGrouping.block(entry.ind).size());
}
+ dd->atomGrouping_ = ordered;
}
else
{
- for (i = 0; i < dd->ncg_home+1; i++)
- {
- dd->cgindex[i] = i;
- }
+ dd->atomGrouping_.setAllBlocksSizeOne(dd->ncg_home);
}
/* Set the home atom number */
- dd->nat_home = dd->cgindex[dd->ncg_home];
+ dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGrouping().fullRange().end());
if (fr->cutoff_scheme == ecutsVERLET)
{
/* The atoms are now exactly in grid order, update the grid order */
- nbnxn_set_atomorder(fr->nbv->nbs);
+ nbnxn_set_atomorder(fr->nbv->nbs.get());
}
else
{
/* Copy the sorted ns cell indices back to the ns grid struct */
- for (i = 0; i < dd->ncg_home; i++)
+ for (gmx::index i = 0; i < cgsort.size(); i++)
{
fr->ns->grid->cell_index[i] = cgsort[i].nsc;
}
- fr->ns->grid->nr = dd->ncg_home;
+ fr->ns->grid->nr = cgsort.size();
}
}
static void add_dd_statistics(gmx_domdec_t *dd)
{
- gmx_domdec_comm_t *comm;
- int ddnat;
-
- comm = dd->comm;
+ gmx_domdec_comm_t *comm = dd->comm;
- for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
+ for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
{
- comm->sum_nat[ddnat-ddnatZONE] +=
- comm->nat[ddnat] - comm->nat[ddnat-1];
+ auto range = static_cast<DDAtomRanges::Type>(i);
+ comm->sum_nat[i] +=
+ comm->atomRanges.end(range) - comm->atomRanges.start(range);
}
comm->ndecomp++;
}
void reset_dd_statistics_counters(gmx_domdec_t *dd)
{
- gmx_domdec_comm_t *comm;
- int ddnat;
-
- comm = dd->comm;
+ gmx_domdec_comm_t *comm = dd->comm;
/* Reset all the statistics and counters for total run counting */
- for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
+ for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
{
- comm->sum_nat[ddnat-ddnatZONE] = 0;
+ comm->sum_nat[i] = 0;
}
comm->ndecomp = 0;
comm->nload = 0;
comm->load_pme = 0;
}
-void print_dd_statistics(t_commrec *cr, const t_inputrec *ir, FILE *fplog)
+void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
{
- gmx_domdec_comm_t *comm;
- int ddnat;
- double av;
-
- comm = cr->dd->comm;
+ gmx_domdec_comm_t *comm = cr->dd->comm;
- gmx_sumd(ddnatNR-ddnatZONE, comm->sum_nat, cr);
+ const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
+ gmx_sumd(numRanges, comm->sum_nat, cr);
if (fplog == nullptr)
{
fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
- for (ddnat = ddnatZONE; ddnat < ddnatNR; ddnat++)
+ for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
{
- av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
- switch (ddnat)
+ auto range = static_cast<DDAtomRanges::Type>(i);
+ double av = comm->sum_nat[i]/comm->ndecomp;
+ switch (range)
{
- case ddnatZONE:
+ case DDAtomRanges::Type::Zones:
fprintf(fplog,
" av. #atoms communicated per step for force: %d x %.1f\n",
2, av);
break;
- case ddnatVSITE:
+ case DDAtomRanges::Type::Vsites:
if (cr->dd->vsite_comm)
{
fprintf(fplog,
av);
}
break;
- case ddnatCON:
+ case DDAtomRanges::Type::Constraints:
if (cr->dd->constraint_comm)
{
fprintf(fplog,
}
void dd_partition_system(FILE *fplog,
- gmx_int64_t step,
- t_commrec *cr,
+ int64_t step,
+ const t_commrec *cr,
gmx_bool bMasterState,
int nstglobalcomm,
t_state *state_global,
gmx_localtop_t *top_local,
t_forcerec *fr,
gmx_vsite_t *vsite,
- gmx_constr_t constr,
+ gmx::Constraints *constr,
t_nrnb *nrnb,
- gmx_wallcycle_t wcycle,
+ gmx_wallcycle *wcycle,
gmx_bool bVerbose)
{
gmx_domdec_t *dd;
gmx_domdec_comm_t *comm;
gmx_ddbox_t ddbox = {0};
t_block *cgs_gl;
- gmx_int64_t step_pcoupl;
+ int64_t step_pcoupl;
rvec cell_ns_x0, cell_ns_x1;
- int i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
+ int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
- gmx_bool bRedist, bSortCG, bResortAll;
+ gmx_bool bRedist, bResortAll;
ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
real grid_density;
char sbuf[22];
dd = cr->dd;
comm = dd->comm;
+ // TODO if the update code becomes accessible here, use
+ // upd->deform for this logic.
bBoxChanged = (bMasterState || inputrecDeform(ir));
if (ir->epc != epcNO)
{
* We need to determine the last step in which p-coupling occurred.
* MRS -- need to validate this for vv?
*/
- n = ir->nstpcouple;
+ int n = ir->nstpcouple;
if (n == 1)
{
step_pcoupl = step - 1;
if (bMasterState)
{
/* Clear the old state */
- clear_dd_indices(dd, 0, 0);
+ clearDDStateIndices(dd, 0, 0);
ncgindex_set = 0;
- rvec *xGlobal = (SIMMASTER(cr) ? as_rvec_array(state_global->x.data()) : nullptr);
+ auto xGlobal = positionsFromStatePointer(state_global);
- set_ddbox(dd, bMasterState, cr, ir,
- SIMMASTER(cr) ? state_global->box : nullptr,
- TRUE, cgs_gl, xGlobal,
+ set_ddbox(dd, true, ir,
+ DDMASTER(dd) ? state_global->box : nullptr,
+ true, xGlobal,
&ddbox);
- get_cg_distribution(fplog, dd, cgs_gl,
- SIMMASTER(cr) ? state_global->box : nullptr,
- &ddbox, xGlobal);
-
- dd_distribute_state(dd, cgs_gl,
- state_global, state_local, f);
+ distributeState(fplog, dd, state_global, ddbox, state_local, f);
dd_make_local_cgs(dd, &top_local->cgs);
&top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
}
- inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
+ inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
- dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
+ dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
}
else if (state_local->ddp_count != dd->ddp_count)
{
if (state_local->ddp_count > dd->ddp_count)
{
- gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
+ gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%ld)", state_local->ddp_count, dd->ddp_count);
}
if (state_local->ddp_count_cg_gl != state_local->ddp_count)
}
/* Clear the old state */
- clear_dd_indices(dd, 0, 0);
+ clearDDStateIndices(dd, 0, 0);
- /* Build the new indices */
- rebuild_cgindex(dd, cgs_gl->index, state_local);
+ /* Restore the atom group indices from state_local */
+ restoreAtomGroups(dd, cgs_gl->index, state_local);
make_dd_indices(dd, cgs_gl->index, 0);
ncgindex_set = dd->ncg_home;
&top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
}
- inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
+ inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
- dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
+ dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
- set_ddbox(dd, bMasterState, cr, ir, state_local->box,
- TRUE, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
+ set_ddbox(dd, bMasterState, ir, state_local->box,
+ true, state_local->x, &ddbox);
bRedist = isDlbOn(comm);
}
/* We have the full state, only redistribute the cgs */
/* Clear the non-home indices */
- clear_dd_indices(dd, dd->ncg_home, dd->nat_home);
+ clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
ncgindex_set = 0;
/* Avoid global communication for dim's without pbc and -gcom */
copy_rvec(comm->box0, ddbox.box0 );
copy_rvec(comm->box_size, ddbox.box_size);
}
- set_ddbox(dd, bMasterState, cr, ir, state_local->box,
- bNStGlobalComm, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
+ set_ddbox(dd, bMasterState, ir, state_local->box,
+ bNStGlobalComm, state_local->x, &ddbox);
bBoxChanged = TRUE;
bRedist = TRUE;
}
/* Check if we should sort the charge groups */
- bSortCG = (bMasterState || bRedist);
+ const bool bSortCG = (bMasterState || bRedist);
ncg_home_old = dd->ncg_home;
{
wallcycle_sub_start(wcycle, ewcsDD_REDIST);
+ ncgindex_set = dd->ncg_home;
dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
state_local, f, fr,
- !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
+ nrnb, &ncg_moved);
+
+ GMX_RELEASE_ASSERT(bSortCG, "Sorting is required after redistribution");
wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
}
fr->rlist, grid_density);
break;
case ecutsVERLET:
- nbnxn_get_ncells(fr->nbv->nbs, &ncells_old[XX], &ncells_old[YY]);
+ nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
break;
default:
gmx_incons("unimplemented");
case ecutsVERLET:
set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
- nbnxn_put_on_grid(fr->nbv->nbs, fr->ePBC, state_local->box,
+ nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
0,
comm->zones.size[0].bb_x0,
comm->zones.size[0].bb_x1,
comm->zones.dens_zone0,
fr->cginfo,
as_rvec_array(state_local->x.data()),
- ncg_moved, bRedist ? comm->moved : nullptr,
+ ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
fr->nbv->grp[eintLocal].kernel_type,
fr->nbv->nbat);
- nbnxn_get_ncells(fr->nbv->nbs, &ncells_new[XX], &ncells_new[YY]);
+ nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
break;
case ecutsGROUP:
fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
bResortAll ? -1 : ncg_home_old);
/* After sorting and compacting we set the correct size */
- dd_resize_state(state_local, f, dd->nat_home);
+ dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
/* Rebuild all the indices */
- ga2la_clear(dd->ga2la);
+ dd->ga2la->clear();
ncgindex_set = 0;
wallcycle_sub_stop(wcycle, ewcsDD_GRID);
if (fr->cutoff_scheme == ecutsVERLET)
{
+ /* When bSortCG=true, we have already set the size for zone 0 */
set_zones_size(dd, state_local->box, &ddbox,
bSortCG ? 1 : 0, comm->zones.n,
0);
wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
/* Extract a local topology from the global topology */
- for (i = 0; i < dd->ndim; i++)
+ for (int i = 0; i < dd->ndim; i++)
{
- np[dd->dim[i]] = comm->cd[i].np;
+ np[dd->dim[i]] = comm->cd[i].numPulses();
}
dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
comm->cellsize_min, np,
wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
/* Set up the special atom communication */
- n = comm->nat[ddnatZONE];
- for (i = ddnatZONE+1; i < ddnatNR; i++)
+ int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
+ for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
{
- switch (i)
+ auto range = static_cast<DDAtomRanges::Type>(i);
+ switch (range)
{
- case ddnatVSITE:
+ case DDAtomRanges::Type::Vsites:
if (vsite && vsite->n_intercg_vsite)
{
n = dd_make_local_vsites(dd, n, top_local->idef.il);
}
break;
- case ddnatCON:
+ case DDAtomRanges::Type::Constraints:
if (dd->bInterCGcons || dd->bInterCGsettles)
{
/* Only for inter-cg constraints we need special code */
default:
gmx_incons("Unknown special atom type setup");
}
- comm->nat[i] = n;
+ comm->atomRanges.setEnd(range, n);
}
wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
/* Make space for the extra coordinates for virtual site
* or constraint communication.
*/
- state_local->natoms = comm->nat[ddnatNR-1];
+ state_local->natoms = comm->atomRanges.numAtomsTotal();
dd_resize_state(state_local, f, state_local->natoms);
{
if (vsite && vsite->n_intercg_vsite)
{
- nat_f_novirsum = comm->nat[ddnatVSITE];
+ nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
}
else
{
if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
{
- nat_f_novirsum = dd->nat_tot;
+ nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
}
else
{
- nat_f_novirsum = dd->nat_home;
+ nat_f_novirsum = comm->atomRanges.numHomeAtoms();
}
}
}
* allocation, zeroing and copying, but this is probably not worth
* the complications and checking.
*/
- forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
- dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
+ forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
+ comm->atomRanges.end(DDAtomRanges::Type::Zones),
+ comm->atomRanges.end(DDAtomRanges::Type::Constraints),
+ nat_f_novirsum);
/* Update atom data for mdatoms and several algorithms */
mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
- nullptr, mdAtoms, vsite, nullptr);
-
- if (ir->implicit_solvent)
- {
- make_local_gb(cr, fr->born, ir->gb_algorithm);
- }
+ nullptr, mdAtoms, constr, vsite, nullptr);
auto mdatoms = mdAtoms->mdatoms();
if (!thisRankHasDuty(cr, DUTY_PME))
/* Send the charges and/or c6/sigmas to our PME only node */
gmx_pme_send_parameters(cr,
fr->ic,
- mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
+ mdatoms->nChargePerturbed != 0, mdatoms->nTypePerturbed != 0,
mdatoms->chargeA, mdatoms->chargeB,
mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
mdatoms->sigmaA, mdatoms->sigmaB,
dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
}
- if (constr)
- {
- set_constraints(constr, top_local, ir, mdatoms, cr);
- }
-
if (ir->bPull)
{
/* Update the local pull groups */
- dd_make_local_pull_groups(cr, ir->pull_work, mdatoms);
- }
-
- if (ir->bRot)
- {
- /* Update the local rotation groups */
- dd_make_local_rotation_groups(dd, ir->rot);
+ dd_make_local_pull_groups(cr, ir->pull_work);
}
- if (ir->eSwapCoords != eswapNO)
+ if (dd->atomSets != nullptr)
{
- /* Update the local groups needed for ion swapping */
- dd_make_local_swap_groups(dd, ir->swap);
+ /* Update the local atom sets */
+ dd->atomSets->setIndicesInDomainDecomposition(*(dd->ga2la));
}
/* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
{
- dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()));
+ dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
write_dd_pdb("dd_dump", step, "dump", top_global, cr,
-1, as_rvec_array(state_local->x.data()), state_local->box);
}
wallcycle_stop(wcycle, ewcDOMDEC);
}
+
+/*! \brief Check whether bonded interactions are missing, if appropriate */
+void checkNumberOfBondedInteractions(FILE *fplog,
+ t_commrec *cr,
+ int totalNumberOfBondedInteractions,
+ const gmx_mtop_t *top_global,
+ const gmx_localtop_t *top_local,
+ const t_state *state,
+ bool *shouldCheckNumberOfBondedInteractions)
+{
+ if (*shouldCheckNumberOfBondedInteractions)
+ {
+ if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
+ {
+ dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
+ }
+ *shouldCheckNumberOfBondedInteractions = false;
+ }
+}