2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/compat/make_unique.h"
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlbtiming.h"
55 #include "gromacs/domdec/domdec_network.h"
56 #include "gromacs/domdec/ga2la.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/gmxlib/chargegroup.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/gmxlib/nrnb.h"
63 #include "gromacs/gpu_utils/gpu_utils.h"
64 #include "gromacs/hardware/hw_info.h"
65 #include "gromacs/imd/imd.h"
66 #include "gromacs/listed-forces/manage-threading.h"
67 #include "gromacs/math/functions.h"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/math/vectypes.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/forcerec.h"
73 #include "gromacs/mdlib/gmx_omp_nthreads.h"
74 #include "gromacs/mdlib/lincs.h"
75 #include "gromacs/mdlib/mdatoms.h"
76 #include "gromacs/mdlib/mdrun.h"
77 #include "gromacs/mdlib/mdsetup.h"
78 #include "gromacs/mdlib/nb_verlet.h"
79 #include "gromacs/mdlib/nbnxn_grid.h"
80 #include "gromacs/mdlib/nsgrid.h"
81 #include "gromacs/mdlib/vsite.h"
82 #include "gromacs/mdtypes/commrec.h"
83 #include "gromacs/mdtypes/df_history.h"
84 #include "gromacs/mdtypes/forcerec.h"
85 #include "gromacs/mdtypes/inputrec.h"
86 #include "gromacs/mdtypes/md_enums.h"
87 #include "gromacs/mdtypes/mdatom.h"
88 #include "gromacs/mdtypes/nblist.h"
89 #include "gromacs/mdtypes/state.h"
90 #include "gromacs/pbcutil/ishift.h"
91 #include "gromacs/pbcutil/pbc.h"
92 #include "gromacs/pulling/pull.h"
93 #include "gromacs/pulling/pull_rotation.h"
94 #include "gromacs/swap/swapcoords.h"
95 #include "gromacs/timing/wallcycle.h"
96 #include "gromacs/topology/block.h"
97 #include "gromacs/topology/idef.h"
98 #include "gromacs/topology/ifunc.h"
99 #include "gromacs/topology/mtop_lookup.h"
100 #include "gromacs/topology/mtop_util.h"
101 #include "gromacs/topology/topology.h"
102 #include "gromacs/utility/basedefinitions.h"
103 #include "gromacs/utility/basenetwork.h"
104 #include "gromacs/utility/cstringutil.h"
105 #include "gromacs/utility/exceptions.h"
106 #include "gromacs/utility/fatalerror.h"
107 #include "gromacs/utility/gmxmpi.h"
108 #include "gromacs/utility/qsort_threadsafe.h"
109 #include "gromacs/utility/real.h"
110 #include "gromacs/utility/smalloc.h"
111 #include "gromacs/utility/stringutil.h"
113 #include "atomdistribution.h"
114 #include "cellsizes.h"
115 #include "distribute.h"
116 #include "domdec_constraints.h"
117 #include "domdec_internal.h"
118 #include "domdec_vsite.h"
119 #include "redistribute.h"
122 #define DD_NLOAD_MAX 9
124 const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
126 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
129 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
130 #define DD_FLAG_NRCG 65535
131 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
132 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
134 /* The DD zone order */
135 static const ivec dd_zo[DD_MAXZONE] =
136 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
138 /* The non-bonded zone-pair setup for domain decomposition
139 * The first number is the i-zone, the second number the first j-zone seen by
140 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
141 * As is, this is for 3D decomposition, where there are 4 i-zones.
142 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
143 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
146 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
151 /* Turn on DLB when the load imbalance causes this amount of total loss.
152 * There is a bit of overhead with DLB and it's difficult to achieve
153 * a load imbalance of less than 2% with DLB.
155 #define DD_PERF_LOSS_DLB_ON 0.02
157 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
158 #define DD_PERF_LOSS_WARN 0.05
161 /* We check if to turn on DLB at the first and every 100 DD partitionings.
162 * With large imbalance DLB will turn on at the first step, so we can
163 * make the interval so large that the MPI overhead of the check is negligible.
165 static const int c_checkTurnDlbOnInterval = 100;
166 /* We need to check if DLB results in worse performance and then turn it off.
167 * We check this more often then for turning DLB on, because the DLB can scale
168 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
169 * and furthermore, we are already synchronizing often with DLB, so
170 * the overhead of the MPI Bcast is not that high.
172 static const int c_checkTurnDlbOffInterval = 20;
174 /* Forward declaration */
175 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
179 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
181 static void index2xyz(ivec nc,int ind,ivec xyz)
183 xyz[XX] = ind % nc[XX];
184 xyz[YY] = (ind / nc[XX]) % nc[YY];
185 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
189 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
191 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
192 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
193 xyz[ZZ] = ind % nc[ZZ];
196 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
201 ddindex = dd_index(dd->nc, c);
202 if (dd->comm->bCartesianPP_PME)
204 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
206 else if (dd->comm->bCartesianPP)
209 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
220 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
222 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
225 int ddglatnr(const gmx_domdec_t *dd, int i)
235 if (i >= dd->comm->atomRanges.numAtomsTotal())
237 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
239 atnr = dd->globalAtomIndices[i] + 1;
245 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
247 return &dd->comm->cgs_gl;
250 void dd_store_state(gmx_domdec_t *dd, t_state *state)
254 if (state->ddp_count != dd->ddp_count)
256 gmx_incons("The MD state does not match the domain decomposition state");
259 state->cg_gl.resize(dd->ncg_home);
260 for (i = 0; i < dd->ncg_home; i++)
262 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
265 state->ddp_count_cg_gl = dd->ddp_count;
268 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
270 return &dd->comm->zones;
273 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
274 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
276 gmx_domdec_zones_t *zones;
279 zones = &dd->comm->zones;
282 while (icg >= zones->izone[izone].cg1)
291 else if (izone < zones->nizone)
293 *jcg0 = zones->izone[izone].jcg0;
297 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
298 icg, izone, zones->nizone);
301 *jcg1 = zones->izone[izone].jcg1;
303 for (d = 0; d < dd->ndim; d++)
306 shift0[dim] = zones->izone[izone].shift0[dim];
307 shift1[dim] = zones->izone[izone].shift1[dim];
308 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
310 /* A conservative approach, this can be optimized */
317 int dd_numHomeAtoms(const gmx_domdec_t &dd)
319 return dd.comm->atomRanges.numHomeAtoms();
322 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
324 /* We currently set mdatoms entries for all atoms:
325 * local + non-local + communicated for vsite + constraints
328 return dd->comm->atomRanges.numAtomsTotal();
331 int dd_natoms_vsite(const gmx_domdec_t *dd)
333 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
336 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
338 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
339 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
342 void dd_move_x(gmx_domdec_t *dd,
344 gmx::ArrayRef<gmx::RVec> x,
345 gmx_wallcycle *wcycle)
347 wallcycle_start(wcycle, ewcMOVEX);
350 gmx_domdec_comm_t *comm;
351 gmx_domdec_comm_dim_t *cd;
352 rvec shift = {0, 0, 0};
353 gmx_bool bPBC, bScrew;
357 const BlockRanges &atomGroups = dd->atomGroups();
360 nat_tot = comm->atomRanges.numHomeAtoms();
361 for (int d = 0; d < dd->ndim; d++)
363 bPBC = (dd->ci[dd->dim[d]] == 0);
364 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
367 copy_rvec(box[dd->dim[d]], shift);
370 for (const gmx_domdec_ind_t &ind : cd->ind)
372 gmx::ArrayRef<const int> index = ind.index;
373 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
374 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
378 for (int i = 0; i < ind.nsend[nzone]; i++)
380 int at0 = atomGroups.index[index[i]];
381 int at1 = atomGroups.index[index[i] + 1];
382 for (int j = at0; j < at1; j++)
384 sendBuffer[n] = x[j];
391 for (int i = 0; i < ind.nsend[nzone]; i++)
393 int at0 = atomGroups.index[index[i]];
394 int at1 = atomGroups.index[index[i] + 1];
395 for (int j = at0; j < at1; j++)
397 /* We need to shift the coordinates */
398 for (int d = 0; d < DIM; d++)
400 sendBuffer[n][d] = x[j][d] + shift[d];
408 for (int i = 0; i < ind.nsend[nzone]; i++)
410 int at0 = atomGroups.index[index[i]];
411 int at1 = atomGroups.index[index[i]+1];
412 for (int j = at0; j < at1; j++)
415 sendBuffer[n][XX] = x[j][XX] + shift[XX];
417 * This operation requires a special shift force
418 * treatment, which is performed in calc_vir.
420 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
421 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
427 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
429 gmx::ArrayRef<gmx::RVec> receiveBuffer;
430 if (cd->receiveInPlace)
432 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
436 receiveBuffer = receiveBufferAccess.buffer;
438 /* Send and receive the coordinates */
439 ddSendrecv(dd, d, dddirBackward,
440 sendBuffer, receiveBuffer);
442 if (!cd->receiveInPlace)
445 for (int zone = 0; zone < nzone; zone++)
447 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
449 x[i] = receiveBuffer[j++];
453 nat_tot += ind.nrecv[nzone+1];
458 wallcycle_stop(wcycle, ewcMOVEX);
461 void dd_move_f(gmx_domdec_t *dd,
462 gmx::ArrayRef<gmx::RVec> f,
464 gmx_wallcycle *wcycle)
466 wallcycle_start(wcycle, ewcMOVEF);
469 gmx_domdec_comm_t *comm;
470 gmx_domdec_comm_dim_t *cd;
473 gmx_bool bShiftForcesNeedPbc, bScrew;
477 const BlockRanges &atomGroups = dd->atomGroups();
479 nzone = comm->zones.n/2;
480 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
481 for (int d = dd->ndim-1; d >= 0; d--)
483 /* Only forces in domains near the PBC boundaries need to
484 consider PBC in the treatment of fshift */
485 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
486 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
487 if (fshift == nullptr && !bScrew)
489 bShiftForcesNeedPbc = FALSE;
491 /* Determine which shift vector we need */
497 for (int p = cd->numPulses() - 1; p >= 0; p--)
499 const gmx_domdec_ind_t &ind = cd->ind[p];
500 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
501 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
503 nat_tot -= ind.nrecv[nzone+1];
505 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
507 gmx::ArrayRef<gmx::RVec> sendBuffer;
508 if (cd->receiveInPlace)
510 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
514 sendBuffer = sendBufferAccess.buffer;
516 for (int zone = 0; zone < nzone; zone++)
518 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
520 sendBuffer[j++] = f[i];
524 /* Communicate the forces */
525 ddSendrecv(dd, d, dddirForward,
526 sendBuffer, receiveBuffer);
527 gmx::ArrayRef<const int> index = ind.index;
528 /* Add the received forces */
530 if (!bShiftForcesNeedPbc)
532 for (int i = 0; i < ind.nsend[nzone]; i++)
534 int at0 = atomGroups.index[index[i]];
535 int at1 = atomGroups.index[index[i] + 1];
536 for (int j = at0; j < at1; j++)
538 for (int d = 0; d < DIM; d++)
540 f[j][d] += receiveBuffer[n][d];
548 /* fshift should always be defined if this function is
549 * called when bShiftForcesNeedPbc is true */
550 assert(NULL != fshift);
551 for (int i = 0; i < ind.nsend[nzone]; i++)
553 int at0 = atomGroups.index[index[i]];
554 int at1 = atomGroups.index[index[i] + 1];
555 for (int j = at0; j < at1; j++)
557 for (int d = 0; d < DIM; d++)
559 f[j][d] += receiveBuffer[n][d];
561 /* Add this force to the shift force */
562 for (int d = 0; d < DIM; d++)
564 fshift[is][d] += receiveBuffer[n][d];
572 for (int i = 0; i < ind.nsend[nzone]; i++)
574 int at0 = atomGroups.index[index[i]];
575 int at1 = atomGroups.index[index[i] + 1];
576 for (int j = at0; j < at1; j++)
578 /* Rotate the force */
579 f[j][XX] += receiveBuffer[n][XX];
580 f[j][YY] -= receiveBuffer[n][YY];
581 f[j][ZZ] -= receiveBuffer[n][ZZ];
584 /* Add this force to the shift force */
585 for (int d = 0; d < DIM; d++)
587 fshift[is][d] += receiveBuffer[n][d];
597 wallcycle_stop(wcycle, ewcMOVEF);
600 /* Convenience function for extracting a real buffer from an rvec buffer
602 * To reduce the number of temporary communication buffers and avoid
603 * cache polution, we reuse gmx::RVec buffers for storing reals.
604 * This functions return a real buffer reference with the same number
605 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
607 static gmx::ArrayRef<real>
608 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
610 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
614 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
617 gmx_domdec_comm_t *comm;
618 gmx_domdec_comm_dim_t *cd;
622 const BlockRanges &atomGroups = dd->atomGroups();
625 nat_tot = comm->atomRanges.numHomeAtoms();
626 for (int d = 0; d < dd->ndim; d++)
629 for (const gmx_domdec_ind_t &ind : cd->ind)
631 gmx::ArrayRef<const int> index = ind.index;
633 /* Note: We provision for RVec instead of real, so a factor of 3
634 * more than needed. The buffer actually already has this size
635 * and we pass a plain pointer below, so this does not matter.
637 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
638 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
640 for (int i = 0; i < ind.nsend[nzone]; i++)
642 int at0 = atomGroups.index[index[i]];
643 int at1 = atomGroups.index[index[i] + 1];
644 for (int j = at0; j < at1; j++)
646 sendBuffer[n++] = v[j];
650 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
652 gmx::ArrayRef<real> receiveBuffer;
653 if (cd->receiveInPlace)
655 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
659 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
661 /* Send and receive the data */
662 ddSendrecv(dd, d, dddirBackward,
663 sendBuffer, receiveBuffer);
664 if (!cd->receiveInPlace)
667 for (int zone = 0; zone < nzone; zone++)
669 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
671 v[i] = receiveBuffer[j++];
675 nat_tot += ind.nrecv[nzone+1];
681 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
684 gmx_domdec_comm_t *comm;
685 gmx_domdec_comm_dim_t *cd;
689 const gmx::BlockRanges &atomGroups = dd->atomGroups();
691 nzone = comm->zones.n/2;
692 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
693 for (int d = dd->ndim-1; d >= 0; d--)
696 for (int p = cd->numPulses() - 1; p >= 0; p--)
698 const gmx_domdec_ind_t &ind = cd->ind[p];
700 /* Note: We provision for RVec instead of real, so a factor of 3
701 * more than needed. The buffer actually already has this size
702 * and we typecast, so this works as intended.
704 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
705 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
706 nat_tot -= ind.nrecv[nzone + 1];
708 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
710 gmx::ArrayRef<real> sendBuffer;
711 if (cd->receiveInPlace)
713 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
717 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
719 for (int zone = 0; zone < nzone; zone++)
721 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
723 sendBuffer[j++] = v[i];
727 /* Communicate the forces */
728 ddSendrecv(dd, d, dddirForward,
729 sendBuffer, receiveBuffer);
730 gmx::ArrayRef<const int> index = ind.index;
731 /* Add the received forces */
733 for (int i = 0; i < ind.nsend[nzone]; i++)
735 int at0 = atomGroups.index[index[i]];
736 int at1 = atomGroups.index[index[i] + 1];
737 for (int j = at0; j < at1; j++)
739 v[j] += receiveBuffer[n];
748 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
750 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
752 zone->min0, zone->max1,
753 zone->mch0, zone->mch0,
754 zone->p1_0, zone->p1_1);
758 #define DDZONECOMM_MAXZONE 5
759 #define DDZONECOMM_BUFSIZE 3
761 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
762 int ddimind, int direction,
763 gmx_ddzone_t *buf_s, int n_s,
764 gmx_ddzone_t *buf_r, int n_r)
766 #define ZBS DDZONECOMM_BUFSIZE
767 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
768 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
771 for (i = 0; i < n_s; i++)
773 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
774 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
775 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
776 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
777 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
778 vbuf_s[i*ZBS+1][2] = 0;
779 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
780 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
781 vbuf_s[i*ZBS+2][2] = 0;
784 ddSendrecv(dd, ddimind, direction,
788 for (i = 0; i < n_r; i++)
790 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
791 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
792 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
793 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
794 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
795 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
796 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
802 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
803 rvec cell_ns_x0, rvec cell_ns_x1)
805 int d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
807 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
808 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
809 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
810 rvec extr_s[2], extr_r[2];
812 real dist_d, c = 0, det;
813 gmx_domdec_comm_t *comm;
818 for (d = 1; d < dd->ndim; d++)
821 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
822 zp->min0 = cell_ns_x0[dim];
823 zp->max1 = cell_ns_x1[dim];
824 zp->min1 = cell_ns_x1[dim];
825 zp->mch0 = cell_ns_x0[dim];
826 zp->mch1 = cell_ns_x1[dim];
827 zp->p1_0 = cell_ns_x0[dim];
828 zp->p1_1 = cell_ns_x1[dim];
831 for (d = dd->ndim-2; d >= 0; d--)
834 bPBC = (dim < ddbox->npbcdim);
836 /* Use an rvec to store two reals */
837 extr_s[d][0] = comm->cell_f0[d+1];
838 extr_s[d][1] = comm->cell_f1[d+1];
839 extr_s[d][2] = comm->cell_f1[d+1];
842 /* Store the extremes in the backward sending buffer,
843 * so the get updated separately from the forward communication.
845 for (d1 = d; d1 < dd->ndim-1; d1++)
847 /* We invert the order to be able to use the same loop for buf_e */
848 buf_s[pos].min0 = extr_s[d1][1];
849 buf_s[pos].max1 = extr_s[d1][0];
850 buf_s[pos].min1 = extr_s[d1][2];
853 /* Store the cell corner of the dimension we communicate along */
854 buf_s[pos].p1_0 = comm->cell_x0[dim];
859 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
862 if (dd->ndim == 3 && d == 0)
864 buf_s[pos] = comm->zone_d2[0][1];
866 buf_s[pos] = comm->zone_d1[0];
870 /* We only need to communicate the extremes
871 * in the forward direction
873 npulse = comm->cd[d].numPulses();
876 /* Take the minimum to avoid double communication */
877 npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
881 /* Without PBC we should really not communicate over
882 * the boundaries, but implementing that complicates
883 * the communication setup and therefore we simply
884 * do all communication, but ignore some data.
888 for (p = 0; p < npulse_min; p++)
890 /* Communicate the extremes forward */
891 bUse = (bPBC || dd->ci[dim] > 0);
893 ddSendrecv(dd, d, dddirForward,
894 extr_s+d, dd->ndim-d-1,
895 extr_r+d, dd->ndim-d-1);
899 for (d1 = d; d1 < dd->ndim-1; d1++)
901 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
902 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
903 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
909 for (p = 0; p < npulse; p++)
911 /* Communicate all the zone information backward */
912 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
914 dd_sendrecv_ddzone(dd, d, dddirBackward,
921 for (d1 = d+1; d1 < dd->ndim; d1++)
923 /* Determine the decrease of maximum required
924 * communication height along d1 due to the distance along d,
925 * this avoids a lot of useless atom communication.
927 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
929 if (ddbox->tric_dir[dim])
931 /* c is the off-diagonal coupling between the cell planes
932 * along directions d and d1.
934 c = ddbox->v[dim][dd->dim[d1]][dim];
940 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
943 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
947 /* A negative value signals out of range */
953 /* Accumulate the extremes over all pulses */
954 for (i = 0; i < buf_size; i++)
964 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
965 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
966 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
969 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
977 if (bUse && dh[d1] >= 0)
979 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
980 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
983 /* Copy the received buffer to the send buffer,
984 * to pass the data through with the next pulse.
988 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
989 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
991 /* Store the extremes */
994 for (d1 = d; d1 < dd->ndim-1; d1++)
996 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
997 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
998 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
1002 if (d == 1 || (d == 0 && dd->ndim == 3))
1004 for (i = d; i < 2; i++)
1006 comm->zone_d2[1-d][i] = buf_e[pos];
1012 comm->zone_d1[1] = buf_e[pos];
1022 for (i = 0; i < 2; i++)
1026 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1028 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1029 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1035 for (i = 0; i < 2; i++)
1037 for (j = 0; j < 2; j++)
1041 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1043 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1044 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1048 for (d = 1; d < dd->ndim; d++)
1050 comm->cell_f_max0[d] = extr_s[d-1][0];
1051 comm->cell_f_min1[d] = extr_s[d-1][1];
1054 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1055 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1060 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1061 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1063 rvec grid_s[2], *grid_r = nullptr, cx, r;
1064 char fname[STRLEN], buf[22];
1066 int a, i, d, z, y, x;
1070 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1071 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1075 snew(grid_r, 2*dd->nnodes);
1078 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1082 for (d = 0; d < DIM; d++)
1084 for (i = 0; i < DIM; i++)
1092 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1094 tric[d][i] = box[i][d]/box[i][i];
1103 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1104 out = gmx_fio_fopen(fname, "w");
1105 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1107 for (i = 0; i < dd->nnodes; i++)
1109 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1110 for (d = 0; d < DIM; d++)
1112 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1114 for (z = 0; z < 2; z++)
1116 for (y = 0; y < 2; y++)
1118 for (x = 0; x < 2; x++)
1120 cx[XX] = grid_r[i*2+x][XX];
1121 cx[YY] = grid_r[i*2+y][YY];
1122 cx[ZZ] = grid_r[i*2+z][ZZ];
1124 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1125 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1129 for (d = 0; d < DIM; d++)
1131 for (x = 0; x < 4; x++)
1135 case 0: y = 1 + i*8 + 2*x; break;
1136 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1137 case 2: y = 1 + i*8 + x; break;
1139 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1143 gmx_fio_fclose(out);
1148 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
1149 const gmx_mtop_t *mtop, const t_commrec *cr,
1150 int natoms, const rvec x[], const matrix box)
1152 char fname[STRLEN], buf[22];
1155 const char *atomname, *resname;
1161 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1164 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1166 out = gmx_fio_fopen(fname, "w");
1168 fprintf(out, "TITLE %s\n", title);
1169 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1171 for (int i = 0; i < natoms; i++)
1173 int ii = dd->globalAtomIndices[i];
1174 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1177 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1180 while (i >= dd->atomGroups().index[dd->comm->zones.cg_range[c+1]])
1186 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1188 b = dd->comm->zones.n;
1192 b = dd->comm->zones.n + 1;
1194 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1195 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1197 fprintf(out, "TER\n");
1199 gmx_fio_fclose(out);
1202 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1204 gmx_domdec_comm_t *comm;
1211 if (comm->bInterCGBondeds)
1213 if (comm->cutoff_mbody > 0)
1215 r = comm->cutoff_mbody;
1219 /* cutoff_mbody=0 means we do not have DLB */
1220 r = comm->cellsize_min[dd->dim[0]];
1221 for (di = 1; di < dd->ndim; di++)
1223 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1225 if (comm->bBondComm)
1227 r = std::max(r, comm->cutoff_mbody);
1231 r = std::min(r, comm->cutoff);
1239 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1243 r_mb = dd_cutoff_multibody(dd);
1245 return std::max(dd->comm->cutoff, r_mb);
1249 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1254 nc = dd->nc[dd->comm->cartpmedim];
1255 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1256 copy_ivec(coord, coord_pme);
1257 coord_pme[dd->comm->cartpmedim] =
1258 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1261 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1266 npme = dd->comm->npmenodes;
1268 /* Here we assign a PME node to communicate with this DD node
1269 * by assuming that the major index of both is x.
1270 * We add cr->npmenodes/2 to obtain an even distribution.
1272 return (ddindex*npme + npme/2)/npp;
1275 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1280 snew(pme_rank, dd->comm->npmenodes);
1282 for (i = 0; i < dd->nnodes; i++)
1284 p0 = ddindex2pmeindex(dd, i);
1285 p1 = ddindex2pmeindex(dd, i+1);
1286 if (i+1 == dd->nnodes || p1 > p0)
1290 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1292 pme_rank[n] = i + 1 + n;
1300 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1308 if (dd->comm->bCartesian) {
1309 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1310 dd_coords2pmecoords(dd,coords,coords_pme);
1311 copy_ivec(dd->ntot,nc);
1312 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1313 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1315 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1317 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1323 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1328 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1330 gmx_domdec_comm_t *comm;
1332 int ddindex, nodeid = -1;
1334 comm = cr->dd->comm;
1339 if (comm->bCartesianPP_PME)
1342 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1347 ddindex = dd_index(cr->dd->nc, coords);
1348 if (comm->bCartesianPP)
1350 nodeid = comm->ddindex2simnodeid[ddindex];
1356 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1368 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1369 const t_commrec gmx_unused *cr,
1374 const gmx_domdec_comm_t *comm = dd->comm;
1376 /* This assumes a uniform x domain decomposition grid cell size */
1377 if (comm->bCartesianPP_PME)
1380 ivec coord, coord_pme;
1381 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1382 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1384 /* This is a PP node */
1385 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1386 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1390 else if (comm->bCartesianPP)
1392 if (sim_nodeid < dd->nnodes)
1394 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1399 /* This assumes DD cells with identical x coordinates
1400 * are numbered sequentially.
1402 if (dd->comm->pmenodes == nullptr)
1404 if (sim_nodeid < dd->nnodes)
1406 /* The DD index equals the nodeid */
1407 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1413 while (sim_nodeid > dd->comm->pmenodes[i])
1417 if (sim_nodeid < dd->comm->pmenodes[i])
1419 pmenode = dd->comm->pmenodes[i];
1427 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1431 return { dd->comm->npmenodes_x, dd->comm->npmenodes_y };
1439 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1443 ivec coord, coord_pme;
1447 std::vector<int> ddranks;
1448 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1450 for (x = 0; x < dd->nc[XX]; x++)
1452 for (y = 0; y < dd->nc[YY]; y++)
1454 for (z = 0; z < dd->nc[ZZ]; z++)
1456 if (dd->comm->bCartesianPP_PME)
1461 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1462 if (dd->ci[XX] == coord_pme[XX] &&
1463 dd->ci[YY] == coord_pme[YY] &&
1464 dd->ci[ZZ] == coord_pme[ZZ])
1466 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1471 /* The slab corresponds to the nodeid in the PME group */
1472 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1474 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1483 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1485 gmx_bool bReceive = TRUE;
1487 if (cr->npmenodes < dd->nnodes)
1489 gmx_domdec_comm_t *comm = dd->comm;
1490 if (comm->bCartesianPP_PME)
1493 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1495 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1496 coords[comm->cartpmedim]++;
1497 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1500 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1501 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1503 /* This is not the last PP node for pmenode */
1508 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1513 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1514 if (cr->sim_nodeid+1 < cr->nnodes &&
1515 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1517 /* This is not the last PP node for pmenode */
1526 static void set_zones_ncg_home(gmx_domdec_t *dd)
1528 gmx_domdec_zones_t *zones;
1531 zones = &dd->comm->zones;
1533 zones->cg_range[0] = 0;
1534 for (i = 1; i < zones->n+1; i++)
1536 zones->cg_range[i] = dd->ncg_home;
1538 /* zone_ncg1[0] should always be equal to ncg_home */
1539 dd->comm->zone_ncg1[0] = dd->ncg_home;
1542 static void restoreAtomGroups(gmx_domdec_t *dd,
1543 const int *gcgs_index, const t_state *state)
1545 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1547 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1548 gmx::BlockRanges &atomGroups = dd->atomGroups_;
1550 globalAtomGroupIndices.resize(atomGroupsState.size());
1551 atomGroups.index.resize(atomGroupsState.size() + 1);
1553 /* Copy back the global charge group indices from state
1554 * and rebuild the local charge group to atom index.
1557 for (unsigned int i = 0; i < atomGroupsState.size(); i++)
1559 atomGroups.index[i] = atomIndex;
1560 const int atomGroupGlobal = atomGroupsState[i];
1561 globalAtomGroupIndices[i] = atomGroupGlobal;
1562 atomIndex += gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1564 atomGroups.index[atomGroupsState.size()] = atomIndex;
1566 dd->ncg_home = atomGroupsState.size();
1567 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomIndex);
1569 set_zones_ncg_home(dd);
1572 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1573 t_forcerec *fr, char *bLocalCG)
1575 cginfo_mb_t *cginfo_mb;
1581 cginfo_mb = fr->cginfo_mb;
1582 cginfo = fr->cginfo;
1584 for (cg = cg0; cg < cg1; cg++)
1586 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1590 if (bLocalCG != nullptr)
1592 for (cg = cg0; cg < cg1; cg++)
1594 bLocalCG[index_gl[cg]] = TRUE;
1599 static void make_dd_indices(gmx_domdec_t *dd,
1600 const int *gcgs_index, int cg_start)
1602 const int numZones = dd->comm->zones.n;
1603 const int *zone2cg = dd->comm->zones.cg_range;
1604 const int *zone_ncg1 = dd->comm->zone_ncg1;
1605 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1606 const gmx_bool bCGs = dd->comm->bCGs;
1608 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1610 if (zone2cg[1] != dd->ncg_home)
1612 gmx_incons("dd->ncg_zone is not up to date");
1615 /* Make the local to global and global to local atom index */
1616 int a = dd->atomGroups().index[cg_start];
1617 globalAtomIndices.resize(a);
1618 for (int zone = 0; zone < numZones; zone++)
1627 cg0 = zone2cg[zone];
1629 int cg1 = zone2cg[zone+1];
1630 int cg1_p1 = cg0 + zone_ncg1[zone];
1632 for (int cg = cg0; cg < cg1; cg++)
1637 /* Signal that this cg is from more than one pulse away */
1640 int cg_gl = globalAtomGroupIndices[cg];
1643 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1645 globalAtomIndices.push_back(a_gl);
1646 ga2la_set(dd->ga2la, a_gl, a, zone1);
1652 globalAtomIndices.push_back(cg_gl);
1653 ga2la_set(dd->ga2la, cg_gl, a, zone1);
1660 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1664 if (bLocalCG == nullptr)
1668 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1670 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1673 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1678 for (int i = 0; i < ncg_sys; i++)
1685 if (ngl != dd->globalAtomGroupIndices.size())
1687 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1694 static void check_index_consistency(gmx_domdec_t *dd,
1695 int natoms_sys, int ncg_sys,
1700 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1702 if (dd->comm->DD_debug > 1)
1704 std::vector<int> have(natoms_sys);
1705 for (int a = 0; a < numAtomsInZones; a++)
1707 int globalAtomIndex = dd->globalAtomIndices[a];
1708 if (have[globalAtomIndex] > 0)
1710 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1714 have[globalAtomIndex] = a + 1;
1719 std::vector<int> have(numAtomsInZones);
1722 for (int i = 0; i < natoms_sys; i++)
1726 if (ga2la_get(dd->ga2la, i, &a, &cell))
1728 if (a >= numAtomsInZones)
1730 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1736 if (dd->globalAtomIndices[a] != i)
1738 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1745 if (ngl != numAtomsInZones)
1748 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1749 dd->rank, where, ngl, numAtomsInZones);
1751 for (int a = 0; a < numAtomsInZones; a++)
1756 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1757 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1761 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1765 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1766 dd->rank, where, nerr);
1770 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1771 static void clearDDStateIndices(gmx_domdec_t *dd,
1777 /* Clear the whole list without searching */
1778 ga2la_clear(dd->ga2la);
1782 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1783 for (int i = 0; i < numAtomsInZones; i++)
1785 ga2la_del(dd->ga2la, dd->globalAtomIndices[i]);
1789 char *bLocalCG = dd->comm->bLocalCG;
1792 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1794 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1798 dd_clear_local_vsite_indices(dd);
1800 if (dd->constraints)
1802 dd_clear_local_constraint_indices(dd);
1806 static gmx_bool check_grid_jump(gmx_int64_t step,
1812 gmx_domdec_comm_t *comm;
1821 for (d = 1; d < dd->ndim; d++)
1824 limit = grid_jump_limit(comm, cutoff, d);
1825 bfac = ddbox->box_size[dim];
1826 if (ddbox->tric_dir[dim])
1828 bfac *= ddbox->skew_fac[dim];
1830 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
1831 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
1839 /* This error should never be triggered under normal
1840 * circumstances, but you never know ...
1842 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1843 gmx_step_str(step, buf),
1844 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1852 static float dd_force_load(gmx_domdec_comm_t *comm)
1859 if (comm->eFlop > 1)
1861 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1866 load = comm->cycl[ddCyclF];
1867 if (comm->cycl_n[ddCyclF] > 1)
1869 /* Subtract the maximum of the last n cycle counts
1870 * to get rid of possible high counts due to other sources,
1871 * for instance system activity, that would otherwise
1872 * affect the dynamic load balancing.
1874 load -= comm->cycl_max[ddCyclF];
1878 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1880 float gpu_wait, gpu_wait_sum;
1882 gpu_wait = comm->cycl[ddCyclWaitGPU];
1883 if (comm->cycl_n[ddCyclF] > 1)
1885 /* We should remove the WaitGPU time of the same MD step
1886 * as the one with the maximum F time, since the F time
1887 * and the wait time are not independent.
1888 * Furthermore, the step for the max F time should be chosen
1889 * the same on all ranks that share the same GPU.
1890 * But to keep the code simple, we remove the average instead.
1891 * The main reason for artificially long times at some steps
1892 * is spurious CPU activity or MPI time, so we don't expect
1893 * that changes in the GPU wait time matter a lot here.
1895 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
1897 /* Sum the wait times over the ranks that share the same GPU */
1898 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1899 comm->mpi_comm_gpu_shared);
1900 /* Replace the wait time by the average over the ranks */
1901 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1909 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1911 gmx_domdec_comm_t *comm;
1916 snew(*dim_f, dd->nc[dim]+1);
1918 for (i = 1; i < dd->nc[dim]; i++)
1920 if (comm->slb_frac[dim])
1922 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1926 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
1929 (*dim_f)[dd->nc[dim]] = 1;
1932 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1934 int pmeindex, slab, nso, i;
1937 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1943 ddpme->dim = dimind;
1945 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1947 ddpme->nslab = (ddpme->dim == 0 ?
1948 dd->comm->npmenodes_x :
1949 dd->comm->npmenodes_y);
1951 if (ddpme->nslab <= 1)
1956 nso = dd->comm->npmenodes/ddpme->nslab;
1957 /* Determine for each PME slab the PP location range for dimension dim */
1958 snew(ddpme->pp_min, ddpme->nslab);
1959 snew(ddpme->pp_max, ddpme->nslab);
1960 for (slab = 0; slab < ddpme->nslab; slab++)
1962 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1963 ddpme->pp_max[slab] = 0;
1965 for (i = 0; i < dd->nnodes; i++)
1967 ddindex2xyz(dd->nc, i, xyz);
1968 /* For y only use our y/z slab.
1969 * This assumes that the PME x grid size matches the DD grid size.
1971 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1973 pmeindex = ddindex2pmeindex(dd, i);
1976 slab = pmeindex/nso;
1980 slab = pmeindex % ddpme->nslab;
1982 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1983 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1987 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1990 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1992 if (dd->comm->ddpme[0].dim == XX)
1994 return dd->comm->ddpme[0].maxshift;
2002 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
2004 if (dd->comm->ddpme[0].dim == YY)
2006 return dd->comm->ddpme[0].maxshift;
2008 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2010 return dd->comm->ddpme[1].maxshift;
2018 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
2020 rvec cell_ns_x0, rvec cell_ns_x1,
2023 gmx_domdec_comm_t *comm;
2028 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
2030 dim = dd->dim[dim_ind];
2032 /* Without PBC we don't have restrictions on the outer cells */
2033 if (!(dim >= ddbox->npbcdim &&
2034 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
2036 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
2037 comm->cellsize_min[dim])
2040 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
2041 gmx_step_str(step, buf), dim2char(dim),
2042 comm->cell_x1[dim] - comm->cell_x0[dim],
2043 ddbox->skew_fac[dim],
2044 dd->comm->cellsize_min[dim],
2045 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2049 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
2051 /* Communicate the boundaries and update cell_ns_x0/1 */
2052 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2053 if (isDlbOn(dd->comm) && dd->ndim > 1)
2055 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2060 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2062 /* Note that the cycles value can be incorrect, either 0 or some
2063 * extremely large value, when our thread migrated to another core
2064 * with an unsynchronized cycle counter. If this happens less often
2065 * that once per nstlist steps, this will not cause issues, since
2066 * we later subtract the maximum value from the sum over nstlist steps.
2067 * A zero count will slightly lower the total, but that's a small effect.
2068 * Note that the main purpose of the subtraction of the maximum value
2069 * is to avoid throwing off the load balancing when stalls occur due
2070 * e.g. system activity or network congestion.
2072 dd->comm->cycl[ddCycl] += cycles;
2073 dd->comm->cycl_n[ddCycl]++;
2074 if (cycles > dd->comm->cycl_max[ddCycl])
2076 dd->comm->cycl_max[ddCycl] = cycles;
2080 static double force_flop_count(t_nrnb *nrnb)
2087 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2089 /* To get closer to the real timings, we half the count
2090 * for the normal loops and again half it for water loops.
2093 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2095 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2099 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2102 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2105 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2107 sum += nrnb->n[i]*cost_nrnb(i);
2110 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2112 sum += nrnb->n[i]*cost_nrnb(i);
2118 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2120 if (dd->comm->eFlop)
2122 dd->comm->flop -= force_flop_count(nrnb);
2125 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2127 if (dd->comm->eFlop)
2129 dd->comm->flop += force_flop_count(nrnb);
2134 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2138 for (i = 0; i < ddCyclNr; i++)
2140 dd->comm->cycl[i] = 0;
2141 dd->comm->cycl_n[i] = 0;
2142 dd->comm->cycl_max[i] = 0;
2145 dd->comm->flop_n = 0;
2148 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2150 gmx_domdec_comm_t *comm;
2151 domdec_load_t *load;
2152 domdec_root_t *root = nullptr;
2154 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2159 fprintf(debug, "get_load_distribution start\n");
2162 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2166 bSepPME = (dd->pme_nodeid >= 0);
2168 if (dd->ndim == 0 && bSepPME)
2170 /* Without decomposition, but with PME nodes, we need the load */
2171 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2172 comm->load[0].pme = comm->cycl[ddCyclPME];
2175 for (d = dd->ndim-1; d >= 0; d--)
2178 /* Check if we participate in the communication in this dimension */
2179 if (d == dd->ndim-1 ||
2180 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2182 load = &comm->load[d];
2183 if (isDlbOn(dd->comm))
2185 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
2188 if (d == dd->ndim-1)
2190 sbuf[pos++] = dd_force_load(comm);
2191 sbuf[pos++] = sbuf[0];
2192 if (isDlbOn(dd->comm))
2194 sbuf[pos++] = sbuf[0];
2195 sbuf[pos++] = cell_frac;
2198 sbuf[pos++] = comm->cell_f_max0[d];
2199 sbuf[pos++] = comm->cell_f_min1[d];
2204 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2205 sbuf[pos++] = comm->cycl[ddCyclPME];
2210 sbuf[pos++] = comm->load[d+1].sum;
2211 sbuf[pos++] = comm->load[d+1].max;
2212 if (isDlbOn(dd->comm))
2214 sbuf[pos++] = comm->load[d+1].sum_m;
2215 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2216 sbuf[pos++] = comm->load[d+1].flags;
2219 sbuf[pos++] = comm->cell_f_max0[d];
2220 sbuf[pos++] = comm->cell_f_min1[d];
2225 sbuf[pos++] = comm->load[d+1].mdf;
2226 sbuf[pos++] = comm->load[d+1].pme;
2230 /* Communicate a row in DD direction d.
2231 * The communicators are setup such that the root always has rank 0.
2234 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2235 load->load, load->nload*sizeof(float), MPI_BYTE,
2236 0, comm->mpi_comm_load[d]);
2238 if (dd->ci[dim] == dd->master_ci[dim])
2240 /* We are the root, process this row */
2243 root = comm->root[d];
2253 for (i = 0; i < dd->nc[dim]; i++)
2255 load->sum += load->load[pos++];
2256 load->max = std::max(load->max, load->load[pos]);
2258 if (isDlbOn(dd->comm))
2262 /* This direction could not be load balanced properly,
2263 * therefore we need to use the maximum iso the average load.
2265 load->sum_m = std::max(load->sum_m, load->load[pos]);
2269 load->sum_m += load->load[pos];
2272 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2276 load->flags = (int)(load->load[pos++] + 0.5);
2280 root->cell_f_max0[i] = load->load[pos++];
2281 root->cell_f_min1[i] = load->load[pos++];
2286 load->mdf = std::max(load->mdf, load->load[pos]);
2288 load->pme = std::max(load->pme, load->load[pos]);
2292 if (isDlbOn(comm) && root->bLimited)
2294 load->sum_m *= dd->nc[dim];
2295 load->flags |= (1<<d);
2303 comm->nload += dd_load_count(comm);
2304 comm->load_step += comm->cycl[ddCyclStep];
2305 comm->load_sum += comm->load[0].sum;
2306 comm->load_max += comm->load[0].max;
2309 for (d = 0; d < dd->ndim; d++)
2311 if (comm->load[0].flags & (1<<d))
2313 comm->load_lim[d]++;
2319 comm->load_mdf += comm->load[0].mdf;
2320 comm->load_pme += comm->load[0].pme;
2324 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2328 fprintf(debug, "get_load_distribution finished\n");
2332 static float dd_force_load_fraction(gmx_domdec_t *dd)
2334 /* Return the relative performance loss on the total run time
2335 * due to the force calculation load imbalance.
2337 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2339 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2347 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2349 /* Return the relative performance loss on the total run time
2350 * due to the force calculation load imbalance.
2352 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2355 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2356 (dd->comm->load_step*dd->nnodes);
2364 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2366 gmx_domdec_comm_t *comm = dd->comm;
2368 /* Only the master rank prints loads and only if we measured loads */
2369 if (!DDMASTER(dd) || comm->nload == 0)
2375 int numPpRanks = dd->nnodes;
2376 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2377 int numRanks = numPpRanks + numPmeRanks;
2378 float lossFraction = 0;
2380 /* Print the average load imbalance and performance loss */
2381 if (dd->nnodes > 1 && comm->load_sum > 0)
2383 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2384 lossFraction = dd_force_imb_perf_loss(dd);
2386 std::string msg = "\n Dynamic load balancing report:\n";
2387 std::string dlbStateStr = "";
2389 switch (dd->comm->dlbState)
2392 dlbStateStr = "DLB was off during the run per user request.";
2394 case edlbsOffForever:
2395 /* Currectly this can happen due to performance loss observed, cell size
2396 * limitations or incompatibility with other settings observed during
2397 * determineInitialDlbState(). */
2398 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2400 case edlbsOffCanTurnOn:
2401 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2403 case edlbsOffTemporarilyLocked:
2404 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2406 case edlbsOnCanTurnOff:
2407 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2410 dlbStateStr = "DLB was permanently on during the run per user request.";
2413 GMX_ASSERT(false, "Undocumented DLB state");
2416 msg += " " + dlbStateStr + "\n";
2417 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2418 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2419 static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5));
2420 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2422 fprintf(fplog, "%s", msg.c_str());
2423 fprintf(stderr, "%s", msg.c_str());
2426 /* Print during what percentage of steps the load balancing was limited */
2427 bool dlbWasLimited = false;
2430 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2431 for (int d = 0; d < dd->ndim; d++)
2433 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2434 sprintf(buf+strlen(buf), " %c %d %%",
2435 dim2char(dd->dim[d]), limitPercentage);
2436 if (limitPercentage >= 50)
2438 dlbWasLimited = true;
2441 sprintf(buf + strlen(buf), "\n");
2442 fprintf(fplog, "%s", buf);
2443 fprintf(stderr, "%s", buf);
2446 /* Print the performance loss due to separate PME - PP rank imbalance */
2447 float lossFractionPme = 0;
2448 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2450 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2451 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2452 if (lossFractionPme <= 0)
2454 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2458 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2460 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2461 fprintf(fplog, "%s", buf);
2462 fprintf(stderr, "%s", buf);
2463 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossFractionPme)*100);
2464 fprintf(fplog, "%s", buf);
2465 fprintf(stderr, "%s", buf);
2467 fprintf(fplog, "\n");
2468 fprintf(stderr, "\n");
2470 if (lossFraction >= DD_PERF_LOSS_WARN)
2473 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2474 " in the domain decomposition.\n", lossFraction*100);
2477 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2479 else if (dlbWasLimited)
2481 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2483 fprintf(fplog, "%s\n", buf);
2484 fprintf(stderr, "%s\n", buf);
2486 if (numPmeRanks > 0 && fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2489 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2490 " had %s work to do than the PP ranks.\n"
2491 " You might want to %s the number of PME ranks\n"
2492 " or %s the cut-off and the grid spacing.\n",
2493 fabs(lossFractionPme*100),
2494 (lossFractionPme < 0) ? "less" : "more",
2495 (lossFractionPme < 0) ? "decrease" : "increase",
2496 (lossFractionPme < 0) ? "decrease" : "increase");
2497 fprintf(fplog, "%s\n", buf);
2498 fprintf(stderr, "%s\n", buf);
2502 static float dd_vol_min(gmx_domdec_t *dd)
2504 return dd->comm->load[0].cvol_min*dd->nnodes;
2507 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
2509 return dd->comm->load[0].flags;
2512 static float dd_f_imbal(gmx_domdec_t *dd)
2514 if (dd->comm->load[0].sum > 0)
2516 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2520 /* Something is wrong in the cycle counting, report no load imbalance */
2525 float dd_pme_f_ratio(gmx_domdec_t *dd)
2527 /* Should only be called on the DD master rank */
2528 assert(DDMASTER(dd));
2530 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2532 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2540 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
2545 flags = dd_load_flags(dd);
2549 "DD load balancing is limited by minimum cell size in dimension");
2550 for (d = 0; d < dd->ndim; d++)
2554 fprintf(fplog, " %c", dim2char(dd->dim[d]));
2557 fprintf(fplog, "\n");
2559 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
2560 if (isDlbOn(dd->comm))
2562 fprintf(fplog, " vol min/aver %5.3f%c",
2563 dd_vol_min(dd), flags ? '!' : ' ');
2567 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2569 if (dd->comm->cycl_n[ddCyclPME])
2571 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2573 fprintf(fplog, "\n\n");
2576 static void dd_print_load_verbose(gmx_domdec_t *dd)
2578 if (isDlbOn(dd->comm))
2580 fprintf(stderr, "vol %4.2f%c ",
2581 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2585 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
2587 if (dd->comm->cycl_n[ddCyclPME])
2589 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2594 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2599 domdec_root_t *root;
2600 gmx_bool bPartOfGroup = FALSE;
2602 dim = dd->dim[dim_ind];
2603 copy_ivec(loc, loc_c);
2604 for (i = 0; i < dd->nc[dim]; i++)
2607 rank = dd_index(dd->nc, loc_c);
2608 if (rank == dd->rank)
2610 /* This process is part of the group */
2611 bPartOfGroup = TRUE;
2614 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2618 dd->comm->mpi_comm_load[dim_ind] = c_row;
2619 if (!isDlbDisabled(dd->comm))
2621 if (dd->ci[dim] == dd->master_ci[dim])
2623 /* This is the root process of this row */
2624 snew(dd->comm->root[dim_ind], 1);
2625 root = dd->comm->root[dim_ind];
2626 snew(root->cell_f, ddCellFractionBufferSize(dd, dim_ind));
2627 snew(root->old_cell_f, dd->nc[dim]+1);
2628 snew(root->bCellMin, dd->nc[dim]);
2631 snew(root->cell_f_max0, dd->nc[dim]);
2632 snew(root->cell_f_min1, dd->nc[dim]);
2633 snew(root->bound_min, dd->nc[dim]);
2634 snew(root->bound_max, dd->nc[dim]);
2636 snew(root->buf_ncd, dd->nc[dim]);
2640 /* This is not a root process, we only need to receive cell_f */
2641 snew(dd->comm->cell_f_row, ddCellFractionBufferSize(dd, dim_ind));
2644 if (dd->ci[dim] == dd->master_ci[dim])
2646 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2652 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2656 int physicalnode_id_hash;
2658 MPI_Comm mpi_comm_pp_physicalnode;
2660 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2662 /* Only ranks with short-ranged tasks (currently) use GPUs.
2663 * If we don't have GPUs assigned, there are no resources to share.
2668 physicalnode_id_hash = gmx_physicalnode_id_hash();
2674 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2675 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2676 dd->rank, physicalnode_id_hash, gpu_id);
2678 /* Split the PP communicator over the physical nodes */
2679 /* TODO: See if we should store this (before), as it's also used for
2680 * for the nodecomm summation.
2682 // TODO PhysicalNodeCommunicator could be extended/used to handle
2683 // the need for per-node per-group communicators.
2684 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2685 &mpi_comm_pp_physicalnode);
2686 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2687 &dd->comm->mpi_comm_gpu_shared);
2688 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2689 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2693 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2696 /* Note that some ranks could share a GPU, while others don't */
2698 if (dd->comm->nrank_gpu_shared == 1)
2700 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2703 GMX_UNUSED_VALUE(cr);
2704 GMX_UNUSED_VALUE(gpu_id);
2708 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2711 int dim0, dim1, i, j;
2716 fprintf(debug, "Making load communicators\n");
2719 snew(dd->comm->load, std::max(dd->ndim, 1));
2720 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2728 make_load_communicator(dd, 0, loc);
2732 for (i = 0; i < dd->nc[dim0]; i++)
2735 make_load_communicator(dd, 1, loc);
2741 for (i = 0; i < dd->nc[dim0]; i++)
2745 for (j = 0; j < dd->nc[dim1]; j++)
2748 make_load_communicator(dd, 2, loc);
2755 fprintf(debug, "Finished making load communicators\n");
2760 /*! \brief Sets up the relation between neighboring domains and zones */
2761 static void setup_neighbor_relations(gmx_domdec_t *dd)
2763 int d, dim, i, j, m;
2765 gmx_domdec_zones_t *zones;
2766 gmx_domdec_ns_ranges_t *izone;
2768 for (d = 0; d < dd->ndim; d++)
2771 copy_ivec(dd->ci, tmp);
2772 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2773 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2774 copy_ivec(dd->ci, tmp);
2775 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2776 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2779 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2782 dd->neighbor[d][1]);
2786 int nzone = (1 << dd->ndim);
2787 int nizone = (1 << std::max(dd->ndim - 1, 0));
2788 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2790 zones = &dd->comm->zones;
2792 for (i = 0; i < nzone; i++)
2795 clear_ivec(zones->shift[i]);
2796 for (d = 0; d < dd->ndim; d++)
2798 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2803 for (i = 0; i < nzone; i++)
2805 for (d = 0; d < DIM; d++)
2807 s[d] = dd->ci[d] - zones->shift[i][d];
2812 else if (s[d] >= dd->nc[d])
2818 zones->nizone = nizone;
2819 for (i = 0; i < zones->nizone; i++)
2821 assert(ddNonbondedZonePairRanges[i][0] == i);
2823 izone = &zones->izone[i];
2824 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2825 * j-zones up to nzone.
2827 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2828 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2829 for (dim = 0; dim < DIM; dim++)
2831 if (dd->nc[dim] == 1)
2833 /* All shifts should be allowed */
2834 izone->shift0[dim] = -1;
2835 izone->shift1[dim] = 1;
2839 /* Determine the min/max j-zone shift wrt the i-zone */
2840 izone->shift0[dim] = 1;
2841 izone->shift1[dim] = -1;
2842 for (j = izone->j0; j < izone->j1; j++)
2844 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2845 if (shift_diff < izone->shift0[dim])
2847 izone->shift0[dim] = shift_diff;
2849 if (shift_diff > izone->shift1[dim])
2851 izone->shift1[dim] = shift_diff;
2858 if (!isDlbDisabled(dd->comm))
2860 snew(dd->comm->root, dd->ndim);
2863 if (dd->comm->bRecordLoad)
2865 make_load_communicators(dd);
2869 static void make_pp_communicator(FILE *fplog,
2871 t_commrec gmx_unused *cr,
2872 int gmx_unused reorder)
2875 gmx_domdec_comm_t *comm;
2882 if (comm->bCartesianPP)
2884 /* Set up cartesian communication for the particle-particle part */
2887 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
2888 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2891 for (int i = 0; i < DIM; i++)
2895 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
2897 /* We overwrite the old communicator with the new cartesian one */
2898 cr->mpi_comm_mygroup = comm_cart;
2901 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2902 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2904 if (comm->bCartesianPP_PME)
2906 /* Since we want to use the original cartesian setup for sim,
2907 * and not the one after split, we need to make an index.
2909 snew(comm->ddindex2ddnodeid, dd->nnodes);
2910 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2911 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2912 /* Get the rank of the DD master,
2913 * above we made sure that the master node is a PP node.
2923 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2925 else if (comm->bCartesianPP)
2927 if (cr->npmenodes == 0)
2929 /* The PP communicator is also
2930 * the communicator for this simulation
2932 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2934 cr->nodeid = dd->rank;
2936 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2938 /* We need to make an index to go from the coordinates
2939 * to the nodeid of this simulation.
2941 snew(comm->ddindex2simnodeid, dd->nnodes);
2942 snew(buf, dd->nnodes);
2943 if (thisRankHasDuty(cr, DUTY_PP))
2945 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2947 /* Communicate the ddindex to simulation nodeid index */
2948 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2949 cr->mpi_comm_mysim);
2952 /* Determine the master coordinates and rank.
2953 * The DD master should be the same node as the master of this sim.
2955 for (int i = 0; i < dd->nnodes; i++)
2957 if (comm->ddindex2simnodeid[i] == 0)
2959 ddindex2xyz(dd->nc, i, dd->master_ci);
2960 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2965 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2970 /* No Cartesian communicators */
2971 /* We use the rank in dd->comm->all as DD index */
2972 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2973 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2975 clear_ivec(dd->master_ci);
2982 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2983 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2988 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2989 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2993 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
2997 gmx_domdec_comm_t *comm = dd->comm;
2999 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
3002 snew(comm->ddindex2simnodeid, dd->nnodes);
3003 snew(buf, dd->nnodes);
3004 if (thisRankHasDuty(cr, DUTY_PP))
3006 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
3008 /* Communicate the ddindex to simulation nodeid index */
3009 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
3010 cr->mpi_comm_mysim);
3014 GMX_UNUSED_VALUE(dd);
3015 GMX_UNUSED_VALUE(cr);
3019 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
3020 DdRankOrder gmx_unused rankOrder,
3021 int gmx_unused reorder)
3023 gmx_domdec_comm_t *comm;
3032 if (comm->bCartesianPP)
3034 for (i = 1; i < DIM; i++)
3036 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
3038 if (bDiv[YY] || bDiv[ZZ])
3040 comm->bCartesianPP_PME = TRUE;
3041 /* If we have 2D PME decomposition, which is always in x+y,
3042 * we stack the PME only nodes in z.
3043 * Otherwise we choose the direction that provides the thinnest slab
3044 * of PME only nodes as this will have the least effect
3045 * on the PP communication.
3046 * But for the PME communication the opposite might be better.
3048 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
3050 dd->nc[YY] > dd->nc[ZZ]))
3052 comm->cartpmedim = ZZ;
3056 comm->cartpmedim = YY;
3058 comm->ntot[comm->cartpmedim]
3059 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3063 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3065 "Will not use a Cartesian communicator for PP <-> PME\n\n");
3069 if (comm->bCartesianPP_PME)
3077 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3080 for (i = 0; i < DIM; i++)
3084 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
3086 MPI_Comm_rank(comm_cart, &rank);
3087 if (MASTER(cr) && rank != 0)
3089 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3092 /* With this assigment we loose the link to the original communicator
3093 * which will usually be MPI_COMM_WORLD, unless have multisim.
3095 cr->mpi_comm_mysim = comm_cart;
3096 cr->sim_nodeid = rank;
3098 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3102 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
3103 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3106 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3110 if (cr->npmenodes == 0 ||
3111 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3113 cr->duty = DUTY_PME;
3116 /* Split the sim communicator into PP and PME only nodes */
3117 MPI_Comm_split(cr->mpi_comm_mysim,
3118 getThisRankDuties(cr),
3119 dd_index(comm->ntot, dd->ci),
3120 &cr->mpi_comm_mygroup);
3127 case DdRankOrder::pp_pme:
3130 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
3133 case DdRankOrder::interleave:
3134 /* Interleave the PP-only and PME-only ranks */
3137 fprintf(fplog, "Interleaving PP and PME ranks\n");
3139 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3141 case DdRankOrder::cartesian:
3144 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3147 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3149 cr->duty = DUTY_PME;
3156 /* Split the sim communicator into PP and PME only nodes */
3157 MPI_Comm_split(cr->mpi_comm_mysim,
3158 getThisRankDuties(cr),
3160 &cr->mpi_comm_mygroup);
3161 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3167 fprintf(fplog, "This rank does only %s work.\n\n",
3168 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3172 /*! \brief Generates the MPI communicators for domain decomposition */
3173 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
3174 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3176 gmx_domdec_comm_t *comm;
3181 copy_ivec(dd->nc, comm->ntot);
3183 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3184 comm->bCartesianPP_PME = FALSE;
3186 /* Reorder the nodes by default. This might change the MPI ranks.
3187 * Real reordering is only supported on very few architectures,
3188 * Blue Gene is one of them.
3190 CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
3192 if (cr->npmenodes > 0)
3194 /* Split the communicator into a PP and PME part */
3195 split_communicator(fplog, cr, dd, ddRankOrder, CartReorder);
3196 if (comm->bCartesianPP_PME)
3198 /* We (possibly) reordered the nodes in split_communicator,
3199 * so it is no longer required in make_pp_communicator.
3201 CartReorder = FALSE;
3206 /* All nodes do PP and PME */
3208 /* We do not require separate communicators */
3209 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3213 if (thisRankHasDuty(cr, DUTY_PP))
3215 /* Copy or make a new PP communicator */
3216 make_pp_communicator(fplog, dd, cr, CartReorder);
3220 receive_ddindex2simnodeid(dd, cr);
3223 if (!thisRankHasDuty(cr, DUTY_PME))
3225 /* Set up the commnuication to our PME node */
3226 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3227 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3230 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
3231 dd->pme_nodeid, dd->pme_receive_vir_ener);
3236 dd->pme_nodeid = -1;
3241 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3243 comm->cgs_gl.index[comm->cgs_gl.nr]);
3247 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
3249 real *slb_frac, tot;
3254 if (nc > 1 && size_string != nullptr)
3258 fprintf(fplog, "Using static load balancing for the %s direction\n",
3263 for (i = 0; i < nc; i++)
3266 sscanf(size_string, "%20lf%n", &dbl, &n);
3269 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3278 fprintf(fplog, "Relative cell sizes:");
3280 for (i = 0; i < nc; i++)
3285 fprintf(fplog, " %5.3f", slb_frac[i]);
3290 fprintf(fplog, "\n");
3297 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3300 gmx_mtop_ilistloop_t iloop;
3304 iloop = gmx_mtop_ilistloop_init(mtop);
3305 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
3307 for (ftype = 0; ftype < F_NRE; ftype++)
3309 if ((interaction_function[ftype].flags & IF_BOND) &&
3312 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
3320 static int dd_getenv(FILE *fplog, const char *env_var, int def)
3326 val = getenv(env_var);
3329 if (sscanf(val, "%20d", &nst) <= 0)
3335 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
3343 static void dd_warning(const t_commrec *cr, FILE *fplog, const char *warn_string)
3347 fprintf(stderr, "\n%s\n", warn_string);
3351 fprintf(fplog, "\n%s\n", warn_string);
3355 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
3356 const t_inputrec *ir, FILE *fplog)
3358 if (ir->ePBC == epbcSCREW &&
3359 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3361 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3364 if (ir->ns_type == ensSIMPLE)
3366 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3369 if (ir->nstlist == 0)
3371 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3374 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3376 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3380 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3385 r = ddbox->box_size[XX];
3386 for (di = 0; di < dd->ndim; di++)
3389 /* Check using the initial average cell size */
3390 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3396 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3398 static int forceDlbOffOrBail(int cmdlineDlbState,
3399 const std::string &reasonStr,
3403 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3404 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3406 if (cmdlineDlbState == edlbsOnUser)
3408 gmx_fatal(FARGS, (dlbNotSupportedErr + reasonStr).c_str());
3410 else if (cmdlineDlbState == edlbsOffCanTurnOn)
3412 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
3414 return edlbsOffForever;
3417 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3419 * This function parses the parameters of "-dlb" command line option setting
3420 * corresponding state values. Then it checks the consistency of the determined
3421 * state with other run parameters and settings. As a result, the initial state
3422 * may be altered or an error may be thrown if incompatibility of options is detected.
3424 * \param [in] fplog Pointer to mdrun log file.
3425 * \param [in] cr Pointer to MPI communication object.
3426 * \param [in] dlbOption Enum value for the DLB option.
3427 * \param [in] bRecordLoad True if the load balancer is recording load information.
3428 * \param [in] mdrunOptions Options for mdrun.
3429 * \param [in] ir Pointer mdrun to input parameters.
3430 * \returns DLB initial/startup state.
3432 static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
3433 DlbOption dlbOption, gmx_bool bRecordLoad,
3434 const MdrunOptions &mdrunOptions,
3435 const t_inputrec *ir)
3437 int dlbState = edlbsOffCanTurnOn;
3441 case DlbOption::turnOnWhenUseful: dlbState = edlbsOffCanTurnOn; break;
3442 case DlbOption::no: dlbState = edlbsOffUser; break;
3443 case DlbOption::yes: dlbState = edlbsOnUser; break;
3444 default: gmx_incons("Invalid dlbOption enum value");
3447 /* Reruns don't support DLB: bail or override auto mode */
3448 if (mdrunOptions.rerun)
3450 std::string reasonStr = "it is not supported in reruns.";
3451 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3454 /* Unsupported integrators */
3455 if (!EI_DYNAMICS(ir->eI))
3457 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3458 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3461 /* Without cycle counters we can't time work to balance on */
3464 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3465 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3468 if (mdrunOptions.reproducible)
3470 std::string reasonStr = "you started a reproducible run.";
3475 case edlbsOffForever:
3476 GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
3478 case edlbsOffCanTurnOn:
3479 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3481 case edlbsOnCanTurnOff:
3482 GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
3485 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
3488 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
3496 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
3499 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3501 /* Decomposition order z,y,x */
3504 fprintf(fplog, "Using domain decomposition order z, y, x\n");
3506 for (int dim = DIM-1; dim >= 0; dim--)
3508 if (dd->nc[dim] > 1)
3510 dd->dim[dd->ndim++] = dim;
3516 /* Decomposition order x,y,z */
3517 for (int dim = 0; dim < DIM; dim++)
3519 if (dd->nc[dim] > 1)
3521 dd->dim[dd->ndim++] = dim;
3528 /* Set dim[0] to avoid extra checks on ndim in several places */
3533 static gmx_domdec_comm_t *init_dd_comm()
3535 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3537 comm->n_load_have = 0;
3538 comm->n_load_collect = 0;
3540 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3542 comm->sum_nat[i] = 0;
3546 comm->load_step = 0;
3549 clear_ivec(comm->load_lim);
3553 /* This should be replaced by a unique pointer */
3554 comm->balanceRegion = ddBalanceRegionAllocate();
3559 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3560 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
3561 const DomdecOptions &options,
3562 const MdrunOptions &mdrunOptions,
3563 const gmx_mtop_t *mtop,
3564 const t_inputrec *ir,
3566 gmx::ArrayRef<const gmx::RVec> xGlobal,
3570 real r_bonded_limit = -1;
3571 const real tenPercentMargin = 1.1;
3572 gmx_domdec_comm_t *comm = dd->comm;
3574 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3575 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3577 dd->pme_recv_f_alloc = 0;
3578 dd->pme_recv_f_buf = nullptr;
3580 /* Initialize to GPU share count to 0, might change later */
3581 comm->nrank_gpu_shared = 0;
3583 comm->dlbState = determineInitialDlbState(fplog, cr, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3584 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3585 /* To consider turning DLB on after 2*nstlist steps we need to check
3586 * at partitioning count 3. Thus we need to increase the first count by 2.
3588 comm->ddPartioningCountFirstDlbOff += 2;
3592 fprintf(fplog, "Dynamic load balancing: %s\n",
3593 edlbs_names[comm->dlbState]);
3595 comm->bPMELoadBalDLBLimits = FALSE;
3597 /* Allocate the charge group/atom sorting struct */
3598 snew(comm->sort, 1);
3600 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3602 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3603 mtop->bIntermolecularInteractions);
3604 if (comm->bInterCGBondeds)
3606 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3610 comm->bInterCGMultiBody = FALSE;
3613 dd->bInterCGcons = inter_charge_group_constraints(*mtop);
3614 dd->bInterCGsettles = inter_charge_group_settles(*mtop);
3618 /* Set the cut-off to some very large value,
3619 * so we don't need if statements everywhere in the code.
3620 * We use sqrt, since the cut-off is squared in some places.
3622 comm->cutoff = GMX_CUTOFF_INF;
3626 comm->cutoff = ir->rlist;
3628 comm->cutoff_mbody = 0;
3630 comm->cellsize_limit = 0;
3631 comm->bBondComm = FALSE;
3633 /* Atoms should be able to move by up to half the list buffer size (if > 0)
3634 * within nstlist steps. Since boundaries are allowed to displace by half
3635 * a cell size, DD cells should be at least the size of the list buffer.
3637 comm->cellsize_limit = std::max(comm->cellsize_limit,
3638 ir->rlist - std::max(ir->rvdw, ir->rcoulomb));
3640 if (comm->bInterCGBondeds)
3642 if (options.minimumCommunicationRange > 0)
3644 comm->cutoff_mbody = options.minimumCommunicationRange;
3645 if (options.useBondedCommunication)
3647 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3651 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3653 r_bonded_limit = comm->cutoff_mbody;
3655 else if (ir->bPeriodicMols)
3657 /* Can not easily determine the required cut-off */
3658 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
3659 comm->cutoff_mbody = comm->cutoff/2;
3660 r_bonded_limit = comm->cutoff_mbody;
3668 dd_bonded_cg_distance(fplog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3669 options.checkBondedInteractions,
3672 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3673 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3675 /* We use an initial margin of 10% for the minimum cell size,
3676 * except when we are just below the non-bonded cut-off.
3678 if (options.useBondedCommunication)
3680 if (std::max(r_2b, r_mb) > comm->cutoff)
3682 r_bonded = std::max(r_2b, r_mb);
3683 r_bonded_limit = tenPercentMargin*r_bonded;
3684 comm->bBondComm = TRUE;
3689 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3691 /* We determine cutoff_mbody later */
3695 /* No special bonded communication,
3696 * simply increase the DD cut-off.
3698 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3699 comm->cutoff_mbody = r_bonded_limit;
3700 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3706 "Minimum cell size due to bonded interactions: %.3f nm\n",
3709 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3713 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3715 /* There is a cell size limit due to the constraints (P-LINCS) */
3716 rconstr = constr_r_max(fplog, mtop, ir);
3720 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
3722 if (rconstr > comm->cellsize_limit)
3724 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
3728 else if (options.constraintCommunicationRange > 0 && fplog)
3730 /* Here we do not check for dd->bInterCGcons,
3731 * because one can also set a cell size limit for virtual sites only
3732 * and at this point we don't know yet if there are intercg v-sites.
3735 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
3736 options.constraintCommunicationRange);
3737 rconstr = options.constraintCommunicationRange;
3739 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3741 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3743 if (options.numCells[XX] > 0)
3745 copy_ivec(options.numCells, dd->nc);
3746 set_dd_dim(fplog, dd);
3747 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3749 if (options.numPmeRanks >= 0)
3751 cr->npmenodes = options.numPmeRanks;
3755 /* When the DD grid is set explicitly and -npme is set to auto,
3756 * don't use PME ranks. We check later if the DD grid is
3757 * compatible with the total number of ranks.
3762 real acs = average_cellsize_min(dd, ddbox);
3763 if (acs < comm->cellsize_limit)
3767 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
3769 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3770 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3771 acs, comm->cellsize_limit);
3776 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3778 /* We need to choose the optimal DD grid and possibly PME nodes */
3780 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
3781 options.numPmeRanks,
3782 !isDlbDisabled(comm),
3784 comm->cellsize_limit, comm->cutoff,
3785 comm->bInterCGBondeds);
3787 if (dd->nc[XX] == 0)
3790 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3791 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3792 !bC ? "-rdd" : "-rcon",
3793 comm->dlbState != edlbsOffUser ? " or -dds" : "",
3794 bC ? " or your LINCS settings" : "");
3796 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3797 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3799 "Look in the log file for details on the domain decomposition",
3800 cr->nnodes-cr->npmenodes, limit, buf);
3802 set_dd_dim(fplog, dd);
3808 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
3809 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3812 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3813 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3815 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3816 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3817 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3819 if (cr->npmenodes > dd->nnodes)
3821 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3822 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3824 if (cr->npmenodes > 0)
3826 comm->npmenodes = cr->npmenodes;
3830 comm->npmenodes = dd->nnodes;
3833 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3835 /* The following choices should match those
3836 * in comm_cost_est in domdec_setup.c.
3837 * Note that here the checks have to take into account
3838 * that the decomposition might occur in a different order than xyz
3839 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3840 * in which case they will not match those in comm_cost_est,
3841 * but since that is mainly for testing purposes that's fine.
3843 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3844 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3845 getenv("GMX_PMEONEDD") == nullptr)
3847 comm->npmedecompdim = 2;
3848 comm->npmenodes_x = dd->nc[XX];
3849 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3853 /* In case nc is 1 in both x and y we could still choose to
3854 * decompose pme in y instead of x, but we use x for simplicity.
3856 comm->npmedecompdim = 1;
3857 if (dd->dim[0] == YY)
3859 comm->npmenodes_x = 1;
3860 comm->npmenodes_y = comm->npmenodes;
3864 comm->npmenodes_x = comm->npmenodes;
3865 comm->npmenodes_y = 1;
3870 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
3871 comm->npmenodes_x, comm->npmenodes_y, 1);
3876 comm->npmedecompdim = 0;
3877 comm->npmenodes_x = 0;
3878 comm->npmenodes_y = 0;
3881 snew(comm->slb_frac, DIM);
3882 if (isDlbDisabled(comm))
3884 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], options.cellSizeX);
3885 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], options.cellSizeY);
3886 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], options.cellSizeZ);
3889 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3891 if (comm->bBondComm || !isDlbDisabled(comm))
3893 /* Set the bonded communication distance to halfway
3894 * the minimum and the maximum,
3895 * since the extra communication cost is nearly zero.
3897 real acs = average_cellsize_min(dd, ddbox);
3898 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3899 if (!isDlbDisabled(comm))
3901 /* Check if this does not limit the scaling */
3902 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3903 options.dlbScaling*acs);
3905 if (!comm->bBondComm)
3907 /* Without bBondComm do not go beyond the n.b. cut-off */
3908 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3909 if (comm->cellsize_limit >= comm->cutoff)
3911 /* We don't loose a lot of efficieny
3912 * when increasing it to the n.b. cut-off.
3913 * It can even be slightly faster, because we need
3914 * less checks for the communication setup.
3916 comm->cutoff_mbody = comm->cutoff;
3919 /* Check if we did not end up below our original limit */
3920 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3922 if (comm->cutoff_mbody > comm->cellsize_limit)
3924 comm->cellsize_limit = comm->cutoff_mbody;
3927 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3932 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
3933 "cellsize limit %f\n",
3934 comm->bBondComm, comm->cellsize_limit);
3939 check_dd_restrictions(cr, dd, ir, fplog);
3943 static void set_dlb_limits(gmx_domdec_t *dd)
3946 for (int d = 0; d < dd->ndim; d++)
3948 /* Set the number of pulses to the value for DLB */
3949 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3951 dd->comm->cellsize_min[dd->dim[d]] =
3952 dd->comm->cellsize_min_dlb[dd->dim[d]];
3957 static void turn_on_dlb(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
3960 gmx_domdec_comm_t *comm;
3967 cellsize_min = comm->cellsize_min[dd->dim[0]];
3968 for (d = 1; d < dd->ndim; d++)
3970 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3973 /* Turn off DLB if we're too close to the cell size limit. */
3974 if (cellsize_min < comm->cellsize_limit*1.05)
3976 auto str = gmx::formatString("step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, "
3977 "but the minimum cell size is smaller than 1.05 times the cell size limit."
3978 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
3979 dd_warning(cr, fplog, str.c_str());
3981 comm->dlbState = edlbsOffForever;
3986 sprintf(buf, "step %" GMX_PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
3987 dd_warning(cr, fplog, buf);
3988 comm->dlbState = edlbsOnCanTurnOff;
3990 /* Store the non-DLB performance, so we can check if DLB actually
3991 * improves performance.
3993 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3994 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
3998 /* We can set the required cell size info here,
3999 * so we do not need to communicate this.
4000 * The grid is completely uniform.
4002 for (d = 0; d < dd->ndim; d++)
4006 comm->load[d].sum_m = comm->load[d].sum;
4008 nc = dd->nc[dd->dim[d]];
4009 for (i = 0; i < nc; i++)
4011 comm->root[d]->cell_f[i] = i/(real)nc;
4014 comm->root[d]->cell_f_max0[i] = i /(real)nc;
4015 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
4018 comm->root[d]->cell_f[nc] = 1.0;
4023 static void turn_off_dlb(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
4025 gmx_domdec_t *dd = cr->dd;
4028 sprintf(buf, "step %" GMX_PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
4029 dd_warning(cr, fplog, buf);
4030 dd->comm->dlbState = edlbsOffCanTurnOn;
4031 dd->comm->haveTurnedOffDlb = true;
4032 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4035 static void turn_off_dlb_forever(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
4037 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
4039 sprintf(buf, "step %" GMX_PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
4040 dd_warning(cr, fplog, buf);
4041 cr->dd->comm->dlbState = edlbsOffForever;
4044 static char *init_bLocalCG(const gmx_mtop_t *mtop)
4049 ncg = ncg_mtop(mtop);
4050 snew(bLocalCG, ncg);
4051 for (cg = 0; cg < ncg; cg++)
4053 bLocalCG[cg] = FALSE;
4059 void dd_init_bondeds(FILE *fplog,
4061 const gmx_mtop_t *mtop,
4062 const gmx_vsite_t *vsite,
4063 const t_inputrec *ir,
4064 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
4066 gmx_domdec_comm_t *comm;
4068 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
4072 if (comm->bBondComm)
4074 /* Communicate atoms beyond the cut-off for bonded interactions */
4077 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
4079 comm->bLocalCG = init_bLocalCG(mtop);
4083 /* Only communicate atoms based on cut-off */
4084 comm->cglink = nullptr;
4085 comm->bLocalCG = nullptr;
4089 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
4090 const gmx_mtop_t *mtop, const t_inputrec *ir,
4091 gmx_bool bDynLoadBal, real dlb_scale,
4092 const gmx_ddbox_t *ddbox)
4094 gmx_domdec_comm_t *comm;
4100 if (fplog == nullptr)
4109 fprintf(fplog, "The maximum number of communication pulses is:");
4110 for (d = 0; d < dd->ndim; d++)
4112 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4114 fprintf(fplog, "\n");
4115 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
4116 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
4117 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
4118 for (d = 0; d < DIM; d++)
4122 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4129 comm->cellsize_min_dlb[d]/
4130 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4132 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
4135 fprintf(fplog, "\n");
4139 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4140 fprintf(fplog, "The initial number of communication pulses is:");
4141 for (d = 0; d < dd->ndim; d++)
4143 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4145 fprintf(fplog, "\n");
4146 fprintf(fplog, "The initial domain decomposition cell size is:");
4147 for (d = 0; d < DIM; d++)
4151 fprintf(fplog, " %c %.2f nm",
4152 dim2char(d), dd->comm->cellsize_min[d]);
4155 fprintf(fplog, "\n\n");
4158 gmx_bool bInterCGVsites = count_intercg_vsites(mtop);
4160 if (comm->bInterCGBondeds ||
4162 dd->bInterCGcons || dd->bInterCGsettles)
4164 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
4165 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4166 "non-bonded interactions", "", comm->cutoff);
4170 limit = dd->comm->cellsize_limit;
4174 if (dynamic_dd_box(ddbox, ir))
4176 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
4178 limit = dd->comm->cellsize_min[XX];
4179 for (d = 1; d < DIM; d++)
4181 limit = std::min(limit, dd->comm->cellsize_min[d]);
4185 if (comm->bInterCGBondeds)
4187 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4188 "two-body bonded interactions", "(-rdd)",
4189 std::max(comm->cutoff, comm->cutoff_mbody));
4190 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4191 "multi-body bonded interactions", "(-rdd)",
4192 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4196 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4197 "virtual site constructions", "(-rcon)", limit);
4199 if (dd->bInterCGcons || dd->bInterCGsettles)
4201 sprintf(buf, "atoms separated by up to %d constraints",
4203 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4204 buf, "(-rcon)", limit);
4206 fprintf(fplog, "\n");
4212 static void set_cell_limits_dlb(gmx_domdec_t *dd,
4214 const t_inputrec *ir,
4215 const gmx_ddbox_t *ddbox)
4217 gmx_domdec_comm_t *comm;
4218 int d, dim, npulse, npulse_d_max, npulse_d;
4223 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4225 /* Determine the maximum number of comm. pulses in one dimension */
4227 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4229 /* Determine the maximum required number of grid pulses */
4230 if (comm->cellsize_limit >= comm->cutoff)
4232 /* Only a single pulse is required */
4235 else if (!bNoCutOff && comm->cellsize_limit > 0)
4237 /* We round down slightly here to avoid overhead due to the latency
4238 * of extra communication calls when the cut-off
4239 * would be only slightly longer than the cell size.
4240 * Later cellsize_limit is redetermined,
4241 * so we can not miss interactions due to this rounding.
4243 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
4247 /* There is no cell size limit */
4248 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4251 if (!bNoCutOff && npulse > 1)
4253 /* See if we can do with less pulses, based on dlb_scale */
4255 for (d = 0; d < dd->ndim; d++)
4258 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
4259 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4260 npulse_d_max = std::max(npulse_d_max, npulse_d);
4262 npulse = std::min(npulse, npulse_d_max);
4265 /* This env var can override npulse */
4266 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
4273 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4274 for (d = 0; d < dd->ndim; d++)
4276 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4277 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4278 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4280 comm->bVacDLBNoLimit = FALSE;
4284 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4285 if (!comm->bVacDLBNoLimit)
4287 comm->cellsize_limit = std::max(comm->cellsize_limit,
4288 comm->cutoff/comm->maxpulse);
4290 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4291 /* Set the minimum cell size for each DD dimension */
4292 for (d = 0; d < dd->ndim; d++)
4294 if (comm->bVacDLBNoLimit ||
4295 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4297 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4301 comm->cellsize_min_dlb[dd->dim[d]] =
4302 comm->cutoff/comm->cd[d].np_dlb;
4305 if (comm->cutoff_mbody <= 0)
4307 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4315 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4317 /* If each molecule is a single charge group
4318 * or we use domain decomposition for each periodic dimension,
4319 * we do not need to take pbc into account for the bonded interactions.
4321 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4324 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4327 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4328 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
4329 const gmx_mtop_t *mtop, const t_inputrec *ir,
4330 const gmx_ddbox_t *ddbox)
4332 gmx_domdec_comm_t *comm;
4338 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4340 init_ddpme(dd, &comm->ddpme[0], 0);
4341 if (comm->npmedecompdim >= 2)
4343 init_ddpme(dd, &comm->ddpme[1], 1);
4348 comm->npmenodes = 0;
4349 if (dd->pme_nodeid >= 0)
4351 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4352 "Can not have separate PME ranks without PME electrostatics");
4358 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4360 if (!isDlbDisabled(comm))
4362 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
4365 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
4366 if (comm->dlbState == edlbsOffCanTurnOn)
4370 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
4372 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
4375 if (ir->ePBC == epbcNONE)
4377 vol_frac = 1 - 1/(double)dd->nnodes;
4382 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
4386 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4388 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4390 dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
4393 /*! \brief Set some important DD parameters that can be modified by env.vars */
4394 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
4396 gmx_domdec_comm_t *comm = dd->comm;
4398 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
4399 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
4400 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
4401 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
4402 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
4403 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
4404 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
4406 if (dd->bSendRecv2 && fplog)
4408 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
4415 fprintf(fplog, "Will load balance based on FLOP count\n");
4417 if (comm->eFlop > 1)
4419 srand(1 + rank_mysim);
4421 comm->bRecordLoad = TRUE;
4425 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4429 DomdecOptions::DomdecOptions() :
4430 checkBondedInteractions(TRUE),
4431 useBondedCommunication(TRUE),
4433 rankOrder(DdRankOrder::pp_pme),
4434 minimumCommunicationRange(0),
4435 constraintCommunicationRange(0),
4436 dlbOption(DlbOption::turnOnWhenUseful),
4442 clear_ivec(numCells);
4445 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
4446 const DomdecOptions &options,
4447 const MdrunOptions &mdrunOptions,
4448 const gmx_mtop_t *mtop,
4449 const t_inputrec *ir,
4451 gmx::ArrayRef<const gmx::RVec> xGlobal)
4458 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
4461 dd = new gmx_domdec_t;
4463 dd->comm = init_dd_comm();
4465 /* Initialize DD paritioning counters */
4466 dd->comm->partition_step = INT_MIN;
4469 set_dd_envvar_options(fplog, dd, cr->nodeid);
4471 gmx_ddbox_t ddbox = {0};
4472 set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
4477 make_dd_communicators(fplog, cr, dd, options.rankOrder);
4479 if (thisRankHasDuty(cr, DUTY_PP))
4481 set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, &ddbox);
4483 setup_neighbor_relations(dd);
4486 /* Set overallocation to avoid frequent reallocation of arrays */
4487 set_over_alloc_dd(TRUE);
4489 clear_dd_cycle_counts(dd);
4494 static gmx_bool test_dd_cutoff(t_commrec *cr,
4495 t_state *state, const t_inputrec *ir,
4506 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4510 for (d = 0; d < dd->ndim; d++)
4514 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4515 if (dynamic_dd_box(&ddbox, ir))
4517 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4520 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4522 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4524 if (np > dd->comm->cd[d].np_dlb)
4529 /* If a current local cell size is smaller than the requested
4530 * cut-off, we could still fix it, but this gets very complicated.
4531 * Without fixing here, we might actually need more checks.
4533 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4540 if (!isDlbDisabled(dd->comm))
4542 /* If DLB is not active yet, we don't need to check the grid jumps.
4543 * Actually we shouldn't, because then the grid jump data is not set.
4545 if (isDlbOn(dd->comm) &&
4546 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4551 gmx_sumi(1, &LocallyLimited, cr);
4553 if (LocallyLimited > 0)
4562 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4565 gmx_bool bCutoffAllowed;
4567 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4571 cr->dd->comm->cutoff = cutoff_req;
4574 return bCutoffAllowed;
4577 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4579 gmx_domdec_comm_t *comm;
4581 comm = cr->dd->comm;
4583 /* Turn on the DLB limiting (might have been on already) */
4584 comm->bPMELoadBalDLBLimits = TRUE;
4586 /* Change the cut-off limit */
4587 comm->PMELoadBal_max_cutoff = cutoff;
4591 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4592 comm->PMELoadBal_max_cutoff);
4596 /* Sets whether we should later check the load imbalance data, so that
4597 * we can trigger dynamic load balancing if enough imbalance has
4600 * Used after PME load balancing unlocks DLB, so that the check
4601 * whether DLB will be useful can happen immediately.
4603 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4605 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4607 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4611 /* Store the DD partitioning count, so we can ignore cycle counts
4612 * over the next nstlist steps, which are often slower.
4614 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4619 /* Returns if we should check whether there has been enough load
4620 * imbalance to trigger dynamic load balancing.
4622 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4624 if (dd->comm->dlbState != edlbsOffCanTurnOn)
4629 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4631 /* We ignore the first nstlist steps at the start of the run
4632 * or after PME load balancing or after turning DLB off, since
4633 * these often have extra allocation or cache miss overhead.
4638 if (dd->comm->cycl_n[ddCyclStep] == 0)
4640 /* We can have zero timed steps when dd_partition_system is called
4641 * more than once at the same step, e.g. with replica exchange.
4642 * Turning on DLB would trigger an assertion failure later, but is
4643 * also useless right after exchanging replicas.
4648 /* We should check whether we should use DLB directly after
4650 if (dd->comm->bCheckWhetherToTurnDlbOn)
4652 /* This flag was set when the PME load-balancing routines
4653 unlocked DLB, and should now be cleared. */
4654 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4657 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4658 * partitionings (we do not do this every partioning, so that we
4659 * avoid excessive communication). */
4660 if (dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1)
4668 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4670 return isDlbOn(dd->comm);
4673 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4675 return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
4678 void dd_dlb_lock(gmx_domdec_t *dd)
4680 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4681 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4683 dd->comm->dlbState = edlbsOffTemporarilyLocked;
4687 void dd_dlb_unlock(gmx_domdec_t *dd)
4689 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4690 if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
4692 dd->comm->dlbState = edlbsOffCanTurnOn;
4693 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4697 static void merge_cg_buffers(int ncell,
4698 gmx_domdec_comm_dim_t *cd, int pulse,
4700 gmx::ArrayRef<int> index_gl,
4702 rvec *cg_cm, rvec *recv_vr,
4703 gmx::ArrayRef<int> cgindex,
4704 cginfo_mb_t *cginfo_mb, int *cginfo)
4706 gmx_domdec_ind_t *ind, *ind_p;
4707 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4708 int shift, shift_at;
4710 ind = &cd->ind[pulse];
4712 /* First correct the already stored data */
4713 shift = ind->nrecv[ncell];
4714 for (cell = ncell-1; cell >= 0; cell--)
4716 shift -= ind->nrecv[cell];
4719 /* Move the cg's present from previous grid pulses */
4720 cg0 = ncg_cell[ncell+cell];
4721 cg1 = ncg_cell[ncell+cell+1];
4722 cgindex[cg1+shift] = cgindex[cg1];
4723 for (cg = cg1-1; cg >= cg0; cg--)
4725 index_gl[cg+shift] = index_gl[cg];
4726 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4727 cgindex[cg+shift] = cgindex[cg];
4728 cginfo[cg+shift] = cginfo[cg];
4730 /* Correct the already stored send indices for the shift */
4731 for (p = 1; p <= pulse; p++)
4733 ind_p = &cd->ind[p];
4735 for (c = 0; c < cell; c++)
4737 cg0 += ind_p->nsend[c];
4739 cg1 = cg0 + ind_p->nsend[cell];
4740 for (cg = cg0; cg < cg1; cg++)
4742 ind_p->index[cg] += shift;
4748 /* Merge in the communicated buffers */
4752 for (cell = 0; cell < ncell; cell++)
4754 cg1 = ncg_cell[ncell+cell+1] + shift;
4757 /* Correct the old cg indices */
4758 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4760 cgindex[cg+1] += shift_at;
4763 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4765 /* Copy this charge group from the buffer */
4766 index_gl[cg1] = recv_i[cg0];
4767 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4768 /* Add it to the cgindex */
4769 cg_gl = index_gl[cg1];
4770 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4771 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4772 cgindex[cg1+1] = cgindex[cg1] + nat;
4777 shift += ind->nrecv[cell];
4778 ncg_cell[ncell+cell+1] = cg1;
4782 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4785 const BlockRanges &atomGroups)
4787 /* Store the atom block boundaries for easy copying of communication buffers
4790 for (int zone = 0; zone < nzone; zone++)
4792 for (gmx_domdec_ind_t &ind : cd->ind)
4794 ind.cell2at0[zone] = atomGroups.index[cg];
4795 cg += ind.nrecv[zone];
4796 ind.cell2at1[zone] = atomGroups.index[cg];
4801 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
4807 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4809 if (!bLocalCG[link->a[i]])
4818 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4820 real c[DIM][4]; /* the corners for the non-bonded communication */
4821 real cr0; /* corner for rounding */
4822 real cr1[4]; /* corners for rounding */
4823 real bc[DIM]; /* corners for bounded communication */
4824 real bcr1; /* corner for rounding for bonded communication */
4827 /* Determine the corners of the domain(s) we are communicating with */
4829 set_dd_corners(const gmx_domdec_t *dd,
4830 int dim0, int dim1, int dim2,
4834 const gmx_domdec_comm_t *comm;
4835 const gmx_domdec_zones_t *zones;
4840 zones = &comm->zones;
4842 /* Keep the compiler happy */
4846 /* The first dimension is equal for all cells */
4847 c->c[0][0] = comm->cell_x0[dim0];
4850 c->bc[0] = c->c[0][0];
4855 /* This cell row is only seen from the first row */
4856 c->c[1][0] = comm->cell_x0[dim1];
4857 /* All rows can see this row */
4858 c->c[1][1] = comm->cell_x0[dim1];
4859 if (isDlbOn(dd->comm))
4861 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4864 /* For the multi-body distance we need the maximum */
4865 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4868 /* Set the upper-right corner for rounding */
4869 c->cr0 = comm->cell_x1[dim0];
4874 for (j = 0; j < 4; j++)
4876 c->c[2][j] = comm->cell_x0[dim2];
4878 if (isDlbOn(dd->comm))
4880 /* Use the maximum of the i-cells that see a j-cell */
4881 for (i = 0; i < zones->nizone; i++)
4883 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4888 std::max(c->c[2][j-4],
4889 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4895 /* For the multi-body distance we need the maximum */
4896 c->bc[2] = comm->cell_x0[dim2];
4897 for (i = 0; i < 2; i++)
4899 for (j = 0; j < 2; j++)
4901 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4907 /* Set the upper-right corner for rounding */
4908 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4909 * Only cell (0,0,0) can see cell 7 (1,1,1)
4911 c->cr1[0] = comm->cell_x1[dim1];
4912 c->cr1[3] = comm->cell_x1[dim1];
4913 if (isDlbOn(dd->comm))
4915 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4918 /* For the multi-body distance we need the maximum */
4919 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4926 /* Add the atom groups we need to send in this pulse from this zone to
4927 * \p localAtomGroups and \p work
4930 get_zone_pulse_cgs(gmx_domdec_t *dd,
4931 int zonei, int zone,
4933 gmx::ArrayRef<const int> globalAtomGroupIndices,
4934 const gmx::BlockRanges &atomGroups,
4935 int dim, int dim_ind,
4936 int dim0, int dim1, int dim2,
4937 real r_comm2, real r_bcomm2,
4939 bool distanceIsTriclinic,
4941 real skew_fac2_d, real skew_fac_01,
4942 rvec *v_d, rvec *v_0, rvec *v_1,
4943 const dd_corners_t *c,
4945 gmx_bool bDistBonded,
4951 std::vector<int> *localAtomGroups,
4952 dd_comm_setup_work_t *work)
4954 gmx_domdec_comm_t *comm;
4956 gmx_bool bDistMB_pulse;
4958 real r2, rb2, r, tric_sh;
4965 bScrew = (dd->bScrewPBC && dim == XX);
4967 bDistMB_pulse = (bDistMB && bDistBonded);
4969 /* Unpack the work data */
4970 std::vector<int> &ibuf = work->atomGroupBuffer;
4971 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4975 for (cg = cg0; cg < cg1; cg++)
4979 if (!distanceIsTriclinic)
4981 /* Rectangular direction, easy */
4982 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4989 r = cg_cm[cg][dim] - c->bc[dim_ind];
4995 /* Rounding gives at most a 16% reduction
4996 * in communicated atoms
4998 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
5000 r = cg_cm[cg][dim0] - c->cr0;
5001 /* This is the first dimension, so always r >= 0 */
5008 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
5010 r = cg_cm[cg][dim1] - c->cr1[zone];
5017 r = cg_cm[cg][dim1] - c->bcr1;
5027 /* Triclinic direction, more complicated */
5030 /* Rounding, conservative as the skew_fac multiplication
5031 * will slightly underestimate the distance.
5033 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
5035 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
5036 for (i = dim0+1; i < DIM; i++)
5038 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
5040 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
5043 rb[dim0] = rn[dim0];
5046 /* Take care that the cell planes along dim0 might not
5047 * be orthogonal to those along dim1 and dim2.
5049 for (i = 1; i <= dim_ind; i++)
5052 if (normal[dim0][dimd] > 0)
5054 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
5057 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
5062 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
5064 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
5066 for (i = dim1+1; i < DIM; i++)
5068 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
5070 rn[dim1] += tric_sh;
5073 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
5074 /* Take care of coupling of the distances
5075 * to the planes along dim0 and dim1 through dim2.
5077 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
5078 /* Take care that the cell planes along dim1
5079 * might not be orthogonal to that along dim2.
5081 if (normal[dim1][dim2] > 0)
5083 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5089 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5092 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5093 /* Take care of coupling of the distances
5094 * to the planes along dim0 and dim1 through dim2.
5096 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5097 /* Take care that the cell planes along dim1
5098 * might not be orthogonal to that along dim2.
5100 if (normal[dim1][dim2] > 0)
5102 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5107 /* The distance along the communication direction */
5108 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5110 for (i = dim+1; i < DIM; i++)
5112 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5117 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5118 /* Take care of coupling of the distances
5119 * to the planes along dim0 and dim1 through dim2.
5121 if (dim_ind == 1 && zonei == 1)
5123 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5129 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5132 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5133 /* Take care of coupling of the distances
5134 * to the planes along dim0 and dim1 through dim2.
5136 if (dim_ind == 1 && zonei == 1)
5138 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5146 ((bDistMB && rb2 < r_bcomm2) ||
5147 (bDist2B && r2 < r_bcomm2)) &&
5149 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5150 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5153 /* Store the local and global atom group indices and position */
5154 localAtomGroups->push_back(cg);
5155 ibuf.push_back(globalAtomGroupIndices[cg]);
5159 if (dd->ci[dim] == 0)
5161 /* Correct cg_cm for pbc */
5162 rvec_add(cg_cm[cg], box[dim], posPbc);
5165 posPbc[YY] = box[YY][YY] - posPbc[YY];
5166 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5171 copy_rvec(cg_cm[cg], posPbc);
5173 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5175 nat += atomGroups.index[cg+1] - atomGroups.index[cg];
5180 work->nsend_zone = nsend_z;
5183 static void clearCommSetupData(dd_comm_setup_work_t *work)
5185 work->localAtomGroupBuffer.clear();
5186 work->atomGroupBuffer.clear();
5187 work->positionBuffer.clear();
5189 work->nsend_zone = 0;
5192 static void setup_dd_communication(gmx_domdec_t *dd,
5193 matrix box, gmx_ddbox_t *ddbox,
5195 t_state *state, PaddedRVecVector *f)
5197 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5198 int nzone, nzone_send, zone, zonei, cg0, cg1;
5199 int c, i, cg, cg_gl, nrcg;
5200 int *zone_cg_range, pos_cg;
5201 gmx_domdec_comm_t *comm;
5202 gmx_domdec_zones_t *zones;
5203 gmx_domdec_comm_dim_t *cd;
5204 cginfo_mb_t *cginfo_mb;
5205 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5206 real r_comm2, r_bcomm2;
5207 dd_corners_t corners;
5208 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5209 real skew_fac2_d, skew_fac_01;
5214 fprintf(debug, "Setting up DD communication\n");
5219 if (comm->dth.empty())
5221 /* Initialize the thread data.
5222 * This can not be done in init_domain_decomposition,
5223 * as the numbers of threads is determined later.
5225 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5226 comm->dth.resize(numThreads);
5229 switch (fr->cutoff_scheme)
5235 cg_cm = as_rvec_array(state->x.data());
5238 gmx_incons("unimplemented");
5242 bBondComm = comm->bBondComm;
5244 /* Do we need to determine extra distances for multi-body bondeds? */
5245 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5247 /* Do we need to determine extra distances for only two-body bondeds? */
5248 bDist2B = (bBondComm && !bDistMB);
5250 r_comm2 = gmx::square(comm->cutoff);
5251 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5255 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, std::sqrt(r_bcomm2));
5258 zones = &comm->zones;
5261 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5262 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5264 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5266 /* Triclinic stuff */
5267 normal = ddbox->normal;
5271 v_0 = ddbox->v[dim0];
5272 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5274 /* Determine the coupling coefficient for the distances
5275 * to the cell planes along dim0 and dim1 through dim2.
5276 * This is required for correct rounding.
5279 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5282 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5288 v_1 = ddbox->v[dim1];
5291 zone_cg_range = zones->cg_range;
5292 cginfo_mb = fr->cginfo_mb;
5294 zone_cg_range[0] = 0;
5295 zone_cg_range[1] = dd->ncg_home;
5296 comm->zone_ncg1[0] = dd->ncg_home;
5297 pos_cg = dd->ncg_home;
5299 nat_tot = comm->atomRanges.numHomeAtoms();
5301 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5303 dim = dd->dim[dim_ind];
5304 cd = &comm->cd[dim_ind];
5306 /* Check if we need to compute triclinic distances along this dim */
5307 bool distanceIsTriclinic = false;
5308 for (i = 0; i <= dim_ind; i++)
5310 if (ddbox->tric_dir[dd->dim[i]])
5312 distanceIsTriclinic = true;
5316 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5318 /* No pbc in this dimension, the first node should not comm. */
5326 v_d = ddbox->v[dim];
5327 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5329 cd->receiveInPlace = true;
5330 for (int p = 0; p < cd->numPulses(); p++)
5332 /* Only atoms communicated in the first pulse are used
5333 * for multi-body bonded interactions or for bBondComm.
5335 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5337 gmx_domdec_ind_t *ind = &cd->ind[p];
5339 /* Thread 0 writes in the global index array */
5341 clearCommSetupData(&comm->dth[0]);
5343 for (zone = 0; zone < nzone_send; zone++)
5345 if (dim_ind > 0 && distanceIsTriclinic)
5347 /* Determine slightly more optimized skew_fac's
5349 * This reduces the number of communicated atoms
5350 * by about 10% for 3D DD of rhombic dodecahedra.
5352 for (dimd = 0; dimd < dim; dimd++)
5354 sf2_round[dimd] = 1;
5355 if (ddbox->tric_dir[dimd])
5357 for (i = dd->dim[dimd]+1; i < DIM; i++)
5359 /* If we are shifted in dimension i
5360 * and the cell plane is tilted forward
5361 * in dimension i, skip this coupling.
5363 if (!(zones->shift[nzone+zone][i] &&
5364 ddbox->v[dimd][i][dimd] >= 0))
5367 gmx::square(ddbox->v[dimd][i][dimd]);
5370 sf2_round[dimd] = 1/sf2_round[dimd];
5375 zonei = zone_perm[dim_ind][zone];
5378 /* Here we permutate the zones to obtain a convenient order
5379 * for neighbor searching
5381 cg0 = zone_cg_range[zonei];
5382 cg1 = zone_cg_range[zonei+1];
5386 /* Look only at the cg's received in the previous grid pulse
5388 cg1 = zone_cg_range[nzone+zone+1];
5389 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5392 const int numThreads = static_cast<int>(comm->dth.size());
5393 #pragma omp parallel for num_threads(numThreads) schedule(static)
5394 for (int th = 0; th < numThreads; th++)
5398 dd_comm_setup_work_t &work = comm->dth[th];
5400 /* Retain data accumulated into buffers of thread 0 */
5403 clearCommSetupData(&work);
5406 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5407 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5409 /* Get the cg's for this pulse in this zone */
5410 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5411 dd->globalAtomGroupIndices,
5413 dim, dim_ind, dim0, dim1, dim2,
5415 box, distanceIsTriclinic,
5416 normal, skew_fac2_d, skew_fac_01,
5417 v_d, v_0, v_1, &corners, sf2_round,
5418 bDistBonded, bBondComm,
5421 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5424 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5427 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5428 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5429 ind->nsend[zone] = comm->dth[0].nsend_zone;
5430 /* Append data of threads>=1 to the communication buffers */
5431 for (int th = 1; th < numThreads; th++)
5433 const dd_comm_setup_work_t &dth = comm->dth[th];
5435 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5436 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5437 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5438 comm->dth[0].nat += dth.nat;
5439 ind->nsend[zone] += dth.nsend_zone;
5442 /* Clear the counts in case we do not have pbc */
5443 for (zone = nzone_send; zone < nzone; zone++)
5445 ind->nsend[zone] = 0;
5447 ind->nsend[nzone] = ind->index.size();
5448 ind->nsend[nzone + 1] = comm->dth[0].nat;
5449 /* Communicate the number of cg's and atoms to receive */
5450 ddSendrecv(dd, dim_ind, dddirBackward,
5451 ind->nsend, nzone+2,
5452 ind->nrecv, nzone+2);
5456 /* We can receive in place if only the last zone is not empty */
5457 for (zone = 0; zone < nzone-1; zone++)
5459 if (ind->nrecv[zone] > 0)
5461 cd->receiveInPlace = false;
5466 int receiveBufferSize = 0;
5467 if (!cd->receiveInPlace)
5469 receiveBufferSize = ind->nrecv[nzone];
5471 /* These buffer are actually only needed with in-place */
5472 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5473 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5475 dd_comm_setup_work_t &work = comm->dth[0];
5477 /* Make space for the global cg indices */
5478 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5479 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5480 dd->atomGroups_.index.resize(numAtomGroupsNew + 1);
5481 /* Communicate the global cg indices */
5482 gmx::ArrayRef<int> integerBufferRef;
5483 if (cd->receiveInPlace)
5485 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5489 integerBufferRef = globalAtomGroupBuffer.buffer;
5491 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5492 work.atomGroupBuffer, integerBufferRef);
5494 /* Make space for cg_cm */
5495 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5496 if (fr->cutoff_scheme == ecutsGROUP)
5502 cg_cm = as_rvec_array(state->x.data());
5504 /* Communicate cg_cm */
5505 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5506 if (cd->receiveInPlace)
5508 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5512 rvecBufferRef = rvecBuffer.buffer;
5514 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5515 work.positionBuffer, rvecBufferRef);
5517 /* Make the charge group index */
5518 if (cd->receiveInPlace)
5520 zone = (p == 0 ? 0 : nzone - 1);
5521 while (zone < nzone)
5523 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5525 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5526 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5527 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5528 dd->atomGroups_.index[pos_cg + 1] = dd->atomGroups_.index[pos_cg] + nrcg;
5531 /* Update the charge group presence,
5532 * so we can use it in the next pass of the loop.
5534 comm->bLocalCG[cg_gl] = TRUE;
5540 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5543 zone_cg_range[nzone+zone] = pos_cg;
5548 /* This part of the code is never executed with bBondComm. */
5549 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5550 dd->globalAtomGroupIndices, integerBufferRef.data(),
5551 cg_cm, as_rvec_array(rvecBufferRef.data()),
5552 dd->atomGroups_.index,
5553 fr->cginfo_mb, fr->cginfo);
5554 pos_cg += ind->nrecv[nzone];
5556 nat_tot += ind->nrecv[nzone+1];
5558 if (!cd->receiveInPlace)
5560 /* Store the atom block for easy copying of communication buffers */
5561 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGroups());
5566 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5570 /* We don't need to update cginfo, since that was alrady done above.
5571 * So we pass NULL for the forcerec.
5573 dd_set_cginfo(dd->globalAtomGroupIndices,
5574 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5575 nullptr, comm->bLocalCG);
5580 fprintf(debug, "Finished setting up DD communication, zones:");
5581 for (c = 0; c < zones->n; c++)
5583 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5585 fprintf(debug, "\n");
5589 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5593 for (c = 0; c < zones->nizone; c++)
5595 zones->izone[c].cg1 = zones->cg_range[c+1];
5596 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5597 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5601 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5603 * Also sets the atom density for the home zone when \p zone_start=0.
5604 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5605 * how many charge groups will move but are still part of the current range.
5606 * \todo When converting domdec to use proper classes, all these variables
5607 * should be private and a method should return the correct count
5608 * depending on an internal state.
5610 * \param[in,out] dd The domain decomposition struct
5611 * \param[in] box The box
5612 * \param[in] ddbox The domain decomposition box struct
5613 * \param[in] zone_start The start of the zone range to set sizes for
5614 * \param[in] zone_end The end of the zone range to set sizes for
5615 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5617 static void set_zones_size(gmx_domdec_t *dd,
5618 matrix box, const gmx_ddbox_t *ddbox,
5619 int zone_start, int zone_end,
5620 int numMovedChargeGroupsInHomeZone)
5622 gmx_domdec_comm_t *comm;
5623 gmx_domdec_zones_t *zones;
5632 zones = &comm->zones;
5634 /* Do we need to determine extra distances for multi-body bondeds? */
5635 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5637 for (z = zone_start; z < zone_end; z++)
5639 /* Copy cell limits to zone limits.
5640 * Valid for non-DD dims and non-shifted dims.
5642 copy_rvec(comm->cell_x0, zones->size[z].x0);
5643 copy_rvec(comm->cell_x1, zones->size[z].x1);
5646 for (d = 0; d < dd->ndim; d++)
5650 for (z = 0; z < zones->n; z++)
5652 /* With a staggered grid we have different sizes
5653 * for non-shifted dimensions.
5655 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5659 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5660 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5664 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5665 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5671 rcmbs = comm->cutoff_mbody;
5672 if (ddbox->tric_dir[dim])
5674 rcs /= ddbox->skew_fac[dim];
5675 rcmbs /= ddbox->skew_fac[dim];
5678 /* Set the lower limit for the shifted zone dimensions */
5679 for (z = zone_start; z < zone_end; z++)
5681 if (zones->shift[z][dim] > 0)
5684 if (!isDlbOn(dd->comm) || d == 0)
5686 zones->size[z].x0[dim] = comm->cell_x1[dim];
5687 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5691 /* Here we take the lower limit of the zone from
5692 * the lowest domain of the zone below.
5696 zones->size[z].x0[dim] =
5697 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5703 zones->size[z].x0[dim] =
5704 zones->size[zone_perm[2][z-4]].x0[dim];
5708 zones->size[z].x0[dim] =
5709 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5712 /* A temporary limit, is updated below */
5713 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5717 for (zi = 0; zi < zones->nizone; zi++)
5719 if (zones->shift[zi][dim] == 0)
5721 /* This takes the whole zone into account.
5722 * With multiple pulses this will lead
5723 * to a larger zone then strictly necessary.
5725 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5726 zones->size[zi].x1[dim]+rcmbs);
5734 /* Loop over the i-zones to set the upper limit of each
5737 for (zi = 0; zi < zones->nizone; zi++)
5739 if (zones->shift[zi][dim] == 0)
5741 /* We should only use zones up to zone_end */
5742 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5743 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5745 if (zones->shift[z][dim] > 0)
5747 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5748 zones->size[zi].x1[dim]+rcs);
5755 for (z = zone_start; z < zone_end; z++)
5757 /* Initialization only required to keep the compiler happy */
5758 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5761 /* To determine the bounding box for a zone we need to find
5762 * the extreme corners of 4, 2 or 1 corners.
5764 nc = 1 << (ddbox->nboundeddim - 1);
5766 for (c = 0; c < nc; c++)
5768 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5772 corner[YY] = zones->size[z].x0[YY];
5776 corner[YY] = zones->size[z].x1[YY];
5780 corner[ZZ] = zones->size[z].x0[ZZ];
5784 corner[ZZ] = zones->size[z].x1[ZZ];
5786 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5787 box[ZZ][1 - dd->dim[0]] != 0)
5789 /* With 1D domain decomposition the cg's are not in
5790 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5791 * Shift the corner of the z-vector back to along the box
5792 * vector of dimension d, so it will later end up at 0 along d.
5793 * This can affect the location of this corner along dd->dim[0]
5794 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5796 int d = 1 - dd->dim[0];
5798 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5800 /* Apply the triclinic couplings */
5801 assert(ddbox->npbcdim <= DIM);
5802 for (i = YY; i < ddbox->npbcdim; i++)
5804 for (j = XX; j < i; j++)
5806 corner[j] += corner[i]*box[i][j]/box[i][i];
5811 copy_rvec(corner, corner_min);
5812 copy_rvec(corner, corner_max);
5816 for (i = 0; i < DIM; i++)
5818 corner_min[i] = std::min(corner_min[i], corner[i]);
5819 corner_max[i] = std::max(corner_max[i], corner[i]);
5823 /* Copy the extreme cornes without offset along x */
5824 for (i = 0; i < DIM; i++)
5826 zones->size[z].bb_x0[i] = corner_min[i];
5827 zones->size[z].bb_x1[i] = corner_max[i];
5829 /* Add the offset along x */
5830 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5831 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5834 if (zone_start == 0)
5837 for (dim = 0; dim < DIM; dim++)
5839 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5841 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5846 for (z = zone_start; z < zone_end; z++)
5848 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5850 zones->size[z].x0[XX], zones->size[z].x1[XX],
5851 zones->size[z].x0[YY], zones->size[z].x1[YY],
5852 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5853 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5855 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5856 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5857 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5862 static int comp_cgsort(const void *a, const void *b)
5866 gmx_cgsort_t *cga, *cgb;
5867 cga = (gmx_cgsort_t *)a;
5868 cgb = (gmx_cgsort_t *)b;
5870 comp = cga->nsc - cgb->nsc;
5873 comp = cga->ind_gl - cgb->ind_gl;
5879 static void order_int_cg(int n, const gmx_cgsort_t *sort,
5884 /* Order the data */
5885 for (i = 0; i < n; i++)
5887 buf[i] = a[sort[i].ind];
5890 /* Copy back to the original array */
5891 for (i = 0; i < n; i++)
5897 static void order_vec_cg(int n,
5898 const gmx_cgsort_t *sort,
5900 gmx::ArrayRef<gmx::RVec> sortBuffer)
5902 GMX_ASSERT(sortBuffer.size() >= static_cast<size_t>(n), "Need a sufficiently large sorting buffer");
5904 rvec *buf = as_rvec_array(sortBuffer.data());
5906 /* Order the data */
5907 for (int i = 0; i < n; i++)
5909 copy_rvec(v[sort[i].ind], buf[i]);
5912 /* Copy back to the original array */
5913 for (int i = 0; i < n; i++)
5915 copy_rvec(buf[i], v[i]);
5919 static void order_vec_atom(int ncg,
5920 const gmx::BlockRanges *atomGroups,
5921 const gmx_cgsort_t *sort,
5922 gmx::ArrayRef<gmx::RVec> v,
5923 gmx::ArrayRef<gmx::RVec> buf)
5925 int a, atot, cg, cg0, cg1, i;
5927 if (atomGroups == nullptr)
5929 /* Avoid the useless loop of the atoms within a cg */
5930 order_vec_cg(ncg, sort, as_rvec_array(v.data()), buf);
5935 /* Order the data */
5937 for (cg = 0; cg < ncg; cg++)
5939 cg0 = atomGroups->index[sort[cg].ind];
5940 cg1 = atomGroups->index[sort[cg].ind+1];
5941 for (i = cg0; i < cg1; i++)
5943 copy_rvec(v[i], buf[a]);
5949 /* Copy back to the original array */
5950 for (a = 0; a < atot; a++)
5952 copy_rvec(buf[a], v[a]);
5956 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
5957 int nsort_new, gmx_cgsort_t *sort_new,
5958 gmx_cgsort_t *sort1)
5962 /* The new indices are not very ordered, so we qsort them */
5963 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
5965 /* sort2 is already ordered, so now we can merge the two arrays */
5969 while (i2 < nsort2 || i_new < nsort_new)
5973 sort1[i1++] = sort_new[i_new++];
5975 else if (i_new == nsort_new)
5977 sort1[i1++] = sort2[i2++];
5979 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
5980 (sort2[i2].nsc == sort_new[i_new].nsc &&
5981 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
5983 sort1[i1++] = sort2[i2++];
5987 sort1[i1++] = sort_new[i_new++];
5992 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
5994 gmx_domdec_sort_t *sort;
5995 gmx_cgsort_t *cgsort, *sort_i;
5996 int ncg_new, nsort2, nsort_new, i, *a, moved;
5998 sort = dd->comm->sort;
6000 a = fr->ns->grid->cell_index;
6002 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
6004 if (ncg_home_old >= 0)
6006 /* The charge groups that remained in the same ns grid cell
6007 * are completely ordered. So we can sort efficiently by sorting
6008 * the charge groups that did move into the stationary list.
6013 for (i = 0; i < dd->ncg_home; i++)
6015 /* Check if this cg did not move to another node */
6018 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
6020 /* This cg is new on this node or moved ns grid cell */
6021 if (nsort_new >= sort->sort_new_nalloc)
6023 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
6024 srenew(sort->sort_new, sort->sort_new_nalloc);
6026 sort_i = &(sort->sort_new[nsort_new++]);
6030 /* This cg did not move */
6031 sort_i = &(sort->sort2[nsort2++]);
6033 /* Sort on the ns grid cell indices
6034 * and the global topology index.
6035 * index_gl is irrelevant with cell ns,
6036 * but we set it here anyhow to avoid a conditional.
6039 sort_i->ind_gl = dd->globalAtomGroupIndices[i];
6046 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
6049 /* Sort efficiently */
6050 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
6055 cgsort = sort->sort;
6057 for (i = 0; i < dd->ncg_home; i++)
6059 /* Sort on the ns grid cell indices
6060 * and the global topology index
6062 cgsort[i].nsc = a[i];
6063 cgsort[i].ind_gl = dd->globalAtomGroupIndices[i];
6065 if (cgsort[i].nsc < moved)
6072 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
6074 /* Determine the order of the charge groups using qsort */
6075 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
6081 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
6087 sort = dd->comm->sort->sort;
6089 nbnxn_get_atomorder(fr->nbv->nbs.get(), &a, &na);
6092 for (i = 0; i < na; i++)
6096 sort[ncg_new].ind = a[i];
6104 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6107 gmx_domdec_sort_t *sort;
6108 gmx_cgsort_t *cgsort;
6109 int ncg_new, i, *ibuf, cgsize;
6111 sort = dd->comm->sort;
6113 if (dd->ncg_home > sort->sort_nalloc)
6115 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
6116 srenew(sort->sort, sort->sort_nalloc);
6117 srenew(sort->sort2, sort->sort_nalloc);
6119 cgsort = sort->sort;
6121 switch (fr->cutoff_scheme)
6124 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
6127 ncg_new = dd_sort_order_nbnxn(dd, fr);
6130 gmx_incons("unimplemented");
6134 const gmx::BlockRanges &atomGroups = dd->atomGroups();
6136 /* We alloc with the old size, since cgindex is still old */
6137 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGroups.index[dd->ncg_home]);
6139 const gmx::BlockRanges *atomGroupsPtr = (dd->comm->bCGs ? &atomGroups : nullptr);
6141 /* Remove the charge groups which are no longer at home here */
6142 dd->ncg_home = ncg_new;
6145 fprintf(debug, "Set the new home charge group count to %d\n",
6149 /* Reorder the state */
6150 if (state->flags & (1 << estX))
6152 order_vec_atom(dd->ncg_home, atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6154 if (state->flags & (1 << estV))
6156 order_vec_atom(dd->ncg_home, atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6158 if (state->flags & (1 << estCGP))
6160 order_vec_atom(dd->ncg_home, atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6163 if (fr->cutoff_scheme == ecutsGROUP)
6166 order_vec_cg(dd->ncg_home, cgsort, cgcm, rvecBuffer.buffer);
6169 if (dd->ncg_home+1 > sort->ibuf_nalloc)
6171 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
6172 srenew(sort->ibuf, sort->ibuf_nalloc);
6175 /* Reorder the global cg index */
6176 order_int_cg(dd->ncg_home, cgsort, dd->globalAtomGroupIndices.data(), ibuf);
6177 /* Reorder the cginfo */
6178 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
6179 /* Rebuild the local cg index */
6183 for (i = 0; i < dd->ncg_home; i++)
6185 cgsize = atomGroups.index[cgsort[i].ind+1] - atomGroups.index[cgsort[i].ind];
6186 ibuf[i+1] = ibuf[i] + cgsize;
6188 for (i = 0; i < dd->ncg_home+1; i++)
6190 dd->atomGroups_.index[i] = ibuf[i];
6195 for (i = 0; i < dd->ncg_home+1; i++)
6197 dd->atomGroups_.index[i] = i;
6200 /* Set the home atom number */
6201 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGroups().index[dd->ncg_home]);
6203 if (fr->cutoff_scheme == ecutsVERLET)
6205 /* The atoms are now exactly in grid order, update the grid order */
6206 nbnxn_set_atomorder(fr->nbv->nbs.get());
6210 /* Copy the sorted ns cell indices back to the ns grid struct */
6211 for (i = 0; i < dd->ncg_home; i++)
6213 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6215 fr->ns->grid->nr = dd->ncg_home;
6219 static void add_dd_statistics(gmx_domdec_t *dd)
6221 gmx_domdec_comm_t *comm = dd->comm;
6223 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6225 auto range = static_cast<DDAtomRanges::Type>(i);
6227 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6232 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6234 gmx_domdec_comm_t *comm = dd->comm;
6236 /* Reset all the statistics and counters for total run counting */
6237 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6239 comm->sum_nat[i] = 0;
6243 comm->load_step = 0;
6246 clear_ivec(comm->load_lim);
6251 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6253 gmx_domdec_comm_t *comm = cr->dd->comm;
6255 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6256 gmx_sumd(numRanges, comm->sum_nat, cr);
6258 if (fplog == nullptr)
6263 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6265 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6267 auto range = static_cast<DDAtomRanges::Type>(i);
6268 double av = comm->sum_nat[i]/comm->ndecomp;
6271 case DDAtomRanges::Type::Zones:
6273 " av. #atoms communicated per step for force: %d x %.1f\n",
6276 case DDAtomRanges::Type::Vsites:
6277 if (cr->dd->vsite_comm)
6280 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6281 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6285 case DDAtomRanges::Type::Constraints:
6286 if (cr->dd->constraint_comm)
6289 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6290 1 + ir->nLincsIter, av);
6294 gmx_incons(" Unknown type for DD statistics");
6297 fprintf(fplog, "\n");
6299 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6301 print_dd_load_av(fplog, cr->dd);
6305 void dd_partition_system(FILE *fplog,
6307 const t_commrec *cr,
6308 gmx_bool bMasterState,
6310 t_state *state_global,
6311 const gmx_mtop_t *top_global,
6312 const t_inputrec *ir,
6313 t_state *state_local,
6314 PaddedRVecVector *f,
6315 gmx::MDAtoms *mdAtoms,
6316 gmx_localtop_t *top_local,
6319 gmx::Constraints *constr,
6321 gmx_wallcycle *wcycle,
6325 gmx_domdec_comm_t *comm;
6326 gmx_ddbox_t ddbox = {0};
6328 gmx_int64_t step_pcoupl;
6329 rvec cell_ns_x0, cell_ns_x1;
6330 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6331 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6332 gmx_bool bRedist, bSortCG, bResortAll;
6333 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6337 wallcycle_start(wcycle, ewcDOMDEC);
6342 // TODO if the update code becomes accessible here, use
6343 // upd->deform for this logic.
6344 bBoxChanged = (bMasterState || inputrecDeform(ir));
6345 if (ir->epc != epcNO)
6347 /* With nstpcouple > 1 pressure coupling happens.
6348 * one step after calculating the pressure.
6349 * Box scaling happens at the end of the MD step,
6350 * after the DD partitioning.
6351 * We therefore have to do DLB in the first partitioning
6352 * after an MD step where P-coupling occurred.
6353 * We need to determine the last step in which p-coupling occurred.
6354 * MRS -- need to validate this for vv?
6356 int n = ir->nstpcouple;
6359 step_pcoupl = step - 1;
6363 step_pcoupl = ((step - 1)/n)*n + 1;
6365 if (step_pcoupl >= comm->partition_step)
6371 bNStGlobalComm = (step % nstglobalcomm == 0);
6379 /* Should we do dynamic load balacing this step?
6380 * Since it requires (possibly expensive) global communication,
6381 * we might want to do DLB less frequently.
6383 if (bBoxChanged || ir->epc != epcNO)
6385 bDoDLB = bBoxChanged;
6389 bDoDLB = bNStGlobalComm;
6393 /* Check if we have recorded loads on the nodes */
6394 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6396 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6398 /* Print load every nstlog, first and last step to the log file */
6399 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6400 comm->n_load_collect == 0 ||
6402 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6404 /* Avoid extra communication due to verbose screen output
6405 * when nstglobalcomm is set.
6407 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6408 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6410 get_load_distribution(dd, wcycle);
6415 dd_print_load(fplog, dd, step-1);
6419 dd_print_load_verbose(dd);
6422 comm->n_load_collect++;
6428 /* Add the measured cycles to the running average */
6429 const float averageFactor = 0.1f;
6430 comm->cyclesPerStepDlbExpAverage =
6431 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6432 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6434 if (comm->dlbState == edlbsOnCanTurnOff &&
6435 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6437 gmx_bool turnOffDlb;
6440 /* If the running averaged cycles with DLB are more
6441 * than before we turned on DLB, turn off DLB.
6442 * We will again run and check the cycles without DLB
6443 * and we can then decide if to turn off DLB forever.
6445 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6446 comm->cyclesPerStepBeforeDLB);
6448 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6451 /* To turn off DLB, we need to redistribute the atoms */
6452 dd_collect_state(dd, state_local, state_global);
6453 bMasterState = TRUE;
6454 turn_off_dlb(fplog, cr, step);
6458 else if (bCheckWhetherToTurnDlbOn)
6460 gmx_bool turnOffDlbForever = FALSE;
6461 gmx_bool turnOnDlb = FALSE;
6463 /* Since the timings are node dependent, the master decides */
6466 /* If we recently turned off DLB, we want to check if
6467 * performance is better without DLB. We want to do this
6468 * ASAP to minimize the chance that external factors
6469 * slowed down the DLB step are gone here and we
6470 * incorrectly conclude that DLB was causing the slowdown.
6471 * So we measure one nstlist block, no running average.
6473 if (comm->haveTurnedOffDlb &&
6474 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6475 comm->cyclesPerStepDlbExpAverage)
6477 /* After turning off DLB we ran nstlist steps in fewer
6478 * cycles than with DLB. This likely means that DLB
6479 * in not benefical, but this could be due to a one
6480 * time unlucky fluctuation, so we require two such
6481 * observations in close succession to turn off DLB
6484 if (comm->dlbSlowerPartitioningCount > 0 &&
6485 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6487 turnOffDlbForever = TRUE;
6489 comm->haveTurnedOffDlb = false;
6490 /* Register when we last measured DLB slowdown */
6491 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6495 /* Here we check if the max PME rank load is more than 0.98
6496 * the max PP force load. If so, PP DLB will not help,
6497 * since we are (almost) limited by PME. Furthermore,
6498 * DLB will cause a significant extra x/f redistribution
6499 * cost on the PME ranks, which will then surely result
6500 * in lower total performance.
6502 if (cr->npmenodes > 0 &&
6503 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6509 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6515 gmx_bool turnOffDlbForever;
6519 turnOffDlbForever, turnOnDlb
6521 dd_bcast(dd, sizeof(bools), &bools);
6522 if (bools.turnOffDlbForever)
6524 turn_off_dlb_forever(fplog, cr, step);
6526 else if (bools.turnOnDlb)
6528 turn_on_dlb(fplog, cr, step);
6533 comm->n_load_have++;
6536 cgs_gl = &comm->cgs_gl;
6541 /* Clear the old state */
6542 clearDDStateIndices(dd, 0, 0);
6545 auto xGlobal = positionsFromStatePointer(state_global);
6547 set_ddbox(dd, true, ir,
6548 DDMASTER(dd) ? state_global->box : nullptr,
6552 distributeState(fplog, dd, state_global, *cgs_gl, ddbox, state_local, f);
6554 dd_make_local_cgs(dd, &top_local->cgs);
6556 /* Ensure that we have space for the new distribution */
6557 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6559 if (fr->cutoff_scheme == ecutsGROUP)
6561 calc_cgcm(fplog, 0, dd->ncg_home,
6562 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6565 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6567 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6569 else if (state_local->ddp_count != dd->ddp_count)
6571 if (state_local->ddp_count > dd->ddp_count)
6573 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
6576 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6578 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6581 /* Clear the old state */
6582 clearDDStateIndices(dd, 0, 0);
6584 /* Restore the atom group indices from state_local */
6585 restoreAtomGroups(dd, cgs_gl->index, state_local);
6586 make_dd_indices(dd, cgs_gl->index, 0);
6587 ncgindex_set = dd->ncg_home;
6589 if (fr->cutoff_scheme == ecutsGROUP)
6591 /* Redetermine the cg COMs */
6592 calc_cgcm(fplog, 0, dd->ncg_home,
6593 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6596 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6598 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6600 set_ddbox(dd, bMasterState, ir, state_local->box,
6601 true, state_local->x, &ddbox);
6603 bRedist = isDlbOn(comm);
6607 /* We have the full state, only redistribute the cgs */
6609 /* Clear the non-home indices */
6610 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6613 /* Avoid global communication for dim's without pbc and -gcom */
6614 if (!bNStGlobalComm)
6616 copy_rvec(comm->box0, ddbox.box0 );
6617 copy_rvec(comm->box_size, ddbox.box_size);
6619 set_ddbox(dd, bMasterState, ir, state_local->box,
6620 bNStGlobalComm, state_local->x, &ddbox);
6625 /* For dim's without pbc and -gcom */
6626 copy_rvec(ddbox.box0, comm->box0 );
6627 copy_rvec(ddbox.box_size, comm->box_size);
6629 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6632 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6634 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6637 /* Check if we should sort the charge groups */
6638 bSortCG = (bMasterState || bRedist);
6640 ncg_home_old = dd->ncg_home;
6642 /* When repartitioning we mark charge groups that will move to neighboring
6643 * DD cells, but we do not move them right away for performance reasons.
6644 * Thus we need to keep track of how many charge groups will move for
6645 * obtaining correct local charge group / atom counts.
6650 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6652 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6654 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
6656 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6659 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6661 &comm->cell_x0, &comm->cell_x1,
6662 dd->ncg_home, fr->cg_cm,
6663 cell_ns_x0, cell_ns_x1, &grid_density);
6667 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6670 switch (fr->cutoff_scheme)
6673 copy_ivec(fr->ns->grid->n, ncells_old);
6674 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6675 state_local->box, cell_ns_x0, cell_ns_x1,
6676 fr->rlist, grid_density);
6679 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6682 gmx_incons("unimplemented");
6684 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6685 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6689 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6691 /* Sort the state on charge group position.
6692 * This enables exact restarts from this step.
6693 * It also improves performance by about 15% with larger numbers
6694 * of atoms per node.
6697 /* Fill the ns grid with the home cell,
6698 * so we can sort with the indices.
6700 set_zones_ncg_home(dd);
6702 switch (fr->cutoff_scheme)
6705 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6707 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6709 comm->zones.size[0].bb_x0,
6710 comm->zones.size[0].bb_x1,
6712 comm->zones.dens_zone0,
6714 as_rvec_array(state_local->x.data()),
6715 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6716 fr->nbv->grp[eintLocal].kernel_type,
6719 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6722 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6723 0, dd->ncg_home, fr->cg_cm);
6725 copy_ivec(fr->ns->grid->n, ncells_new);
6728 gmx_incons("unimplemented");
6731 bResortAll = bMasterState;
6733 /* Check if we can user the old order and ns grid cell indices
6734 * of the charge groups to sort the charge groups efficiently.
6736 if (ncells_new[XX] != ncells_old[XX] ||
6737 ncells_new[YY] != ncells_old[YY] ||
6738 ncells_new[ZZ] != ncells_old[ZZ])
6745 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6746 gmx_step_str(step, sbuf), dd->ncg_home);
6748 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6749 bResortAll ? -1 : ncg_home_old);
6751 /* After sorting and compacting we set the correct size */
6752 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6754 /* Rebuild all the indices */
6755 ga2la_clear(dd->ga2la);
6758 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6761 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6763 /* Setup up the communication and communicate the coordinates */
6764 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6766 /* Set the indices */
6767 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6769 /* Set the charge group boundaries for neighbor searching */
6770 set_cg_boundaries(&comm->zones);
6772 if (fr->cutoff_scheme == ecutsVERLET)
6774 /* When bSortCG=true, we have already set the size for zone 0 */
6775 set_zones_size(dd, state_local->box, &ddbox,
6776 bSortCG ? 1 : 0, comm->zones.n,
6780 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6783 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6784 -1,as_rvec_array(state_local->x.data()),state_local->box);
6787 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6789 /* Extract a local topology from the global topology */
6790 for (int i = 0; i < dd->ndim; i++)
6792 np[dd->dim[i]] = comm->cd[i].numPulses();
6794 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6795 comm->cellsize_min, np,
6797 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6798 vsite, top_global, top_local);
6800 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6802 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6804 /* Set up the special atom communication */
6805 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6806 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6808 auto range = static_cast<DDAtomRanges::Type>(i);
6811 case DDAtomRanges::Type::Vsites:
6812 if (vsite && vsite->n_intercg_vsite)
6814 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6817 case DDAtomRanges::Type::Constraints:
6818 if (dd->bInterCGcons || dd->bInterCGsettles)
6820 /* Only for inter-cg constraints we need special code */
6821 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6822 constr, ir->nProjOrder,
6823 top_local->idef.il);
6827 gmx_incons("Unknown special atom type setup");
6829 comm->atomRanges.setEnd(range, n);
6832 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6834 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6836 /* Make space for the extra coordinates for virtual site
6837 * or constraint communication.
6839 state_local->natoms = comm->atomRanges.numAtomsTotal();
6841 dd_resize_state(state_local, f, state_local->natoms);
6843 if (fr->haveDirectVirialContributions)
6845 if (vsite && vsite->n_intercg_vsite)
6847 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6851 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6853 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6857 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6866 /* Set the number of atoms required for the force calculation.
6867 * Forces need to be constrained when doing energy
6868 * minimization. For simple simulations we could avoid some
6869 * allocation, zeroing and copying, but this is probably not worth
6870 * the complications and checking.
6872 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6873 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6874 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6877 /* Update atom data for mdatoms and several algorithms */
6878 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6879 nullptr, mdAtoms, constr, vsite, nullptr);
6881 auto mdatoms = mdAtoms->mdatoms();
6882 if (!thisRankHasDuty(cr, DUTY_PME))
6884 /* Send the charges and/or c6/sigmas to our PME only node */
6885 gmx_pme_send_parameters(cr,
6887 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
6888 mdatoms->chargeA, mdatoms->chargeB,
6889 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6890 mdatoms->sigmaA, mdatoms->sigmaB,
6891 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6896 /* Update the local pull groups */
6897 dd_make_local_pull_groups(cr, ir->pull_work, mdatoms);
6902 /* Update the local rotation groups */
6903 dd_make_local_rotation_groups(dd, ir->rot);
6906 if (ir->eSwapCoords != eswapNO)
6908 /* Update the local groups needed for ion swapping */
6909 dd_make_local_swap_groups(dd, ir->swap);
6912 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6913 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6915 add_dd_statistics(dd);
6917 /* Make sure we only count the cycles for this DD partitioning */
6918 clear_dd_cycle_counts(dd);
6920 /* Because the order of the atoms might have changed since
6921 * the last vsite construction, we need to communicate the constructing
6922 * atom coordinates again (for spreading the forces this MD step).
6924 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6926 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6928 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6930 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6931 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6932 -1, as_rvec_array(state_local->x.data()), state_local->box);
6935 /* Store the partitioning step */
6936 comm->partition_step = step;
6938 /* Increase the DD partitioning counter */
6940 /* The state currently matches this DD partitioning count, store it */
6941 state_local->ddp_count = dd->ddp_count;
6944 /* The DD master node knows the complete cg distribution,
6945 * store the count so we can possibly skip the cg info communication.
6947 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6950 if (comm->DD_debug > 0)
6952 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6953 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6954 "after partitioning");
6957 wallcycle_stop(wcycle, ewcDOMDEC);
6960 /*! \brief Check whether bonded interactions are missing, if appropriate */
6961 void checkNumberOfBondedInteractions(FILE *fplog,
6963 int totalNumberOfBondedInteractions,
6964 const gmx_mtop_t *top_global,
6965 const gmx_localtop_t *top_local,
6966 const t_state *state,
6967 bool *shouldCheckNumberOfBondedInteractions)
6969 if (*shouldCheckNumberOfBondedInteractions)
6971 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6973 dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6975 *shouldCheckNumberOfBondedInteractions = false;