2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/compat/make_unique.h"
53 #include "gromacs/domdec/collect.h"
54 #include "gromacs/domdec/dlbtiming.h"
55 #include "gromacs/domdec/domdec_network.h"
56 #include "gromacs/domdec/ga2la.h"
57 #include "gromacs/ewald/pme.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/gmxlib/chargegroup.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/gmxlib/nrnb.h"
63 #include "gromacs/gpu_utils/gpu_utils.h"
64 #include "gromacs/hardware/hw_info.h"
65 #include "gromacs/imd/imd.h"
66 #include "gromacs/listed-forces/manage-threading.h"
67 #include "gromacs/math/functions.h"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/math/vectypes.h"
70 #include "gromacs/mdlib/constr.h"
71 #include "gromacs/mdlib/constraintrange.h"
72 #include "gromacs/mdlib/forcerec.h"
73 #include "gromacs/mdlib/gmx_omp_nthreads.h"
74 #include "gromacs/mdlib/lincs.h"
75 #include "gromacs/mdlib/mdatoms.h"
76 #include "gromacs/mdlib/mdrun.h"
77 #include "gromacs/mdlib/mdsetup.h"
78 #include "gromacs/mdlib/nb_verlet.h"
79 #include "gromacs/mdlib/nbnxn_grid.h"
80 #include "gromacs/mdlib/nsgrid.h"
81 #include "gromacs/mdlib/vsite.h"
82 #include "gromacs/mdtypes/commrec.h"
83 #include "gromacs/mdtypes/df_history.h"
84 #include "gromacs/mdtypes/forcerec.h"
85 #include "gromacs/mdtypes/inputrec.h"
86 #include "gromacs/mdtypes/md_enums.h"
87 #include "gromacs/mdtypes/mdatom.h"
88 #include "gromacs/mdtypes/nblist.h"
89 #include "gromacs/mdtypes/state.h"
90 #include "gromacs/pbcutil/ishift.h"
91 #include "gromacs/pbcutil/pbc.h"
92 #include "gromacs/pulling/pull.h"
93 #include "gromacs/pulling/pull_rotation.h"
94 #include "gromacs/swap/swapcoords.h"
95 #include "gromacs/timing/wallcycle.h"
96 #include "gromacs/topology/block.h"
97 #include "gromacs/topology/idef.h"
98 #include "gromacs/topology/ifunc.h"
99 #include "gromacs/topology/mtop_lookup.h"
100 #include "gromacs/topology/mtop_util.h"
101 #include "gromacs/topology/topology.h"
102 #include "gromacs/utility/basedefinitions.h"
103 #include "gromacs/utility/basenetwork.h"
104 #include "gromacs/utility/cstringutil.h"
105 #include "gromacs/utility/exceptions.h"
106 #include "gromacs/utility/fatalerror.h"
107 #include "gromacs/utility/gmxmpi.h"
108 #include "gromacs/utility/qsort_threadsafe.h"
109 #include "gromacs/utility/real.h"
110 #include "gromacs/utility/smalloc.h"
111 #include "gromacs/utility/stringutil.h"
113 #include "atomdistribution.h"
114 #include "cellsizes.h"
115 #include "distribute.h"
116 #include "domdec_constraints.h"
117 #include "domdec_internal.h"
118 #include "domdec_vsite.h"
119 #include "redistribute.h"
122 #define DD_NLOAD_MAX 9
124 const char *edlbs_names[edlbsNR] = { "off", "auto", "locked", "on", "on" };
126 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
129 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
130 #define DD_FLAG_NRCG 65535
131 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
132 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
134 /* The DD zone order */
135 static const ivec dd_zo[DD_MAXZONE] =
136 {{0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0}, {0, 1, 1}, {0, 0, 1}, {1, 0, 1}, {1, 1, 1}};
138 /* The non-bonded zone-pair setup for domain decomposition
139 * The first number is the i-zone, the second number the first j-zone seen by
140 * this i-zone, the third number the last+1 j-zone seen by this i-zone.
141 * As is, this is for 3D decomposition, where there are 4 i-zones.
142 * With 2D decomposition use only the first 2 i-zones and a last+1 j-zone of 4.
143 * With 1D decomposition use only the first i-zone and a last+1 j-zone of 2.
146 ddNonbondedZonePairRanges[DD_MAXIZONE][3] = {{0, 0, 8},
151 /* Turn on DLB when the load imbalance causes this amount of total loss.
152 * There is a bit of overhead with DLB and it's difficult to achieve
153 * a load imbalance of less than 2% with DLB.
155 #define DD_PERF_LOSS_DLB_ON 0.02
157 /* Warn about imbalance due to PP or PP/PME load imbalance at this loss */
158 #define DD_PERF_LOSS_WARN 0.05
161 /* We check if to turn on DLB at the first and every 100 DD partitionings.
162 * With large imbalance DLB will turn on at the first step, so we can
163 * make the interval so large that the MPI overhead of the check is negligible.
165 static const int c_checkTurnDlbOnInterval = 100;
166 /* We need to check if DLB results in worse performance and then turn it off.
167 * We check this more often then for turning DLB on, because the DLB can scale
168 * the domains very rapidly, so if unlucky the load imbalance can go up quickly
169 * and furthermore, we are already synchronizing often with DLB, so
170 * the overhead of the MPI Bcast is not that high.
172 static const int c_checkTurnDlbOffInterval = 20;
174 /* Forward declaration */
175 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
179 #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
181 static void index2xyz(ivec nc,int ind,ivec xyz)
183 xyz[XX] = ind % nc[XX];
184 xyz[YY] = (ind / nc[XX]) % nc[YY];
185 xyz[ZZ] = ind / (nc[YY]*nc[XX]);
189 static void ddindex2xyz(ivec nc, int ind, ivec xyz)
191 xyz[XX] = ind / (nc[YY]*nc[ZZ]);
192 xyz[YY] = (ind / nc[ZZ]) % nc[YY];
193 xyz[ZZ] = ind % nc[ZZ];
196 static int ddcoord2ddnodeid(gmx_domdec_t *dd, ivec c)
201 ddindex = dd_index(dd->nc, c);
202 if (dd->comm->bCartesianPP_PME)
204 ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
206 else if (dd->comm->bCartesianPP)
209 MPI_Cart_rank(dd->mpi_comm_all, c, &ddnodeid);
220 static gmx_bool dynamic_dd_box(const gmx_ddbox_t *ddbox, const t_inputrec *ir)
222 return (ddbox->nboundeddim < DIM || inputrecDynamicBox(ir));
225 int ddglatnr(const gmx_domdec_t *dd, int i)
235 if (i >= dd->comm->atomRanges.numAtomsTotal())
237 gmx_fatal(FARGS, "glatnr called with %d, which is larger than the local number of atoms (%d)", i, dd->comm->atomRanges.numAtomsTotal());
239 atnr = dd->globalAtomIndices[i] + 1;
245 t_block *dd_charge_groups_global(gmx_domdec_t *dd)
247 return &dd->comm->cgs_gl;
250 void dd_store_state(gmx_domdec_t *dd, t_state *state)
254 if (state->ddp_count != dd->ddp_count)
256 gmx_incons("The MD state does not match the domain decomposition state");
259 state->cg_gl.resize(dd->ncg_home);
260 for (i = 0; i < dd->ncg_home; i++)
262 state->cg_gl[i] = dd->globalAtomGroupIndices[i];
265 state->ddp_count_cg_gl = dd->ddp_count;
268 gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
270 return &dd->comm->zones;
273 void dd_get_ns_ranges(const gmx_domdec_t *dd, int icg,
274 int *jcg0, int *jcg1, ivec shift0, ivec shift1)
276 gmx_domdec_zones_t *zones;
279 zones = &dd->comm->zones;
282 while (icg >= zones->izone[izone].cg1)
291 else if (izone < zones->nizone)
293 *jcg0 = zones->izone[izone].jcg0;
297 gmx_fatal(FARGS, "DD icg %d out of range: izone (%d) >= nizone (%d)",
298 icg, izone, zones->nizone);
301 *jcg1 = zones->izone[izone].jcg1;
303 for (d = 0; d < dd->ndim; d++)
306 shift0[dim] = zones->izone[izone].shift0[dim];
307 shift1[dim] = zones->izone[izone].shift1[dim];
308 if (dd->comm->tric_dir[dim] || (isDlbOn(dd->comm) && d > 0))
310 /* A conservative approach, this can be optimized */
317 int dd_numHomeAtoms(const gmx_domdec_t &dd)
319 return dd.comm->atomRanges.numHomeAtoms();
322 int dd_natoms_mdatoms(const gmx_domdec_t *dd)
324 /* We currently set mdatoms entries for all atoms:
325 * local + non-local + communicated for vsite + constraints
328 return dd->comm->atomRanges.numAtomsTotal();
331 int dd_natoms_vsite(const gmx_domdec_t *dd)
333 return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
336 void dd_get_constraint_range(const gmx_domdec_t *dd, int *at_start, int *at_end)
338 *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
339 *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
342 void dd_move_x(gmx_domdec_t *dd,
344 gmx::ArrayRef<gmx::RVec> x,
345 gmx_wallcycle *wcycle)
347 wallcycle_start(wcycle, ewcMOVEX);
350 gmx_domdec_comm_t *comm;
351 gmx_domdec_comm_dim_t *cd;
352 rvec shift = {0, 0, 0};
353 gmx_bool bPBC, bScrew;
357 const BlockRanges &atomGroups = dd->atomGroups();
360 nat_tot = comm->atomRanges.numHomeAtoms();
361 for (int d = 0; d < dd->ndim; d++)
363 bPBC = (dd->ci[dd->dim[d]] == 0);
364 bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
367 copy_rvec(box[dd->dim[d]], shift);
370 for (const gmx_domdec_ind_t &ind : cd->ind)
372 gmx::ArrayRef<const int> index = ind.index;
373 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
374 gmx::ArrayRef<gmx::RVec> &sendBuffer = sendBufferAccess.buffer;
378 for (int i = 0; i < ind.nsend[nzone]; i++)
380 int at0 = atomGroups.index[index[i]];
381 int at1 = atomGroups.index[index[i] + 1];
382 for (int j = at0; j < at1; j++)
384 sendBuffer[n] = x[j];
391 for (int i = 0; i < ind.nsend[nzone]; i++)
393 int at0 = atomGroups.index[index[i]];
394 int at1 = atomGroups.index[index[i] + 1];
395 for (int j = at0; j < at1; j++)
397 /* We need to shift the coordinates */
398 for (int d = 0; d < DIM; d++)
400 sendBuffer[n][d] = x[j][d] + shift[d];
408 for (int i = 0; i < ind.nsend[nzone]; i++)
410 int at0 = atomGroups.index[index[i]];
411 int at1 = atomGroups.index[index[i]+1];
412 for (int j = at0; j < at1; j++)
415 sendBuffer[n][XX] = x[j][XX] + shift[XX];
417 * This operation requires a special shift force
418 * treatment, which is performed in calc_vir.
420 sendBuffer[n][YY] = box[YY][YY] - x[j][YY];
421 sendBuffer[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
427 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
429 gmx::ArrayRef<gmx::RVec> receiveBuffer;
430 if (cd->receiveInPlace)
432 receiveBuffer = gmx::arrayRefFromArray(x.data() + nat_tot, ind.nrecv[nzone + 1]);
436 receiveBuffer = receiveBufferAccess.buffer;
438 /* Send and receive the coordinates */
439 ddSendrecv(dd, d, dddirBackward,
440 sendBuffer, receiveBuffer);
442 if (!cd->receiveInPlace)
445 for (int zone = 0; zone < nzone; zone++)
447 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
449 x[i] = receiveBuffer[j++];
453 nat_tot += ind.nrecv[nzone+1];
458 wallcycle_stop(wcycle, ewcMOVEX);
461 void dd_move_f(gmx_domdec_t *dd,
462 gmx::ArrayRef<gmx::RVec> f,
464 gmx_wallcycle *wcycle)
466 wallcycle_start(wcycle, ewcMOVEF);
469 gmx_domdec_comm_t *comm;
470 gmx_domdec_comm_dim_t *cd;
473 gmx_bool bShiftForcesNeedPbc, bScrew;
477 const BlockRanges &atomGroups = dd->atomGroups();
479 nzone = comm->zones.n/2;
480 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
481 for (int d = dd->ndim-1; d >= 0; d--)
483 /* Only forces in domains near the PBC boundaries need to
484 consider PBC in the treatment of fshift */
485 bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
486 bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
487 if (fshift == nullptr && !bScrew)
489 bShiftForcesNeedPbc = FALSE;
491 /* Determine which shift vector we need */
497 for (int p = cd->numPulses() - 1; p >= 0; p--)
499 const gmx_domdec_ind_t &ind = cd->ind[p];
500 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
501 gmx::ArrayRef<gmx::RVec> &receiveBuffer = receiveBufferAccess.buffer;
503 nat_tot -= ind.nrecv[nzone+1];
505 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
507 gmx::ArrayRef<gmx::RVec> sendBuffer;
508 if (cd->receiveInPlace)
510 sendBuffer = gmx::arrayRefFromArray(f.data() + nat_tot, ind.nrecv[nzone + 1]);
514 sendBuffer = sendBufferAccess.buffer;
516 for (int zone = 0; zone < nzone; zone++)
518 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
520 sendBuffer[j++] = f[i];
524 /* Communicate the forces */
525 ddSendrecv(dd, d, dddirForward,
526 sendBuffer, receiveBuffer);
527 gmx::ArrayRef<const int> index = ind.index;
528 /* Add the received forces */
530 if (!bShiftForcesNeedPbc)
532 for (int i = 0; i < ind.nsend[nzone]; i++)
534 int at0 = atomGroups.index[index[i]];
535 int at1 = atomGroups.index[index[i] + 1];
536 for (int j = at0; j < at1; j++)
538 for (int d = 0; d < DIM; d++)
540 f[j][d] += receiveBuffer[n][d];
548 /* fshift should always be defined if this function is
549 * called when bShiftForcesNeedPbc is true */
550 assert(NULL != fshift);
551 for (int i = 0; i < ind.nsend[nzone]; i++)
553 int at0 = atomGroups.index[index[i]];
554 int at1 = atomGroups.index[index[i] + 1];
555 for (int j = at0; j < at1; j++)
557 for (int d = 0; d < DIM; d++)
559 f[j][d] += receiveBuffer[n][d];
561 /* Add this force to the shift force */
562 for (int d = 0; d < DIM; d++)
564 fshift[is][d] += receiveBuffer[n][d];
572 for (int i = 0; i < ind.nsend[nzone]; i++)
574 int at0 = atomGroups.index[index[i]];
575 int at1 = atomGroups.index[index[i] + 1];
576 for (int j = at0; j < at1; j++)
578 /* Rotate the force */
579 f[j][XX] += receiveBuffer[n][XX];
580 f[j][YY] -= receiveBuffer[n][YY];
581 f[j][ZZ] -= receiveBuffer[n][ZZ];
584 /* Add this force to the shift force */
585 for (int d = 0; d < DIM; d++)
587 fshift[is][d] += receiveBuffer[n][d];
597 wallcycle_stop(wcycle, ewcMOVEF);
600 /* Convenience function for extracting a real buffer from an rvec buffer
602 * To reduce the number of temporary communication buffers and avoid
603 * cache polution, we reuse gmx::RVec buffers for storing reals.
604 * This functions return a real buffer reference with the same number
605 * of elements as the gmx::RVec buffer (so 1/3 of the size in bytes).
607 static gmx::ArrayRef<real>
608 realArrayRefFromRvecArrayRef(gmx::ArrayRef<gmx::RVec> arrayRef)
610 return gmx::arrayRefFromArray(as_rvec_array(arrayRef.data())[0],
614 void dd_atom_spread_real(gmx_domdec_t *dd, real v[])
617 gmx_domdec_comm_t *comm;
618 gmx_domdec_comm_dim_t *cd;
622 const BlockRanges &atomGroups = dd->atomGroups();
625 nat_tot = comm->atomRanges.numHomeAtoms();
626 for (int d = 0; d < dd->ndim; d++)
629 for (const gmx_domdec_ind_t &ind : cd->ind)
631 gmx::ArrayRef<const int> index = ind.index;
633 /* Note: We provision for RVec instead of real, so a factor of 3
634 * more than needed. The buffer actually already has this size
635 * and we pass a plain pointer below, so this does not matter.
637 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
638 gmx::ArrayRef<real> sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
640 for (int i = 0; i < ind.nsend[nzone]; i++)
642 int at0 = atomGroups.index[index[i]];
643 int at1 = atomGroups.index[index[i] + 1];
644 for (int j = at0; j < at1; j++)
646 sendBuffer[n++] = v[j];
650 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
652 gmx::ArrayRef<real> receiveBuffer;
653 if (cd->receiveInPlace)
655 receiveBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
659 receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
661 /* Send and receive the data */
662 ddSendrecv(dd, d, dddirBackward,
663 sendBuffer, receiveBuffer);
664 if (!cd->receiveInPlace)
667 for (int zone = 0; zone < nzone; zone++)
669 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
671 v[i] = receiveBuffer[j++];
675 nat_tot += ind.nrecv[nzone+1];
681 void dd_atom_sum_real(gmx_domdec_t *dd, real v[])
684 gmx_domdec_comm_t *comm;
685 gmx_domdec_comm_dim_t *cd;
689 const gmx::BlockRanges &atomGroups = dd->atomGroups();
691 nzone = comm->zones.n/2;
692 nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
693 for (int d = dd->ndim-1; d >= 0; d--)
696 for (int p = cd->numPulses() - 1; p >= 0; p--)
698 const gmx_domdec_ind_t &ind = cd->ind[p];
700 /* Note: We provision for RVec instead of real, so a factor of 3
701 * more than needed. The buffer actually already has this size
702 * and we typecast, so this works as intended.
704 DDBufferAccess<gmx::RVec> receiveBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
705 gmx::ArrayRef<real> receiveBuffer = realArrayRefFromRvecArrayRef(receiveBufferAccess.buffer);
706 nat_tot -= ind.nrecv[nzone + 1];
708 DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer2, cd->receiveInPlace ? 0 : ind.nrecv[nzone + 1]);
710 gmx::ArrayRef<real> sendBuffer;
711 if (cd->receiveInPlace)
713 sendBuffer = gmx::arrayRefFromArray(v + nat_tot, ind.nrecv[nzone + 1]);
717 sendBuffer = realArrayRefFromRvecArrayRef(sendBufferAccess.buffer);
719 for (int zone = 0; zone < nzone; zone++)
721 for (int i = ind.cell2at0[zone]; i < ind.cell2at1[zone]; i++)
723 sendBuffer[j++] = v[i];
727 /* Communicate the forces */
728 ddSendrecv(dd, d, dddirForward,
729 sendBuffer, receiveBuffer);
730 gmx::ArrayRef<const int> index = ind.index;
731 /* Add the received forces */
733 for (int i = 0; i < ind.nsend[nzone]; i++)
735 int at0 = atomGroups.index[index[i]];
736 int at1 = atomGroups.index[index[i] + 1];
737 for (int j = at0; j < at1; j++)
739 v[j] += receiveBuffer[n];
748 static void print_ddzone(FILE *fp, int d, int i, int j, gmx_ddzone_t *zone)
750 fprintf(fp, "zone d0 %d d1 %d d2 %d min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
752 zone->min0, zone->max1,
753 zone->mch0, zone->mch0,
754 zone->p1_0, zone->p1_1);
758 #define DDZONECOMM_MAXZONE 5
759 #define DDZONECOMM_BUFSIZE 3
761 static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
762 int ddimind, int direction,
763 gmx_ddzone_t *buf_s, int n_s,
764 gmx_ddzone_t *buf_r, int n_r)
766 #define ZBS DDZONECOMM_BUFSIZE
767 rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
768 rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
771 for (i = 0; i < n_s; i++)
773 vbuf_s[i*ZBS ][0] = buf_s[i].min0;
774 vbuf_s[i*ZBS ][1] = buf_s[i].max1;
775 vbuf_s[i*ZBS ][2] = buf_s[i].min1;
776 vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
777 vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
778 vbuf_s[i*ZBS+1][2] = 0;
779 vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
780 vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
781 vbuf_s[i*ZBS+2][2] = 0;
784 ddSendrecv(dd, ddimind, direction,
788 for (i = 0; i < n_r; i++)
790 buf_r[i].min0 = vbuf_r[i*ZBS ][0];
791 buf_r[i].max1 = vbuf_r[i*ZBS ][1];
792 buf_r[i].min1 = vbuf_r[i*ZBS ][2];
793 buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
794 buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
795 buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
796 buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
802 static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
803 rvec cell_ns_x0, rvec cell_ns_x1)
805 int d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
807 gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
808 gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
809 gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
810 rvec extr_s[2], extr_r[2];
812 real dist_d, c = 0, det;
813 gmx_domdec_comm_t *comm;
818 for (d = 1; d < dd->ndim; d++)
821 zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
822 zp->min0 = cell_ns_x0[dim];
823 zp->max1 = cell_ns_x1[dim];
824 zp->min1 = cell_ns_x1[dim];
825 zp->mch0 = cell_ns_x0[dim];
826 zp->mch1 = cell_ns_x1[dim];
827 zp->p1_0 = cell_ns_x0[dim];
828 zp->p1_1 = cell_ns_x1[dim];
831 for (d = dd->ndim-2; d >= 0; d--)
834 bPBC = (dim < ddbox->npbcdim);
836 /* Use an rvec to store two reals */
837 extr_s[d][0] = comm->cell_f0[d+1];
838 extr_s[d][1] = comm->cell_f1[d+1];
839 extr_s[d][2] = comm->cell_f1[d+1];
842 /* Store the extremes in the backward sending buffer,
843 * so the get updated separately from the forward communication.
845 for (d1 = d; d1 < dd->ndim-1; d1++)
847 /* We invert the order to be able to use the same loop for buf_e */
848 buf_s[pos].min0 = extr_s[d1][1];
849 buf_s[pos].max1 = extr_s[d1][0];
850 buf_s[pos].min1 = extr_s[d1][2];
853 /* Store the cell corner of the dimension we communicate along */
854 buf_s[pos].p1_0 = comm->cell_x0[dim];
859 buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
862 if (dd->ndim == 3 && d == 0)
864 buf_s[pos] = comm->zone_d2[0][1];
866 buf_s[pos] = comm->zone_d1[0];
870 /* We only need to communicate the extremes
871 * in the forward direction
873 npulse = comm->cd[d].numPulses();
876 /* Take the minimum to avoid double communication */
877 npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
881 /* Without PBC we should really not communicate over
882 * the boundaries, but implementing that complicates
883 * the communication setup and therefore we simply
884 * do all communication, but ignore some data.
888 for (p = 0; p < npulse_min; p++)
890 /* Communicate the extremes forward */
891 bUse = (bPBC || dd->ci[dim] > 0);
893 ddSendrecv(dd, d, dddirForward,
894 extr_s+d, dd->ndim-d-1,
895 extr_r+d, dd->ndim-d-1);
899 for (d1 = d; d1 < dd->ndim-1; d1++)
901 extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
902 extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
903 extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
909 for (p = 0; p < npulse; p++)
911 /* Communicate all the zone information backward */
912 bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
914 dd_sendrecv_ddzone(dd, d, dddirBackward,
921 for (d1 = d+1; d1 < dd->ndim; d1++)
923 /* Determine the decrease of maximum required
924 * communication height along d1 due to the distance along d,
925 * this avoids a lot of useless atom communication.
927 dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
929 if (ddbox->tric_dir[dim])
931 /* c is the off-diagonal coupling between the cell planes
932 * along directions d and d1.
934 c = ddbox->v[dim][dd->dim[d1]][dim];
940 det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
943 dh[d1] = comm->cutoff - (c*dist_d + std::sqrt(det))/(1 + c*c);
947 /* A negative value signals out of range */
953 /* Accumulate the extremes over all pulses */
954 for (i = 0; i < buf_size; i++)
964 buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
965 buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
966 buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
969 if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
977 if (bUse && dh[d1] >= 0)
979 buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
980 buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
983 /* Copy the received buffer to the send buffer,
984 * to pass the data through with the next pulse.
988 if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
989 (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
991 /* Store the extremes */
994 for (d1 = d; d1 < dd->ndim-1; d1++)
996 extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
997 extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
998 extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
1002 if (d == 1 || (d == 0 && dd->ndim == 3))
1004 for (i = d; i < 2; i++)
1006 comm->zone_d2[1-d][i] = buf_e[pos];
1012 comm->zone_d1[1] = buf_e[pos];
1022 for (i = 0; i < 2; i++)
1026 print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
1028 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
1029 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
1035 for (i = 0; i < 2; i++)
1037 for (j = 0; j < 2; j++)
1041 print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
1043 cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
1044 cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
1048 for (d = 1; d < dd->ndim; d++)
1050 comm->cell_f_max0[d] = extr_s[d-1][0];
1051 comm->cell_f_min1[d] = extr_s[d-1][1];
1054 fprintf(debug, "Cell fraction d %d, max0 %f, min1 %f\n",
1055 d, comm->cell_f_max0[d], comm->cell_f_min1[d]);
1060 static void write_dd_grid_pdb(const char *fn, gmx_int64_t step,
1061 gmx_domdec_t *dd, matrix box, gmx_ddbox_t *ddbox)
1063 rvec grid_s[2], *grid_r = nullptr, cx, r;
1064 char fname[STRLEN], buf[22];
1066 int a, i, d, z, y, x;
1070 copy_rvec(dd->comm->cell_x0, grid_s[0]);
1071 copy_rvec(dd->comm->cell_x1, grid_s[1]);
1075 snew(grid_r, 2*dd->nnodes);
1078 dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : nullptr);
1082 for (d = 0; d < DIM; d++)
1084 for (i = 0; i < DIM; i++)
1092 if (d < ddbox->npbcdim && dd->nc[d] > 1)
1094 tric[d][i] = box[i][d]/box[i][i];
1103 sprintf(fname, "%s_%s.pdb", fn, gmx_step_str(step, buf));
1104 out = gmx_fio_fopen(fname, "w");
1105 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1107 for (i = 0; i < dd->nnodes; i++)
1109 vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
1110 for (d = 0; d < DIM; d++)
1112 vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
1114 for (z = 0; z < 2; z++)
1116 for (y = 0; y < 2; y++)
1118 for (x = 0; x < 2; x++)
1120 cx[XX] = grid_r[i*2+x][XX];
1121 cx[YY] = grid_r[i*2+y][YY];
1122 cx[ZZ] = grid_r[i*2+z][ZZ];
1124 gmx_fprintf_pdb_atomline(out, epdbATOM, a++, "CA", ' ', "GLY", ' ', i+1, ' ',
1125 10*r[XX], 10*r[YY], 10*r[ZZ], 1.0, vol, "");
1129 for (d = 0; d < DIM; d++)
1131 for (x = 0; x < 4; x++)
1135 case 0: y = 1 + i*8 + 2*x; break;
1136 case 1: y = 1 + i*8 + 2*x - (x % 2); break;
1137 case 2: y = 1 + i*8 + x; break;
1139 fprintf(out, "%6s%5d%5d\n", "CONECT", y, y+(1<<d));
1143 gmx_fio_fclose(out);
1148 void write_dd_pdb(const char *fn, gmx_int64_t step, const char *title,
1149 const gmx_mtop_t *mtop, const t_commrec *cr,
1150 int natoms, const rvec x[], const matrix box)
1152 char fname[STRLEN], buf[22];
1155 const char *atomname, *resname;
1161 natoms = dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
1164 sprintf(fname, "%s_%s_n%d.pdb", fn, gmx_step_str(step, buf), cr->sim_nodeid);
1166 out = gmx_fio_fopen(fname, "w");
1168 fprintf(out, "TITLE %s\n", title);
1169 gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
1171 for (int i = 0; i < natoms; i++)
1173 int ii = dd->globalAtomIndices[i];
1174 mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
1177 if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Zones))
1180 while (i >= dd->atomGroups().index[dd->comm->zones.cg_range[c+1]])
1186 else if (i < dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites))
1188 b = dd->comm->zones.n;
1192 b = dd->comm->zones.n + 1;
1194 gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, atomname, ' ', resname, ' ', resnr, ' ',
1195 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, b, "");
1197 fprintf(out, "TER\n");
1199 gmx_fio_fclose(out);
1202 real dd_cutoff_multibody(const gmx_domdec_t *dd)
1204 gmx_domdec_comm_t *comm;
1211 if (comm->bInterCGBondeds)
1213 if (comm->cutoff_mbody > 0)
1215 r = comm->cutoff_mbody;
1219 /* cutoff_mbody=0 means we do not have DLB */
1220 r = comm->cellsize_min[dd->dim[0]];
1221 for (di = 1; di < dd->ndim; di++)
1223 r = std::min(r, comm->cellsize_min[dd->dim[di]]);
1225 if (comm->bBondComm)
1227 r = std::max(r, comm->cutoff_mbody);
1231 r = std::min(r, comm->cutoff);
1239 real dd_cutoff_twobody(const gmx_domdec_t *dd)
1243 r_mb = dd_cutoff_multibody(dd);
1245 return std::max(dd->comm->cutoff, r_mb);
1249 static void dd_cart_coord2pmecoord(const gmx_domdec_t *dd, const ivec coord,
1254 nc = dd->nc[dd->comm->cartpmedim];
1255 ntot = dd->comm->ntot[dd->comm->cartpmedim];
1256 copy_ivec(coord, coord_pme);
1257 coord_pme[dd->comm->cartpmedim] =
1258 nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
1261 static int ddindex2pmeindex(const gmx_domdec_t *dd, int ddindex)
1266 npme = dd->comm->npmenodes;
1268 /* Here we assign a PME node to communicate with this DD node
1269 * by assuming that the major index of both is x.
1270 * We add cr->npmenodes/2 to obtain an even distribution.
1272 return (ddindex*npme + npme/2)/npp;
1275 static int *dd_interleaved_pme_ranks(const gmx_domdec_t *dd)
1280 snew(pme_rank, dd->comm->npmenodes);
1282 for (i = 0; i < dd->nnodes; i++)
1284 p0 = ddindex2pmeindex(dd, i);
1285 p1 = ddindex2pmeindex(dd, i+1);
1286 if (i+1 == dd->nnodes || p1 > p0)
1290 fprintf(debug, "pme_rank[%d] = %d\n", n, i+1+n);
1292 pme_rank[n] = i + 1 + n;
1300 static int gmx_ddcoord2pmeindex(const t_commrec *cr, int x, int y, int z)
1308 if (dd->comm->bCartesian) {
1309 gmx_ddindex2xyz(dd->nc,ddindex,coords);
1310 dd_coords2pmecoords(dd,coords,coords_pme);
1311 copy_ivec(dd->ntot,nc);
1312 nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1313 coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
1315 slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
1317 slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
1323 slab = ddindex2pmeindex(dd, dd_index(dd->nc, coords));
1328 static int ddcoord2simnodeid(const t_commrec *cr, int x, int y, int z)
1330 gmx_domdec_comm_t *comm;
1332 int ddindex, nodeid = -1;
1334 comm = cr->dd->comm;
1339 if (comm->bCartesianPP_PME)
1342 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &nodeid);
1347 ddindex = dd_index(cr->dd->nc, coords);
1348 if (comm->bCartesianPP)
1350 nodeid = comm->ddindex2simnodeid[ddindex];
1356 nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
1368 static int dd_simnode2pmenode(const gmx_domdec_t *dd,
1369 const t_commrec gmx_unused *cr,
1374 const gmx_domdec_comm_t *comm = dd->comm;
1376 /* This assumes a uniform x domain decomposition grid cell size */
1377 if (comm->bCartesianPP_PME)
1380 ivec coord, coord_pme;
1381 MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
1382 if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1384 /* This is a PP node */
1385 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1386 MPI_Cart_rank(cr->mpi_comm_mysim, coord_pme, &pmenode);
1390 else if (comm->bCartesianPP)
1392 if (sim_nodeid < dd->nnodes)
1394 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1399 /* This assumes DD cells with identical x coordinates
1400 * are numbered sequentially.
1402 if (dd->comm->pmenodes == nullptr)
1404 if (sim_nodeid < dd->nnodes)
1406 /* The DD index equals the nodeid */
1407 pmenode = dd->nnodes + ddindex2pmeindex(dd, sim_nodeid);
1413 while (sim_nodeid > dd->comm->pmenodes[i])
1417 if (sim_nodeid < dd->comm->pmenodes[i])
1419 pmenode = dd->comm->pmenodes[i];
1427 NumPmeDomains getNumPmeDomains(const gmx_domdec_t *dd)
1432 dd->comm->npmenodes_x, dd->comm->npmenodes_y
1443 std::vector<int> get_pme_ddranks(const t_commrec *cr, int pmenodeid)
1447 ivec coord, coord_pme;
1451 std::vector<int> ddranks;
1452 ddranks.reserve((dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
1454 for (x = 0; x < dd->nc[XX]; x++)
1456 for (y = 0; y < dd->nc[YY]; y++)
1458 for (z = 0; z < dd->nc[ZZ]; z++)
1460 if (dd->comm->bCartesianPP_PME)
1465 dd_cart_coord2pmecoord(dd, coord, coord_pme);
1466 if (dd->ci[XX] == coord_pme[XX] &&
1467 dd->ci[YY] == coord_pme[YY] &&
1468 dd->ci[ZZ] == coord_pme[ZZ])
1470 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1475 /* The slab corresponds to the nodeid in the PME group */
1476 if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
1478 ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
1487 static gmx_bool receive_vir_ener(const gmx_domdec_t *dd, const t_commrec *cr)
1489 gmx_bool bReceive = TRUE;
1491 if (cr->npmenodes < dd->nnodes)
1493 gmx_domdec_comm_t *comm = dd->comm;
1494 if (comm->bCartesianPP_PME)
1497 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1499 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
1500 coords[comm->cartpmedim]++;
1501 if (coords[comm->cartpmedim] < dd->nc[comm->cartpmedim])
1504 MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
1505 if (dd_simnode2pmenode(dd, cr, rank) == pmenode)
1507 /* This is not the last PP node for pmenode */
1512 GMX_RELEASE_ASSERT(false, "Without MPI we should not have Cartesian PP-PME with #PMEnodes < #DDnodes");
1517 int pmenode = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
1518 if (cr->sim_nodeid+1 < cr->nnodes &&
1519 dd_simnode2pmenode(dd, cr, cr->sim_nodeid+1) == pmenode)
1521 /* This is not the last PP node for pmenode */
1530 static void set_zones_ncg_home(gmx_domdec_t *dd)
1532 gmx_domdec_zones_t *zones;
1535 zones = &dd->comm->zones;
1537 zones->cg_range[0] = 0;
1538 for (i = 1; i < zones->n+1; i++)
1540 zones->cg_range[i] = dd->ncg_home;
1542 /* zone_ncg1[0] should always be equal to ncg_home */
1543 dd->comm->zone_ncg1[0] = dd->ncg_home;
1546 static void restoreAtomGroups(gmx_domdec_t *dd,
1547 const int *gcgs_index, const t_state *state)
1549 gmx::ArrayRef<const int> atomGroupsState = state->cg_gl;
1551 std::vector<int> &globalAtomGroupIndices = dd->globalAtomGroupIndices;
1552 gmx::BlockRanges &atomGroups = dd->atomGroups_;
1554 globalAtomGroupIndices.resize(atomGroupsState.size());
1555 atomGroups.index.resize(atomGroupsState.size() + 1);
1557 /* Copy back the global charge group indices from state
1558 * and rebuild the local charge group to atom index.
1561 for (unsigned int i = 0; i < atomGroupsState.size(); i++)
1563 atomGroups.index[i] = atomIndex;
1564 const int atomGroupGlobal = atomGroupsState[i];
1565 globalAtomGroupIndices[i] = atomGroupGlobal;
1566 atomIndex += gcgs_index[atomGroupGlobal + 1] - gcgs_index[atomGroupGlobal];
1568 atomGroups.index[atomGroupsState.size()] = atomIndex;
1570 dd->ncg_home = atomGroupsState.size();
1571 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, atomIndex);
1573 set_zones_ncg_home(dd);
1576 static void dd_set_cginfo(gmx::ArrayRef<const int> index_gl, int cg0, int cg1,
1577 t_forcerec *fr, char *bLocalCG)
1579 cginfo_mb_t *cginfo_mb;
1585 cginfo_mb = fr->cginfo_mb;
1586 cginfo = fr->cginfo;
1588 for (cg = cg0; cg < cg1; cg++)
1590 cginfo[cg] = ddcginfo(cginfo_mb, index_gl[cg]);
1594 if (bLocalCG != nullptr)
1596 for (cg = cg0; cg < cg1; cg++)
1598 bLocalCG[index_gl[cg]] = TRUE;
1603 static void make_dd_indices(gmx_domdec_t *dd,
1604 const int *gcgs_index, int cg_start)
1606 const int numZones = dd->comm->zones.n;
1607 const int *zone2cg = dd->comm->zones.cg_range;
1608 const int *zone_ncg1 = dd->comm->zone_ncg1;
1609 gmx::ArrayRef<const int> globalAtomGroupIndices = dd->globalAtomGroupIndices;
1610 const gmx_bool bCGs = dd->comm->bCGs;
1612 std::vector<int> &globalAtomIndices = dd->globalAtomIndices;
1614 if (zone2cg[1] != dd->ncg_home)
1616 gmx_incons("dd->ncg_zone is not up to date");
1619 /* Make the local to global and global to local atom index */
1620 int a = dd->atomGroups().index[cg_start];
1621 globalAtomIndices.resize(a);
1622 for (int zone = 0; zone < numZones; zone++)
1631 cg0 = zone2cg[zone];
1633 int cg1 = zone2cg[zone+1];
1634 int cg1_p1 = cg0 + zone_ncg1[zone];
1636 for (int cg = cg0; cg < cg1; cg++)
1641 /* Signal that this cg is from more than one pulse away */
1644 int cg_gl = globalAtomGroupIndices[cg];
1647 for (int a_gl = gcgs_index[cg_gl]; a_gl < gcgs_index[cg_gl+1]; a_gl++)
1649 globalAtomIndices.push_back(a_gl);
1650 ga2la_set(dd->ga2la, a_gl, a, zone1);
1656 globalAtomIndices.push_back(cg_gl);
1657 ga2la_set(dd->ga2la, cg_gl, a, zone1);
1664 static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
1668 if (bLocalCG == nullptr)
1672 for (size_t i = 0; i < dd->globalAtomGroupIndices.size(); i++)
1674 if (!bLocalCG[dd->globalAtomGroupIndices[i]])
1677 "DD rank %d, %s: atom group %zu, global atom group %d is not marked in bLocalCG (ncg_home %d)\n", dd->rank, where, i + 1, dd->globalAtomGroupIndices[i] + 1, dd->ncg_home);
1682 for (int i = 0; i < ncg_sys; i++)
1689 if (ngl != dd->globalAtomGroupIndices.size())
1691 fprintf(stderr, "DD rank %d, %s: In bLocalCG %zu atom groups are marked as local, whereas there are %zu\n", dd->rank, where, ngl, dd->globalAtomGroupIndices.size());
1698 static void check_index_consistency(gmx_domdec_t *dd,
1699 int natoms_sys, int ncg_sys,
1704 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1706 if (dd->comm->DD_debug > 1)
1708 std::vector<int> have(natoms_sys);
1709 for (int a = 0; a < numAtomsInZones; a++)
1711 int globalAtomIndex = dd->globalAtomIndices[a];
1712 if (have[globalAtomIndex] > 0)
1714 fprintf(stderr, "DD rank %d: global atom %d occurs twice: index %d and %d\n", dd->rank, globalAtomIndex + 1, have[globalAtomIndex], a+1);
1718 have[globalAtomIndex] = a + 1;
1723 std::vector<int> have(numAtomsInZones);
1726 for (int i = 0; i < natoms_sys; i++)
1730 if (ga2la_get(dd->ga2la, i, &a, &cell))
1732 if (a >= numAtomsInZones)
1734 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n", dd->rank, i+1, a+1, numAtomsInZones);
1740 if (dd->globalAtomIndices[a] != i)
1742 fprintf(stderr, "DD rank %d: global atom %d marked as local atom %d, which has global atom index %d\n", dd->rank, i+1, a+1, dd->globalAtomIndices[a]+1);
1749 if (ngl != numAtomsInZones)
1752 "DD rank %d, %s: %d global atom indices, %d local atoms\n",
1753 dd->rank, where, ngl, numAtomsInZones);
1755 for (int a = 0; a < numAtomsInZones; a++)
1760 "DD rank %d, %s: local atom %d, global %d has no global index\n",
1761 dd->rank, where, a + 1, dd->globalAtomIndices[a] + 1);
1765 nerr += check_bLocalCG(dd, ncg_sys, dd->comm->bLocalCG, where);
1769 gmx_fatal(FARGS, "DD rank %d, %s: %d atom(group) index inconsistencies",
1770 dd->rank, where, nerr);
1774 /* Clear all DD global state indices, starting from \p atomGroupStart and \p atomStart */
1775 static void clearDDStateIndices(gmx_domdec_t *dd,
1781 /* Clear the whole list without searching */
1782 ga2la_clear(dd->ga2la);
1786 const int numAtomsInZones = dd->comm->atomRanges.end(DDAtomRanges::Type::Zones);
1787 for (int i = 0; i < numAtomsInZones; i++)
1789 ga2la_del(dd->ga2la, dd->globalAtomIndices[i]);
1793 char *bLocalCG = dd->comm->bLocalCG;
1796 for (size_t atomGroup = atomGroupStart; atomGroup < dd->globalAtomGroupIndices.size(); atomGroup++)
1798 bLocalCG[dd->globalAtomGroupIndices[atomGroup]] = FALSE;
1802 dd_clear_local_vsite_indices(dd);
1804 if (dd->constraints)
1806 dd_clear_local_constraint_indices(dd);
1810 static gmx_bool check_grid_jump(gmx_int64_t step,
1816 gmx_domdec_comm_t *comm;
1825 for (d = 1; d < dd->ndim; d++)
1828 limit = grid_jump_limit(comm, cutoff, d);
1829 bfac = ddbox->box_size[dim];
1830 if (ddbox->tric_dir[dim])
1832 bfac *= ddbox->skew_fac[dim];
1834 if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac < limit ||
1835 (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
1843 /* This error should never be triggered under normal
1844 * circumstances, but you never know ...
1846 gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
1847 gmx_step_str(step, buf),
1848 dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
1856 static float dd_force_load(gmx_domdec_comm_t *comm)
1863 if (comm->eFlop > 1)
1865 load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
1870 load = comm->cycl[ddCyclF];
1871 if (comm->cycl_n[ddCyclF] > 1)
1873 /* Subtract the maximum of the last n cycle counts
1874 * to get rid of possible high counts due to other sources,
1875 * for instance system activity, that would otherwise
1876 * affect the dynamic load balancing.
1878 load -= comm->cycl_max[ddCyclF];
1882 if (comm->cycl_n[ddCyclWaitGPU] && comm->nrank_gpu_shared > 1)
1884 float gpu_wait, gpu_wait_sum;
1886 gpu_wait = comm->cycl[ddCyclWaitGPU];
1887 if (comm->cycl_n[ddCyclF] > 1)
1889 /* We should remove the WaitGPU time of the same MD step
1890 * as the one with the maximum F time, since the F time
1891 * and the wait time are not independent.
1892 * Furthermore, the step for the max F time should be chosen
1893 * the same on all ranks that share the same GPU.
1894 * But to keep the code simple, we remove the average instead.
1895 * The main reason for artificially long times at some steps
1896 * is spurious CPU activity or MPI time, so we don't expect
1897 * that changes in the GPU wait time matter a lot here.
1899 gpu_wait *= (comm->cycl_n[ddCyclF] - 1)/(float)comm->cycl_n[ddCyclF];
1901 /* Sum the wait times over the ranks that share the same GPU */
1902 MPI_Allreduce(&gpu_wait, &gpu_wait_sum, 1, MPI_FLOAT, MPI_SUM,
1903 comm->mpi_comm_gpu_shared);
1904 /* Replace the wait time by the average over the ranks */
1905 load += -gpu_wait + gpu_wait_sum/comm->nrank_gpu_shared;
1913 static void set_slb_pme_dim_f(gmx_domdec_t *dd, int dim, real **dim_f)
1915 gmx_domdec_comm_t *comm;
1920 snew(*dim_f, dd->nc[dim]+1);
1922 for (i = 1; i < dd->nc[dim]; i++)
1924 if (comm->slb_frac[dim])
1926 (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
1930 (*dim_f)[i] = (real)i/(real)dd->nc[dim];
1933 (*dim_f)[dd->nc[dim]] = 1;
1936 static void init_ddpme(gmx_domdec_t *dd, gmx_ddpme_t *ddpme, int dimind)
1938 int pmeindex, slab, nso, i;
1941 if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
1947 ddpme->dim = dimind;
1949 ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
1951 ddpme->nslab = (ddpme->dim == 0 ?
1952 dd->comm->npmenodes_x :
1953 dd->comm->npmenodes_y);
1955 if (ddpme->nslab <= 1)
1960 nso = dd->comm->npmenodes/ddpme->nslab;
1961 /* Determine for each PME slab the PP location range for dimension dim */
1962 snew(ddpme->pp_min, ddpme->nslab);
1963 snew(ddpme->pp_max, ddpme->nslab);
1964 for (slab = 0; slab < ddpme->nslab; slab++)
1966 ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
1967 ddpme->pp_max[slab] = 0;
1969 for (i = 0; i < dd->nnodes; i++)
1971 ddindex2xyz(dd->nc, i, xyz);
1972 /* For y only use our y/z slab.
1973 * This assumes that the PME x grid size matches the DD grid size.
1975 if (dimind == 0 || xyz[XX] == dd->ci[XX])
1977 pmeindex = ddindex2pmeindex(dd, i);
1980 slab = pmeindex/nso;
1984 slab = pmeindex % ddpme->nslab;
1986 ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
1987 ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
1991 set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
1994 int dd_pme_maxshift_x(const gmx_domdec_t *dd)
1996 if (dd->comm->ddpme[0].dim == XX)
1998 return dd->comm->ddpme[0].maxshift;
2006 int dd_pme_maxshift_y(const gmx_domdec_t *dd)
2008 if (dd->comm->ddpme[0].dim == YY)
2010 return dd->comm->ddpme[0].maxshift;
2012 else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
2014 return dd->comm->ddpme[1].maxshift;
2022 static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
2024 rvec cell_ns_x0, rvec cell_ns_x1,
2027 gmx_domdec_comm_t *comm;
2032 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
2034 dim = dd->dim[dim_ind];
2036 /* Without PBC we don't have restrictions on the outer cells */
2037 if (!(dim >= ddbox->npbcdim &&
2038 (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
2040 (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
2041 comm->cellsize_min[dim])
2044 gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
2045 gmx_step_str(step, buf), dim2char(dim),
2046 comm->cell_x1[dim] - comm->cell_x0[dim],
2047 ddbox->skew_fac[dim],
2048 dd->comm->cellsize_min[dim],
2049 dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2053 if ((isDlbOn(dd->comm) && dd->ndim > 1) || ddbox->nboundeddim < DIM)
2055 /* Communicate the boundaries and update cell_ns_x0/1 */
2056 dd_move_cellx(dd, ddbox, cell_ns_x0, cell_ns_x1);
2057 if (isDlbOn(dd->comm) && dd->ndim > 1)
2059 check_grid_jump(step, dd, dd->comm->cutoff, ddbox, TRUE);
2064 void dd_cycles_add(const gmx_domdec_t *dd, float cycles, int ddCycl)
2066 /* Note that the cycles value can be incorrect, either 0 or some
2067 * extremely large value, when our thread migrated to another core
2068 * with an unsynchronized cycle counter. If this happens less often
2069 * that once per nstlist steps, this will not cause issues, since
2070 * we later subtract the maximum value from the sum over nstlist steps.
2071 * A zero count will slightly lower the total, but that's a small effect.
2072 * Note that the main purpose of the subtraction of the maximum value
2073 * is to avoid throwing off the load balancing when stalls occur due
2074 * e.g. system activity or network congestion.
2076 dd->comm->cycl[ddCycl] += cycles;
2077 dd->comm->cycl_n[ddCycl]++;
2078 if (cycles > dd->comm->cycl_max[ddCycl])
2080 dd->comm->cycl_max[ddCycl] = cycles;
2084 static double force_flop_count(t_nrnb *nrnb)
2091 for (i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
2093 /* To get closer to the real timings, we half the count
2094 * for the normal loops and again half it for water loops.
2097 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2099 sum += nrnb->n[i]*0.25*cost_nrnb(i);
2103 sum += nrnb->n[i]*0.50*cost_nrnb(i);
2106 for (i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
2109 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
2111 sum += nrnb->n[i]*cost_nrnb(i);
2114 for (i = eNR_BONDS; i <= eNR_WALLS; i++)
2116 sum += nrnb->n[i]*cost_nrnb(i);
2122 void dd_force_flop_start(gmx_domdec_t *dd, t_nrnb *nrnb)
2124 if (dd->comm->eFlop)
2126 dd->comm->flop -= force_flop_count(nrnb);
2129 void dd_force_flop_stop(gmx_domdec_t *dd, t_nrnb *nrnb)
2131 if (dd->comm->eFlop)
2133 dd->comm->flop += force_flop_count(nrnb);
2138 static void clear_dd_cycle_counts(gmx_domdec_t *dd)
2142 for (i = 0; i < ddCyclNr; i++)
2144 dd->comm->cycl[i] = 0;
2145 dd->comm->cycl_n[i] = 0;
2146 dd->comm->cycl_max[i] = 0;
2149 dd->comm->flop_n = 0;
2152 static void get_load_distribution(gmx_domdec_t *dd, gmx_wallcycle_t wcycle)
2154 gmx_domdec_comm_t *comm;
2155 domdec_load_t *load;
2156 domdec_root_t *root = nullptr;
2158 float cell_frac = 0, sbuf[DD_NLOAD_MAX];
2163 fprintf(debug, "get_load_distribution start\n");
2166 wallcycle_start(wcycle, ewcDDCOMMLOAD);
2170 bSepPME = (dd->pme_nodeid >= 0);
2172 if (dd->ndim == 0 && bSepPME)
2174 /* Without decomposition, but with PME nodes, we need the load */
2175 comm->load[0].mdf = comm->cycl[ddCyclPPduringPME];
2176 comm->load[0].pme = comm->cycl[ddCyclPME];
2179 for (d = dd->ndim-1; d >= 0; d--)
2182 /* Check if we participate in the communication in this dimension */
2183 if (d == dd->ndim-1 ||
2184 (dd->ci[dd->dim[d+1]] == 0 && dd->ci[dd->dim[dd->ndim-1]] == 0))
2186 load = &comm->load[d];
2187 if (isDlbOn(dd->comm))
2189 cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
2192 if (d == dd->ndim-1)
2194 sbuf[pos++] = dd_force_load(comm);
2195 sbuf[pos++] = sbuf[0];
2196 if (isDlbOn(dd->comm))
2198 sbuf[pos++] = sbuf[0];
2199 sbuf[pos++] = cell_frac;
2202 sbuf[pos++] = comm->cell_f_max0[d];
2203 sbuf[pos++] = comm->cell_f_min1[d];
2208 sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
2209 sbuf[pos++] = comm->cycl[ddCyclPME];
2214 sbuf[pos++] = comm->load[d+1].sum;
2215 sbuf[pos++] = comm->load[d+1].max;
2216 if (isDlbOn(dd->comm))
2218 sbuf[pos++] = comm->load[d+1].sum_m;
2219 sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
2220 sbuf[pos++] = comm->load[d+1].flags;
2223 sbuf[pos++] = comm->cell_f_max0[d];
2224 sbuf[pos++] = comm->cell_f_min1[d];
2229 sbuf[pos++] = comm->load[d+1].mdf;
2230 sbuf[pos++] = comm->load[d+1].pme;
2234 /* Communicate a row in DD direction d.
2235 * The communicators are setup such that the root always has rank 0.
2238 MPI_Gather(sbuf, load->nload*sizeof(float), MPI_BYTE,
2239 load->load, load->nload*sizeof(float), MPI_BYTE,
2240 0, comm->mpi_comm_load[d]);
2242 if (dd->ci[dim] == dd->master_ci[dim])
2244 /* We are the root, process this row */
2247 root = comm->root[d];
2257 for (i = 0; i < dd->nc[dim]; i++)
2259 load->sum += load->load[pos++];
2260 load->max = std::max(load->max, load->load[pos]);
2262 if (isDlbOn(dd->comm))
2266 /* This direction could not be load balanced properly,
2267 * therefore we need to use the maximum iso the average load.
2269 load->sum_m = std::max(load->sum_m, load->load[pos]);
2273 load->sum_m += load->load[pos];
2276 load->cvol_min = std::min(load->cvol_min, load->load[pos]);
2280 load->flags = (int)(load->load[pos++] + 0.5);
2284 root->cell_f_max0[i] = load->load[pos++];
2285 root->cell_f_min1[i] = load->load[pos++];
2290 load->mdf = std::max(load->mdf, load->load[pos]);
2292 load->pme = std::max(load->pme, load->load[pos]);
2296 if (isDlbOn(comm) && root->bLimited)
2298 load->sum_m *= dd->nc[dim];
2299 load->flags |= (1<<d);
2307 comm->nload += dd_load_count(comm);
2308 comm->load_step += comm->cycl[ddCyclStep];
2309 comm->load_sum += comm->load[0].sum;
2310 comm->load_max += comm->load[0].max;
2313 for (d = 0; d < dd->ndim; d++)
2315 if (comm->load[0].flags & (1<<d))
2317 comm->load_lim[d]++;
2323 comm->load_mdf += comm->load[0].mdf;
2324 comm->load_pme += comm->load[0].pme;
2328 wallcycle_stop(wcycle, ewcDDCOMMLOAD);
2332 fprintf(debug, "get_load_distribution finished\n");
2336 static float dd_force_load_fraction(gmx_domdec_t *dd)
2338 /* Return the relative performance loss on the total run time
2339 * due to the force calculation load imbalance.
2341 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2343 return dd->comm->load_sum/(dd->comm->load_step*dd->nnodes);
2351 static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
2353 /* Return the relative performance loss on the total run time
2354 * due to the force calculation load imbalance.
2356 if (dd->comm->nload > 0 && dd->comm->load_step > 0)
2359 (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
2360 (dd->comm->load_step*dd->nnodes);
2368 static void print_dd_load_av(FILE *fplog, gmx_domdec_t *dd)
2370 gmx_domdec_comm_t *comm = dd->comm;
2372 /* Only the master rank prints loads and only if we measured loads */
2373 if (!DDMASTER(dd) || comm->nload == 0)
2379 int numPpRanks = dd->nnodes;
2380 int numPmeRanks = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
2381 int numRanks = numPpRanks + numPmeRanks;
2382 float lossFraction = 0;
2384 /* Print the average load imbalance and performance loss */
2385 if (dd->nnodes > 1 && comm->load_sum > 0)
2387 float imbalance = comm->load_max*numPpRanks/comm->load_sum - 1;
2388 lossFraction = dd_force_imb_perf_loss(dd);
2390 std::string msg = "\n Dynamic load balancing report:\n";
2391 std::string dlbStateStr;
2393 switch (dd->comm->dlbState)
2396 dlbStateStr = "DLB was off during the run per user request.";
2398 case edlbsOffForever:
2399 /* Currectly this can happen due to performance loss observed, cell size
2400 * limitations or incompatibility with other settings observed during
2401 * determineInitialDlbState(). */
2402 dlbStateStr = "DLB got disabled because it was unsuitable to use.";
2404 case edlbsOffCanTurnOn:
2405 dlbStateStr = "DLB was off during the run due to low measured imbalance.";
2407 case edlbsOffTemporarilyLocked:
2408 dlbStateStr = "DLB was locked at the end of the run due to unfinished PP-PME balancing.";
2410 case edlbsOnCanTurnOff:
2411 dlbStateStr = "DLB was turned on during the run due to measured imbalance.";
2414 dlbStateStr = "DLB was permanently on during the run per user request.";
2417 GMX_ASSERT(false, "Undocumented DLB state");
2420 msg += " " + dlbStateStr + "\n";
2421 msg += gmx::formatString(" Average load imbalance: %.1f%%.\n", imbalance*100);
2422 msg += gmx::formatString(" The balanceable part of the MD step is %d%%, load imbalance is computed from this.\n",
2423 static_cast<int>(dd_force_load_fraction(dd)*100 + 0.5));
2424 msg += gmx::formatString(" Part of the total run time spent waiting due to load imbalance: %.1f%%.\n",
2426 fprintf(fplog, "%s", msg.c_str());
2427 fprintf(stderr, "%s", msg.c_str());
2430 /* Print during what percentage of steps the load balancing was limited */
2431 bool dlbWasLimited = false;
2434 sprintf(buf, " Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
2435 for (int d = 0; d < dd->ndim; d++)
2437 int limitPercentage = (200*comm->load_lim[d] + 1)/(2*comm->nload);
2438 sprintf(buf+strlen(buf), " %c %d %%",
2439 dim2char(dd->dim[d]), limitPercentage);
2440 if (limitPercentage >= 50)
2442 dlbWasLimited = true;
2445 sprintf(buf + strlen(buf), "\n");
2446 fprintf(fplog, "%s", buf);
2447 fprintf(stderr, "%s", buf);
2450 /* Print the performance loss due to separate PME - PP rank imbalance */
2451 float lossFractionPme = 0;
2452 if (numPmeRanks > 0 && comm->load_mdf > 0 && comm->load_step > 0)
2454 float pmeForceRatio = comm->load_pme/comm->load_mdf;
2455 lossFractionPme = (comm->load_pme - comm->load_mdf)/comm->load_step;
2456 if (lossFractionPme <= 0)
2458 lossFractionPme *= numPmeRanks/static_cast<float>(numRanks);
2462 lossFractionPme *= numPpRanks/static_cast<float>(numRanks);
2464 sprintf(buf, " Average PME mesh/force load: %5.3f\n", pmeForceRatio);
2465 fprintf(fplog, "%s", buf);
2466 fprintf(stderr, "%s", buf);
2467 sprintf(buf, " Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n", fabs(lossFractionPme)*100);
2468 fprintf(fplog, "%s", buf);
2469 fprintf(stderr, "%s", buf);
2471 fprintf(fplog, "\n");
2472 fprintf(stderr, "\n");
2474 if (lossFraction >= DD_PERF_LOSS_WARN)
2477 "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
2478 " in the domain decomposition.\n", lossFraction*100);
2481 sprintf(buf+strlen(buf), " You might want to use dynamic load balancing (option -dlb.)\n");
2483 else if (dlbWasLimited)
2485 sprintf(buf+strlen(buf), " You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
2487 fprintf(fplog, "%s\n", buf);
2488 fprintf(stderr, "%s\n", buf);
2490 if (numPmeRanks > 0 && fabs(lossFractionPme) >= DD_PERF_LOSS_WARN)
2493 "NOTE: %.1f %% performance was lost because the PME ranks\n"
2494 " had %s work to do than the PP ranks.\n"
2495 " You might want to %s the number of PME ranks\n"
2496 " or %s the cut-off and the grid spacing.\n",
2497 fabs(lossFractionPme*100),
2498 (lossFractionPme < 0) ? "less" : "more",
2499 (lossFractionPme < 0) ? "decrease" : "increase",
2500 (lossFractionPme < 0) ? "decrease" : "increase");
2501 fprintf(fplog, "%s\n", buf);
2502 fprintf(stderr, "%s\n", buf);
2506 static float dd_vol_min(gmx_domdec_t *dd)
2508 return dd->comm->load[0].cvol_min*dd->nnodes;
2511 static gmx_bool dd_load_flags(gmx_domdec_t *dd)
2513 return dd->comm->load[0].flags;
2516 static float dd_f_imbal(gmx_domdec_t *dd)
2518 if (dd->comm->load[0].sum > 0)
2520 return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1.0f;
2524 /* Something is wrong in the cycle counting, report no load imbalance */
2529 float dd_pme_f_ratio(gmx_domdec_t *dd)
2531 /* Should only be called on the DD master rank */
2532 assert(DDMASTER(dd));
2534 if (dd->comm->load[0].mdf > 0 && dd->comm->cycl_n[ddCyclPME] > 0)
2536 return dd->comm->load[0].pme/dd->comm->load[0].mdf;
2544 static void dd_print_load(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step)
2549 flags = dd_load_flags(dd);
2553 "DD load balancing is limited by minimum cell size in dimension");
2554 for (d = 0; d < dd->ndim; d++)
2558 fprintf(fplog, " %c", dim2char(dd->dim[d]));
2561 fprintf(fplog, "\n");
2563 fprintf(fplog, "DD step %s", gmx_step_str(step, buf));
2564 if (isDlbOn(dd->comm))
2566 fprintf(fplog, " vol min/aver %5.3f%c",
2567 dd_vol_min(dd), flags ? '!' : ' ');
2571 fprintf(fplog, " load imb.: force %4.1f%%", dd_f_imbal(dd)*100);
2573 if (dd->comm->cycl_n[ddCyclPME])
2575 fprintf(fplog, " pme mesh/force %5.3f", dd_pme_f_ratio(dd));
2577 fprintf(fplog, "\n\n");
2580 static void dd_print_load_verbose(gmx_domdec_t *dd)
2582 if (isDlbOn(dd->comm))
2584 fprintf(stderr, "vol %4.2f%c ",
2585 dd_vol_min(dd), dd_load_flags(dd) ? '!' : ' ');
2589 fprintf(stderr, "imb F %2d%% ", (int)(dd_f_imbal(dd)*100+0.5));
2591 if (dd->comm->cycl_n[ddCyclPME])
2593 fprintf(stderr, "pme/F %4.2f ", dd_pme_f_ratio(dd));
2598 static void make_load_communicator(gmx_domdec_t *dd, int dim_ind, ivec loc)
2603 domdec_root_t *root;
2604 gmx_bool bPartOfGroup = FALSE;
2606 dim = dd->dim[dim_ind];
2607 copy_ivec(loc, loc_c);
2608 for (i = 0; i < dd->nc[dim]; i++)
2611 rank = dd_index(dd->nc, loc_c);
2612 if (rank == dd->rank)
2614 /* This process is part of the group */
2615 bPartOfGroup = TRUE;
2618 MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup ? 0 : MPI_UNDEFINED, dd->rank,
2622 dd->comm->mpi_comm_load[dim_ind] = c_row;
2623 if (!isDlbDisabled(dd->comm))
2625 if (dd->ci[dim] == dd->master_ci[dim])
2627 /* This is the root process of this row */
2628 snew(dd->comm->root[dim_ind], 1);
2629 root = dd->comm->root[dim_ind];
2630 snew(root->cell_f, ddCellFractionBufferSize(dd, dim_ind));
2631 snew(root->old_cell_f, dd->nc[dim]+1);
2632 snew(root->bCellMin, dd->nc[dim]);
2635 snew(root->cell_f_max0, dd->nc[dim]);
2636 snew(root->cell_f_min1, dd->nc[dim]);
2637 snew(root->bound_min, dd->nc[dim]);
2638 snew(root->bound_max, dd->nc[dim]);
2640 snew(root->buf_ncd, dd->nc[dim]);
2644 /* This is not a root process, we only need to receive cell_f */
2645 snew(dd->comm->cell_f_row, ddCellFractionBufferSize(dd, dim_ind));
2648 if (dd->ci[dim] == dd->master_ci[dim])
2650 snew(dd->comm->load[dim_ind].load, dd->nc[dim]*DD_NLOAD_MAX);
2656 void dd_setup_dlb_resource_sharing(t_commrec *cr,
2660 int physicalnode_id_hash;
2662 MPI_Comm mpi_comm_pp_physicalnode;
2664 if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
2666 /* Only ranks with short-ranged tasks (currently) use GPUs.
2667 * If we don't have GPUs assigned, there are no resources to share.
2672 physicalnode_id_hash = gmx_physicalnode_id_hash();
2678 fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
2679 fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n",
2680 dd->rank, physicalnode_id_hash, gpu_id);
2682 /* Split the PP communicator over the physical nodes */
2683 /* TODO: See if we should store this (before), as it's also used for
2684 * for the nodecomm summation.
2686 // TODO PhysicalNodeCommunicator could be extended/used to handle
2687 // the need for per-node per-group communicators.
2688 MPI_Comm_split(dd->mpi_comm_all, physicalnode_id_hash, dd->rank,
2689 &mpi_comm_pp_physicalnode);
2690 MPI_Comm_split(mpi_comm_pp_physicalnode, gpu_id, dd->rank,
2691 &dd->comm->mpi_comm_gpu_shared);
2692 MPI_Comm_free(&mpi_comm_pp_physicalnode);
2693 MPI_Comm_size(dd->comm->mpi_comm_gpu_shared, &dd->comm->nrank_gpu_shared);
2697 fprintf(debug, "nrank_gpu_shared %d\n", dd->comm->nrank_gpu_shared);
2700 /* Note that some ranks could share a GPU, while others don't */
2702 if (dd->comm->nrank_gpu_shared == 1)
2704 MPI_Comm_free(&dd->comm->mpi_comm_gpu_shared);
2707 GMX_UNUSED_VALUE(cr);
2708 GMX_UNUSED_VALUE(gpu_id);
2712 static void make_load_communicators(gmx_domdec_t gmx_unused *dd)
2715 int dim0, dim1, i, j;
2720 fprintf(debug, "Making load communicators\n");
2723 snew(dd->comm->load, std::max(dd->ndim, 1));
2724 snew(dd->comm->mpi_comm_load, std::max(dd->ndim, 1));
2732 make_load_communicator(dd, 0, loc);
2736 for (i = 0; i < dd->nc[dim0]; i++)
2739 make_load_communicator(dd, 1, loc);
2745 for (i = 0; i < dd->nc[dim0]; i++)
2749 for (j = 0; j < dd->nc[dim1]; j++)
2752 make_load_communicator(dd, 2, loc);
2759 fprintf(debug, "Finished making load communicators\n");
2764 /*! \brief Sets up the relation between neighboring domains and zones */
2765 static void setup_neighbor_relations(gmx_domdec_t *dd)
2767 int d, dim, i, j, m;
2769 gmx_domdec_zones_t *zones;
2770 gmx_domdec_ns_ranges_t *izone;
2772 for (d = 0; d < dd->ndim; d++)
2775 copy_ivec(dd->ci, tmp);
2776 tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
2777 dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
2778 copy_ivec(dd->ci, tmp);
2779 tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
2780 dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
2783 fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
2786 dd->neighbor[d][1]);
2790 int nzone = (1 << dd->ndim);
2791 int nizone = (1 << std::max(dd->ndim - 1, 0));
2792 assert(nizone >= 1 && nizone <= DD_MAXIZONE);
2794 zones = &dd->comm->zones;
2796 for (i = 0; i < nzone; i++)
2799 clear_ivec(zones->shift[i]);
2800 for (d = 0; d < dd->ndim; d++)
2802 zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
2807 for (i = 0; i < nzone; i++)
2809 for (d = 0; d < DIM; d++)
2811 s[d] = dd->ci[d] - zones->shift[i][d];
2816 else if (s[d] >= dd->nc[d])
2822 zones->nizone = nizone;
2823 for (i = 0; i < zones->nizone; i++)
2825 assert(ddNonbondedZonePairRanges[i][0] == i);
2827 izone = &zones->izone[i];
2828 /* dd_zp3 is for 3D decomposition, for fewer dimensions use only
2829 * j-zones up to nzone.
2831 izone->j0 = std::min(ddNonbondedZonePairRanges[i][1], nzone);
2832 izone->j1 = std::min(ddNonbondedZonePairRanges[i][2], nzone);
2833 for (dim = 0; dim < DIM; dim++)
2835 if (dd->nc[dim] == 1)
2837 /* All shifts should be allowed */
2838 izone->shift0[dim] = -1;
2839 izone->shift1[dim] = 1;
2843 /* Determine the min/max j-zone shift wrt the i-zone */
2844 izone->shift0[dim] = 1;
2845 izone->shift1[dim] = -1;
2846 for (j = izone->j0; j < izone->j1; j++)
2848 int shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
2849 if (shift_diff < izone->shift0[dim])
2851 izone->shift0[dim] = shift_diff;
2853 if (shift_diff > izone->shift1[dim])
2855 izone->shift1[dim] = shift_diff;
2862 if (!isDlbDisabled(dd->comm))
2864 snew(dd->comm->root, dd->ndim);
2867 if (dd->comm->bRecordLoad)
2869 make_load_communicators(dd);
2873 static void make_pp_communicator(FILE *fplog,
2875 t_commrec gmx_unused *cr,
2876 int gmx_unused reorder)
2879 gmx_domdec_comm_t *comm;
2886 if (comm->bCartesianPP)
2888 /* Set up cartesian communication for the particle-particle part */
2891 fprintf(fplog, "Will use a Cartesian communicator: %d x %d x %d\n",
2892 dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
2895 for (int i = 0; i < DIM; i++)
2899 MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, reorder,
2901 /* We overwrite the old communicator with the new cartesian one */
2902 cr->mpi_comm_mygroup = comm_cart;
2905 dd->mpi_comm_all = cr->mpi_comm_mygroup;
2906 MPI_Comm_rank(dd->mpi_comm_all, &dd->rank);
2908 if (comm->bCartesianPP_PME)
2910 /* Since we want to use the original cartesian setup for sim,
2911 * and not the one after split, we need to make an index.
2913 snew(comm->ddindex2ddnodeid, dd->nnodes);
2914 comm->ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
2915 gmx_sumi(dd->nnodes, comm->ddindex2ddnodeid, cr);
2916 /* Get the rank of the DD master,
2917 * above we made sure that the master node is a PP node.
2927 MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
2929 else if (comm->bCartesianPP)
2931 if (cr->npmenodes == 0)
2933 /* The PP communicator is also
2934 * the communicator for this simulation
2936 cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
2938 cr->nodeid = dd->rank;
2940 MPI_Cart_coords(dd->mpi_comm_all, dd->rank, DIM, dd->ci);
2942 /* We need to make an index to go from the coordinates
2943 * to the nodeid of this simulation.
2945 snew(comm->ddindex2simnodeid, dd->nnodes);
2946 snew(buf, dd->nnodes);
2947 if (thisRankHasDuty(cr, DUTY_PP))
2949 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
2951 /* Communicate the ddindex to simulation nodeid index */
2952 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
2953 cr->mpi_comm_mysim);
2956 /* Determine the master coordinates and rank.
2957 * The DD master should be the same node as the master of this sim.
2959 for (int i = 0; i < dd->nnodes; i++)
2961 if (comm->ddindex2simnodeid[i] == 0)
2963 ddindex2xyz(dd->nc, i, dd->master_ci);
2964 MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
2969 fprintf(debug, "The master rank is %d\n", dd->masterrank);
2974 /* No Cartesian communicators */
2975 /* We use the rank in dd->comm->all as DD index */
2976 ddindex2xyz(dd->nc, dd->rank, dd->ci);
2977 /* The simulation master nodeid is 0, so the DD master rank is also 0 */
2979 clear_ivec(dd->master_ci);
2986 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2987 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2992 "Domain decomposition rank %d, coordinates %d %d %d\n\n",
2993 dd->rank, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
2997 static void receive_ddindex2simnodeid(gmx_domdec_t *dd,
3001 gmx_domdec_comm_t *comm = dd->comm;
3003 if (!comm->bCartesianPP_PME && comm->bCartesianPP)
3006 snew(comm->ddindex2simnodeid, dd->nnodes);
3007 snew(buf, dd->nnodes);
3008 if (thisRankHasDuty(cr, DUTY_PP))
3010 buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
3012 /* Communicate the ddindex to simulation nodeid index */
3013 MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
3014 cr->mpi_comm_mysim);
3018 GMX_UNUSED_VALUE(dd);
3019 GMX_UNUSED_VALUE(cr);
3023 static void split_communicator(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
3024 DdRankOrder gmx_unused rankOrder,
3025 int gmx_unused reorder)
3027 gmx_domdec_comm_t *comm;
3036 if (comm->bCartesianPP)
3038 for (i = 1; i < DIM; i++)
3040 bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
3042 if (bDiv[YY] || bDiv[ZZ])
3044 comm->bCartesianPP_PME = TRUE;
3045 /* If we have 2D PME decomposition, which is always in x+y,
3046 * we stack the PME only nodes in z.
3047 * Otherwise we choose the direction that provides the thinnest slab
3048 * of PME only nodes as this will have the least effect
3049 * on the PP communication.
3050 * But for the PME communication the opposite might be better.
3052 if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
3054 dd->nc[YY] > dd->nc[ZZ]))
3056 comm->cartpmedim = ZZ;
3060 comm->cartpmedim = YY;
3062 comm->ntot[comm->cartpmedim]
3063 += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
3067 fprintf(fplog, "Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n", cr->npmenodes, dd->nc[XX], dd->nc[YY], dd->nc[XX], dd->nc[ZZ]);
3069 "Will not use a Cartesian communicator for PP <-> PME\n\n");
3073 if (comm->bCartesianPP_PME)
3081 fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
3084 for (i = 0; i < DIM; i++)
3088 MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
3090 MPI_Comm_rank(comm_cart, &rank);
3091 if (MASTER(cr) && rank != 0)
3093 gmx_fatal(FARGS, "MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
3096 /* With this assigment we loose the link to the original communicator
3097 * which will usually be MPI_COMM_WORLD, unless have multisim.
3099 cr->mpi_comm_mysim = comm_cart;
3100 cr->sim_nodeid = rank;
3102 MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, dd->ci);
3106 fprintf(fplog, "Cartesian rank %d, coordinates %d %d %d\n\n",
3107 cr->sim_nodeid, dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
3110 if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
3114 if (cr->npmenodes == 0 ||
3115 dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
3117 cr->duty = DUTY_PME;
3120 /* Split the sim communicator into PP and PME only nodes */
3121 MPI_Comm_split(cr->mpi_comm_mysim,
3122 getThisRankDuties(cr),
3123 dd_index(comm->ntot, dd->ci),
3124 &cr->mpi_comm_mygroup);
3131 case DdRankOrder::pp_pme:
3134 fprintf(fplog, "Order of the ranks: PP first, PME last\n");
3137 case DdRankOrder::interleave:
3138 /* Interleave the PP-only and PME-only ranks */
3141 fprintf(fplog, "Interleaving PP and PME ranks\n");
3143 comm->pmenodes = dd_interleaved_pme_ranks(dd);
3145 case DdRankOrder::cartesian:
3148 gmx_fatal(FARGS, "Invalid ddRankOrder=%d", static_cast<int>(rankOrder));
3151 if (dd_simnode2pmenode(dd, cr, cr->sim_nodeid) == -1)
3153 cr->duty = DUTY_PME;
3160 /* Split the sim communicator into PP and PME only nodes */
3161 MPI_Comm_split(cr->mpi_comm_mysim,
3162 getThisRankDuties(cr),
3164 &cr->mpi_comm_mygroup);
3165 MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid);
3171 fprintf(fplog, "This rank does only %s work.\n\n",
3172 thisRankHasDuty(cr, DUTY_PP) ? "particle-particle" : "PME-mesh");
3176 /*! \brief Generates the MPI communicators for domain decomposition */
3177 static void make_dd_communicators(FILE *fplog, t_commrec *cr,
3178 gmx_domdec_t *dd, DdRankOrder ddRankOrder)
3180 gmx_domdec_comm_t *comm;
3185 copy_ivec(dd->nc, comm->ntot);
3187 comm->bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
3188 comm->bCartesianPP_PME = FALSE;
3190 /* Reorder the nodes by default. This might change the MPI ranks.
3191 * Real reordering is only supported on very few architectures,
3192 * Blue Gene is one of them.
3194 CartReorder = (getenv("GMX_NO_CART_REORDER") == nullptr);
3196 if (cr->npmenodes > 0)
3198 /* Split the communicator into a PP and PME part */
3199 split_communicator(fplog, cr, dd, ddRankOrder, CartReorder);
3200 if (comm->bCartesianPP_PME)
3202 /* We (possibly) reordered the nodes in split_communicator,
3203 * so it is no longer required in make_pp_communicator.
3205 CartReorder = FALSE;
3210 /* All nodes do PP and PME */
3212 /* We do not require separate communicators */
3213 cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
3217 if (thisRankHasDuty(cr, DUTY_PP))
3219 /* Copy or make a new PP communicator */
3220 make_pp_communicator(fplog, dd, cr, CartReorder);
3224 receive_ddindex2simnodeid(dd, cr);
3227 if (!thisRankHasDuty(cr, DUTY_PME))
3229 /* Set up the commnuication to our PME node */
3230 dd->pme_nodeid = dd_simnode2pmenode(dd, cr, cr->sim_nodeid);
3231 dd->pme_receive_vir_ener = receive_vir_ener(dd, cr);
3234 fprintf(debug, "My pme_nodeid %d receive ener %d\n",
3235 dd->pme_nodeid, dd->pme_receive_vir_ener);
3240 dd->pme_nodeid = -1;
3245 dd->ma = gmx::compat::make_unique<AtomDistribution>(dd->nc,
3247 comm->cgs_gl.index[comm->cgs_gl.nr]);
3251 static real *get_slb_frac(FILE *fplog, const char *dir, int nc, const char *size_string)
3253 real *slb_frac, tot;
3258 if (nc > 1 && size_string != nullptr)
3262 fprintf(fplog, "Using static load balancing for the %s direction\n",
3267 for (i = 0; i < nc; i++)
3270 sscanf(size_string, "%20lf%n", &dbl, &n);
3273 gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
3282 fprintf(fplog, "Relative cell sizes:");
3284 for (i = 0; i < nc; i++)
3289 fprintf(fplog, " %5.3f", slb_frac[i]);
3294 fprintf(fplog, "\n");
3301 static int multi_body_bondeds_count(const gmx_mtop_t *mtop)
3304 gmx_mtop_ilistloop_t iloop;
3308 iloop = gmx_mtop_ilistloop_init(mtop);
3309 while (gmx_mtop_ilistloop_next(iloop, &il, &nmol))
3311 for (ftype = 0; ftype < F_NRE; ftype++)
3313 if ((interaction_function[ftype].flags & IF_BOND) &&
3316 n += nmol*il[ftype].nr/(1 + NRAL(ftype));
3324 static int dd_getenv(FILE *fplog, const char *env_var, int def)
3330 val = getenv(env_var);
3333 if (sscanf(val, "%20d", &nst) <= 0)
3339 fprintf(fplog, "Found env.var. %s = %s, using value %d\n",
3347 static void dd_warning(const t_commrec *cr, FILE *fplog, const char *warn_string)
3351 fprintf(stderr, "\n%s\n", warn_string);
3355 fprintf(fplog, "\n%s\n", warn_string);
3359 static void check_dd_restrictions(t_commrec *cr, const gmx_domdec_t *dd,
3360 const t_inputrec *ir, FILE *fplog)
3362 if (ir->ePBC == epbcSCREW &&
3363 (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
3365 gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction", epbc_names[ir->ePBC]);
3368 if (ir->ns_type == ensSIMPLE)
3370 gmx_fatal(FARGS, "Domain decomposition does not support simple neighbor searching, use grid searching or run with one MPI rank");
3373 if (ir->nstlist == 0)
3375 gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
3378 if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
3380 dd_warning(cr, fplog, "comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
3384 static real average_cellsize_min(gmx_domdec_t *dd, gmx_ddbox_t *ddbox)
3389 r = ddbox->box_size[XX];
3390 for (di = 0; di < dd->ndim; di++)
3393 /* Check using the initial average cell size */
3394 r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
3400 /*! \brief Depending on the DLB initial value return the DLB switched off state or issue an error.
3402 static int forceDlbOffOrBail(int cmdlineDlbState,
3403 const std::string &reasonStr,
3407 std::string dlbNotSupportedErr = "Dynamic load balancing requested, but ";
3408 std::string dlbDisableNote = "NOTE: disabling dynamic load balancing as ";
3410 if (cmdlineDlbState == edlbsOnUser)
3412 gmx_fatal(FARGS, (dlbNotSupportedErr + reasonStr).c_str());
3414 else if (cmdlineDlbState == edlbsOffCanTurnOn)
3416 dd_warning(cr, fplog, (dlbDisableNote + reasonStr + "\n").c_str());
3418 return edlbsOffForever;
3421 /*! \brief Return the dynamic load balancer's initial state based on initial conditions and user inputs.
3423 * This function parses the parameters of "-dlb" command line option setting
3424 * corresponding state values. Then it checks the consistency of the determined
3425 * state with other run parameters and settings. As a result, the initial state
3426 * may be altered or an error may be thrown if incompatibility of options is detected.
3428 * \param [in] fplog Pointer to mdrun log file.
3429 * \param [in] cr Pointer to MPI communication object.
3430 * \param [in] dlbOption Enum value for the DLB option.
3431 * \param [in] bRecordLoad True if the load balancer is recording load information.
3432 * \param [in] mdrunOptions Options for mdrun.
3433 * \param [in] ir Pointer mdrun to input parameters.
3434 * \returns DLB initial/startup state.
3436 static int determineInitialDlbState(FILE *fplog, t_commrec *cr,
3437 DlbOption dlbOption, gmx_bool bRecordLoad,
3438 const MdrunOptions &mdrunOptions,
3439 const t_inputrec *ir)
3441 int dlbState = edlbsOffCanTurnOn;
3445 case DlbOption::turnOnWhenUseful: dlbState = edlbsOffCanTurnOn; break;
3446 case DlbOption::no: dlbState = edlbsOffUser; break;
3447 case DlbOption::yes: dlbState = edlbsOnUser; break;
3448 default: gmx_incons("Invalid dlbOption enum value");
3451 /* Reruns don't support DLB: bail or override auto mode */
3452 if (mdrunOptions.rerun)
3454 std::string reasonStr = "it is not supported in reruns.";
3455 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3458 /* Unsupported integrators */
3459 if (!EI_DYNAMICS(ir->eI))
3461 auto reasonStr = gmx::formatString("it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
3462 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3465 /* Without cycle counters we can't time work to balance on */
3468 std::string reasonStr = "cycle counters unsupported or not enabled in the operating system kernel.";
3469 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3472 if (mdrunOptions.reproducible)
3474 std::string reasonStr = "you started a reproducible run.";
3479 case edlbsOffForever:
3480 GMX_RELEASE_ASSERT(false, "edlbsOffForever is not a valid initial state");
3482 case edlbsOffCanTurnOn:
3483 return forceDlbOffOrBail(dlbState, reasonStr, cr, fplog);
3485 case edlbsOnCanTurnOff:
3486 GMX_RELEASE_ASSERT(false, "edlbsOffCanTurnOff is not a valid initial state");
3489 return forceDlbOffOrBail(dlbState, reasonStr + " In load balanced runs binary reproducibility cannot be ensured.", cr, fplog);
3492 gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice", dlbState);
3500 static void set_dd_dim(FILE *fplog, gmx_domdec_t *dd)
3503 if (getenv("GMX_DD_ORDER_ZYX") != nullptr)
3505 /* Decomposition order z,y,x */
3508 fprintf(fplog, "Using domain decomposition order z, y, x\n");
3510 for (int dim = DIM-1; dim >= 0; dim--)
3512 if (dd->nc[dim] > 1)
3514 dd->dim[dd->ndim++] = dim;
3520 /* Decomposition order x,y,z */
3521 for (int dim = 0; dim < DIM; dim++)
3523 if (dd->nc[dim] > 1)
3525 dd->dim[dd->ndim++] = dim;
3532 /* Set dim[0] to avoid extra checks on ndim in several places */
3537 static gmx_domdec_comm_t *init_dd_comm()
3539 gmx_domdec_comm_t *comm = new gmx_domdec_comm_t;
3541 comm->n_load_have = 0;
3542 comm->n_load_collect = 0;
3544 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
3546 comm->sum_nat[i] = 0;
3550 comm->load_step = 0;
3553 clear_ivec(comm->load_lim);
3557 /* This should be replaced by a unique pointer */
3558 comm->balanceRegion = ddBalanceRegionAllocate();
3563 /*! \brief Set the cell size and interaction limits, as well as the DD grid */
3564 static void set_dd_limits_and_grid(FILE *fplog, t_commrec *cr, gmx_domdec_t *dd,
3565 const DomdecOptions &options,
3566 const MdrunOptions &mdrunOptions,
3567 const gmx_mtop_t *mtop,
3568 const t_inputrec *ir,
3570 gmx::ArrayRef<const gmx::RVec> xGlobal,
3574 real r_bonded_limit = -1;
3575 const real tenPercentMargin = 1.1;
3576 gmx_domdec_comm_t *comm = dd->comm;
3578 dd->npbcdim = ePBC2npbcdim(ir->ePBC);
3579 dd->bScrewPBC = (ir->ePBC == epbcSCREW);
3581 dd->pme_recv_f_alloc = 0;
3582 dd->pme_recv_f_buf = nullptr;
3584 /* Initialize to GPU share count to 0, might change later */
3585 comm->nrank_gpu_shared = 0;
3587 comm->dlbState = determineInitialDlbState(fplog, cr, options.dlbOption, comm->bRecordLoad, mdrunOptions, ir);
3588 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
3589 /* To consider turning DLB on after 2*nstlist steps we need to check
3590 * at partitioning count 3. Thus we need to increase the first count by 2.
3592 comm->ddPartioningCountFirstDlbOff += 2;
3596 fprintf(fplog, "Dynamic load balancing: %s\n",
3597 edlbs_names[comm->dlbState]);
3599 comm->bPMELoadBalDLBLimits = FALSE;
3601 /* Allocate the charge group/atom sorting struct */
3602 snew(comm->sort, 1);
3604 comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
3606 comm->bInterCGBondeds = ((ncg_mtop(mtop) > gmx_mtop_num_molecules(*mtop)) ||
3607 mtop->bIntermolecularInteractions);
3608 if (comm->bInterCGBondeds)
3610 comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
3614 comm->bInterCGMultiBody = FALSE;
3617 dd->bInterCGcons = inter_charge_group_constraints(*mtop);
3618 dd->bInterCGsettles = inter_charge_group_settles(*mtop);
3622 /* Set the cut-off to some very large value,
3623 * so we don't need if statements everywhere in the code.
3624 * We use sqrt, since the cut-off is squared in some places.
3626 comm->cutoff = GMX_CUTOFF_INF;
3630 comm->cutoff = ir->rlist;
3632 comm->cutoff_mbody = 0;
3634 comm->cellsize_limit = 0;
3635 comm->bBondComm = FALSE;
3637 /* Atoms should be able to move by up to half the list buffer size (if > 0)
3638 * within nstlist steps. Since boundaries are allowed to displace by half
3639 * a cell size, DD cells should be at least the size of the list buffer.
3641 comm->cellsize_limit = std::max(comm->cellsize_limit,
3642 ir->rlist - std::max(ir->rvdw, ir->rcoulomb));
3644 if (comm->bInterCGBondeds)
3646 if (options.minimumCommunicationRange > 0)
3648 comm->cutoff_mbody = options.minimumCommunicationRange;
3649 if (options.useBondedCommunication)
3651 comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
3655 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3657 r_bonded_limit = comm->cutoff_mbody;
3659 else if (ir->bPeriodicMols)
3661 /* Can not easily determine the required cut-off */
3662 dd_warning(cr, fplog, "NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
3663 comm->cutoff_mbody = comm->cutoff/2;
3664 r_bonded_limit = comm->cutoff_mbody;
3672 dd_bonded_cg_distance(fplog, mtop, ir, as_rvec_array(xGlobal.data()), box,
3673 options.checkBondedInteractions,
3676 gmx_bcast(sizeof(r_2b), &r_2b, cr);
3677 gmx_bcast(sizeof(r_mb), &r_mb, cr);
3679 /* We use an initial margin of 10% for the minimum cell size,
3680 * except when we are just below the non-bonded cut-off.
3682 if (options.useBondedCommunication)
3684 if (std::max(r_2b, r_mb) > comm->cutoff)
3686 r_bonded = std::max(r_2b, r_mb);
3687 r_bonded_limit = tenPercentMargin*r_bonded;
3688 comm->bBondComm = TRUE;
3693 r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
3695 /* We determine cutoff_mbody later */
3699 /* No special bonded communication,
3700 * simply increase the DD cut-off.
3702 r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
3703 comm->cutoff_mbody = r_bonded_limit;
3704 comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
3710 "Minimum cell size due to bonded interactions: %.3f nm\n",
3713 comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
3717 if (dd->bInterCGcons && options.constraintCommunicationRange <= 0)
3719 /* There is a cell size limit due to the constraints (P-LINCS) */
3720 rconstr = constr_r_max(fplog, mtop, ir);
3724 "Estimated maximum distance required for P-LINCS: %.3f nm\n",
3726 if (rconstr > comm->cellsize_limit)
3728 fprintf(fplog, "This distance will limit the DD cell size, you can override this with -rcon\n");
3732 else if (options.constraintCommunicationRange > 0 && fplog)
3734 /* Here we do not check for dd->bInterCGcons,
3735 * because one can also set a cell size limit for virtual sites only
3736 * and at this point we don't know yet if there are intercg v-sites.
3739 "User supplied maximum distance required for P-LINCS: %.3f nm\n",
3740 options.constraintCommunicationRange);
3741 rconstr = options.constraintCommunicationRange;
3743 comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
3745 comm->cgs_gl = gmx_mtop_global_cgs(mtop);
3747 if (options.numCells[XX] > 0)
3749 copy_ivec(options.numCells, dd->nc);
3750 set_dd_dim(fplog, dd);
3751 set_ddbox_cr(cr, &dd->nc, ir, box, xGlobal, ddbox);
3753 if (options.numPmeRanks >= 0)
3755 cr->npmenodes = options.numPmeRanks;
3759 /* When the DD grid is set explicitly and -npme is set to auto,
3760 * don't use PME ranks. We check later if the DD grid is
3761 * compatible with the total number of ranks.
3766 real acs = average_cellsize_min(dd, ddbox);
3767 if (acs < comm->cellsize_limit)
3771 fprintf(fplog, "ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n", acs, comm->cellsize_limit);
3773 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3774 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
3775 acs, comm->cellsize_limit);
3780 set_ddbox_cr(cr, nullptr, ir, box, xGlobal, ddbox);
3782 /* We need to choose the optimal DD grid and possibly PME nodes */
3784 dd_choose_grid(fplog, cr, dd, ir, mtop, box, ddbox,
3785 options.numPmeRanks,
3786 !isDlbDisabled(comm),
3788 comm->cellsize_limit, comm->cutoff,
3789 comm->bInterCGBondeds);
3791 if (dd->nc[XX] == 0)
3794 gmx_bool bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
3795 sprintf(buf, "Change the number of ranks or mdrun option %s%s%s",
3796 !bC ? "-rdd" : "-rcon",
3797 comm->dlbState != edlbsOffUser ? " or -dds" : "",
3798 bC ? " or your LINCS settings" : "");
3800 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3801 "There is no domain decomposition for %d ranks that is compatible with the given box and a minimum cell size of %g nm\n"
3803 "Look in the log file for details on the domain decomposition",
3804 cr->nnodes-cr->npmenodes, limit, buf);
3806 set_dd_dim(fplog, dd);
3812 "Domain decomposition grid %d x %d x %d, separate PME ranks %d\n",
3813 dd->nc[XX], dd->nc[YY], dd->nc[ZZ], cr->npmenodes);
3816 dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
3817 if (cr->nnodes - dd->nnodes != cr->npmenodes)
3819 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3820 "The size of the domain decomposition grid (%d) does not match the number of ranks (%d). The total number of ranks is %d",
3821 dd->nnodes, cr->nnodes - cr->npmenodes, cr->nnodes);
3823 if (cr->npmenodes > dd->nnodes)
3825 gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
3826 "The number of separate PME ranks (%d) is larger than the number of PP ranks (%d), this is not supported.", cr->npmenodes, dd->nnodes);
3828 if (cr->npmenodes > 0)
3830 comm->npmenodes = cr->npmenodes;
3834 comm->npmenodes = dd->nnodes;
3837 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
3839 /* The following choices should match those
3840 * in comm_cost_est in domdec_setup.c.
3841 * Note that here the checks have to take into account
3842 * that the decomposition might occur in a different order than xyz
3843 * (for instance through the env.var. GMX_DD_ORDER_ZYX),
3844 * in which case they will not match those in comm_cost_est,
3845 * but since that is mainly for testing purposes that's fine.
3847 if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
3848 comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
3849 getenv("GMX_PMEONEDD") == nullptr)
3851 comm->npmedecompdim = 2;
3852 comm->npmenodes_x = dd->nc[XX];
3853 comm->npmenodes_y = comm->npmenodes/comm->npmenodes_x;
3857 /* In case nc is 1 in both x and y we could still choose to
3858 * decompose pme in y instead of x, but we use x for simplicity.
3860 comm->npmedecompdim = 1;
3861 if (dd->dim[0] == YY)
3863 comm->npmenodes_x = 1;
3864 comm->npmenodes_y = comm->npmenodes;
3868 comm->npmenodes_x = comm->npmenodes;
3869 comm->npmenodes_y = 1;
3874 fprintf(fplog, "PME domain decomposition: %d x %d x %d\n",
3875 comm->npmenodes_x, comm->npmenodes_y, 1);
3880 comm->npmedecompdim = 0;
3881 comm->npmenodes_x = 0;
3882 comm->npmenodes_y = 0;
3885 snew(comm->slb_frac, DIM);
3886 if (isDlbDisabled(comm))
3888 comm->slb_frac[XX] = get_slb_frac(fplog, "x", dd->nc[XX], options.cellSizeX);
3889 comm->slb_frac[YY] = get_slb_frac(fplog, "y", dd->nc[YY], options.cellSizeY);
3890 comm->slb_frac[ZZ] = get_slb_frac(fplog, "z", dd->nc[ZZ], options.cellSizeZ);
3893 if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
3895 if (comm->bBondComm || !isDlbDisabled(comm))
3897 /* Set the bonded communication distance to halfway
3898 * the minimum and the maximum,
3899 * since the extra communication cost is nearly zero.
3901 real acs = average_cellsize_min(dd, ddbox);
3902 comm->cutoff_mbody = 0.5*(r_bonded + acs);
3903 if (!isDlbDisabled(comm))
3905 /* Check if this does not limit the scaling */
3906 comm->cutoff_mbody = std::min(comm->cutoff_mbody,
3907 options.dlbScaling*acs);
3909 if (!comm->bBondComm)
3911 /* Without bBondComm do not go beyond the n.b. cut-off */
3912 comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
3913 if (comm->cellsize_limit >= comm->cutoff)
3915 /* We don't loose a lot of efficieny
3916 * when increasing it to the n.b. cut-off.
3917 * It can even be slightly faster, because we need
3918 * less checks for the communication setup.
3920 comm->cutoff_mbody = comm->cutoff;
3923 /* Check if we did not end up below our original limit */
3924 comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
3926 if (comm->cutoff_mbody > comm->cellsize_limit)
3928 comm->cellsize_limit = comm->cutoff_mbody;
3931 /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
3936 fprintf(debug, "Bonded atom communication beyond the cut-off: %d\n"
3937 "cellsize limit %f\n",
3938 comm->bBondComm, comm->cellsize_limit);
3943 check_dd_restrictions(cr, dd, ir, fplog);
3947 static void set_dlb_limits(gmx_domdec_t *dd)
3950 for (int d = 0; d < dd->ndim; d++)
3952 /* Set the number of pulses to the value for DLB */
3953 dd->comm->cd[d].ind.resize(dd->comm->cd[d].np_dlb);
3955 dd->comm->cellsize_min[dd->dim[d]] =
3956 dd->comm->cellsize_min_dlb[dd->dim[d]];
3961 static void turn_on_dlb(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
3964 gmx_domdec_comm_t *comm;
3971 cellsize_min = comm->cellsize_min[dd->dim[0]];
3972 for (d = 1; d < dd->ndim; d++)
3974 cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
3977 /* Turn off DLB if we're too close to the cell size limit. */
3978 if (cellsize_min < comm->cellsize_limit*1.05)
3980 auto str = gmx::formatString("step %" GMX_PRId64 " Measured %.1f %% performance loss due to load imbalance, "
3981 "but the minimum cell size is smaller than 1.05 times the cell size limit."
3982 "Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
3983 dd_warning(cr, fplog, str.c_str());
3985 comm->dlbState = edlbsOffForever;
3990 sprintf(buf, "step %" GMX_PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
3991 dd_warning(cr, fplog, buf);
3992 comm->dlbState = edlbsOnCanTurnOff;
3994 /* Store the non-DLB performance, so we can check if DLB actually
3995 * improves performance.
3997 GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
3998 comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
4002 /* We can set the required cell size info here,
4003 * so we do not need to communicate this.
4004 * The grid is completely uniform.
4006 for (d = 0; d < dd->ndim; d++)
4010 comm->load[d].sum_m = comm->load[d].sum;
4012 nc = dd->nc[dd->dim[d]];
4013 for (i = 0; i < nc; i++)
4015 comm->root[d]->cell_f[i] = i/(real)nc;
4018 comm->root[d]->cell_f_max0[i] = i /(real)nc;
4019 comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
4022 comm->root[d]->cell_f[nc] = 1.0;
4027 static void turn_off_dlb(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
4029 gmx_domdec_t *dd = cr->dd;
4032 sprintf(buf, "step %" GMX_PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
4033 dd_warning(cr, fplog, buf);
4034 dd->comm->dlbState = edlbsOffCanTurnOn;
4035 dd->comm->haveTurnedOffDlb = true;
4036 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4039 static void turn_off_dlb_forever(FILE *fplog, const t_commrec *cr, gmx_int64_t step)
4041 GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
4043 sprintf(buf, "step %" GMX_PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
4044 dd_warning(cr, fplog, buf);
4045 cr->dd->comm->dlbState = edlbsOffForever;
4048 static char *init_bLocalCG(const gmx_mtop_t *mtop)
4053 ncg = ncg_mtop(mtop);
4054 snew(bLocalCG, ncg);
4055 for (cg = 0; cg < ncg; cg++)
4057 bLocalCG[cg] = FALSE;
4063 void dd_init_bondeds(FILE *fplog,
4065 const gmx_mtop_t *mtop,
4066 const gmx_vsite_t *vsite,
4067 const t_inputrec *ir,
4068 gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
4070 gmx_domdec_comm_t *comm;
4072 dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
4076 if (comm->bBondComm)
4078 /* Communicate atoms beyond the cut-off for bonded interactions */
4081 comm->cglink = make_charge_group_links(mtop, dd, cginfo_mb);
4083 comm->bLocalCG = init_bLocalCG(mtop);
4087 /* Only communicate atoms based on cut-off */
4088 comm->cglink = nullptr;
4089 comm->bLocalCG = nullptr;
4093 static void print_dd_settings(FILE *fplog, gmx_domdec_t *dd,
4094 const gmx_mtop_t *mtop, const t_inputrec *ir,
4095 gmx_bool bDynLoadBal, real dlb_scale,
4096 const gmx_ddbox_t *ddbox)
4098 gmx_domdec_comm_t *comm;
4104 if (fplog == nullptr)
4113 fprintf(fplog, "The maximum number of communication pulses is:");
4114 for (d = 0; d < dd->ndim; d++)
4116 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
4118 fprintf(fplog, "\n");
4119 fprintf(fplog, "The minimum size for domain decomposition cells is %.3f nm\n", comm->cellsize_limit);
4120 fprintf(fplog, "The requested allowed shrink of DD cells (option -dds) is: %.2f\n", dlb_scale);
4121 fprintf(fplog, "The allowed shrink of domain decomposition cells is:");
4122 for (d = 0; d < DIM; d++)
4126 if (d >= ddbox->npbcdim && dd->nc[d] == 2)
4133 comm->cellsize_min_dlb[d]/
4134 (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
4136 fprintf(fplog, " %c %.2f", dim2char(d), shrink);
4139 fprintf(fplog, "\n");
4143 set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
4144 fprintf(fplog, "The initial number of communication pulses is:");
4145 for (d = 0; d < dd->ndim; d++)
4147 fprintf(fplog, " %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
4149 fprintf(fplog, "\n");
4150 fprintf(fplog, "The initial domain decomposition cell size is:");
4151 for (d = 0; d < DIM; d++)
4155 fprintf(fplog, " %c %.2f nm",
4156 dim2char(d), dd->comm->cellsize_min[d]);
4159 fprintf(fplog, "\n\n");
4162 gmx_bool bInterCGVsites = count_intercg_vsites(mtop);
4164 if (comm->bInterCGBondeds ||
4166 dd->bInterCGcons || dd->bInterCGsettles)
4168 fprintf(fplog, "The maximum allowed distance for charge groups involved in interactions is:\n");
4169 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4170 "non-bonded interactions", "", comm->cutoff);
4174 limit = dd->comm->cellsize_limit;
4178 if (dynamic_dd_box(ddbox, ir))
4180 fprintf(fplog, "(the following are initial values, they could change due to box deformation)\n");
4182 limit = dd->comm->cellsize_min[XX];
4183 for (d = 1; d < DIM; d++)
4185 limit = std::min(limit, dd->comm->cellsize_min[d]);
4189 if (comm->bInterCGBondeds)
4191 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4192 "two-body bonded interactions", "(-rdd)",
4193 std::max(comm->cutoff, comm->cutoff_mbody));
4194 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4195 "multi-body bonded interactions", "(-rdd)",
4196 (comm->bBondComm || isDlbOn(dd->comm)) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
4200 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4201 "virtual site constructions", "(-rcon)", limit);
4203 if (dd->bInterCGcons || dd->bInterCGsettles)
4205 sprintf(buf, "atoms separated by up to %d constraints",
4207 fprintf(fplog, "%40s %-7s %6.3f nm\n",
4208 buf, "(-rcon)", limit);
4210 fprintf(fplog, "\n");
4216 static void set_cell_limits_dlb(gmx_domdec_t *dd,
4218 const t_inputrec *ir,
4219 const gmx_ddbox_t *ddbox)
4221 gmx_domdec_comm_t *comm;
4222 int d, dim, npulse, npulse_d_max, npulse_d;
4227 bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
4229 /* Determine the maximum number of comm. pulses in one dimension */
4231 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4233 /* Determine the maximum required number of grid pulses */
4234 if (comm->cellsize_limit >= comm->cutoff)
4236 /* Only a single pulse is required */
4239 else if (!bNoCutOff && comm->cellsize_limit > 0)
4241 /* We round down slightly here to avoid overhead due to the latency
4242 * of extra communication calls when the cut-off
4243 * would be only slightly longer than the cell size.
4244 * Later cellsize_limit is redetermined,
4245 * so we can not miss interactions due to this rounding.
4247 npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
4251 /* There is no cell size limit */
4252 npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
4255 if (!bNoCutOff && npulse > 1)
4257 /* See if we can do with less pulses, based on dlb_scale */
4259 for (d = 0; d < dd->ndim; d++)
4262 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
4263 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
4264 npulse_d_max = std::max(npulse_d_max, npulse_d);
4266 npulse = std::min(npulse, npulse_d_max);
4269 /* This env var can override npulse */
4270 d = dd_getenv(debug, "GMX_DD_NPULSE", 0);
4277 comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
4278 for (d = 0; d < dd->ndim; d++)
4280 comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
4281 comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
4282 if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
4284 comm->bVacDLBNoLimit = FALSE;
4288 /* cellsize_limit is set for LINCS in init_domain_decomposition */
4289 if (!comm->bVacDLBNoLimit)
4291 comm->cellsize_limit = std::max(comm->cellsize_limit,
4292 comm->cutoff/comm->maxpulse);
4294 comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
4295 /* Set the minimum cell size for each DD dimension */
4296 for (d = 0; d < dd->ndim; d++)
4298 if (comm->bVacDLBNoLimit ||
4299 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
4301 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
4305 comm->cellsize_min_dlb[dd->dim[d]] =
4306 comm->cutoff/comm->cd[d].np_dlb;
4309 if (comm->cutoff_mbody <= 0)
4311 comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
4319 gmx_bool dd_bonded_molpbc(const gmx_domdec_t *dd, int ePBC)
4321 /* If each molecule is a single charge group
4322 * or we use domain decomposition for each periodic dimension,
4323 * we do not need to take pbc into account for the bonded interactions.
4325 return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
4328 (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
4331 /*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
4332 static void set_ddgrid_parameters(FILE *fplog, gmx_domdec_t *dd, real dlb_scale,
4333 const gmx_mtop_t *mtop, const t_inputrec *ir,
4334 const gmx_ddbox_t *ddbox)
4336 gmx_domdec_comm_t *comm;
4342 if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
4344 init_ddpme(dd, &comm->ddpme[0], 0);
4345 if (comm->npmedecompdim >= 2)
4347 init_ddpme(dd, &comm->ddpme[1], 1);
4352 comm->npmenodes = 0;
4353 if (dd->pme_nodeid >= 0)
4355 gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
4356 "Can not have separate PME ranks without PME electrostatics");
4362 fprintf(debug, "The DD cut-off is %f\n", comm->cutoff);
4364 if (!isDlbDisabled(comm))
4366 set_cell_limits_dlb(dd, dlb_scale, ir, ddbox);
4369 print_dd_settings(fplog, dd, mtop, ir, isDlbOn(comm), dlb_scale, ddbox);
4370 if (comm->dlbState == edlbsOffCanTurnOn)
4374 fprintf(fplog, "When dynamic load balancing gets turned on, these settings will change to:\n");
4376 print_dd_settings(fplog, dd, mtop, ir, TRUE, dlb_scale, ddbox);
4379 if (ir->ePBC == epbcNONE)
4381 vol_frac = 1 - 1/(double)dd->nnodes;
4386 (1 + comm_box_frac(dd->nc, comm->cutoff, ddbox))/(double)dd->nnodes;
4390 fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
4392 natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
4394 dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
4397 /*! \brief Set some important DD parameters that can be modified by env.vars */
4398 static void set_dd_envvar_options(FILE *fplog, gmx_domdec_t *dd, int rank_mysim)
4400 gmx_domdec_comm_t *comm = dd->comm;
4402 dd->bSendRecv2 = dd_getenv(fplog, "GMX_DD_USE_SENDRECV2", 0);
4403 comm->dlb_scale_lim = dd_getenv(fplog, "GMX_DLB_MAX_BOX_SCALING", 10);
4404 comm->eFlop = dd_getenv(fplog, "GMX_DLB_BASED_ON_FLOPS", 0);
4405 int recload = dd_getenv(fplog, "GMX_DD_RECORD_LOAD", 1);
4406 comm->nstDDDump = dd_getenv(fplog, "GMX_DD_NST_DUMP", 0);
4407 comm->nstDDDumpGrid = dd_getenv(fplog, "GMX_DD_NST_DUMP_GRID", 0);
4408 comm->DD_debug = dd_getenv(fplog, "GMX_DD_DEBUG", 0);
4410 if (dd->bSendRecv2 && fplog)
4412 fprintf(fplog, "Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
4419 fprintf(fplog, "Will load balance based on FLOP count\n");
4421 if (comm->eFlop > 1)
4423 srand(1 + rank_mysim);
4425 comm->bRecordLoad = TRUE;
4429 comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
4433 DomdecOptions::DomdecOptions() :
4434 checkBondedInteractions(TRUE),
4435 useBondedCommunication(TRUE),
4437 rankOrder(DdRankOrder::pp_pme),
4438 minimumCommunicationRange(0),
4439 constraintCommunicationRange(0),
4440 dlbOption(DlbOption::turnOnWhenUseful),
4446 clear_ivec(numCells);
4449 gmx_domdec_t *init_domain_decomposition(FILE *fplog, t_commrec *cr,
4450 const DomdecOptions &options,
4451 const MdrunOptions &mdrunOptions,
4452 const gmx_mtop_t *mtop,
4453 const t_inputrec *ir,
4455 gmx::ArrayRef<const gmx::RVec> xGlobal)
4462 "\nInitializing Domain Decomposition on %d ranks\n", cr->nnodes);
4465 dd = new gmx_domdec_t;
4467 dd->comm = init_dd_comm();
4469 /* Initialize DD paritioning counters */
4470 dd->comm->partition_step = INT_MIN;
4473 set_dd_envvar_options(fplog, dd, cr->nodeid);
4475 gmx_ddbox_t ddbox = {0};
4476 set_dd_limits_and_grid(fplog, cr, dd, options, mdrunOptions,
4481 make_dd_communicators(fplog, cr, dd, options.rankOrder);
4483 if (thisRankHasDuty(cr, DUTY_PP))
4485 set_ddgrid_parameters(fplog, dd, options.dlbScaling, mtop, ir, &ddbox);
4487 setup_neighbor_relations(dd);
4490 /* Set overallocation to avoid frequent reallocation of arrays */
4491 set_over_alloc_dd(TRUE);
4493 clear_dd_cycle_counts(dd);
4498 static gmx_bool test_dd_cutoff(t_commrec *cr,
4499 t_state *state, const t_inputrec *ir,
4510 set_ddbox(dd, false, ir, state->box, true, state->x, &ddbox);
4514 for (d = 0; d < dd->ndim; d++)
4518 inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
4519 if (dynamic_dd_box(&ddbox, ir))
4521 inv_cell_size *= DD_PRES_SCALE_MARGIN;
4524 np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
4526 if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
4528 if (np > dd->comm->cd[d].np_dlb)
4533 /* If a current local cell size is smaller than the requested
4534 * cut-off, we could still fix it, but this gets very complicated.
4535 * Without fixing here, we might actually need more checks.
4537 if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
4544 if (!isDlbDisabled(dd->comm))
4546 /* If DLB is not active yet, we don't need to check the grid jumps.
4547 * Actually we shouldn't, because then the grid jump data is not set.
4549 if (isDlbOn(dd->comm) &&
4550 check_grid_jump(0, dd, cutoff_req, &ddbox, FALSE))
4555 gmx_sumi(1, &LocallyLimited, cr);
4557 if (LocallyLimited > 0)
4566 gmx_bool change_dd_cutoff(t_commrec *cr, t_state *state, const t_inputrec *ir,
4569 gmx_bool bCutoffAllowed;
4571 bCutoffAllowed = test_dd_cutoff(cr, state, ir, cutoff_req);
4575 cr->dd->comm->cutoff = cutoff_req;
4578 return bCutoffAllowed;
4581 void set_dd_dlb_max_cutoff(t_commrec *cr, real cutoff)
4583 gmx_domdec_comm_t *comm;
4585 comm = cr->dd->comm;
4587 /* Turn on the DLB limiting (might have been on already) */
4588 comm->bPMELoadBalDLBLimits = TRUE;
4590 /* Change the cut-off limit */
4591 comm->PMELoadBal_max_cutoff = cutoff;
4595 fprintf(debug, "PME load balancing set a limit to the DLB staggering such that a %f cut-off will continue to fit\n",
4596 comm->PMELoadBal_max_cutoff);
4600 /* Sets whether we should later check the load imbalance data, so that
4601 * we can trigger dynamic load balancing if enough imbalance has
4604 * Used after PME load balancing unlocks DLB, so that the check
4605 * whether DLB will be useful can happen immediately.
4607 static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue)
4609 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4611 dd->comm->bCheckWhetherToTurnDlbOn = bValue;
4615 /* Store the DD partitioning count, so we can ignore cycle counts
4616 * over the next nstlist steps, which are often slower.
4618 dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
4623 /* Returns if we should check whether there has been enough load
4624 * imbalance to trigger dynamic load balancing.
4626 static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
4628 if (dd->comm->dlbState != edlbsOffCanTurnOn)
4633 if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
4635 /* We ignore the first nstlist steps at the start of the run
4636 * or after PME load balancing or after turning DLB off, since
4637 * these often have extra allocation or cache miss overhead.
4642 if (dd->comm->cycl_n[ddCyclStep] == 0)
4644 /* We can have zero timed steps when dd_partition_system is called
4645 * more than once at the same step, e.g. with replica exchange.
4646 * Turning on DLB would trigger an assertion failure later, but is
4647 * also useless right after exchanging replicas.
4652 /* We should check whether we should use DLB directly after
4654 if (dd->comm->bCheckWhetherToTurnDlbOn)
4656 /* This flag was set when the PME load-balancing routines
4657 unlocked DLB, and should now be cleared. */
4658 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
4661 /* We check whether we should use DLB every c_checkTurnDlbOnInterval
4662 * partitionings (we do not do this every partioning, so that we
4663 * avoid excessive communication). */
4664 if (dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1)
4672 gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
4674 return isDlbOn(dd->comm);
4677 gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
4679 return (dd->comm->dlbState == edlbsOffTemporarilyLocked);
4682 void dd_dlb_lock(gmx_domdec_t *dd)
4684 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4685 if (dd->comm->dlbState == edlbsOffCanTurnOn)
4687 dd->comm->dlbState = edlbsOffTemporarilyLocked;
4691 void dd_dlb_unlock(gmx_domdec_t *dd)
4693 /* We can only lock the DLB when it is set to auto, otherwise don't do anything */
4694 if (dd->comm->dlbState == edlbsOffTemporarilyLocked)
4696 dd->comm->dlbState = edlbsOffCanTurnOn;
4697 dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
4701 static void merge_cg_buffers(int ncell,
4702 gmx_domdec_comm_dim_t *cd, int pulse,
4704 gmx::ArrayRef<int> index_gl,
4706 rvec *cg_cm, rvec *recv_vr,
4707 gmx::ArrayRef<int> cgindex,
4708 cginfo_mb_t *cginfo_mb, int *cginfo)
4710 gmx_domdec_ind_t *ind, *ind_p;
4711 int p, cell, c, cg, cg0, cg1, cg_gl, nat;
4712 int shift, shift_at;
4714 ind = &cd->ind[pulse];
4716 /* First correct the already stored data */
4717 shift = ind->nrecv[ncell];
4718 for (cell = ncell-1; cell >= 0; cell--)
4720 shift -= ind->nrecv[cell];
4723 /* Move the cg's present from previous grid pulses */
4724 cg0 = ncg_cell[ncell+cell];
4725 cg1 = ncg_cell[ncell+cell+1];
4726 cgindex[cg1+shift] = cgindex[cg1];
4727 for (cg = cg1-1; cg >= cg0; cg--)
4729 index_gl[cg+shift] = index_gl[cg];
4730 copy_rvec(cg_cm[cg], cg_cm[cg+shift]);
4731 cgindex[cg+shift] = cgindex[cg];
4732 cginfo[cg+shift] = cginfo[cg];
4734 /* Correct the already stored send indices for the shift */
4735 for (p = 1; p <= pulse; p++)
4737 ind_p = &cd->ind[p];
4739 for (c = 0; c < cell; c++)
4741 cg0 += ind_p->nsend[c];
4743 cg1 = cg0 + ind_p->nsend[cell];
4744 for (cg = cg0; cg < cg1; cg++)
4746 ind_p->index[cg] += shift;
4752 /* Merge in the communicated buffers */
4756 for (cell = 0; cell < ncell; cell++)
4758 cg1 = ncg_cell[ncell+cell+1] + shift;
4761 /* Correct the old cg indices */
4762 for (cg = ncg_cell[ncell+cell]; cg < cg1; cg++)
4764 cgindex[cg+1] += shift_at;
4767 for (cg = 0; cg < ind->nrecv[cell]; cg++)
4769 /* Copy this charge group from the buffer */
4770 index_gl[cg1] = recv_i[cg0];
4771 copy_rvec(recv_vr[cg0], cg_cm[cg1]);
4772 /* Add it to the cgindex */
4773 cg_gl = index_gl[cg1];
4774 cginfo[cg1] = ddcginfo(cginfo_mb, cg_gl);
4775 nat = GET_CGINFO_NATOMS(cginfo[cg1]);
4776 cgindex[cg1+1] = cgindex[cg1] + nat;
4781 shift += ind->nrecv[cell];
4782 ncg_cell[ncell+cell+1] = cg1;
4786 static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
4789 const BlockRanges &atomGroups)
4791 /* Store the atom block boundaries for easy copying of communication buffers
4794 for (int zone = 0; zone < nzone; zone++)
4796 for (gmx_domdec_ind_t &ind : cd->ind)
4798 ind.cell2at0[zone] = atomGroups.index[cg];
4799 cg += ind.nrecv[zone];
4800 ind.cell2at1[zone] = atomGroups.index[cg];
4805 static gmx_bool missing_link(t_blocka *link, int cg_gl, char *bLocalCG)
4811 for (i = link->index[cg_gl]; i < link->index[cg_gl+1]; i++)
4813 if (!bLocalCG[link->a[i]])
4822 /* Domain corners for communication, a maximum of 4 i-zones see a j domain */
4824 real c[DIM][4]; /* the corners for the non-bonded communication */
4825 real cr0; /* corner for rounding */
4826 real cr1[4]; /* corners for rounding */
4827 real bc[DIM]; /* corners for bounded communication */
4828 real bcr1; /* corner for rounding for bonded communication */
4831 /* Determine the corners of the domain(s) we are communicating with */
4833 set_dd_corners(const gmx_domdec_t *dd,
4834 int dim0, int dim1, int dim2,
4838 const gmx_domdec_comm_t *comm;
4839 const gmx_domdec_zones_t *zones;
4844 zones = &comm->zones;
4846 /* Keep the compiler happy */
4850 /* The first dimension is equal for all cells */
4851 c->c[0][0] = comm->cell_x0[dim0];
4854 c->bc[0] = c->c[0][0];
4859 /* This cell row is only seen from the first row */
4860 c->c[1][0] = comm->cell_x0[dim1];
4861 /* All rows can see this row */
4862 c->c[1][1] = comm->cell_x0[dim1];
4863 if (isDlbOn(dd->comm))
4865 c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
4868 /* For the multi-body distance we need the maximum */
4869 c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
4872 /* Set the upper-right corner for rounding */
4873 c->cr0 = comm->cell_x1[dim0];
4878 for (j = 0; j < 4; j++)
4880 c->c[2][j] = comm->cell_x0[dim2];
4882 if (isDlbOn(dd->comm))
4884 /* Use the maximum of the i-cells that see a j-cell */
4885 for (i = 0; i < zones->nizone; i++)
4887 for (j = zones->izone[i].j0; j < zones->izone[i].j1; j++)
4892 std::max(c->c[2][j-4],
4893 comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
4899 /* For the multi-body distance we need the maximum */
4900 c->bc[2] = comm->cell_x0[dim2];
4901 for (i = 0; i < 2; i++)
4903 for (j = 0; j < 2; j++)
4905 c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
4911 /* Set the upper-right corner for rounding */
4912 /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
4913 * Only cell (0,0,0) can see cell 7 (1,1,1)
4915 c->cr1[0] = comm->cell_x1[dim1];
4916 c->cr1[3] = comm->cell_x1[dim1];
4917 if (isDlbOn(dd->comm))
4919 c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
4922 /* For the multi-body distance we need the maximum */
4923 c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
4930 /* Add the atom groups we need to send in this pulse from this zone to
4931 * \p localAtomGroups and \p work
4934 get_zone_pulse_cgs(gmx_domdec_t *dd,
4935 int zonei, int zone,
4937 gmx::ArrayRef<const int> globalAtomGroupIndices,
4938 const gmx::BlockRanges &atomGroups,
4939 int dim, int dim_ind,
4940 int dim0, int dim1, int dim2,
4941 real r_comm2, real r_bcomm2,
4943 bool distanceIsTriclinic,
4945 real skew_fac2_d, real skew_fac_01,
4946 rvec *v_d, rvec *v_0, rvec *v_1,
4947 const dd_corners_t *c,
4949 gmx_bool bDistBonded,
4955 std::vector<int> *localAtomGroups,
4956 dd_comm_setup_work_t *work)
4958 gmx_domdec_comm_t *comm;
4960 gmx_bool bDistMB_pulse;
4962 real r2, rb2, r, tric_sh;
4969 bScrew = (dd->bScrewPBC && dim == XX);
4971 bDistMB_pulse = (bDistMB && bDistBonded);
4973 /* Unpack the work data */
4974 std::vector<int> &ibuf = work->atomGroupBuffer;
4975 std::vector<gmx::RVec> &vbuf = work->positionBuffer;
4979 for (cg = cg0; cg < cg1; cg++)
4983 if (!distanceIsTriclinic)
4985 /* Rectangular direction, easy */
4986 r = cg_cm[cg][dim] - c->c[dim_ind][zone];
4993 r = cg_cm[cg][dim] - c->bc[dim_ind];
4999 /* Rounding gives at most a 16% reduction
5000 * in communicated atoms
5002 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
5004 r = cg_cm[cg][dim0] - c->cr0;
5005 /* This is the first dimension, so always r >= 0 */
5012 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
5014 r = cg_cm[cg][dim1] - c->cr1[zone];
5021 r = cg_cm[cg][dim1] - c->bcr1;
5031 /* Triclinic direction, more complicated */
5034 /* Rounding, conservative as the skew_fac multiplication
5035 * will slightly underestimate the distance.
5037 if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
5039 rn[dim0] = cg_cm[cg][dim0] - c->cr0;
5040 for (i = dim0+1; i < DIM; i++)
5042 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
5044 r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
5047 rb[dim0] = rn[dim0];
5050 /* Take care that the cell planes along dim0 might not
5051 * be orthogonal to those along dim1 and dim2.
5053 for (i = 1; i <= dim_ind; i++)
5056 if (normal[dim0][dimd] > 0)
5058 rn[dimd] -= rn[dim0]*normal[dim0][dimd];
5061 rb[dimd] -= rb[dim0]*normal[dim0][dimd];
5066 if (dim_ind == 2 && (zonei == 2 || zonei == 3))
5068 rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
5070 for (i = dim1+1; i < DIM; i++)
5072 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
5074 rn[dim1] += tric_sh;
5077 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
5078 /* Take care of coupling of the distances
5079 * to the planes along dim0 and dim1 through dim2.
5081 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
5082 /* Take care that the cell planes along dim1
5083 * might not be orthogonal to that along dim2.
5085 if (normal[dim1][dim2] > 0)
5087 rn[dim2] -= rn[dim1]*normal[dim1][dim2];
5093 cg_cm[cg][dim1] - c->bcr1 + tric_sh;
5096 rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
5097 /* Take care of coupling of the distances
5098 * to the planes along dim0 and dim1 through dim2.
5100 rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
5101 /* Take care that the cell planes along dim1
5102 * might not be orthogonal to that along dim2.
5104 if (normal[dim1][dim2] > 0)
5106 rb[dim2] -= rb[dim1]*normal[dim1][dim2];
5111 /* The distance along the communication direction */
5112 rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
5114 for (i = dim+1; i < DIM; i++)
5116 tric_sh -= cg_cm[cg][i]*v_d[i][dim];
5121 r2 += rn[dim]*rn[dim]*skew_fac2_d;
5122 /* Take care of coupling of the distances
5123 * to the planes along dim0 and dim1 through dim2.
5125 if (dim_ind == 1 && zonei == 1)
5127 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
5133 rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
5136 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
5137 /* Take care of coupling of the distances
5138 * to the planes along dim0 and dim1 through dim2.
5140 if (dim_ind == 1 && zonei == 1)
5142 rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
5150 ((bDistMB && rb2 < r_bcomm2) ||
5151 (bDist2B && r2 < r_bcomm2)) &&
5153 (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
5154 missing_link(comm->cglink, globalAtomGroupIndices[cg],
5157 /* Store the local and global atom group indices and position */
5158 localAtomGroups->push_back(cg);
5159 ibuf.push_back(globalAtomGroupIndices[cg]);
5163 if (dd->ci[dim] == 0)
5165 /* Correct cg_cm for pbc */
5166 rvec_add(cg_cm[cg], box[dim], posPbc);
5169 posPbc[YY] = box[YY][YY] - posPbc[YY];
5170 posPbc[ZZ] = box[ZZ][ZZ] - posPbc[ZZ];
5175 copy_rvec(cg_cm[cg], posPbc);
5177 vbuf.emplace_back(posPbc[XX], posPbc[YY], posPbc[ZZ]);
5179 nat += atomGroups.index[cg+1] - atomGroups.index[cg];
5184 work->nsend_zone = nsend_z;
5187 static void clearCommSetupData(dd_comm_setup_work_t *work)
5189 work->localAtomGroupBuffer.clear();
5190 work->atomGroupBuffer.clear();
5191 work->positionBuffer.clear();
5193 work->nsend_zone = 0;
5196 static void setup_dd_communication(gmx_domdec_t *dd,
5197 matrix box, gmx_ddbox_t *ddbox,
5199 t_state *state, PaddedRVecVector *f)
5201 int dim_ind, dim, dim0, dim1, dim2, dimd, nat_tot;
5202 int nzone, nzone_send, zone, zonei, cg0, cg1;
5203 int c, i, cg, cg_gl, nrcg;
5204 int *zone_cg_range, pos_cg;
5205 gmx_domdec_comm_t *comm;
5206 gmx_domdec_zones_t *zones;
5207 gmx_domdec_comm_dim_t *cd;
5208 cginfo_mb_t *cginfo_mb;
5209 gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
5210 real r_comm2, r_bcomm2;
5211 dd_corners_t corners;
5212 rvec *cg_cm, *normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
5213 real skew_fac2_d, skew_fac_01;
5218 fprintf(debug, "Setting up DD communication\n");
5223 if (comm->dth.empty())
5225 /* Initialize the thread data.
5226 * This can not be done in init_domain_decomposition,
5227 * as the numbers of threads is determined later.
5229 int numThreads = gmx_omp_nthreads_get(emntDomdec);
5230 comm->dth.resize(numThreads);
5233 switch (fr->cutoff_scheme)
5239 cg_cm = as_rvec_array(state->x.data());
5242 gmx_incons("unimplemented");
5246 bBondComm = comm->bBondComm;
5248 /* Do we need to determine extra distances for multi-body bondeds? */
5249 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5251 /* Do we need to determine extra distances for only two-body bondeds? */
5252 bDist2B = (bBondComm && !bDistMB);
5254 r_comm2 = gmx::square(comm->cutoff);
5255 r_bcomm2 = gmx::square(comm->cutoff_mbody);
5259 fprintf(debug, "bBondComm %d, r_bc %f\n", bBondComm, std::sqrt(r_bcomm2));
5262 zones = &comm->zones;
5265 dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
5266 dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
5268 set_dd_corners(dd, dim0, dim1, dim2, bDistMB, &corners);
5270 /* Triclinic stuff */
5271 normal = ddbox->normal;
5275 v_0 = ddbox->v[dim0];
5276 if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
5278 /* Determine the coupling coefficient for the distances
5279 * to the cell planes along dim0 and dim1 through dim2.
5280 * This is required for correct rounding.
5283 ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
5286 fprintf(debug, "\nskew_fac_01 %f\n", skew_fac_01);
5292 v_1 = ddbox->v[dim1];
5295 zone_cg_range = zones->cg_range;
5296 cginfo_mb = fr->cginfo_mb;
5298 zone_cg_range[0] = 0;
5299 zone_cg_range[1] = dd->ncg_home;
5300 comm->zone_ncg1[0] = dd->ncg_home;
5301 pos_cg = dd->ncg_home;
5303 nat_tot = comm->atomRanges.numHomeAtoms();
5305 for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
5307 dim = dd->dim[dim_ind];
5308 cd = &comm->cd[dim_ind];
5310 /* Check if we need to compute triclinic distances along this dim */
5311 bool distanceIsTriclinic = false;
5312 for (i = 0; i <= dim_ind; i++)
5314 if (ddbox->tric_dir[dd->dim[i]])
5316 distanceIsTriclinic = true;
5320 if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
5322 /* No pbc in this dimension, the first node should not comm. */
5330 v_d = ddbox->v[dim];
5331 skew_fac2_d = gmx::square(ddbox->skew_fac[dim]);
5333 cd->receiveInPlace = true;
5334 for (int p = 0; p < cd->numPulses(); p++)
5336 /* Only atoms communicated in the first pulse are used
5337 * for multi-body bonded interactions or for bBondComm.
5339 bDistBonded = ((bDistMB || bDist2B) && p == 0);
5341 gmx_domdec_ind_t *ind = &cd->ind[p];
5343 /* Thread 0 writes in the global index array */
5345 clearCommSetupData(&comm->dth[0]);
5347 for (zone = 0; zone < nzone_send; zone++)
5349 if (dim_ind > 0 && distanceIsTriclinic)
5351 /* Determine slightly more optimized skew_fac's
5353 * This reduces the number of communicated atoms
5354 * by about 10% for 3D DD of rhombic dodecahedra.
5356 for (dimd = 0; dimd < dim; dimd++)
5358 sf2_round[dimd] = 1;
5359 if (ddbox->tric_dir[dimd])
5361 for (i = dd->dim[dimd]+1; i < DIM; i++)
5363 /* If we are shifted in dimension i
5364 * and the cell plane is tilted forward
5365 * in dimension i, skip this coupling.
5367 if (!(zones->shift[nzone+zone][i] &&
5368 ddbox->v[dimd][i][dimd] >= 0))
5371 gmx::square(ddbox->v[dimd][i][dimd]);
5374 sf2_round[dimd] = 1/sf2_round[dimd];
5379 zonei = zone_perm[dim_ind][zone];
5382 /* Here we permutate the zones to obtain a convenient order
5383 * for neighbor searching
5385 cg0 = zone_cg_range[zonei];
5386 cg1 = zone_cg_range[zonei+1];
5390 /* Look only at the cg's received in the previous grid pulse
5392 cg1 = zone_cg_range[nzone+zone+1];
5393 cg0 = cg1 - cd->ind[p-1].nrecv[zone];
5396 const int numThreads = static_cast<int>(comm->dth.size());
5397 #pragma omp parallel for num_threads(numThreads) schedule(static)
5398 for (int th = 0; th < numThreads; th++)
5402 dd_comm_setup_work_t &work = comm->dth[th];
5404 /* Retain data accumulated into buffers of thread 0 */
5407 clearCommSetupData(&work);
5410 int cg0_th = cg0 + ((cg1 - cg0)* th )/numThreads;
5411 int cg1_th = cg0 + ((cg1 - cg0)*(th+1))/numThreads;
5413 /* Get the cg's for this pulse in this zone */
5414 get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th,
5415 dd->globalAtomGroupIndices,
5417 dim, dim_ind, dim0, dim1, dim2,
5419 box, distanceIsTriclinic,
5420 normal, skew_fac2_d, skew_fac_01,
5421 v_d, v_0, v_1, &corners, sf2_round,
5422 bDistBonded, bBondComm,
5425 th == 0 ? &ind->index : &work.localAtomGroupBuffer,
5428 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
5431 std::vector<int> &atomGroups = comm->dth[0].atomGroupBuffer;
5432 std::vector<gmx::RVec> &positions = comm->dth[0].positionBuffer;
5433 ind->nsend[zone] = comm->dth[0].nsend_zone;
5434 /* Append data of threads>=1 to the communication buffers */
5435 for (int th = 1; th < numThreads; th++)
5437 const dd_comm_setup_work_t &dth = comm->dth[th];
5439 ind->index.insert(ind->index.end(), dth.localAtomGroupBuffer.begin(), dth.localAtomGroupBuffer.end());
5440 atomGroups.insert(atomGroups.end(), dth.atomGroupBuffer.begin(), dth.atomGroupBuffer.end());
5441 positions.insert(positions.end(), dth.positionBuffer.begin(), dth.positionBuffer.end());
5442 comm->dth[0].nat += dth.nat;
5443 ind->nsend[zone] += dth.nsend_zone;
5446 /* Clear the counts in case we do not have pbc */
5447 for (zone = nzone_send; zone < nzone; zone++)
5449 ind->nsend[zone] = 0;
5451 ind->nsend[nzone] = ind->index.size();
5452 ind->nsend[nzone + 1] = comm->dth[0].nat;
5453 /* Communicate the number of cg's and atoms to receive */
5454 ddSendrecv(dd, dim_ind, dddirBackward,
5455 ind->nsend, nzone+2,
5456 ind->nrecv, nzone+2);
5460 /* We can receive in place if only the last zone is not empty */
5461 for (zone = 0; zone < nzone-1; zone++)
5463 if (ind->nrecv[zone] > 0)
5465 cd->receiveInPlace = false;
5470 int receiveBufferSize = 0;
5471 if (!cd->receiveInPlace)
5473 receiveBufferSize = ind->nrecv[nzone];
5475 /* These buffer are actually only needed with in-place */
5476 DDBufferAccess<int> globalAtomGroupBuffer(comm->intBuffer, receiveBufferSize);
5477 DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, receiveBufferSize);
5479 dd_comm_setup_work_t &work = comm->dth[0];
5481 /* Make space for the global cg indices */
5482 int numAtomGroupsNew = pos_cg + ind->nrecv[nzone];
5483 dd->globalAtomGroupIndices.resize(numAtomGroupsNew);
5484 dd->atomGroups_.index.resize(numAtomGroupsNew + 1);
5485 /* Communicate the global cg indices */
5486 gmx::ArrayRef<int> integerBufferRef;
5487 if (cd->receiveInPlace)
5489 integerBufferRef = gmx::arrayRefFromArray(dd->globalAtomGroupIndices.data() + pos_cg, ind->nrecv[nzone]);
5493 integerBufferRef = globalAtomGroupBuffer.buffer;
5495 ddSendrecv<int>(dd, dim_ind, dddirBackward,
5496 work.atomGroupBuffer, integerBufferRef);
5498 /* Make space for cg_cm */
5499 dd_check_alloc_ncg(fr, state, f, pos_cg + ind->nrecv[nzone]);
5500 if (fr->cutoff_scheme == ecutsGROUP)
5506 cg_cm = as_rvec_array(state->x.data());
5508 /* Communicate cg_cm */
5509 gmx::ArrayRef<gmx::RVec> rvecBufferRef;
5510 if (cd->receiveInPlace)
5512 rvecBufferRef = gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec *>(cg_cm + pos_cg), ind->nrecv[nzone]);
5516 rvecBufferRef = rvecBuffer.buffer;
5518 ddSendrecv<gmx::RVec>(dd, dim_ind, dddirBackward,
5519 work.positionBuffer, rvecBufferRef);
5521 /* Make the charge group index */
5522 if (cd->receiveInPlace)
5524 zone = (p == 0 ? 0 : nzone - 1);
5525 while (zone < nzone)
5527 for (cg = 0; cg < ind->nrecv[zone]; cg++)
5529 cg_gl = dd->globalAtomGroupIndices[pos_cg];
5530 fr->cginfo[pos_cg] = ddcginfo(cginfo_mb, cg_gl);
5531 nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
5532 dd->atomGroups_.index[pos_cg + 1] = dd->atomGroups_.index[pos_cg] + nrcg;
5535 /* Update the charge group presence,
5536 * so we can use it in the next pass of the loop.
5538 comm->bLocalCG[cg_gl] = TRUE;
5544 comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
5547 zone_cg_range[nzone+zone] = pos_cg;
5552 /* This part of the code is never executed with bBondComm. */
5553 merge_cg_buffers(nzone, cd, p, zone_cg_range,
5554 dd->globalAtomGroupIndices, integerBufferRef.data(),
5555 cg_cm, as_rvec_array(rvecBufferRef.data()),
5556 dd->atomGroups_.index,
5557 fr->cginfo_mb, fr->cginfo);
5558 pos_cg += ind->nrecv[nzone];
5560 nat_tot += ind->nrecv[nzone+1];
5562 if (!cd->receiveInPlace)
5564 /* Store the atom block for easy copying of communication buffers */
5565 make_cell2at_index(cd, nzone, zone_cg_range[nzone], dd->atomGroups());
5570 comm->atomRanges.setEnd(DDAtomRanges::Type::Zones, nat_tot);
5574 /* We don't need to update cginfo, since that was alrady done above.
5575 * So we pass NULL for the forcerec.
5577 dd_set_cginfo(dd->globalAtomGroupIndices,
5578 dd->ncg_home, dd->globalAtomGroupIndices.size(),
5579 nullptr, comm->bLocalCG);
5584 fprintf(debug, "Finished setting up DD communication, zones:");
5585 for (c = 0; c < zones->n; c++)
5587 fprintf(debug, " %d", zones->cg_range[c+1]-zones->cg_range[c]);
5589 fprintf(debug, "\n");
5593 static void set_cg_boundaries(gmx_domdec_zones_t *zones)
5597 for (c = 0; c < zones->nizone; c++)
5599 zones->izone[c].cg1 = zones->cg_range[c+1];
5600 zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
5601 zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
5605 /* \brief Set zone dimensions for zones \p zone_start to \p zone_end-1
5607 * Also sets the atom density for the home zone when \p zone_start=0.
5608 * For this \p numMovedChargeGroupsInHomeZone needs to be passed to tell
5609 * how many charge groups will move but are still part of the current range.
5610 * \todo When converting domdec to use proper classes, all these variables
5611 * should be private and a method should return the correct count
5612 * depending on an internal state.
5614 * \param[in,out] dd The domain decomposition struct
5615 * \param[in] box The box
5616 * \param[in] ddbox The domain decomposition box struct
5617 * \param[in] zone_start The start of the zone range to set sizes for
5618 * \param[in] zone_end The end of the zone range to set sizes for
5619 * \param[in] numMovedChargeGroupsInHomeZone The number of charge groups in the home zone that should moved but are still present in dd->comm->zones.cg_range
5621 static void set_zones_size(gmx_domdec_t *dd,
5622 matrix box, const gmx_ddbox_t *ddbox,
5623 int zone_start, int zone_end,
5624 int numMovedChargeGroupsInHomeZone)
5626 gmx_domdec_comm_t *comm;
5627 gmx_domdec_zones_t *zones;
5636 zones = &comm->zones;
5638 /* Do we need to determine extra distances for multi-body bondeds? */
5639 bDistMB = (comm->bInterCGMultiBody && isDlbOn(dd->comm) && dd->ndim > 1);
5641 for (z = zone_start; z < zone_end; z++)
5643 /* Copy cell limits to zone limits.
5644 * Valid for non-DD dims and non-shifted dims.
5646 copy_rvec(comm->cell_x0, zones->size[z].x0);
5647 copy_rvec(comm->cell_x1, zones->size[z].x1);
5650 for (d = 0; d < dd->ndim; d++)
5654 for (z = 0; z < zones->n; z++)
5656 /* With a staggered grid we have different sizes
5657 * for non-shifted dimensions.
5659 if (isDlbOn(dd->comm) && zones->shift[z][dim] == 0)
5663 zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
5664 zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
5668 zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
5669 zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
5675 rcmbs = comm->cutoff_mbody;
5676 if (ddbox->tric_dir[dim])
5678 rcs /= ddbox->skew_fac[dim];
5679 rcmbs /= ddbox->skew_fac[dim];
5682 /* Set the lower limit for the shifted zone dimensions */
5683 for (z = zone_start; z < zone_end; z++)
5685 if (zones->shift[z][dim] > 0)
5688 if (!isDlbOn(dd->comm) || d == 0)
5690 zones->size[z].x0[dim] = comm->cell_x1[dim];
5691 zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
5695 /* Here we take the lower limit of the zone from
5696 * the lowest domain of the zone below.
5700 zones->size[z].x0[dim] =
5701 comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
5707 zones->size[z].x0[dim] =
5708 zones->size[zone_perm[2][z-4]].x0[dim];
5712 zones->size[z].x0[dim] =
5713 comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
5716 /* A temporary limit, is updated below */
5717 zones->size[z].x1[dim] = zones->size[z].x0[dim];
5721 for (zi = 0; zi < zones->nizone; zi++)
5723 if (zones->shift[zi][dim] == 0)
5725 /* This takes the whole zone into account.
5726 * With multiple pulses this will lead
5727 * to a larger zone then strictly necessary.
5729 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5730 zones->size[zi].x1[dim]+rcmbs);
5738 /* Loop over the i-zones to set the upper limit of each
5741 for (zi = 0; zi < zones->nizone; zi++)
5743 if (zones->shift[zi][dim] == 0)
5745 /* We should only use zones up to zone_end */
5746 int jZoneEnd = std::min(zones->izone[zi].j1, zone_end);
5747 for (z = zones->izone[zi].j0; z < jZoneEnd; z++)
5749 if (zones->shift[z][dim] > 0)
5751 zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
5752 zones->size[zi].x1[dim]+rcs);
5759 for (z = zone_start; z < zone_end; z++)
5761 /* Initialization only required to keep the compiler happy */
5762 rvec corner_min = {0, 0, 0}, corner_max = {0, 0, 0}, corner;
5765 /* To determine the bounding box for a zone we need to find
5766 * the extreme corners of 4, 2 or 1 corners.
5768 nc = 1 << (ddbox->nboundeddim - 1);
5770 for (c = 0; c < nc; c++)
5772 /* Set up a zone corner at x=0, ignoring trilinic couplings */
5776 corner[YY] = zones->size[z].x0[YY];
5780 corner[YY] = zones->size[z].x1[YY];
5784 corner[ZZ] = zones->size[z].x0[ZZ];
5788 corner[ZZ] = zones->size[z].x1[ZZ];
5790 if (dd->ndim == 1 && dd->dim[0] < ZZ && ZZ < dd->npbcdim &&
5791 box[ZZ][1 - dd->dim[0]] != 0)
5793 /* With 1D domain decomposition the cg's are not in
5794 * the triclinic box, but triclinic x-y and rectangular y/x-z.
5795 * Shift the corner of the z-vector back to along the box
5796 * vector of dimension d, so it will later end up at 0 along d.
5797 * This can affect the location of this corner along dd->dim[0]
5798 * through the matrix operation below if box[d][dd->dim[0]]!=0.
5800 int d = 1 - dd->dim[0];
5802 corner[d] -= corner[ZZ]*box[ZZ][d]/box[ZZ][ZZ];
5804 /* Apply the triclinic couplings */
5805 assert(ddbox->npbcdim <= DIM);
5806 for (i = YY; i < ddbox->npbcdim; i++)
5808 for (j = XX; j < i; j++)
5810 corner[j] += corner[i]*box[i][j]/box[i][i];
5815 copy_rvec(corner, corner_min);
5816 copy_rvec(corner, corner_max);
5820 for (i = 0; i < DIM; i++)
5822 corner_min[i] = std::min(corner_min[i], corner[i]);
5823 corner_max[i] = std::max(corner_max[i], corner[i]);
5827 /* Copy the extreme cornes without offset along x */
5828 for (i = 0; i < DIM; i++)
5830 zones->size[z].bb_x0[i] = corner_min[i];
5831 zones->size[z].bb_x1[i] = corner_max[i];
5833 /* Add the offset along x */
5834 zones->size[z].bb_x0[XX] += zones->size[z].x0[XX];
5835 zones->size[z].bb_x1[XX] += zones->size[z].x1[XX];
5838 if (zone_start == 0)
5841 for (dim = 0; dim < DIM; dim++)
5843 vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
5845 zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0] - numMovedChargeGroupsInHomeZone)/vol;
5850 for (z = zone_start; z < zone_end; z++)
5852 fprintf(debug, "zone %d %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5854 zones->size[z].x0[XX], zones->size[z].x1[XX],
5855 zones->size[z].x0[YY], zones->size[z].x1[YY],
5856 zones->size[z].x0[ZZ], zones->size[z].x1[ZZ]);
5857 fprintf(debug, "zone %d bb %6.3f - %6.3f %6.3f - %6.3f %6.3f - %6.3f\n",
5859 zones->size[z].bb_x0[XX], zones->size[z].bb_x1[XX],
5860 zones->size[z].bb_x0[YY], zones->size[z].bb_x1[YY],
5861 zones->size[z].bb_x0[ZZ], zones->size[z].bb_x1[ZZ]);
5866 static int comp_cgsort(const void *a, const void *b)
5870 gmx_cgsort_t *cga, *cgb;
5871 cga = (gmx_cgsort_t *)a;
5872 cgb = (gmx_cgsort_t *)b;
5874 comp = cga->nsc - cgb->nsc;
5877 comp = cga->ind_gl - cgb->ind_gl;
5883 static void order_int_cg(int n, const gmx_cgsort_t *sort,
5888 /* Order the data */
5889 for (i = 0; i < n; i++)
5891 buf[i] = a[sort[i].ind];
5894 /* Copy back to the original array */
5895 for (i = 0; i < n; i++)
5901 static void order_vec_cg(int n,
5902 const gmx_cgsort_t *sort,
5904 gmx::ArrayRef<gmx::RVec> sortBuffer)
5906 GMX_ASSERT(sortBuffer.size() >= static_cast<size_t>(n), "Need a sufficiently large sorting buffer");
5908 rvec *buf = as_rvec_array(sortBuffer.data());
5910 /* Order the data */
5911 for (int i = 0; i < n; i++)
5913 copy_rvec(v[sort[i].ind], buf[i]);
5916 /* Copy back to the original array */
5917 for (int i = 0; i < n; i++)
5919 copy_rvec(buf[i], v[i]);
5923 static void order_vec_atom(int ncg,
5924 const gmx::BlockRanges *atomGroups,
5925 const gmx_cgsort_t *sort,
5926 gmx::ArrayRef<gmx::RVec> v,
5927 gmx::ArrayRef<gmx::RVec> buf)
5929 int a, atot, cg, cg0, cg1, i;
5931 if (atomGroups == nullptr)
5933 /* Avoid the useless loop of the atoms within a cg */
5934 order_vec_cg(ncg, sort, as_rvec_array(v.data()), buf);
5939 /* Order the data */
5941 for (cg = 0; cg < ncg; cg++)
5943 cg0 = atomGroups->index[sort[cg].ind];
5944 cg1 = atomGroups->index[sort[cg].ind+1];
5945 for (i = cg0; i < cg1; i++)
5947 copy_rvec(v[i], buf[a]);
5953 /* Copy back to the original array */
5954 for (a = 0; a < atot; a++)
5956 copy_rvec(buf[a], v[a]);
5960 static void ordered_sort(int nsort2, gmx_cgsort_t *sort2,
5961 int nsort_new, gmx_cgsort_t *sort_new,
5962 gmx_cgsort_t *sort1)
5966 /* The new indices are not very ordered, so we qsort them */
5967 gmx_qsort_threadsafe(sort_new, nsort_new, sizeof(sort_new[0]), comp_cgsort);
5969 /* sort2 is already ordered, so now we can merge the two arrays */
5973 while (i2 < nsort2 || i_new < nsort_new)
5977 sort1[i1++] = sort_new[i_new++];
5979 else if (i_new == nsort_new)
5981 sort1[i1++] = sort2[i2++];
5983 else if (sort2[i2].nsc < sort_new[i_new].nsc ||
5984 (sort2[i2].nsc == sort_new[i_new].nsc &&
5985 sort2[i2].ind_gl < sort_new[i_new].ind_gl))
5987 sort1[i1++] = sort2[i2++];
5991 sort1[i1++] = sort_new[i_new++];
5996 static int dd_sort_order(gmx_domdec_t *dd, t_forcerec *fr, int ncg_home_old)
5998 gmx_domdec_sort_t *sort;
5999 gmx_cgsort_t *cgsort, *sort_i;
6000 int ncg_new, nsort2, nsort_new, i, *a, moved;
6002 sort = dd->comm->sort;
6004 a = fr->ns->grid->cell_index;
6006 moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns->grid->ncells;
6008 if (ncg_home_old >= 0)
6010 /* The charge groups that remained in the same ns grid cell
6011 * are completely ordered. So we can sort efficiently by sorting
6012 * the charge groups that did move into the stationary list.
6017 for (i = 0; i < dd->ncg_home; i++)
6019 /* Check if this cg did not move to another node */
6022 if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
6024 /* This cg is new on this node or moved ns grid cell */
6025 if (nsort_new >= sort->sort_new_nalloc)
6027 sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
6028 srenew(sort->sort_new, sort->sort_new_nalloc);
6030 sort_i = &(sort->sort_new[nsort_new++]);
6034 /* This cg did not move */
6035 sort_i = &(sort->sort2[nsort2++]);
6037 /* Sort on the ns grid cell indices
6038 * and the global topology index.
6039 * index_gl is irrelevant with cell ns,
6040 * but we set it here anyhow to avoid a conditional.
6043 sort_i->ind_gl = dd->globalAtomGroupIndices[i];
6050 fprintf(debug, "ordered sort cgs: stationary %d moved %d\n",
6053 /* Sort efficiently */
6054 ordered_sort(nsort2, sort->sort2, nsort_new, sort->sort_new,
6059 cgsort = sort->sort;
6061 for (i = 0; i < dd->ncg_home; i++)
6063 /* Sort on the ns grid cell indices
6064 * and the global topology index
6066 cgsort[i].nsc = a[i];
6067 cgsort[i].ind_gl = dd->globalAtomGroupIndices[i];
6069 if (cgsort[i].nsc < moved)
6076 fprintf(debug, "qsort cgs: %d new home %d\n", dd->ncg_home, ncg_new);
6078 /* Determine the order of the charge groups using qsort */
6079 gmx_qsort_threadsafe(cgsort, dd->ncg_home, sizeof(cgsort[0]), comp_cgsort);
6085 static int dd_sort_order_nbnxn(gmx_domdec_t *dd, t_forcerec *fr)
6091 sort = dd->comm->sort->sort;
6093 nbnxn_get_atomorder(fr->nbv->nbs.get(), &a, &na);
6096 for (i = 0; i < na; i++)
6100 sort[ncg_new].ind = a[i];
6108 static void dd_sort_state(gmx_domdec_t *dd, rvec *cgcm, t_forcerec *fr, t_state *state,
6111 gmx_domdec_sort_t *sort;
6112 gmx_cgsort_t *cgsort;
6113 int ncg_new, i, *ibuf, cgsize;
6115 sort = dd->comm->sort;
6117 if (dd->ncg_home > sort->sort_nalloc)
6119 sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
6120 srenew(sort->sort, sort->sort_nalloc);
6121 srenew(sort->sort2, sort->sort_nalloc);
6123 cgsort = sort->sort;
6125 switch (fr->cutoff_scheme)
6128 ncg_new = dd_sort_order(dd, fr, ncg_home_old);
6131 ncg_new = dd_sort_order_nbnxn(dd, fr);
6134 gmx_incons("unimplemented");
6138 const gmx::BlockRanges &atomGroups = dd->atomGroups();
6140 /* We alloc with the old size, since cgindex is still old */
6141 DDBufferAccess<gmx::RVec> rvecBuffer(dd->comm->rvecBuffer, atomGroups.index[dd->ncg_home]);
6143 const gmx::BlockRanges *atomGroupsPtr = (dd->comm->bCGs ? &atomGroups : nullptr);
6145 /* Remove the charge groups which are no longer at home here */
6146 dd->ncg_home = ncg_new;
6149 fprintf(debug, "Set the new home charge group count to %d\n",
6153 /* Reorder the state */
6154 if (state->flags & (1 << estX))
6156 order_vec_atom(dd->ncg_home, atomGroupsPtr, cgsort, state->x, rvecBuffer.buffer);
6158 if (state->flags & (1 << estV))
6160 order_vec_atom(dd->ncg_home, atomGroupsPtr, cgsort, state->v, rvecBuffer.buffer);
6162 if (state->flags & (1 << estCGP))
6164 order_vec_atom(dd->ncg_home, atomGroupsPtr, cgsort, state->cg_p, rvecBuffer.buffer);
6167 if (fr->cutoff_scheme == ecutsGROUP)
6170 order_vec_cg(dd->ncg_home, cgsort, cgcm, rvecBuffer.buffer);
6173 if (dd->ncg_home+1 > sort->ibuf_nalloc)
6175 sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
6176 srenew(sort->ibuf, sort->ibuf_nalloc);
6179 /* Reorder the global cg index */
6180 order_int_cg(dd->ncg_home, cgsort, dd->globalAtomGroupIndices.data(), ibuf);
6181 /* Reorder the cginfo */
6182 order_int_cg(dd->ncg_home, cgsort, fr->cginfo, ibuf);
6183 /* Rebuild the local cg index */
6187 for (i = 0; i < dd->ncg_home; i++)
6189 cgsize = atomGroups.index[cgsort[i].ind+1] - atomGroups.index[cgsort[i].ind];
6190 ibuf[i+1] = ibuf[i] + cgsize;
6192 for (i = 0; i < dd->ncg_home+1; i++)
6194 dd->atomGroups_.index[i] = ibuf[i];
6199 for (i = 0; i < dd->ncg_home+1; i++)
6201 dd->atomGroups_.index[i] = i;
6204 /* Set the home atom number */
6205 dd->comm->atomRanges.setEnd(DDAtomRanges::Type::Home, dd->atomGroups().index[dd->ncg_home]);
6207 if (fr->cutoff_scheme == ecutsVERLET)
6209 /* The atoms are now exactly in grid order, update the grid order */
6210 nbnxn_set_atomorder(fr->nbv->nbs.get());
6214 /* Copy the sorted ns cell indices back to the ns grid struct */
6215 for (i = 0; i < dd->ncg_home; i++)
6217 fr->ns->grid->cell_index[i] = cgsort[i].nsc;
6219 fr->ns->grid->nr = dd->ncg_home;
6223 static void add_dd_statistics(gmx_domdec_t *dd)
6225 gmx_domdec_comm_t *comm = dd->comm;
6227 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6229 auto range = static_cast<DDAtomRanges::Type>(i);
6231 comm->atomRanges.end(range) - comm->atomRanges.start(range);
6236 void reset_dd_statistics_counters(gmx_domdec_t *dd)
6238 gmx_domdec_comm_t *comm = dd->comm;
6240 /* Reset all the statistics and counters for total run counting */
6241 for (int i = 0; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6243 comm->sum_nat[i] = 0;
6247 comm->load_step = 0;
6250 clear_ivec(comm->load_lim);
6255 void print_dd_statistics(const t_commrec *cr, const t_inputrec *ir, FILE *fplog)
6257 gmx_domdec_comm_t *comm = cr->dd->comm;
6259 const int numRanges = static_cast<int>(DDAtomRanges::Type::Number);
6260 gmx_sumd(numRanges, comm->sum_nat, cr);
6262 if (fplog == nullptr)
6267 fprintf(fplog, "\n D O M A I N D E C O M P O S I T I O N S T A T I S T I C S\n\n");
6269 for (int i = static_cast<int>(DDAtomRanges::Type::Zones); i < numRanges; i++)
6271 auto range = static_cast<DDAtomRanges::Type>(i);
6272 double av = comm->sum_nat[i]/comm->ndecomp;
6275 case DDAtomRanges::Type::Zones:
6277 " av. #atoms communicated per step for force: %d x %.1f\n",
6280 case DDAtomRanges::Type::Vsites:
6281 if (cr->dd->vsite_comm)
6284 " av. #atoms communicated per step for vsites: %d x %.1f\n",
6285 (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD) ? 3 : 2,
6289 case DDAtomRanges::Type::Constraints:
6290 if (cr->dd->constraint_comm)
6293 " av. #atoms communicated per step for LINCS: %d x %.1f\n",
6294 1 + ir->nLincsIter, av);
6298 gmx_incons(" Unknown type for DD statistics");
6301 fprintf(fplog, "\n");
6303 if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
6305 print_dd_load_av(fplog, cr->dd);
6309 void dd_partition_system(FILE *fplog,
6311 const t_commrec *cr,
6312 gmx_bool bMasterState,
6314 t_state *state_global,
6315 const gmx_mtop_t *top_global,
6316 const t_inputrec *ir,
6317 t_state *state_local,
6318 PaddedRVecVector *f,
6319 gmx::MDAtoms *mdAtoms,
6320 gmx_localtop_t *top_local,
6323 gmx::Constraints *constr,
6325 gmx_wallcycle *wcycle,
6329 gmx_domdec_comm_t *comm;
6330 gmx_ddbox_t ddbox = {0};
6332 gmx_int64_t step_pcoupl;
6333 rvec cell_ns_x0, cell_ns_x1;
6334 int ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
6335 gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
6336 gmx_bool bRedist, bSortCG, bResortAll;
6337 ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
6341 wallcycle_start(wcycle, ewcDOMDEC);
6346 // TODO if the update code becomes accessible here, use
6347 // upd->deform for this logic.
6348 bBoxChanged = (bMasterState || inputrecDeform(ir));
6349 if (ir->epc != epcNO)
6351 /* With nstpcouple > 1 pressure coupling happens.
6352 * one step after calculating the pressure.
6353 * Box scaling happens at the end of the MD step,
6354 * after the DD partitioning.
6355 * We therefore have to do DLB in the first partitioning
6356 * after an MD step where P-coupling occurred.
6357 * We need to determine the last step in which p-coupling occurred.
6358 * MRS -- need to validate this for vv?
6360 int n = ir->nstpcouple;
6363 step_pcoupl = step - 1;
6367 step_pcoupl = ((step - 1)/n)*n + 1;
6369 if (step_pcoupl >= comm->partition_step)
6375 bNStGlobalComm = (step % nstglobalcomm == 0);
6383 /* Should we do dynamic load balacing this step?
6384 * Since it requires (possibly expensive) global communication,
6385 * we might want to do DLB less frequently.
6387 if (bBoxChanged || ir->epc != epcNO)
6389 bDoDLB = bBoxChanged;
6393 bDoDLB = bNStGlobalComm;
6397 /* Check if we have recorded loads on the nodes */
6398 if (comm->bRecordLoad && dd_load_count(comm) > 0)
6400 bCheckWhetherToTurnDlbOn = dd_dlb_get_should_check_whether_to_turn_dlb_on(dd);
6402 /* Print load every nstlog, first and last step to the log file */
6403 bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
6404 comm->n_load_collect == 0 ||
6406 (step + ir->nstlist > ir->init_step + ir->nsteps)));
6408 /* Avoid extra communication due to verbose screen output
6409 * when nstglobalcomm is set.
6411 if (bDoDLB || bLogLoad || bCheckWhetherToTurnDlbOn ||
6412 (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
6414 get_load_distribution(dd, wcycle);
6419 dd_print_load(fplog, dd, step-1);
6423 dd_print_load_verbose(dd);
6426 comm->n_load_collect++;
6432 /* Add the measured cycles to the running average */
6433 const float averageFactor = 0.1f;
6434 comm->cyclesPerStepDlbExpAverage =
6435 (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
6436 averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
6438 if (comm->dlbState == edlbsOnCanTurnOff &&
6439 dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
6441 gmx_bool turnOffDlb;
6444 /* If the running averaged cycles with DLB are more
6445 * than before we turned on DLB, turn off DLB.
6446 * We will again run and check the cycles without DLB
6447 * and we can then decide if to turn off DLB forever.
6449 turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
6450 comm->cyclesPerStepBeforeDLB);
6452 dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
6455 /* To turn off DLB, we need to redistribute the atoms */
6456 dd_collect_state(dd, state_local, state_global);
6457 bMasterState = TRUE;
6458 turn_off_dlb(fplog, cr, step);
6462 else if (bCheckWhetherToTurnDlbOn)
6464 gmx_bool turnOffDlbForever = FALSE;
6465 gmx_bool turnOnDlb = FALSE;
6467 /* Since the timings are node dependent, the master decides */
6470 /* If we recently turned off DLB, we want to check if
6471 * performance is better without DLB. We want to do this
6472 * ASAP to minimize the chance that external factors
6473 * slowed down the DLB step are gone here and we
6474 * incorrectly conclude that DLB was causing the slowdown.
6475 * So we measure one nstlist block, no running average.
6477 if (comm->haveTurnedOffDlb &&
6478 comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
6479 comm->cyclesPerStepDlbExpAverage)
6481 /* After turning off DLB we ran nstlist steps in fewer
6482 * cycles than with DLB. This likely means that DLB
6483 * in not benefical, but this could be due to a one
6484 * time unlucky fluctuation, so we require two such
6485 * observations in close succession to turn off DLB
6488 if (comm->dlbSlowerPartitioningCount > 0 &&
6489 dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
6491 turnOffDlbForever = TRUE;
6493 comm->haveTurnedOffDlb = false;
6494 /* Register when we last measured DLB slowdown */
6495 comm->dlbSlowerPartitioningCount = dd->ddp_count;
6499 /* Here we check if the max PME rank load is more than 0.98
6500 * the max PP force load. If so, PP DLB will not help,
6501 * since we are (almost) limited by PME. Furthermore,
6502 * DLB will cause a significant extra x/f redistribution
6503 * cost on the PME ranks, which will then surely result
6504 * in lower total performance.
6506 if (cr->npmenodes > 0 &&
6507 dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
6513 turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
6519 gmx_bool turnOffDlbForever;
6523 turnOffDlbForever, turnOnDlb
6525 dd_bcast(dd, sizeof(bools), &bools);
6526 if (bools.turnOffDlbForever)
6528 turn_off_dlb_forever(fplog, cr, step);
6530 else if (bools.turnOnDlb)
6532 turn_on_dlb(fplog, cr, step);
6537 comm->n_load_have++;
6540 cgs_gl = &comm->cgs_gl;
6545 /* Clear the old state */
6546 clearDDStateIndices(dd, 0, 0);
6549 auto xGlobal = positionsFromStatePointer(state_global);
6551 set_ddbox(dd, true, ir,
6552 DDMASTER(dd) ? state_global->box : nullptr,
6556 distributeState(fplog, dd, state_global, *cgs_gl, ddbox, state_local, f);
6558 dd_make_local_cgs(dd, &top_local->cgs);
6560 /* Ensure that we have space for the new distribution */
6561 dd_check_alloc_ncg(fr, state_local, f, dd->ncg_home);
6563 if (fr->cutoff_scheme == ecutsGROUP)
6565 calc_cgcm(fplog, 0, dd->ncg_home,
6566 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6569 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6571 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6573 else if (state_local->ddp_count != dd->ddp_count)
6575 if (state_local->ddp_count > dd->ddp_count)
6577 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)", state_local->ddp_count, dd->ddp_count);
6580 if (state_local->ddp_count_cg_gl != state_local->ddp_count)
6582 gmx_fatal(FARGS, "Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)", state_local->ddp_count_cg_gl, state_local->ddp_count);
6585 /* Clear the old state */
6586 clearDDStateIndices(dd, 0, 0);
6588 /* Restore the atom group indices from state_local */
6589 restoreAtomGroups(dd, cgs_gl->index, state_local);
6590 make_dd_indices(dd, cgs_gl->index, 0);
6591 ncgindex_set = dd->ncg_home;
6593 if (fr->cutoff_scheme == ecutsGROUP)
6595 /* Redetermine the cg COMs */
6596 calc_cgcm(fplog, 0, dd->ncg_home,
6597 &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
6600 inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
6602 dd_set_cginfo(dd->globalAtomGroupIndices, 0, dd->ncg_home, fr, comm->bLocalCG);
6604 set_ddbox(dd, bMasterState, ir, state_local->box,
6605 true, state_local->x, &ddbox);
6607 bRedist = isDlbOn(comm);
6611 /* We have the full state, only redistribute the cgs */
6613 /* Clear the non-home indices */
6614 clearDDStateIndices(dd, dd->ncg_home, comm->atomRanges.numHomeAtoms());
6617 /* Avoid global communication for dim's without pbc and -gcom */
6618 if (!bNStGlobalComm)
6620 copy_rvec(comm->box0, ddbox.box0 );
6621 copy_rvec(comm->box_size, ddbox.box_size);
6623 set_ddbox(dd, bMasterState, ir, state_local->box,
6624 bNStGlobalComm, state_local->x, &ddbox);
6629 /* For dim's without pbc and -gcom */
6630 copy_rvec(ddbox.box0, comm->box0 );
6631 copy_rvec(ddbox.box_size, comm->box_size);
6633 set_dd_cell_sizes(dd, &ddbox, dynamic_dd_box(&ddbox, ir), bMasterState, bDoDLB,
6636 if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
6638 write_dd_grid_pdb("dd_grid", step, dd, state_local->box, &ddbox);
6641 /* Check if we should sort the charge groups */
6642 bSortCG = (bMasterState || bRedist);
6644 ncg_home_old = dd->ncg_home;
6646 /* When repartitioning we mark charge groups that will move to neighboring
6647 * DD cells, but we do not move them right away for performance reasons.
6648 * Thus we need to keep track of how many charge groups will move for
6649 * obtaining correct local charge group / atom counts.
6654 wallcycle_sub_start(wcycle, ewcsDD_REDIST);
6656 dd_redistribute_cg(fplog, step, dd, ddbox.tric_dir,
6658 !bSortCG, nrnb, &ncgindex_set, &ncg_moved);
6660 wallcycle_sub_stop(wcycle, ewcsDD_REDIST);
6663 get_nsgrid_boundaries(ddbox.nboundeddim, state_local->box,
6665 &comm->cell_x0, &comm->cell_x1,
6666 dd->ncg_home, fr->cg_cm,
6667 cell_ns_x0, cell_ns_x1, &grid_density);
6671 comm_dd_ns_cell_sizes(dd, &ddbox, cell_ns_x0, cell_ns_x1, step);
6674 switch (fr->cutoff_scheme)
6677 copy_ivec(fr->ns->grid->n, ncells_old);
6678 grid_first(fplog, fr->ns->grid, dd, &ddbox,
6679 state_local->box, cell_ns_x0, cell_ns_x1,
6680 fr->rlist, grid_density);
6683 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_old[XX], &ncells_old[YY]);
6686 gmx_incons("unimplemented");
6688 /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
6689 copy_ivec(ddbox.tric_dir, comm->tric_dir);
6693 wallcycle_sub_start(wcycle, ewcsDD_GRID);
6695 /* Sort the state on charge group position.
6696 * This enables exact restarts from this step.
6697 * It also improves performance by about 15% with larger numbers
6698 * of atoms per node.
6701 /* Fill the ns grid with the home cell,
6702 * so we can sort with the indices.
6704 set_zones_ncg_home(dd);
6706 switch (fr->cutoff_scheme)
6709 set_zones_size(dd, state_local->box, &ddbox, 0, 1, ncg_moved);
6711 nbnxn_put_on_grid(fr->nbv->nbs.get(), fr->ePBC, state_local->box,
6713 comm->zones.size[0].bb_x0,
6714 comm->zones.size[0].bb_x1,
6716 comm->zones.dens_zone0,
6718 as_rvec_array(state_local->x.data()),
6719 ncg_moved, bRedist ? comm->movedBuffer.data() : nullptr,
6720 fr->nbv->grp[eintLocal].kernel_type,
6723 nbnxn_get_ncells(fr->nbv->nbs.get(), &ncells_new[XX], &ncells_new[YY]);
6726 fill_grid(&comm->zones, fr->ns->grid, dd->ncg_home,
6727 0, dd->ncg_home, fr->cg_cm);
6729 copy_ivec(fr->ns->grid->n, ncells_new);
6732 gmx_incons("unimplemented");
6735 bResortAll = bMasterState;
6737 /* Check if we can user the old order and ns grid cell indices
6738 * of the charge groups to sort the charge groups efficiently.
6740 if (ncells_new[XX] != ncells_old[XX] ||
6741 ncells_new[YY] != ncells_old[YY] ||
6742 ncells_new[ZZ] != ncells_old[ZZ])
6749 fprintf(debug, "Step %s, sorting the %d home charge groups\n",
6750 gmx_step_str(step, sbuf), dd->ncg_home);
6752 dd_sort_state(dd, fr->cg_cm, fr, state_local,
6753 bResortAll ? -1 : ncg_home_old);
6755 /* After sorting and compacting we set the correct size */
6756 dd_resize_state(state_local, f, comm->atomRanges.numHomeAtoms());
6758 /* Rebuild all the indices */
6759 ga2la_clear(dd->ga2la);
6762 wallcycle_sub_stop(wcycle, ewcsDD_GRID);
6765 wallcycle_sub_start(wcycle, ewcsDD_SETUPCOMM);
6767 /* Setup up the communication and communicate the coordinates */
6768 setup_dd_communication(dd, state_local->box, &ddbox, fr, state_local, f);
6770 /* Set the indices */
6771 make_dd_indices(dd, cgs_gl->index, ncgindex_set);
6773 /* Set the charge group boundaries for neighbor searching */
6774 set_cg_boundaries(&comm->zones);
6776 if (fr->cutoff_scheme == ecutsVERLET)
6778 /* When bSortCG=true, we have already set the size for zone 0 */
6779 set_zones_size(dd, state_local->box, &ddbox,
6780 bSortCG ? 1 : 0, comm->zones.n,
6784 wallcycle_sub_stop(wcycle, ewcsDD_SETUPCOMM);
6787 write_dd_pdb("dd_home",step,"dump",top_global,cr,
6788 -1,as_rvec_array(state_local->x.data()),state_local->box);
6791 wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
6793 /* Extract a local topology from the global topology */
6794 for (int i = 0; i < dd->ndim; i++)
6796 np[dd->dim[i]] = comm->cd[i].numPulses();
6798 dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
6799 comm->cellsize_min, np,
6801 fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
6802 vsite, top_global, top_local);
6804 wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
6806 wallcycle_sub_start(wcycle, ewcsDD_MAKECONSTR);
6808 /* Set up the special atom communication */
6809 int n = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6810 for (int i = static_cast<int>(DDAtomRanges::Type::Zones) + 1; i < static_cast<int>(DDAtomRanges::Type::Number); i++)
6812 auto range = static_cast<DDAtomRanges::Type>(i);
6815 case DDAtomRanges::Type::Vsites:
6816 if (vsite && vsite->n_intercg_vsite)
6818 n = dd_make_local_vsites(dd, n, top_local->idef.il);
6821 case DDAtomRanges::Type::Constraints:
6822 if (dd->bInterCGcons || dd->bInterCGsettles)
6824 /* Only for inter-cg constraints we need special code */
6825 n = dd_make_local_constraints(dd, n, top_global, fr->cginfo,
6826 constr, ir->nProjOrder,
6827 top_local->idef.il);
6831 gmx_incons("Unknown special atom type setup");
6833 comm->atomRanges.setEnd(range, n);
6836 wallcycle_sub_stop(wcycle, ewcsDD_MAKECONSTR);
6838 wallcycle_sub_start(wcycle, ewcsDD_TOPOTHER);
6840 /* Make space for the extra coordinates for virtual site
6841 * or constraint communication.
6843 state_local->natoms = comm->atomRanges.numAtomsTotal();
6845 dd_resize_state(state_local, f, state_local->natoms);
6847 if (fr->haveDirectVirialContributions)
6849 if (vsite && vsite->n_intercg_vsite)
6851 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Vsites);
6855 if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
6857 nat_f_novirsum = comm->atomRanges.end(DDAtomRanges::Type::Zones);
6861 nat_f_novirsum = comm->atomRanges.numHomeAtoms();
6870 /* Set the number of atoms required for the force calculation.
6871 * Forces need to be constrained when doing energy
6872 * minimization. For simple simulations we could avoid some
6873 * allocation, zeroing and copying, but this is probably not worth
6874 * the complications and checking.
6876 forcerec_set_ranges(fr, dd->ncg_home, dd->globalAtomGroupIndices.size(),
6877 comm->atomRanges.end(DDAtomRanges::Type::Zones),
6878 comm->atomRanges.end(DDAtomRanges::Type::Constraints),
6881 /* Update atom data for mdatoms and several algorithms */
6882 mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
6883 nullptr, mdAtoms, constr, vsite, nullptr);
6885 auto mdatoms = mdAtoms->mdatoms();
6886 if (!thisRankHasDuty(cr, DUTY_PME))
6888 /* Send the charges and/or c6/sigmas to our PME only node */
6889 gmx_pme_send_parameters(cr,
6891 mdatoms->nChargePerturbed, mdatoms->nTypePerturbed,
6892 mdatoms->chargeA, mdatoms->chargeB,
6893 mdatoms->sqrt_c6A, mdatoms->sqrt_c6B,
6894 mdatoms->sigmaA, mdatoms->sigmaB,
6895 dd_pme_maxshift_x(dd), dd_pme_maxshift_y(dd));
6900 /* Update the local pull groups */
6901 dd_make_local_pull_groups(cr, ir->pull_work, mdatoms);
6906 /* Update the local rotation groups */
6907 dd_make_local_rotation_groups(dd, ir->rot);
6910 if (ir->eSwapCoords != eswapNO)
6912 /* Update the local groups needed for ion swapping */
6913 dd_make_local_swap_groups(dd, ir->swap);
6916 /* Update the local atoms to be communicated via the IMD protocol if bIMD is TRUE. */
6917 dd_make_local_IMD_atoms(ir->bIMD, dd, ir->imd);
6919 add_dd_statistics(dd);
6921 /* Make sure we only count the cycles for this DD partitioning */
6922 clear_dd_cycle_counts(dd);
6924 /* Because the order of the atoms might have changed since
6925 * the last vsite construction, we need to communicate the constructing
6926 * atom coordinates again (for spreading the forces this MD step).
6928 dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
6930 wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
6932 if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
6934 dd_move_x(dd, state_local->box, state_local->x, nullWallcycle);
6935 write_dd_pdb("dd_dump", step, "dump", top_global, cr,
6936 -1, as_rvec_array(state_local->x.data()), state_local->box);
6939 /* Store the partitioning step */
6940 comm->partition_step = step;
6942 /* Increase the DD partitioning counter */
6944 /* The state currently matches this DD partitioning count, store it */
6945 state_local->ddp_count = dd->ddp_count;
6948 /* The DD master node knows the complete cg distribution,
6949 * store the count so we can possibly skip the cg info communication.
6951 comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
6954 if (comm->DD_debug > 0)
6956 /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
6957 check_index_consistency(dd, top_global->natoms, ncg_mtop(top_global),
6958 "after partitioning");
6961 wallcycle_stop(wcycle, ewcDOMDEC);
6964 /*! \brief Check whether bonded interactions are missing, if appropriate */
6965 void checkNumberOfBondedInteractions(FILE *fplog,
6967 int totalNumberOfBondedInteractions,
6968 const gmx_mtop_t *top_global,
6969 const gmx_localtop_t *top_local,
6970 const t_state *state,
6971 bool *shouldCheckNumberOfBondedInteractions)
6973 if (*shouldCheckNumberOfBondedInteractions)
6975 if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
6977 dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
6979 *shouldCheckNumberOfBondedInteractions = false;