2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2017,2018,2019,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "dlbtiming.h"
42 #include "gromacs/domdec/domdec.h"
43 #include "gromacs/gmxlib/nrnb.h"
44 #include "gromacs/utility/gmxassert.h"
46 #include "domdec_internal.h"
48 /*! \brief Struct for timing the region for dynamic load balancing */
51 /*! \brief Constructor */
53 isOpen(false), isOpenOnCpu(false), isOpenOnGpu(false), cyclesOpenCpu(0), cyclesLastCpu(0)
57 bool isOpen; /**< Are we in an open balancing region? */
58 bool isOpenOnCpu; /**< Is the, currently open, region still open on the CPU side? */
59 bool isOpenOnGpu; /**< Is the, currently open, region open on the GPU side? */
60 gmx_cycles_t cyclesOpenCpu; /**< Cycle count when opening the CPU region */
61 gmx_cycles_t cyclesLastCpu; /**< Cycle count at the last call to \p ddCloseBalanceRegionCpu() */
64 BalanceRegion* ddBalanceRegionAllocate()
66 return new BalanceRegion;
69 /*! \brief Returns the pointer to the balance region.
71 * This should be replaced by a properly managed BalanceRegion class,
72 * but that requires a lot of refactoring in domdec.cpp.
74 static BalanceRegion* getBalanceRegion(const gmx_domdec_t* dd)
76 GMX_ASSERT(dd != nullptr && dd->comm != nullptr, "Balance regions should only be used with DD");
77 BalanceRegion* region = dd->comm->balanceRegion;
78 GMX_ASSERT(region != nullptr, "Balance region should be initialized before use");
82 void DDBalanceRegionHandler::openRegionCpuImpl(DdAllowBalanceRegionReopen gmx_unused allowReopen) const
84 BalanceRegion* reg = getBalanceRegion(dd_);
85 if (dd_->comm->ddSettings.recordLoad)
87 GMX_ASSERT(allowReopen == DdAllowBalanceRegionReopen::yes || !reg->isOpen,
88 "Should not open an already opened region");
90 reg->cyclesOpenCpu = gmx_cycles_read();
92 reg->isOpenOnCpu = true;
93 reg->isOpenOnGpu = false;
97 void DDBalanceRegionHandler::openRegionGpuImpl() const
99 BalanceRegion* reg = getBalanceRegion(dd_);
100 GMX_ASSERT(reg->isOpen, "Can only open a GPU region inside an open CPU region");
101 GMX_ASSERT(!reg->isOpenOnGpu, "Can not re-open a GPU balance region");
102 reg->isOpenOnGpu = true;
105 void ddReopenBalanceRegionCpu(const gmx_domdec_t* dd)
107 BalanceRegion* reg = getBalanceRegion(dd);
108 /* If the GPU is busy, don't reopen as we are overlapping with work */
109 if (reg->isOpen && !reg->isOpenOnGpu)
111 reg->cyclesOpenCpu = gmx_cycles_read();
115 void DDBalanceRegionHandler::closeRegionCpuImpl() const
117 BalanceRegion* reg = getBalanceRegion(dd_);
118 if (reg->isOpen && reg->isOpenOnCpu)
120 GMX_ASSERT(reg->isOpenOnCpu, "Can only close an open region");
121 gmx_cycles_t cycles = gmx_cycles_read();
122 reg->isOpenOnCpu = false;
124 if (reg->isOpenOnGpu)
126 /* Store the cycles for estimating the GPU/CPU overlap time */
127 reg->cyclesLastCpu = cycles;
131 /* We can close the region */
132 float cyclesCpu = cycles - reg->cyclesOpenCpu;
133 dd_cycles_add(dd_, cyclesCpu, ddCyclF);
139 void DDBalanceRegionHandler::closeRegionGpuImpl(float waitGpuCyclesInCpuRegion,
140 DdBalanceRegionWaitedForGpu waitedForGpu) const
142 BalanceRegion* reg = getBalanceRegion(dd_);
145 GMX_ASSERT(reg->isOpenOnGpu, "Can not close a non-open GPU balance region");
146 GMX_ASSERT(!reg->isOpenOnCpu,
147 "The GPU region should be closed after closing the CPU region");
149 float waitGpuCyclesEstimate = gmx_cycles_read() - reg->cyclesLastCpu;
150 if (waitedForGpu == DdBalanceRegionWaitedForGpu::no)
152 /* The actual time could be anywhere between 0 and
153 * waitCyclesEstimate. Using half is the best we can do.
155 const float unknownWaitEstimateFactor = 0.5F;
156 waitGpuCyclesEstimate *= unknownWaitEstimateFactor;
159 float cyclesCpu = reg->cyclesLastCpu - reg->cyclesOpenCpu;
160 dd_cycles_add(dd_, cyclesCpu + waitGpuCyclesEstimate, ddCyclF);
162 /* Register the total GPU wait time, to redistribute with GPU sharing */
163 dd_cycles_add(dd_, waitGpuCyclesInCpuRegion + waitGpuCyclesEstimate, ddCyclWaitGPU);
165 /* Close the region */
166 reg->isOpenOnGpu = false;
171 //! Accumulates flop counts for force calculations.
172 static double force_flop_count(const t_nrnb* nrnb)
175 for (int i = 0; i < eNR_NBKERNEL_FREE_ENERGY; i++)
177 /* To get closer to the real timings, we half the count
178 * for the normal loops and again half it for water loops.
180 const char* name = nrnb_str(i);
181 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
183 sum += nrnb->n[i] * 0.25 * cost_nrnb(i);
187 sum += nrnb->n[i] * 0.50 * cost_nrnb(i);
190 for (int i = eNR_NBKERNEL_FREE_ENERGY; i <= eNR_NB14; i++)
192 const char* name = nrnb_str(i);
193 if (strstr(name, "W3") != nullptr || strstr(name, "W4") != nullptr)
195 sum += nrnb->n[i] * cost_nrnb(i);
198 for (int i = eNR_BONDS; i <= eNR_WALLS; i++)
200 sum += nrnb->n[i] * cost_nrnb(i);
206 void dd_force_flop_start(gmx_domdec_t* dd, t_nrnb* nrnb)
208 if (dd->comm->ddSettings.eFlop)
210 dd->comm->flop -= force_flop_count(nrnb);
214 void dd_force_flop_stop(gmx_domdec_t* dd, t_nrnb* nrnb)
216 if (dd->comm->ddSettings.eFlop)
218 dd->comm->flop += force_flop_count(nrnb);
223 void clear_dd_cycle_counts(gmx_domdec_t* dd)
225 for (int i = 0; i < ddCyclNr; i++)
227 dd->comm->cycl[i] = 0;
228 dd->comm->cycl_n[i] = 0;
229 dd->comm->cycl_max[i] = 0;
232 dd->comm->flop_n = 0;