2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 * This file defines functions for setting up kernel benchmarks
40 * \author Berk Hess <hess@kth.se>
41 * \ingroup module_nbnxm
46 #include "bench_setup.h"
48 #include "gromacs/compat/optional.h"
49 #include "gromacs/gmxlib/nrnb.h"
50 #include "gromacs/mdlib/dispersioncorrection.h"
51 #include "gromacs/mdlib/force_flags.h"
52 #include "gromacs/mdlib/forcerec.h"
53 #include "gromacs/mdlib/gmx_omp_nthreads.h"
54 #include "gromacs/mdtypes/enerdata.h"
55 #include "gromacs/mdtypes/forcerec.h"
56 #include "gromacs/mdtypes/interaction_const.h"
57 #include "gromacs/mdtypes/mdatom.h"
58 #include "gromacs/mdtypes/simulation_workload.h"
59 #include "gromacs/nbnxm/atomdata.h"
60 #include "gromacs/nbnxm/gridset.h"
61 #include "gromacs/nbnxm/nbnxm.h"
62 #include "gromacs/nbnxm/nbnxm_simd.h"
63 #include "gromacs/nbnxm/pairlistset.h"
64 #include "gromacs/nbnxm/pairlistsets.h"
65 #include "gromacs/nbnxm/pairsearch.h"
66 #include "gromacs/pbcutil/ishift.h"
67 #include "gromacs/pbcutil/pbc.h"
68 #include "gromacs/simd/simd.h"
69 #include "gromacs/timing/cyclecounter.h"
70 #include "gromacs/utility/enumerationhelpers.h"
71 #include "gromacs/utility/fatalerror.h"
72 #include "gromacs/utility/logger.h"
74 #include "bench_system.h"
79 /*! \brief Checks the kernel setup
81 * Returns an error string when the kernel is not available.
83 static gmx::compat::optional<std::string> checkKernelSetup(const KernelBenchOptions &options)
85 GMX_RELEASE_ASSERT(options.nbnxmSimd < BenchMarkKernels::Count &&
86 options.nbnxmSimd != BenchMarkKernels::SimdAuto, "Need a valid kernel SIMD type");
89 if ((options.nbnxmSimd != BenchMarkKernels::SimdNo && !GMX_SIMD)
90 #ifndef GMX_NBNXN_SIMD_4XN
91 || options.nbnxmSimd == BenchMarkKernels::Simd4XM
93 #ifndef GMX_NBNXN_SIMD_2XNN
94 || options.nbnxmSimd == BenchMarkKernels::Simd2XMM
98 return "the requested SIMD kernel was not set up at configuration time";
104 //! Helper to translate between the different enumeration values.
105 static KernelType translateBenchmarkEnum(const BenchMarkKernels &kernel)
107 int kernelInt = static_cast<int>(kernel);
108 return static_cast<KernelType>(kernelInt);
111 /*! \brief Returns the kernel setup
113 static KernelSetup getKernelSetup(const KernelBenchOptions &options)
115 auto messageWhenInvalid = checkKernelSetup(options);
116 GMX_RELEASE_ASSERT(!messageWhenInvalid, "Need valid options");
118 KernelSetup kernelSetup;
120 //The int enum options.nbnxnSimd is set up to match KernelType + 1
121 kernelSetup.kernelType = translateBenchmarkEnum(options.nbnxmSimd);
122 // The plain-C kernel does not support analytical ewald correction
123 if (kernelSetup.kernelType == KernelType::Cpu4x4_PlainC)
125 kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
129 kernelSetup.ewaldExclusionType = options.useTabulatedEwaldCorr ? EwaldExclusionType::Table : EwaldExclusionType::Analytical;
135 //! Return an interaction constants struct with members used in the benchmark set appropriately
136 static interaction_const_t setupInteractionConst(const KernelBenchOptions &options)
139 interaction_const_t ic;
141 ic.vdwtype = evdwCUT;
142 ic.vdw_modifier = eintmodPOTSHIFT;
143 ic.rvdw = options.pairlistCutoff;
145 ic.eeltype = (options.coulombType == BenchMarkCoulomb::Pme ? eelPME : eelRF);
146 ic.coulomb_modifier = eintmodPOTSHIFT;
147 ic.rcoulomb = options.pairlistCutoff;
149 // Reaction-field with epsilon_rf=inf
150 // TODO: Replace by calc_rffac() after refactoring that
151 ic.k_rf = 0.5*std::pow(ic.rcoulomb, -3);
152 ic.c_rf = 1/ic.rcoulomb + ic.k_rf*ic.rcoulomb*ic.rcoulomb;
154 if (EEL_PME_EWALD(ic.eeltype))
156 // Ewald coefficients, we ignore the potential shift
157 GMX_RELEASE_ASSERT(options.ewaldcoeff_q > 0, "Ewald coefficient should be > 0");
158 ic.ewaldcoeff_q = options.ewaldcoeff_q;
159 ic.coulombEwaldTables = std::make_unique<EwaldCorrectionTables>();
160 init_interaction_const_tables(nullptr, &ic);
166 //! Sets up and returns a Nbnxm object for the given benchmark options and system
167 static std::unique_ptr<nonbonded_verlet_t>
168 setupNbnxmForBenchInstance(const KernelBenchOptions &options,
169 const gmx::BenchmarkSystem &system)
171 const auto pinPolicy = (options.useGpu ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
172 const int numThreads = options.numThreads;
173 // Note: the options and Nbnxm combination rule enums values should match
174 const int combinationRule = static_cast<int>(options.ljCombinationRule);
176 auto messageWhenInvalid = checkKernelSetup(options);
177 if (messageWhenInvalid)
179 gmx_fatal(FARGS, "Requested kernel is unavailable because %s.",
180 messageWhenInvalid->c_str());
182 Nbnxm::KernelSetup kernelSetup = getKernelSetup(options);
184 PairlistParams pairlistParams(kernelSetup.kernelType, false, options.pairlistCutoff, false);
186 GridSet gridSet(epbcXYZ, false, nullptr, nullptr, pairlistParams.pairlistType, false, numThreads, pinPolicy);
188 auto pairlistSets = std::make_unique<PairlistSets>(pairlistParams, false, 0);
190 auto pairSearch = std::make_unique<PairSearch>(epbcXYZ, false, nullptr, nullptr,
191 pairlistParams.pairlistType,
192 false, numThreads, pinPolicy);
194 auto atomData = std::make_unique<nbnxn_atomdata_t>(pinPolicy);
196 // Put everything together
197 auto nbv = std::make_unique<nonbonded_verlet_t>(std::move(pairlistSets),
198 std::move(pairSearch),
204 nbnxn_atomdata_init(gmx::MDLogger(),
205 nbv->nbat.get(), kernelSetup.kernelType,
206 combinationRule, system.numAtomTypes, system.nonbondedParameters.data(),
211 GMX_RELEASE_ASSERT(!TRICLINIC(system.box), "Only rectangular unit-cells are supported here");
212 const rvec lowerCorner = { 0, 0, 0 };
213 const rvec upperCorner = {
219 gmx::ArrayRef<const int> atomInfo;
220 if (options.useHalfLJOptimization)
222 atomInfo = system.atomInfoOxygenVdw;
226 atomInfo = system.atomInfoAllVdw;
229 const real atomDensity = system.coordinates.size()/det(system.box);
231 nbnxn_put_on_grid(nbv.get(),
232 system.box, 0, lowerCorner, upperCorner,
233 nullptr, { 0, int(system.coordinates.size()) }, atomDensity,
234 atomInfo, system.coordinates,
237 nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
238 &system.excls, 0, &nrnb);
241 // We only use (read) the atom type and charge from mdatoms
242 mdatoms.typeA = const_cast<int *>(system.atomTypes.data());
243 mdatoms.chargeA = const_cast<real *>(system.charges.data());
244 nbv->setAtomProperties(mdatoms, atomInfo);
249 //! Add the options instance to the list for all requested kernel SIMD types
251 expandSimdOptionAndPushBack(const KernelBenchOptions &options,
252 std::vector<KernelBenchOptions> *optionsList)
254 if (options.nbnxmSimd == BenchMarkKernels::SimdAuto)
256 bool addedInstance = false;
257 #ifdef GMX_NBNXN_SIMD_4XN
258 optionsList->push_back(options);
259 optionsList->back().nbnxmSimd = BenchMarkKernels::Simd4XM;
260 addedInstance = true;
262 #ifdef GMX_NBNXN_SIMD_2XNN
263 optionsList->push_back(options);
264 optionsList->back().nbnxmSimd = BenchMarkKernels::Simd2XMM;
265 addedInstance = true;
269 optionsList->push_back(options);
270 optionsList->back().nbnxmSimd = BenchMarkKernels::SimdNo;
275 optionsList->push_back(options);
279 //! Sets up and runs the requested benchmark instance and prints the results
281 // When \p doWarmup is true runs the warmup iterations instead
282 // of the normal ones and does not print any results
283 static void setupAndRunInstance(const gmx::BenchmarkSystem &system,
284 const KernelBenchOptions &options,
287 // Generate an, accurate, estimate of the number of non-zero pair interactions
288 const real atomDensity = system.coordinates.size()/det(system.box);
289 const real numPairsWithinCutoff = atomDensity*4.0/3.0*M_PI*std::pow(options.pairlistCutoff, 3);
290 const real numUsefulPairs = system.coordinates.size()*0.5*(numPairsWithinCutoff + 1);
292 std::unique_ptr<nonbonded_verlet_t> nbv = setupNbnxmForBenchInstance(options, system);
294 // We set the interaction cut-off to the pairlist cut-off
295 interaction_const_t ic = setupInteractionConst(options);
299 gmx_enerdata_t enerd(1, 0);
301 gmx::StepWorkload stepWork;
302 stepWork.computeForces = true;
303 if (options.computeVirialAndEnergy)
305 stepWork.computeVirial = true;
306 stepWork.computeEnergy = true;
309 const gmx::EnumerationArray<BenchMarkKernels, std::string> kernelNames = { "auto", "no", "4xM", "2xMM" };
311 const gmx::EnumerationArray<BenchMarkCombRule, std::string> combruleNames = { "geom.", "LB", "none" };
315 fprintf(stdout, "%-7s %-4s %-5s %-4s ",
316 options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
317 options.useHalfLJOptimization ? "half" : "all",
318 combruleNames[options.ljCombinationRule].c_str(),
319 kernelNames[options.nbnxmSimd].c_str());
322 // Run pre-iteration to avoid cache misses
323 for (int iter = 0; iter < options.numPreIterations; iter++)
325 nbv->dispatchNonbondedKernel(InteractionLocality::Local,
326 ic, stepWork, enbvClearFYes, system.forceRec,
331 const int numIterations = (doWarmup ? options.numWarmupIterations : options.numIterations);
332 const PairlistSet &pairlistSet = nbv->pairlistSets().pairlistSet(InteractionLocality::Local);
333 const gmx::index numPairs = pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_ + pairlistSet.natpair_q_;
334 gmx_cycles_t cycles = gmx_cycles_read();
335 for (int iter = 0; iter < numIterations; iter++)
337 // Run the kernel without force clearing
338 nbv->dispatchNonbondedKernel(InteractionLocality::Local,
339 ic, stepWork, enbvClearFNo, system.forceRec,
343 cycles = gmx_cycles_read() - cycles;
346 const double dCycles = static_cast<double>(cycles);
347 if (options.cyclesPerPair)
349 fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n",
351 dCycles/options.numIterations*1e-6,
352 dCycles/(options.numIterations*numPairs),
353 dCycles/(options.numIterations*numUsefulPairs));
357 fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n",
359 dCycles/options.numIterations*1e-6,
360 options.numIterations*numPairs/dCycles,
361 options.numIterations*numUsefulPairs/dCycles);
366 void bench(const int sizeFactor,
367 const KernelBenchOptions &options)
369 // We don't want to call gmx_omp_nthreads_init(), so we init what we need
370 gmx_omp_nthreads_set(emntPairsearch, options.numThreads);
371 gmx_omp_nthreads_set(emntNonbonded, options.numThreads);
373 const gmx::BenchmarkSystem system(sizeFactor);
375 real minBoxSize = norm(system.box[XX]);
376 for (int dim = YY; dim < DIM; dim++)
378 minBoxSize = std::min(minBoxSize, norm(system.box[dim]));
380 if (options.pairlistCutoff > 0.5*minBoxSize)
382 gmx_fatal(FARGS, "The cut-off should be shorter than half the box size");
385 std::vector<KernelBenchOptions> optionsList;
388 KernelBenchOptions opt = options;
389 gmx::EnumerationWrapper<BenchMarkCoulomb> coulombIter;
390 for (auto coulombType : coulombIter)
392 opt.coulombType = coulombType;
393 for (int halfLJ = 0; halfLJ <= 1; halfLJ++)
395 opt.useHalfLJOptimization = (halfLJ == 1);
397 gmx::EnumerationWrapper<BenchMarkCombRule> combRuleIter;
398 for (auto combRule : combRuleIter)
400 if (combRule == BenchMarkCombRule::RuleNone)
404 opt.ljCombinationRule = combRule;
406 expandSimdOptionAndPushBack(opt, &optionsList);
413 expandSimdOptionAndPushBack(options, &optionsList);
415 GMX_RELEASE_ASSERT(!optionsList.empty(), "Expect at least on benchmark setup");
418 if (options.nbnxmSimd != BenchMarkKernels::SimdNo)
420 fprintf(stdout, "SIMD width: %d\n", GMX_SIMD_REAL_WIDTH);
423 fprintf(stdout, "System size: %zu atoms\n", system.coordinates.size());
424 fprintf(stdout, "Cut-off radius: %g nm\n", options.pairlistCutoff);
425 fprintf(stdout, "Number of threads: %d\n", options.numThreads);
426 fprintf(stdout, "Number of iterations: %d\n", options.numIterations);
427 fprintf(stdout, "Compute energies: %s\n",
428 options.computeVirialAndEnergy ? "yes" : "no");
429 if (options.coulombType != BenchMarkCoulomb::ReactionField)
431 fprintf(stdout, "Ewald excl. corr.: %s\n",
432 options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr ? "table" : "analytical");
436 if (options.numWarmupIterations > 0)
438 setupAndRunInstance(system, optionsList[0], true);
441 fprintf(stdout, "Coulomb LJ comb. SIMD Mcycles Mcycles/it. %s\n",
442 options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
443 fprintf(stdout, " total useful\n");
445 for (const auto &optionsInstance : optionsList)
447 setupAndRunInstance(system, optionsInstance, false);