Pass ArrayRefs to dispatchNonbondedKernel
[alexxy/gromacs.git] / src / gromacs / nbnxm / benchmark / bench_setup.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  * \brief
38  * This file defines functions for setting up kernel benchmarks
39  *
40  * \author Berk Hess <hess@kth.se>
41  * \ingroup module_nbnxm
42  */
43
44 #include "gmxpre.h"
45
46 #include "bench_setup.h"
47
48 #include <optional>
49
50 #include "gromacs/gmxlib/nrnb.h"
51 #include "gromacs/math/units.h"
52 #include "gromacs/math/vectypes.h"
53 #include "gromacs/mdlib/dispersioncorrection.h"
54 #include "gromacs/mdlib/force_flags.h"
55 #include "gromacs/mdlib/forcerec.h"
56 #include "gromacs/mdlib/gmx_omp_nthreads.h"
57 #include "gromacs/mdtypes/enerdata.h"
58 #include "gromacs/mdtypes/forcerec.h"
59 #include "gromacs/mdtypes/interaction_const.h"
60 #include "gromacs/mdtypes/mdatom.h"
61 #include "gromacs/mdtypes/simulation_workload.h"
62 #include "gromacs/nbnxm/atomdata.h"
63 #include "gromacs/nbnxm/gridset.h"
64 #include "gromacs/nbnxm/nbnxm.h"
65 #include "gromacs/nbnxm/nbnxm_simd.h"
66 #include "gromacs/nbnxm/pairlistset.h"
67 #include "gromacs/nbnxm/pairlistsets.h"
68 #include "gromacs/nbnxm/pairsearch.h"
69 #include "gromacs/pbcutil/ishift.h"
70 #include "gromacs/pbcutil/pbc.h"
71 #include "gromacs/simd/simd.h"
72 #include "gromacs/timing/cyclecounter.h"
73 #include "gromacs/utility/enumerationhelpers.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/logger.h"
76
77 #include "bench_system.h"
78
79 namespace Nbnxm
80 {
81
82 /*! \brief Checks the kernel setup
83  *
84  * Returns an error string when the kernel is not available.
85  */
86 static std::optional<std::string> checkKernelSetup(const KernelBenchOptions& options)
87 {
88     GMX_RELEASE_ASSERT(options.nbnxmSimd < BenchMarkKernels::Count
89                                && options.nbnxmSimd != BenchMarkKernels::SimdAuto,
90                        "Need a valid kernel SIMD type");
91
92     // Check SIMD support
93     if ((options.nbnxmSimd != BenchMarkKernels::SimdNo && !GMX_SIMD)
94 #ifndef GMX_NBNXN_SIMD_4XN
95         || options.nbnxmSimd == BenchMarkKernels::Simd4XM
96 #endif
97 #ifndef GMX_NBNXN_SIMD_2XNN
98         || options.nbnxmSimd == BenchMarkKernels::Simd2XMM
99 #endif
100     )
101     {
102         return "the requested SIMD kernel was not set up at configuration time";
103     }
104
105     if (options.reportTime && (0 > gmx_cycles_calibrate(1.0)))
106     {
107         return "the -time option is not supported on this system";
108     }
109
110     return {};
111 }
112
113 //! Helper to translate between the different enumeration values.
114 static KernelType translateBenchmarkEnum(const BenchMarkKernels& kernel)
115 {
116     int kernelInt = static_cast<int>(kernel);
117     return static_cast<KernelType>(kernelInt);
118 }
119
120 /*! \brief Returns the kernel setup
121  */
122 static KernelSetup getKernelSetup(const KernelBenchOptions& options)
123 {
124     auto messageWhenInvalid = checkKernelSetup(options);
125     GMX_RELEASE_ASSERT(!messageWhenInvalid, "Need valid options");
126
127     KernelSetup kernelSetup;
128
129     // The int enum options.nbnxnSimd is set up to match KernelType + 1
130     kernelSetup.kernelType = translateBenchmarkEnum(options.nbnxmSimd);
131     // The plain-C kernel does not support analytical ewald correction
132     if (kernelSetup.kernelType == KernelType::Cpu4x4_PlainC)
133     {
134         kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
135     }
136     else
137     {
138         kernelSetup.ewaldExclusionType = options.useTabulatedEwaldCorr ? EwaldExclusionType::Table
139                                                                        : EwaldExclusionType::Analytical;
140     }
141
142     return kernelSetup;
143 }
144
145 //! Return an interaction constants struct with members used in the benchmark set appropriately
146 static interaction_const_t setupInteractionConst(const KernelBenchOptions& options)
147
148 {
149     interaction_const_t ic;
150
151     ic.vdwtype      = VanDerWaalsType::Cut;
152     ic.vdw_modifier = InteractionModifiers::PotShift;
153     ic.rvdw         = options.pairlistCutoff;
154
155     ic.eeltype = (options.coulombType == BenchMarkCoulomb::Pme ? CoulombInteractionType::Pme
156                                                                : CoulombInteractionType::RF);
157     ic.coulomb_modifier = InteractionModifiers::PotShift;
158     ic.rcoulomb         = options.pairlistCutoff;
159
160     // Reaction-field with reactionFieldPermitivity=inf
161     // TODO: Replace by calc_rffac() after refactoring that
162     ic.reactionFieldCoefficient = 0.5 * std::pow(ic.rcoulomb, -3);
163     ic.reactionFieldShift = 1 / ic.rcoulomb + ic.reactionFieldCoefficient * ic.rcoulomb * ic.rcoulomb;
164
165     if (EEL_PME_EWALD(ic.eeltype))
166     {
167         // Ewald coefficients, we ignore the potential shift
168         GMX_RELEASE_ASSERT(options.ewaldcoeff_q > 0, "Ewald coefficient should be > 0");
169         ic.ewaldcoeff_q       = options.ewaldcoeff_q;
170         ic.coulombEwaldTables = std::make_unique<EwaldCorrectionTables>();
171         init_interaction_const_tables(nullptr, &ic, 0, 0);
172     }
173
174     return ic;
175 }
176
177 //! Sets up and returns a Nbnxm object for the given benchmark options and system
178 static std::unique_ptr<nonbonded_verlet_t> setupNbnxmForBenchInstance(const KernelBenchOptions& options,
179                                                                       const gmx::BenchmarkSystem& system)
180 {
181     const auto pinPolicy  = (options.useGpu ? gmx::PinningPolicy::PinnedIfSupported
182                                            : gmx::PinningPolicy::CannotBePinned);
183     const int  numThreads = options.numThreads;
184     // Note: the options and Nbnxm combination rule enums values should match
185     const int combinationRule = static_cast<int>(options.ljCombinationRule);
186
187     auto messageWhenInvalid = checkKernelSetup(options);
188     if (messageWhenInvalid)
189     {
190         gmx_fatal(FARGS, "Requested kernel is unavailable because %s.", messageWhenInvalid->c_str());
191     }
192     Nbnxm::KernelSetup kernelSetup = getKernelSetup(options);
193
194     PairlistParams pairlistParams(kernelSetup.kernelType, false, options.pairlistCutoff, false);
195
196     GridSet gridSet(
197             PbcType::Xyz, false, nullptr, nullptr, pairlistParams.pairlistType, false, numThreads, pinPolicy);
198
199     auto pairlistSets = std::make_unique<PairlistSets>(pairlistParams, false, 0);
200
201     auto pairSearch = std::make_unique<PairSearch>(
202             PbcType::Xyz, false, nullptr, nullptr, pairlistParams.pairlistType, false, numThreads, pinPolicy);
203
204     auto atomData = std::make_unique<nbnxn_atomdata_t>(pinPolicy,
205                                                        gmx::MDLogger(),
206                                                        kernelSetup.kernelType,
207                                                        combinationRule,
208                                                        system.numAtomTypes,
209                                                        system.nonbondedParameters,
210                                                        1,
211                                                        numThreads);
212
213     // Put everything together
214     auto nbv = std::make_unique<nonbonded_verlet_t>(
215             std::move(pairlistSets), std::move(pairSearch), std::move(atomData), kernelSetup, nullptr, nullptr);
216
217     t_nrnb nrnb;
218
219     GMX_RELEASE_ASSERT(!TRICLINIC(system.box), "Only rectangular unit-cells are supported here");
220     const rvec lowerCorner = { 0, 0, 0 };
221     const rvec upperCorner = { system.box[XX][XX], system.box[YY][YY], system.box[ZZ][ZZ] };
222
223     gmx::ArrayRef<const int> atomInfo;
224     if (options.useHalfLJOptimization)
225     {
226         atomInfo = system.atomInfoOxygenVdw;
227     }
228     else
229     {
230         atomInfo = system.atomInfoAllVdw;
231     }
232
233     const real atomDensity = system.coordinates.size() / det(system.box);
234
235     nbnxn_put_on_grid(nbv.get(),
236                       system.box,
237                       0,
238                       lowerCorner,
239                       upperCorner,
240                       nullptr,
241                       { 0, int(system.coordinates.size()) },
242                       atomDensity,
243                       atomInfo,
244                       system.coordinates,
245                       0,
246                       nullptr);
247
248     nbv->constructPairlist(gmx::InteractionLocality::Local, system.excls, 0, &nrnb);
249
250     nbv->setAtomProperties(system.atomTypes, system.charges, atomInfo);
251
252     return nbv;
253 }
254
255 //! Add the options instance to the list for all requested kernel SIMD types
256 static void expandSimdOptionAndPushBack(const KernelBenchOptions&        options,
257                                         std::vector<KernelBenchOptions>* optionsList)
258 {
259     if (options.nbnxmSimd == BenchMarkKernels::SimdAuto)
260     {
261         bool addedInstance = false;
262 #ifdef GMX_NBNXN_SIMD_4XN
263         optionsList->push_back(options);
264         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd4XM;
265         addedInstance                 = true;
266 #endif
267 #ifdef GMX_NBNXN_SIMD_2XNN
268         optionsList->push_back(options);
269         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd2XMM;
270         addedInstance                 = true;
271 #endif
272         if (!addedInstance)
273         {
274             optionsList->push_back(options);
275             optionsList->back().nbnxmSimd = BenchMarkKernels::SimdNo;
276         }
277     }
278     else
279     {
280         optionsList->push_back(options);
281     }
282 }
283
284 //! Sets up and runs the requested benchmark instance and prints the results
285 //
286 // When \p doWarmup is true runs the warmup iterations instead
287 // of the normal ones and does not print any results
288 static void setupAndRunInstance(const gmx::BenchmarkSystem& system,
289                                 const KernelBenchOptions&   options,
290                                 const bool                  doWarmup)
291 {
292     // Generate an, accurate, estimate of the number of non-zero pair interactions
293     const real atomDensity = system.coordinates.size() / det(system.box);
294     const real numPairsWithinCutoff =
295             atomDensity * 4.0 / 3.0 * M_PI * std::pow(options.pairlistCutoff, 3);
296     const real numUsefulPairs = system.coordinates.size() * 0.5 * (numPairsWithinCutoff + 1);
297
298     std::unique_ptr<nonbonded_verlet_t> nbv = setupNbnxmForBenchInstance(options, system);
299
300     // We set the interaction cut-off to the pairlist cut-off
301     interaction_const_t ic = setupInteractionConst(options);
302
303     t_nrnb nrnb = { 0 };
304
305     gmx_enerdata_t enerd(1, 0);
306
307     gmx::StepWorkload stepWork;
308     stepWork.computeForces = true;
309     if (options.computeVirialAndEnergy)
310     {
311         stepWork.computeVirial = true;
312         stepWork.computeEnergy = true;
313     }
314
315     const gmx::EnumerationArray<BenchMarkKernels, std::string> kernelNames = {
316         "auto", "no", "4xM", "2xMM"
317     };
318
319     const gmx::EnumerationArray<BenchMarkCombRule, std::string> combruleNames = { "geom.",
320                                                                                   "LB",
321                                                                                   "none" };
322
323     if (!doWarmup)
324     {
325         fprintf(stdout,
326                 "%-7s %-4s %-5s %-4s ",
327                 options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
328                 options.useHalfLJOptimization ? "half" : "all",
329                 combruleNames[options.ljCombinationRule].c_str(),
330                 kernelNames[options.nbnxmSimd].c_str());
331         if (!options.outputFile.empty())
332         {
333             fprintf(system.csv,
334                     "\"%d\",\"%zu\",\"%g\",\"%d\",\"%d\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%"
335                     "s\",",
336 #if GMX_SIMD
337                     (options.nbnxmSimd != BenchMarkKernels::SimdNo) ? GMX_SIMD_REAL_WIDTH : 0,
338 #else
339                     0,
340 #endif
341                     system.coordinates.size(),
342                     options.pairlistCutoff,
343                     options.numThreads,
344                     options.numIterations,
345                     options.computeVirialAndEnergy ? "yes" : "no",
346                     (options.coulombType != BenchMarkCoulomb::ReactionField)
347                             ? ((options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr)
348                                        ? "table"
349                                        : "analytical")
350                             : "",
351                     options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
352                     options.useHalfLJOptimization ? "half" : "all",
353                     combruleNames[options.ljCombinationRule].c_str(),
354                     kernelNames[options.nbnxmSimd].c_str());
355         }
356     }
357
358     // Run pre-iteration to avoid cache misses
359     for (int iter = 0; iter < options.numPreIterations; iter++)
360     {
361         nbv->dispatchNonbondedKernel(
362                 gmx::InteractionLocality::Local,
363                 ic,
364                 stepWork,
365                 enbvClearFYes,
366                 system.forceRec.shift_vec,
367                 enerd.grpp.energyGroupPairTerms[system.forceRec.haveBuckingham ? NonBondedEnergyTerms::BuckinghamSR
368                                                                                : NonBondedEnergyTerms::LJSR],
369                 enerd.grpp.energyGroupPairTerms[NonBondedEnergyTerms::CoulombSR],
370                 &nrnb);
371     }
372
373     const int numIterations = (doWarmup ? options.numWarmupIterations : options.numIterations);
374     const PairlistSet& pairlistSet = nbv->pairlistSets().pairlistSet(gmx::InteractionLocality::Local);
375     const gmx::index numPairs = pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_ + pairlistSet.natpair_q_;
376     gmx_cycles_t cycles = gmx_cycles_read();
377     for (int iter = 0; iter < numIterations; iter++)
378     {
379         // Run the kernel without force clearing
380         nbv->dispatchNonbondedKernel(
381                 gmx::InteractionLocality::Local,
382                 ic,
383                 stepWork,
384                 enbvClearFNo,
385                 system.forceRec.shift_vec,
386                 enerd.grpp.energyGroupPairTerms[system.forceRec.haveBuckingham ? NonBondedEnergyTerms::BuckinghamSR
387                                                                                : NonBondedEnergyTerms::LJSR],
388                 enerd.grpp.energyGroupPairTerms[NonBondedEnergyTerms::CoulombSR],
389                 &nrnb);
390     }
391     cycles = gmx_cycles_read() - cycles;
392     if (!doWarmup)
393     {
394         if (options.reportTime)
395         {
396             const double uSec = static_cast<double>(cycles) * gmx_cycles_calibrate(1.0) * 1.e6;
397             if (options.cyclesPerPair)
398             {
399                 fprintf(stdout,
400                         "%13.2f %13.3f %10.3f %10.3f\n",
401                         uSec,
402                         uSec / options.numIterations,
403                         uSec / (options.numIterations * numPairs),
404                         uSec / (options.numIterations * numUsefulPairs));
405                 if (!options.outputFile.empty())
406                 {
407                     fprintf(system.csv,
408                             "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n",
409                             uSec,
410                             uSec / options.numIterations,
411                             uSec / (options.numIterations * numPairs),
412                             uSec / (options.numIterations * numUsefulPairs));
413                 }
414             }
415             else
416             {
417                 fprintf(stdout,
418                         "%13.2f %13.3f %10.3f %10.3f\n",
419                         uSec,
420                         uSec / options.numIterations,
421                         options.numIterations * numPairs / uSec,
422                         options.numIterations * numUsefulPairs / uSec);
423                 if (!options.outputFile.empty())
424                 {
425                     fprintf(system.csv,
426                             "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n",
427                             uSec,
428                             uSec / options.numIterations,
429                             options.numIterations * numPairs / uSec,
430                             options.numIterations * numUsefulPairs / uSec);
431                 }
432             }
433         }
434         else
435         {
436             const double dCycles = static_cast<double>(cycles);
437             if (options.cyclesPerPair)
438             {
439                 fprintf(stdout,
440                         "%10.3f %10.4f %8.4f %8.4f\n",
441                         cycles * 1e-6,
442                         dCycles / options.numIterations * 1e-6,
443                         dCycles / (options.numIterations * numPairs),
444                         dCycles / (options.numIterations * numUsefulPairs));
445             }
446             else
447             {
448                 fprintf(stdout,
449                         "%10.3f %10.4f %8.4f %8.4f\n",
450                         dCycles * 1e-6,
451                         dCycles / options.numIterations * 1e-6,
452                         options.numIterations * numPairs / dCycles,
453                         options.numIterations * numUsefulPairs / dCycles);
454             }
455         }
456     }
457 }
458
459 void bench(const int sizeFactor, const KernelBenchOptions& options)
460 {
461     // We don't want to call gmx_omp_nthreads_init(), so we init what we need
462     gmx_omp_nthreads_set(ModuleMultiThread::Pairsearch, options.numThreads);
463     gmx_omp_nthreads_set(ModuleMultiThread::Nonbonded, options.numThreads);
464
465     const gmx::BenchmarkSystem system(sizeFactor, options.outputFile);
466
467     real minBoxSize = norm(system.box[XX]);
468     for (int dim = YY; dim < DIM; dim++)
469     {
470         minBoxSize = std::min(minBoxSize, norm(system.box[dim]));
471     }
472     if (options.pairlistCutoff > 0.5 * minBoxSize)
473     {
474         gmx_fatal(FARGS, "The cut-off should be shorter than half the box size");
475     }
476
477     std::vector<KernelBenchOptions> optionsList;
478     if (options.doAll)
479     {
480         KernelBenchOptions                        opt = options;
481         gmx::EnumerationWrapper<BenchMarkCoulomb> coulombIter;
482         for (auto coulombType : coulombIter)
483         {
484             opt.coulombType = coulombType;
485             for (int halfLJ = 0; halfLJ <= 1; halfLJ++)
486             {
487                 opt.useHalfLJOptimization = (halfLJ == 1);
488
489                 gmx::EnumerationWrapper<BenchMarkCombRule> combRuleIter;
490                 for (auto combRule : combRuleIter)
491                 {
492                     opt.ljCombinationRule = combRule;
493
494                     expandSimdOptionAndPushBack(opt, &optionsList);
495                 }
496             }
497         }
498     }
499     else
500     {
501         expandSimdOptionAndPushBack(options, &optionsList);
502     }
503     GMX_RELEASE_ASSERT(!optionsList.empty(), "Expect at least on benchmark setup");
504
505 #if GMX_SIMD
506     if (options.nbnxmSimd != BenchMarkKernels::SimdNo)
507     {
508         fprintf(stdout, "SIMD width:           %d\n", GMX_SIMD_REAL_WIDTH);
509     }
510 #endif
511     fprintf(stdout, "System size:          %zu atoms\n", system.coordinates.size());
512     fprintf(stdout, "Cut-off radius:       %g nm\n", options.pairlistCutoff);
513     fprintf(stdout, "Number of threads:    %d\n", options.numThreads);
514     fprintf(stdout, "Number of iterations: %d\n", options.numIterations);
515     fprintf(stdout, "Compute energies:     %s\n", options.computeVirialAndEnergy ? "yes" : "no");
516     if (options.coulombType != BenchMarkCoulomb::ReactionField)
517     {
518         fprintf(stdout,
519                 "Ewald excl. corr.:    %s\n",
520                 options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr
521                         ? "table"
522                         : "analytical");
523     }
524     printf("\n");
525
526     if (options.numWarmupIterations > 0)
527     {
528         setupAndRunInstance(system, optionsList[0], true);
529     }
530
531     if (options.reportTime)
532     {
533         fprintf(stdout,
534                 "Coulomb LJ   comb. SIMD       usec         usec/it.        %s\n",
535                 options.cyclesPerPair ? "usec/pair" : "pairs/usec");
536         if (!options.outputFile.empty())
537         {
538             fprintf(system.csv,
539                     "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
540                     "energy\",\"Ewald excl. "
541                     "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"usec\",\"usec/it\",\"total "
542                     "pairs/usec\",\"useful pairs/usec\"\n");
543         }
544         fprintf(stdout,
545                 "                                                        total      useful\n");
546     }
547     else
548     {
549         fprintf(stdout,
550                 "Coulomb LJ   comb. SIMD    Mcycles  Mcycles/it.   %s\n",
551                 options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
552         if (!options.outputFile.empty())
553         {
554             fprintf(system.csv,
555                     "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
556                     "energy\",\"Ewald excl. "
557                     "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"Mcycles\",\"Mcycles/"
558                     "it\",\"total "
559                     "total cycles/pair\",\"total cycles per useful pair\"\n");
560         }
561         fprintf(stdout, "                                                total    useful\n");
562     }
563
564     for (const auto& optionsInstance : optionsList)
565     {
566         setupAndRunInstance(system, optionsInstance, false);
567     }
568
569     if (!options.outputFile.empty())
570     {
571         fclose(system.csv);
572     }
573 }
574
575 } // namespace Nbnxm