Move M_PI definition to math/units.h
[alexxy/gromacs.git] / src / gromacs / nbnxm / benchmark / bench_setup.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  * \brief
38  * This file defines functions for setting up kernel benchmarks
39  *
40  * \author Berk Hess <hess@kth.se>
41  * \ingroup module_nbnxm
42  */
43
44 #include "gmxpre.h"
45
46 #include "bench_setup.h"
47
48 #include <optional>
49
50 #include "gromacs/gmxlib/nrnb.h"
51 #include "gromacs/math/units.h"
52 #include "gromacs/mdlib/dispersioncorrection.h"
53 #include "gromacs/mdlib/force_flags.h"
54 #include "gromacs/mdlib/forcerec.h"
55 #include "gromacs/mdlib/gmx_omp_nthreads.h"
56 #include "gromacs/mdtypes/enerdata.h"
57 #include "gromacs/mdtypes/forcerec.h"
58 #include "gromacs/mdtypes/interaction_const.h"
59 #include "gromacs/mdtypes/mdatom.h"
60 #include "gromacs/mdtypes/simulation_workload.h"
61 #include "gromacs/nbnxm/atomdata.h"
62 #include "gromacs/nbnxm/gridset.h"
63 #include "gromacs/nbnxm/nbnxm.h"
64 #include "gromacs/nbnxm/nbnxm_simd.h"
65 #include "gromacs/nbnxm/pairlistset.h"
66 #include "gromacs/nbnxm/pairlistsets.h"
67 #include "gromacs/nbnxm/pairsearch.h"
68 #include "gromacs/pbcutil/ishift.h"
69 #include "gromacs/pbcutil/pbc.h"
70 #include "gromacs/simd/simd.h"
71 #include "gromacs/timing/cyclecounter.h"
72 #include "gromacs/utility/enumerationhelpers.h"
73 #include "gromacs/utility/fatalerror.h"
74 #include "gromacs/utility/logger.h"
75
76 #include "bench_system.h"
77
78 namespace Nbnxm
79 {
80
81 /*! \brief Checks the kernel setup
82  *
83  * Returns an error string when the kernel is not available.
84  */
85 static std::optional<std::string> checkKernelSetup(const KernelBenchOptions& options)
86 {
87     GMX_RELEASE_ASSERT(options.nbnxmSimd < BenchMarkKernels::Count
88                                && options.nbnxmSimd != BenchMarkKernels::SimdAuto,
89                        "Need a valid kernel SIMD type");
90
91     // Check SIMD support
92     if ((options.nbnxmSimd != BenchMarkKernels::SimdNo && !GMX_SIMD)
93 #ifndef GMX_NBNXN_SIMD_4XN
94         || options.nbnxmSimd == BenchMarkKernels::Simd4XM
95 #endif
96 #ifndef GMX_NBNXN_SIMD_2XNN
97         || options.nbnxmSimd == BenchMarkKernels::Simd2XMM
98 #endif
99     )
100     {
101         return "the requested SIMD kernel was not set up at configuration time";
102     }
103
104     if (options.reportTime && (0 > gmx_cycles_calibrate(1.0)))
105     {
106         return "the -time option is not supported on this system";
107     }
108
109     return {};
110 }
111
112 //! Helper to translate between the different enumeration values.
113 static KernelType translateBenchmarkEnum(const BenchMarkKernels& kernel)
114 {
115     int kernelInt = static_cast<int>(kernel);
116     return static_cast<KernelType>(kernelInt);
117 }
118
119 /*! \brief Returns the kernel setup
120  */
121 static KernelSetup getKernelSetup(const KernelBenchOptions& options)
122 {
123     auto messageWhenInvalid = checkKernelSetup(options);
124     GMX_RELEASE_ASSERT(!messageWhenInvalid, "Need valid options");
125
126     KernelSetup kernelSetup;
127
128     // The int enum options.nbnxnSimd is set up to match KernelType + 1
129     kernelSetup.kernelType = translateBenchmarkEnum(options.nbnxmSimd);
130     // The plain-C kernel does not support analytical ewald correction
131     if (kernelSetup.kernelType == KernelType::Cpu4x4_PlainC)
132     {
133         kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
134     }
135     else
136     {
137         kernelSetup.ewaldExclusionType = options.useTabulatedEwaldCorr ? EwaldExclusionType::Table
138                                                                        : EwaldExclusionType::Analytical;
139     }
140
141     return kernelSetup;
142 }
143
144 //! Return an interaction constants struct with members used in the benchmark set appropriately
145 static interaction_const_t setupInteractionConst(const KernelBenchOptions& options)
146
147 {
148     interaction_const_t ic;
149
150     ic.vdwtype      = VanDerWaalsType::Cut;
151     ic.vdw_modifier = InteractionModifiers::PotShift;
152     ic.rvdw         = options.pairlistCutoff;
153
154     ic.eeltype = (options.coulombType == BenchMarkCoulomb::Pme ? CoulombInteractionType::Pme
155                                                                : CoulombInteractionType::RF);
156     ic.coulomb_modifier = InteractionModifiers::PotShift;
157     ic.rcoulomb         = options.pairlistCutoff;
158
159     // Reaction-field with reactionFieldPermitivity=inf
160     // TODO: Replace by calc_rffac() after refactoring that
161     ic.reactionFieldCoefficient = 0.5 * std::pow(ic.rcoulomb, -3);
162     ic.reactionFieldShift = 1 / ic.rcoulomb + ic.reactionFieldCoefficient * ic.rcoulomb * ic.rcoulomb;
163
164     if (EEL_PME_EWALD(ic.eeltype))
165     {
166         // Ewald coefficients, we ignore the potential shift
167         GMX_RELEASE_ASSERT(options.ewaldcoeff_q > 0, "Ewald coefficient should be > 0");
168         ic.ewaldcoeff_q       = options.ewaldcoeff_q;
169         ic.coulombEwaldTables = std::make_unique<EwaldCorrectionTables>();
170         init_interaction_const_tables(nullptr, &ic, 0, 0);
171     }
172
173     return ic;
174 }
175
176 //! Sets up and returns a Nbnxm object for the given benchmark options and system
177 static std::unique_ptr<nonbonded_verlet_t> setupNbnxmForBenchInstance(const KernelBenchOptions& options,
178                                                                       const gmx::BenchmarkSystem& system)
179 {
180     const auto pinPolicy  = (options.useGpu ? gmx::PinningPolicy::PinnedIfSupported
181                                            : gmx::PinningPolicy::CannotBePinned);
182     const int  numThreads = options.numThreads;
183     // Note: the options and Nbnxm combination rule enums values should match
184     const int combinationRule = static_cast<int>(options.ljCombinationRule);
185
186     auto messageWhenInvalid = checkKernelSetup(options);
187     if (messageWhenInvalid)
188     {
189         gmx_fatal(FARGS, "Requested kernel is unavailable because %s.", messageWhenInvalid->c_str());
190     }
191     Nbnxm::KernelSetup kernelSetup = getKernelSetup(options);
192
193     PairlistParams pairlistParams(kernelSetup.kernelType, false, options.pairlistCutoff, false);
194
195     GridSet gridSet(
196             PbcType::Xyz, false, nullptr, nullptr, pairlistParams.pairlistType, false, numThreads, pinPolicy);
197
198     auto pairlistSets = std::make_unique<PairlistSets>(pairlistParams, false, 0);
199
200     auto pairSearch = std::make_unique<PairSearch>(
201             PbcType::Xyz, false, nullptr, nullptr, pairlistParams.pairlistType, false, numThreads, pinPolicy);
202
203     auto atomData = std::make_unique<nbnxn_atomdata_t>(pinPolicy);
204
205     // Put everything together
206     auto nbv = std::make_unique<nonbonded_verlet_t>(
207             std::move(pairlistSets), std::move(pairSearch), std::move(atomData), kernelSetup, nullptr, nullptr);
208
209     nbnxn_atomdata_init(gmx::MDLogger(),
210                         nbv->nbat.get(),
211                         kernelSetup.kernelType,
212                         combinationRule,
213                         system.numAtomTypes,
214                         system.nonbondedParameters,
215                         1,
216                         numThreads);
217
218     t_nrnb nrnb;
219
220     GMX_RELEASE_ASSERT(!TRICLINIC(system.box), "Only rectangular unit-cells are supported here");
221     const rvec lowerCorner = { 0, 0, 0 };
222     const rvec upperCorner = { system.box[XX][XX], system.box[YY][YY], system.box[ZZ][ZZ] };
223
224     gmx::ArrayRef<const int> atomInfo;
225     if (options.useHalfLJOptimization)
226     {
227         atomInfo = system.atomInfoOxygenVdw;
228     }
229     else
230     {
231         atomInfo = system.atomInfoAllVdw;
232     }
233
234     const real atomDensity = system.coordinates.size() / det(system.box);
235
236     nbnxn_put_on_grid(nbv.get(),
237                       system.box,
238                       0,
239                       lowerCorner,
240                       upperCorner,
241                       nullptr,
242                       { 0, int(system.coordinates.size()) },
243                       atomDensity,
244                       atomInfo,
245                       system.coordinates,
246                       0,
247                       nullptr);
248
249     nbv->constructPairlist(gmx::InteractionLocality::Local, system.excls, 0, &nrnb);
250
251     nbv->setAtomProperties(system.atomTypes, system.charges, atomInfo);
252
253     return nbv;
254 }
255
256 //! Add the options instance to the list for all requested kernel SIMD types
257 static void expandSimdOptionAndPushBack(const KernelBenchOptions&        options,
258                                         std::vector<KernelBenchOptions>* optionsList)
259 {
260     if (options.nbnxmSimd == BenchMarkKernels::SimdAuto)
261     {
262         bool addedInstance = false;
263 #ifdef GMX_NBNXN_SIMD_4XN
264         optionsList->push_back(options);
265         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd4XM;
266         addedInstance                 = true;
267 #endif
268 #ifdef GMX_NBNXN_SIMD_2XNN
269         optionsList->push_back(options);
270         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd2XMM;
271         addedInstance                 = true;
272 #endif
273         if (!addedInstance)
274         {
275             optionsList->push_back(options);
276             optionsList->back().nbnxmSimd = BenchMarkKernels::SimdNo;
277         }
278     }
279     else
280     {
281         optionsList->push_back(options);
282     }
283 }
284
285 //! Sets up and runs the requested benchmark instance and prints the results
286 //
287 // When \p doWarmup is true runs the warmup iterations instead
288 // of the normal ones and does not print any results
289 static void setupAndRunInstance(const gmx::BenchmarkSystem& system,
290                                 const KernelBenchOptions&   options,
291                                 const bool                  doWarmup)
292 {
293     // Generate an, accurate, estimate of the number of non-zero pair interactions
294     const real atomDensity = system.coordinates.size() / det(system.box);
295     const real numPairsWithinCutoff =
296             atomDensity * 4.0 / 3.0 * M_PI * std::pow(options.pairlistCutoff, 3);
297     const real numUsefulPairs = system.coordinates.size() * 0.5 * (numPairsWithinCutoff + 1);
298
299     std::unique_ptr<nonbonded_verlet_t> nbv = setupNbnxmForBenchInstance(options, system);
300
301     // We set the interaction cut-off to the pairlist cut-off
302     interaction_const_t ic = setupInteractionConst(options);
303
304     t_nrnb nrnb = { 0 };
305
306     gmx_enerdata_t enerd(1, 0);
307
308     gmx::StepWorkload stepWork;
309     stepWork.computeForces = true;
310     if (options.computeVirialAndEnergy)
311     {
312         stepWork.computeVirial = true;
313         stepWork.computeEnergy = true;
314     }
315
316     const gmx::EnumerationArray<BenchMarkKernels, std::string> kernelNames = {
317         "auto", "no", "4xM", "2xMM"
318     };
319
320     const gmx::EnumerationArray<BenchMarkCombRule, std::string> combruleNames = { "geom.",
321                                                                                   "LB",
322                                                                                   "none" };
323
324     if (!doWarmup)
325     {
326         fprintf(stdout,
327                 "%-7s %-4s %-5s %-4s ",
328                 options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
329                 options.useHalfLJOptimization ? "half" : "all",
330                 combruleNames[options.ljCombinationRule].c_str(),
331                 kernelNames[options.nbnxmSimd].c_str());
332         if (!options.outputFile.empty())
333         {
334             fprintf(system.csv,
335                     "\"%d\",\"%zu\",\"%g\",\"%d\",\"%d\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%"
336                     "s\",",
337 #if GMX_SIMD
338                     (options.nbnxmSimd != BenchMarkKernels::SimdNo) ? GMX_SIMD_REAL_WIDTH : 0,
339 #else
340                     0,
341 #endif
342                     system.coordinates.size(),
343                     options.pairlistCutoff,
344                     options.numThreads,
345                     options.numIterations,
346                     options.computeVirialAndEnergy ? "yes" : "no",
347                     (options.coulombType != BenchMarkCoulomb::ReactionField)
348                             ? ((options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr)
349                                        ? "table"
350                                        : "analytical")
351                             : "",
352                     options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
353                     options.useHalfLJOptimization ? "half" : "all",
354                     combruleNames[options.ljCombinationRule].c_str(),
355                     kernelNames[options.nbnxmSimd].c_str());
356         }
357     }
358
359     // Run pre-iteration to avoid cache misses
360     for (int iter = 0; iter < options.numPreIterations; iter++)
361     {
362         nbv->dispatchNonbondedKernel(
363                 gmx::InteractionLocality::Local, ic, stepWork, enbvClearFYes, system.forceRec, &enerd, &nrnb);
364     }
365
366     const int numIterations = (doWarmup ? options.numWarmupIterations : options.numIterations);
367     const PairlistSet& pairlistSet = nbv->pairlistSets().pairlistSet(gmx::InteractionLocality::Local);
368     const gmx::index numPairs = pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_ + pairlistSet.natpair_q_;
369     gmx_cycles_t cycles = gmx_cycles_read();
370     for (int iter = 0; iter < numIterations; iter++)
371     {
372         // Run the kernel without force clearing
373         nbv->dispatchNonbondedKernel(
374                 gmx::InteractionLocality::Local, ic, stepWork, enbvClearFNo, system.forceRec, &enerd, &nrnb);
375     }
376     cycles = gmx_cycles_read() - cycles;
377     if (!doWarmup)
378     {
379         if (options.reportTime)
380         {
381             const double uSec = static_cast<double>(cycles) * gmx_cycles_calibrate(1.0) * 1.e6;
382             if (options.cyclesPerPair)
383             {
384                 fprintf(stdout,
385                         "%13.2f %13.3f %10.3f %10.3f\n",
386                         uSec,
387                         uSec / options.numIterations,
388                         uSec / (options.numIterations * numPairs),
389                         uSec / (options.numIterations * numUsefulPairs));
390                 if (!options.outputFile.empty())
391                 {
392                     fprintf(system.csv,
393                             "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n",
394                             uSec,
395                             uSec / options.numIterations,
396                             uSec / (options.numIterations * numPairs),
397                             uSec / (options.numIterations * numUsefulPairs));
398                 }
399             }
400             else
401             {
402                 fprintf(stdout,
403                         "%13.2f %13.3f %10.3f %10.3f\n",
404                         uSec,
405                         uSec / options.numIterations,
406                         options.numIterations * numPairs / uSec,
407                         options.numIterations * numUsefulPairs / uSec);
408                 if (!options.outputFile.empty())
409                 {
410                     fprintf(system.csv,
411                             "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n",
412                             uSec,
413                             uSec / options.numIterations,
414                             options.numIterations * numPairs / uSec,
415                             options.numIterations * numUsefulPairs / uSec);
416                 }
417             }
418         }
419         else
420         {
421             const double dCycles = static_cast<double>(cycles);
422             if (options.cyclesPerPair)
423             {
424                 fprintf(stdout,
425                         "%10.3f %10.4f %8.4f %8.4f\n",
426                         cycles * 1e-6,
427                         dCycles / options.numIterations * 1e-6,
428                         dCycles / (options.numIterations * numPairs),
429                         dCycles / (options.numIterations * numUsefulPairs));
430             }
431             else
432             {
433                 fprintf(stdout,
434                         "%10.3f %10.4f %8.4f %8.4f\n",
435                         dCycles * 1e-6,
436                         dCycles / options.numIterations * 1e-6,
437                         options.numIterations * numPairs / dCycles,
438                         options.numIterations * numUsefulPairs / dCycles);
439             }
440         }
441     }
442 }
443
444 void bench(const int sizeFactor, const KernelBenchOptions& options)
445 {
446     // We don't want to call gmx_omp_nthreads_init(), so we init what we need
447     gmx_omp_nthreads_set(emntPairsearch, options.numThreads);
448     gmx_omp_nthreads_set(emntNonbonded, options.numThreads);
449
450     const gmx::BenchmarkSystem system(sizeFactor, options.outputFile);
451
452     real minBoxSize = norm(system.box[XX]);
453     for (int dim = YY; dim < DIM; dim++)
454     {
455         minBoxSize = std::min(minBoxSize, norm(system.box[dim]));
456     }
457     if (options.pairlistCutoff > 0.5 * minBoxSize)
458     {
459         gmx_fatal(FARGS, "The cut-off should be shorter than half the box size");
460     }
461
462     std::vector<KernelBenchOptions> optionsList;
463     if (options.doAll)
464     {
465         KernelBenchOptions                        opt = options;
466         gmx::EnumerationWrapper<BenchMarkCoulomb> coulombIter;
467         for (auto coulombType : coulombIter)
468         {
469             opt.coulombType = coulombType;
470             for (int halfLJ = 0; halfLJ <= 1; halfLJ++)
471             {
472                 opt.useHalfLJOptimization = (halfLJ == 1);
473
474                 gmx::EnumerationWrapper<BenchMarkCombRule> combRuleIter;
475                 for (auto combRule : combRuleIter)
476                 {
477                     opt.ljCombinationRule = combRule;
478
479                     expandSimdOptionAndPushBack(opt, &optionsList);
480                 }
481             }
482         }
483     }
484     else
485     {
486         expandSimdOptionAndPushBack(options, &optionsList);
487     }
488     GMX_RELEASE_ASSERT(!optionsList.empty(), "Expect at least on benchmark setup");
489
490 #if GMX_SIMD
491     if (options.nbnxmSimd != BenchMarkKernels::SimdNo)
492     {
493         fprintf(stdout, "SIMD width:           %d\n", GMX_SIMD_REAL_WIDTH);
494     }
495 #endif
496     fprintf(stdout, "System size:          %zu atoms\n", system.coordinates.size());
497     fprintf(stdout, "Cut-off radius:       %g nm\n", options.pairlistCutoff);
498     fprintf(stdout, "Number of threads:    %d\n", options.numThreads);
499     fprintf(stdout, "Number of iterations: %d\n", options.numIterations);
500     fprintf(stdout, "Compute energies:     %s\n", options.computeVirialAndEnergy ? "yes" : "no");
501     if (options.coulombType != BenchMarkCoulomb::ReactionField)
502     {
503         fprintf(stdout,
504                 "Ewald excl. corr.:    %s\n",
505                 options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr
506                         ? "table"
507                         : "analytical");
508     }
509     printf("\n");
510
511     if (options.numWarmupIterations > 0)
512     {
513         setupAndRunInstance(system, optionsList[0], true);
514     }
515
516     if (options.reportTime)
517     {
518         fprintf(stdout,
519                 "Coulomb LJ   comb. SIMD       usec         usec/it.        %s\n",
520                 options.cyclesPerPair ? "usec/pair" : "pairs/usec");
521         if (!options.outputFile.empty())
522         {
523             fprintf(system.csv,
524                     "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
525                     "energy\",\"Ewald excl. "
526                     "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"usec\",\"usec/it\",\"total "
527                     "pairs/usec\",\"useful pairs/usec\"\n");
528         }
529         fprintf(stdout,
530                 "                                                        total      useful\n");
531     }
532     else
533     {
534         fprintf(stdout,
535                 "Coulomb LJ   comb. SIMD    Mcycles  Mcycles/it.   %s\n",
536                 options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
537         if (!options.outputFile.empty())
538         {
539             fprintf(system.csv,
540                     "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
541                     "energy\",\"Ewald excl. "
542                     "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"Mcycles\",\"Mcycles/"
543                     "it\",\"total "
544                     "total cycles/pair\",\"total cycles per useful pair\"\n");
545         }
546         fprintf(stdout, "                                                total    useful\n");
547     }
548
549     for (const auto& optionsInstance : optionsList)
550     {
551         setupAndRunInstance(system, optionsInstance, false);
552     }
553
554     if (!options.outputFile.empty())
555     {
556         fclose(system.csv);
557     }
558 }
559
560 } // namespace Nbnxm