src/gromacs/nbnxm/benchmark/bench_setup.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * This file defines functions for setting up kernel benchmarks
  39  *
  40  * \author Berk Hess <hess@kth.se>
  41  * \ingroup module_nbnxm
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "bench_setup.h"
  47
  48 #include "gromacs/compat/optional.h"
  49 #include "gromacs/gmxlib/nrnb.h"
  50 #include "gromacs/mdlib/dispersioncorrection.h"
  51 #include "gromacs/mdlib/force_flags.h"
  52 #include "gromacs/mdlib/forcerec.h"
  53 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  54 #include "gromacs/mdtypes/enerdata.h"
  55 #include "gromacs/mdtypes/forcerec.h"
  56 #include "gromacs/mdtypes/interaction_const.h"
  57 #include "gromacs/mdtypes/mdatom.h"
  58 #include "gromacs/mdtypes/simulation_workload.h"
  59 #include "gromacs/nbnxm/atomdata.h"
  60 #include "gromacs/nbnxm/gridset.h"
  61 #include "gromacs/nbnxm/nbnxm.h"
  62 #include "gromacs/nbnxm/nbnxm_simd.h"
  63 #include "gromacs/nbnxm/pairlistset.h"
  64 #include "gromacs/nbnxm/pairlistsets.h"
  65 #include "gromacs/nbnxm/pairsearch.h"
  66 #include "gromacs/pbcutil/ishift.h"
  67 #include "gromacs/pbcutil/pbc.h"
  68 #include "gromacs/simd/simd.h"
  69 #include "gromacs/timing/cyclecounter.h"
  70 #include "gromacs/utility/enumerationhelpers.h"
  71 #include "gromacs/utility/fatalerror.h"
  72 #include "gromacs/utility/logger.h"
  73
  74 #include "bench_system.h"
  75
  76 namespace Nbnxm
  77 {
  78
  79 /*! \brief Checks the kernel setup
  80  *
  81  * Returns an error string when the kernel is not available.
  82  */
  83 static gmx::compat::optional<std::string> checkKernelSetup(const KernelBenchOptions& options)
  84 {
  85     GMX_RELEASE_ASSERT(options.nbnxmSimd < BenchMarkKernels::Count
  86                                && options.nbnxmSimd != BenchMarkKernels::SimdAuto,
  87                        "Need a valid kernel SIMD type");
  88
  89     // Check SIMD support
  90     if ((options.nbnxmSimd != BenchMarkKernels::SimdNo && !GMX_SIMD)
  91 #ifndef GMX_NBNXN_SIMD_4XN
  92         || options.nbnxmSimd == BenchMarkKernels::Simd4XM
  93 #endif
  94 #ifndef GMX_NBNXN_SIMD_2XNN
  95         || options.nbnxmSimd == BenchMarkKernels::Simd2XMM
  96 #endif
  97     )
  98     {
  99         return "the requested SIMD kernel was not set up at configuration time";
 100     }
 101
 102     return {};
 103 }
 104
 105 //! Helper to translate between the different enumeration values.
 106 static KernelType translateBenchmarkEnum(const BenchMarkKernels& kernel)
 107 {
 108     int kernelInt = static_cast<int>(kernel);
 109     return static_cast<KernelType>(kernelInt);
 110 }
 111
 112 /*! \brief Returns the kernel setup
 113  */
 114 static KernelSetup getKernelSetup(const KernelBenchOptions& options)
 115 {
 116     auto messageWhenInvalid = checkKernelSetup(options);
 117     GMX_RELEASE_ASSERT(!messageWhenInvalid, "Need valid options");
 118
 119     KernelSetup kernelSetup;
 120
 121     // The int enum options.nbnxnSimd is set up to match KernelType + 1
 122     kernelSetup.kernelType = translateBenchmarkEnum(options.nbnxmSimd);
 123     // The plain-C kernel does not support analytical ewald correction
 124     if (kernelSetup.kernelType == KernelType::Cpu4x4_PlainC)
 125     {
 126         kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
 127     }
 128     else
 129     {
 130         kernelSetup.ewaldExclusionType = options.useTabulatedEwaldCorr ? EwaldExclusionType::Table
 131                                                                        : EwaldExclusionType::Analytical;
 132     }
 133
 134     return kernelSetup;
 135 }
 136
 137 //! Return an interaction constants struct with members used in the benchmark set appropriately
 138 static interaction_const_t setupInteractionConst(const KernelBenchOptions& options)
 139
 140 {
 141     interaction_const_t ic;
 142
 143     ic.vdwtype      = evdwCUT;
 144     ic.vdw_modifier = eintmodPOTSHIFT;
 145     ic.rvdw         = options.pairlistCutoff;
 146
 147     ic.eeltype          = (options.coulombType == BenchMarkCoulomb::Pme ? eelPME : eelRF);
 148     ic.coulomb_modifier = eintmodPOTSHIFT;
 149     ic.rcoulomb         = options.pairlistCutoff;
 150
 151     // Reaction-field with epsilon_rf=inf
 152     // TODO: Replace by calc_rffac() after refactoring that
 153     ic.k_rf = 0.5 * std::pow(ic.rcoulomb, -3);
 154     ic.c_rf = 1 / ic.rcoulomb + ic.k_rf * ic.rcoulomb * ic.rcoulomb;
 155
 156     if (EEL_PME_EWALD(ic.eeltype))
 157     {
 158         // Ewald coefficients, we ignore the potential shift
 159         GMX_RELEASE_ASSERT(options.ewaldcoeff_q > 0, "Ewald coefficient should be > 0");
 160         ic.ewaldcoeff_q       = options.ewaldcoeff_q;
 161         ic.coulombEwaldTables = std::make_unique<EwaldCorrectionTables>();
 162         init_interaction_const_tables(nullptr, &ic);
 163     }
 164
 165     return ic;
 166 }
 167
 168 //! Sets up and returns a Nbnxm object for the given benchmark options and system
 169 static std::unique_ptr<nonbonded_verlet_t> setupNbnxmForBenchInstance(const KernelBenchOptions& options,
 170                                                                       const gmx::BenchmarkSystem& system)
 171 {
 172     const auto pinPolicy  = (options.useGpu ? gmx::PinningPolicy::PinnedIfSupported
 173                                            : gmx::PinningPolicy::CannotBePinned);
 174     const int  numThreads = options.numThreads;
 175     // Note: the options and Nbnxm combination rule enums values should match
 176     const int combinationRule = static_cast<int>(options.ljCombinationRule);
 177
 178     auto messageWhenInvalid = checkKernelSetup(options);
 179     if (messageWhenInvalid)
 180     {
 181         gmx_fatal(FARGS, "Requested kernel is unavailable because %s.", messageWhenInvalid->c_str());
 182     }
 183     Nbnxm::KernelSetup kernelSetup = getKernelSetup(options);
 184
 185     PairlistParams pairlistParams(kernelSetup.kernelType, false, options.pairlistCutoff, false);
 186
 187     GridSet gridSet(PbcType::Xyz, false, nullptr, nullptr, pairlistParams.pairlistType, false,
 188                     numThreads, pinPolicy);
 189
 190     auto pairlistSets = std::make_unique<PairlistSets>(pairlistParams, false, 0);
 191
 192     auto pairSearch =
 193             std::make_unique<PairSearch>(PbcType::Xyz, false, nullptr, nullptr,
 194                                          pairlistParams.pairlistType, false, numThreads, pinPolicy);
 195
 196     auto atomData = std::make_unique<nbnxn_atomdata_t>(pinPolicy);
 197
 198     // Put everything together
 199     auto nbv = std::make_unique<nonbonded_verlet_t>(std::move(pairlistSets), std::move(pairSearch),
 200                                                     std::move(atomData), kernelSetup, nullptr, nullptr);
 201
 202     nbnxn_atomdata_init(gmx::MDLogger(), nbv->nbat.get(), kernelSetup.kernelType, combinationRule,
 203                         system.numAtomTypes, system.nonbondedParameters, 1, numThreads);
 204
 205     t_nrnb nrnb;
 206
 207     GMX_RELEASE_ASSERT(!TRICLINIC(system.box), "Only rectangular unit-cells are supported here");
 208     const rvec lowerCorner = { 0, 0, 0 };
 209     const rvec upperCorner = { system.box[XX][XX], system.box[YY][YY], system.box[ZZ][ZZ] };
 210
 211     gmx::ArrayRef<const int> atomInfo;
 212     if (options.useHalfLJOptimization)
 213     {
 214         atomInfo = system.atomInfoOxygenVdw;
 215     }
 216     else
 217     {
 218         atomInfo = system.atomInfoAllVdw;
 219     }
 220
 221     const real atomDensity = system.coordinates.size() / det(system.box);
 222
 223     nbnxn_put_on_grid(nbv.get(), system.box, 0, lowerCorner, upperCorner, nullptr,
 224                       { 0, int(system.coordinates.size()) }, atomDensity, atomInfo,
 225                       system.coordinates, 0, nullptr);
 226
 227     nbv->constructPairlist(gmx::InteractionLocality::Local, system.excls, 0, &nrnb);
 228
 229     t_mdatoms mdatoms;
 230     // We only use (read) the atom type and charge from mdatoms
 231     mdatoms.typeA   = const_cast<int*>(system.atomTypes.data());
 232     mdatoms.chargeA = const_cast<real*>(system.charges.data());
 233     nbv->setAtomProperties(mdatoms, atomInfo);
 234
 235     return nbv;
 236 }
 237
 238 //! Add the options instance to the list for all requested kernel SIMD types
 239 static void expandSimdOptionAndPushBack(const KernelBenchOptions&        options,
 240                                         std::vector<KernelBenchOptions>* optionsList)
 241 {
 242     if (options.nbnxmSimd == BenchMarkKernels::SimdAuto)
 243     {
 244         bool addedInstance = false;
 245 #ifdef GMX_NBNXN_SIMD_4XN
 246         optionsList->push_back(options);
 247         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd4XM;
 248         addedInstance                 = true;
 249 #endif
 250 #ifdef GMX_NBNXN_SIMD_2XNN
 251         optionsList->push_back(options);
 252         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd2XMM;
 253         addedInstance                 = true;
 254 #endif
 255         if (!addedInstance)
 256         {
 257             optionsList->push_back(options);
 258             optionsList->back().nbnxmSimd = BenchMarkKernels::SimdNo;
 259         }
 260     }
 261     else
 262     {
 263         optionsList->push_back(options);
 264     }
 265 }
 266
 267 //! Sets up and runs the requested benchmark instance and prints the results
 268 //
 269 // When \p doWarmup is true runs the warmup iterations instead
 270 // of the normal ones and does not print any results
 271 static void setupAndRunInstance(const gmx::BenchmarkSystem& system,
 272                                 const KernelBenchOptions&   options,
 273                                 const bool                  doWarmup)
 274 {
 275     // Generate an, accurate, estimate of the number of non-zero pair interactions
 276     const real atomDensity = system.coordinates.size() / det(system.box);
 277     const real numPairsWithinCutoff =
 278             atomDensity * 4.0 / 3.0 * M_PI * std::pow(options.pairlistCutoff, 3);
 279     const real numUsefulPairs = system.coordinates.size() * 0.5 * (numPairsWithinCutoff + 1);
 280
 281     std::unique_ptr<nonbonded_verlet_t> nbv = setupNbnxmForBenchInstance(options, system);
 282
 283     // We set the interaction cut-off to the pairlist cut-off
 284     interaction_const_t ic = setupInteractionConst(options);
 285
 286     t_nrnb nrnb = { 0 };
 287
 288     gmx_enerdata_t enerd(1, 0);
 289
 290     gmx::StepWorkload stepWork;
 291     stepWork.computeForces = true;
 292     if (options.computeVirialAndEnergy)
 293     {
 294         stepWork.computeVirial = true;
 295         stepWork.computeEnergy = true;
 296     }
 297
 298     const gmx::EnumerationArray<BenchMarkKernels, std::string> kernelNames = { "auto", "no", "4xM",
 299                                                                                "2xMM" };
 300
 301     const gmx::EnumerationArray<BenchMarkCombRule, std::string> combruleNames = { "geom.", "LB",
 302                                                                                   "none" };
 303
 304     if (!doWarmup)
 305     {
 306         fprintf(stdout, "%-7s %-4s %-5s %-4s ",
 307                 options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
 308                 options.useHalfLJOptimization ? "half" : "all",
 309                 combruleNames[options.ljCombinationRule].c_str(), kernelNames[options.nbnxmSimd].c_str());
 310     }
 311
 312     // Run pre-iteration to avoid cache misses
 313     for (int iter = 0; iter < options.numPreIterations; iter++)
 314     {
 315         nbv->dispatchNonbondedKernel(gmx::InteractionLocality::Local, ic, stepWork, enbvClearFYes,
 316                                      system.forceRec, &enerd, &nrnb);
 317     }
 318
 319     const int numIterations = (doWarmup ? options.numWarmupIterations : options.numIterations);
 320     const PairlistSet& pairlistSet = nbv->pairlistSets().pairlistSet(gmx::InteractionLocality::Local);
 321     const gmx::index numPairs = pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_ + pairlistSet.natpair_q_;
 322     gmx_cycles_t cycles = gmx_cycles_read();
 323     for (int iter = 0; iter < numIterations; iter++)
 324     {
 325         // Run the kernel without force clearing
 326         nbv->dispatchNonbondedKernel(gmx::InteractionLocality::Local, ic, stepWork, enbvClearFNo,
 327                                      system.forceRec, &enerd, &nrnb);
 328     }
 329     cycles = gmx_cycles_read() - cycles;
 330     if (!doWarmup)
 331     {
 332         const double dCycles = static_cast<double>(cycles);
 333         if (options.cyclesPerPair)
 334         {
 335             fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", cycles * 1e-6,
 336                     dCycles / options.numIterations * 1e-6, dCycles / (options.numIterations * numPairs),
 337                     dCycles / (options.numIterations * numUsefulPairs));
 338         }
 339         else
 340         {
 341             fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", dCycles * 1e-6,
 342                     dCycles / options.numIterations * 1e-6, options.numIterations * numPairs / dCycles,
 343                     options.numIterations * numUsefulPairs / dCycles);
 344         }
 345     }
 346 }
 347
 348 void bench(const int sizeFactor, const KernelBenchOptions& options)
 349 {
 350     // We don't want to call gmx_omp_nthreads_init(), so we init what we need
 351     gmx_omp_nthreads_set(emntPairsearch, options.numThreads);
 352     gmx_omp_nthreads_set(emntNonbonded, options.numThreads);
 353
 354     const gmx::BenchmarkSystem system(sizeFactor);
 355
 356     real minBoxSize = norm(system.box[XX]);
 357     for (int dim = YY; dim < DIM; dim++)
 358     {
 359         minBoxSize = std::min(minBoxSize, norm(system.box[dim]));
 360     }
 361     if (options.pairlistCutoff > 0.5 * minBoxSize)
 362     {
 363         gmx_fatal(FARGS, "The cut-off should be shorter than half the box size");
 364     }
 365
 366     std::vector<KernelBenchOptions> optionsList;
 367     if (options.doAll)
 368     {
 369         KernelBenchOptions                        opt = options;
 370         gmx::EnumerationWrapper<BenchMarkCoulomb> coulombIter;
 371         for (auto coulombType : coulombIter)
 372         {
 373             opt.coulombType = coulombType;
 374             for (int halfLJ = 0; halfLJ <= 1; halfLJ++)
 375             {
 376                 opt.useHalfLJOptimization = (halfLJ == 1);
 377
 378                 gmx::EnumerationWrapper<BenchMarkCombRule> combRuleIter;
 379                 for (auto combRule : combRuleIter)
 380                 {
 381                     if (combRule == BenchMarkCombRule::RuleNone)
 382                     {
 383                         continue;
 384                     }
 385                     opt.ljCombinationRule = combRule;
 386
 387                     expandSimdOptionAndPushBack(opt, &optionsList);
 388                 }
 389             }
 390         }
 391     }
 392     else
 393     {
 394         expandSimdOptionAndPushBack(options, &optionsList);
 395     }
 396     GMX_RELEASE_ASSERT(!optionsList.empty(), "Expect at least on benchmark setup");
 397
 398 #if GMX_SIMD
 399     if (options.nbnxmSimd != BenchMarkKernels::SimdNo)
 400     {
 401         fprintf(stdout, "SIMD width:           %d\n", GMX_SIMD_REAL_WIDTH);
 402     }
 403 #endif
 404     fprintf(stdout, "System size:          %zu atoms\n", system.coordinates.size());
 405     fprintf(stdout, "Cut-off radius:       %g nm\n", options.pairlistCutoff);
 406     fprintf(stdout, "Number of threads:    %d\n", options.numThreads);
 407     fprintf(stdout, "Number of iterations: %d\n", options.numIterations);
 408     fprintf(stdout, "Compute energies:     %s\n", options.computeVirialAndEnergy ? "yes" : "no");
 409     if (options.coulombType != BenchMarkCoulomb::ReactionField)
 410     {
 411         fprintf(stdout, "Ewald excl. corr.:    %s\n",
 412                 options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr
 413                         ? "table"
 414                         : "analytical");
 415     }
 416     printf("\n");
 417
 418     if (options.numWarmupIterations > 0)
 419     {
 420         setupAndRunInstance(system, optionsList[0], true);
 421     }
 422
 423     fprintf(stdout, "Coulomb LJ   comb. SIMD    Mcycles  Mcycles/it.   %s\n",
 424             options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
 425     fprintf(stdout, "                                                total    useful\n");
 426
 427     for (const auto& optionsInstance : optionsList)
 428     {
 429         setupAndRunInstance(system, optionsInstance, false);
 430     }
 431 }
 432
 433 } // namespace Nbnxm