src/gromacs/nbnxm/benchmark/bench_setup.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * This file defines functions for setting up kernel benchmarks
  39  *
  40  * \author Berk Hess <hess@kth.se>
  41  * \ingroup module_nbnxm
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "bench_setup.h"
  47
  48 #include "gromacs/compat/optional.h"
  49 #include "gromacs/gmxlib/nrnb.h"
  50 #include "gromacs/mdlib/dispersioncorrection.h"
  51 #include "gromacs/mdlib/force_flags.h"
  52 #include "gromacs/mdlib/forcerec.h"
  53 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  54 #include "gromacs/mdtypes/enerdata.h"
  55 #include "gromacs/mdtypes/forcerec.h"
  56 #include "gromacs/mdtypes/interaction_const.h"
  57 #include "gromacs/mdtypes/mdatom.h"
  58 #include "gromacs/mdtypes/simulation_workload.h"
  59 #include "gromacs/nbnxm/atomdata.h"
  60 #include "gromacs/nbnxm/gridset.h"
  61 #include "gromacs/nbnxm/nbnxm.h"
  62 #include "gromacs/nbnxm/nbnxm_simd.h"
  63 #include "gromacs/nbnxm/pairlistset.h"
  64 #include "gromacs/nbnxm/pairlistsets.h"
  65 #include "gromacs/nbnxm/pairsearch.h"
  66 #include "gromacs/pbcutil/ishift.h"
  67 #include "gromacs/pbcutil/pbc.h"
  68 #include "gromacs/simd/simd.h"
  69 #include "gromacs/timing/cyclecounter.h"
  70 #include "gromacs/utility/enumerationhelpers.h"
  71 #include "gromacs/utility/fatalerror.h"
  72 #include "gromacs/utility/logger.h"
  73
  74 #include "bench_system.h"
  75
  76 namespace Nbnxm
  77 {
  78
  79 /*! \brief Checks the kernel setup
  80  *
  81  * Returns an error string when the kernel is not available.
  82  */
  83 static gmx::compat::optional<std::string> checkKernelSetup(const KernelBenchOptions &options)
  84 {
  85     GMX_RELEASE_ASSERT(options.nbnxmSimd < BenchMarkKernels::Count &&
  86                        options.nbnxmSimd != BenchMarkKernels::SimdAuto, "Need a valid kernel SIMD type");
  87
  88     // Check SIMD support
  89     if ((options.nbnxmSimd != BenchMarkKernels::SimdNo && !GMX_SIMD)
  90 #ifndef GMX_NBNXN_SIMD_4XN
  91         || options.nbnxmSimd == BenchMarkKernels::Simd4XM
  92 #endif
  93 #ifndef GMX_NBNXN_SIMD_2XNN
  94         || options.nbnxmSimd == BenchMarkKernels::Simd2XMM
  95 #endif
  96         )
  97     {
  98         return "the requested SIMD kernel was not set up at configuration time";
  99     }
 100
 101     return {};
 102 }
 103
 104 //! Helper to translate between the different enumeration values.
 105 static KernelType translateBenchmarkEnum(const BenchMarkKernels &kernel)
 106 {
 107     int kernelInt = static_cast<int>(kernel);
 108     return static_cast<KernelType>(kernelInt);
 109 }
 110
 111 /*! \brief Returns the kernel setup
 112  */
 113 static KernelSetup getKernelSetup(const KernelBenchOptions &options)
 114 {
 115     auto messageWhenInvalid = checkKernelSetup(options);
 116     GMX_RELEASE_ASSERT(!messageWhenInvalid, "Need valid options");
 117
 118     KernelSetup kernelSetup;
 119
 120     //The int enum options.nbnxnSimd is set up to match KernelType + 1
 121     kernelSetup.kernelType         = translateBenchmarkEnum(options.nbnxmSimd);
 122     // The plain-C kernel does not support analytical ewald correction
 123     if (kernelSetup.kernelType == KernelType::Cpu4x4_PlainC)
 124     {
 125         kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
 126     }
 127     else
 128     {
 129         kernelSetup.ewaldExclusionType = options.useTabulatedEwaldCorr ? EwaldExclusionType::Table : EwaldExclusionType::Analytical;
 130     }
 131
 132     return kernelSetup;
 133 }
 134
 135 //! Return an interaction constants struct with members used in the benchmark set appropriately
 136 static interaction_const_t setupInteractionConst(const KernelBenchOptions &options)
 137
 138 {
 139     interaction_const_t ic;
 140
 141     ic.vdwtype          = evdwCUT;
 142     ic.vdw_modifier     = eintmodPOTSHIFT;
 143     ic.rvdw             = options.pairlistCutoff;
 144
 145     ic.eeltype          = (options.coulombType == BenchMarkCoulomb::Pme ? eelPME : eelRF);
 146     ic.coulomb_modifier = eintmodPOTSHIFT;
 147     ic.rcoulomb         = options.pairlistCutoff;
 148
 149     // Reaction-field with epsilon_rf=inf
 150     // TODO: Replace by calc_rffac() after refactoring that
 151     ic.k_rf             = 0.5*std::pow(ic.rcoulomb, -3);
 152     ic.c_rf             = 1/ic.rcoulomb + ic.k_rf*ic.rcoulomb*ic.rcoulomb;
 153
 154     if (EEL_PME_EWALD(ic.eeltype))
 155     {
 156         // Ewald coefficients, we ignore the potential shift
 157         GMX_RELEASE_ASSERT(options.ewaldcoeff_q > 0, "Ewald coefficient should be > 0");
 158         ic.ewaldcoeff_q       = options.ewaldcoeff_q;
 159         ic.coulombEwaldTables = std::make_unique<EwaldCorrectionTables>();
 160         init_interaction_const_tables(nullptr, &ic);
 161     }
 162
 163     return ic;
 164 }
 165
 166 //! Sets up and returns a Nbnxm object for the given benchmark options and system
 167 static std::unique_ptr<nonbonded_verlet_t>
 168 setupNbnxmForBenchInstance(const KernelBenchOptions   &options,
 169                            const gmx::BenchmarkSystem &system)
 170 {
 171     const auto         pinPolicy       = (options.useGpu ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
 172     const int          numThreads      = options.numThreads;
 173     // Note: the options and Nbnxm combination rule enums values should match
 174     const int          combinationRule = static_cast<int>(options.ljCombinationRule);
 175
 176     auto               messageWhenInvalid = checkKernelSetup(options);
 177     if (messageWhenInvalid)
 178     {
 179         gmx_fatal(FARGS, "Requested kernel is unavailable because %s.",
 180                   messageWhenInvalid->c_str());
 181     }
 182     Nbnxm::KernelSetup kernelSetup = getKernelSetup(options);
 183
 184     PairlistParams     pairlistParams(kernelSetup.kernelType, false, options.pairlistCutoff, false);
 185
 186     GridSet            gridSet(epbcXYZ, false, nullptr, nullptr, pairlistParams.pairlistType, false, numThreads, pinPolicy);
 187
 188     auto               pairlistSets = std::make_unique<PairlistSets>(pairlistParams, false, 0);
 189
 190     auto               pairSearch   = std::make_unique<PairSearch>(epbcXYZ, false, nullptr, nullptr,
 191                                                                    pairlistParams.pairlistType,
 192                                                                    false, numThreads, pinPolicy);
 193
 194     auto atomData     = std::make_unique<nbnxn_atomdata_t>(pinPolicy);
 195
 196     // Put everything together
 197     auto nbv = std::make_unique<nonbonded_verlet_t>(std::move(pairlistSets),
 198                                                     std::move(pairSearch),
 199                                                     std::move(atomData),
 200                                                     kernelSetup,
 201                                                     nullptr,
 202                                                     nullptr);
 203
 204     nbnxn_atomdata_init(gmx::MDLogger(),
 205                         nbv->nbat.get(), kernelSetup.kernelType,
 206                         combinationRule, system.numAtomTypes, system.nonbondedParameters.data(),
 207                         1, numThreads);
 208
 209     t_nrnb nrnb;
 210
 211     GMX_RELEASE_ASSERT(!TRICLINIC(system.box), "Only rectangular unit-cells are supported here");
 212     const rvec               lowerCorner = { 0, 0, 0 };
 213     const rvec               upperCorner = {
 214         system.box[XX][XX],
 215         system.box[YY][YY],
 216         system.box[ZZ][ZZ]
 217     };
 218
 219     gmx::ArrayRef<const int> atomInfo;
 220     if (options.useHalfLJOptimization)
 221     {
 222         atomInfo = system.atomInfoOxygenVdw;
 223     }
 224     else
 225     {
 226         atomInfo = system.atomInfoAllVdw;
 227     }
 228
 229     const real atomDensity = system.coordinates.size()/det(system.box);
 230
 231     nbnxn_put_on_grid(nbv.get(),
 232                       system.box, 0, lowerCorner, upperCorner,
 233                       nullptr, { 0, int(system.coordinates.size()) }, atomDensity,
 234                       atomInfo, system.coordinates,
 235                       0, nullptr);
 236
 237     nbv->constructPairlist(Nbnxm::InteractionLocality::Local,
 238                            &system.excls, 0, &nrnb);
 239
 240     t_mdatoms mdatoms;
 241     // We only use (read) the atom type and charge from mdatoms
 242     mdatoms.typeA   = const_cast<int *>(system.atomTypes.data());
 243     mdatoms.chargeA = const_cast<real *>(system.charges.data());
 244     nbv->setAtomProperties(mdatoms, atomInfo);
 245
 246     return nbv;
 247 }
 248
 249 //! Add the options instance to the list for all requested kernel SIMD types
 250 static void
 251 expandSimdOptionAndPushBack(const KernelBenchOptions        &options,
 252                             std::vector<KernelBenchOptions> *optionsList)
 253 {
 254     if (options.nbnxmSimd == BenchMarkKernels::SimdAuto)
 255     {
 256         bool addedInstance = false;
 257 #ifdef GMX_NBNXN_SIMD_4XN
 258         optionsList->push_back(options);
 259         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd4XM;
 260         addedInstance = true;
 261 #endif
 262 #ifdef GMX_NBNXN_SIMD_2XNN
 263         optionsList->push_back(options);
 264         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd2XMM;
 265         addedInstance = true;
 266 #endif
 267         if (!addedInstance)
 268         {
 269             optionsList->push_back(options);
 270             optionsList->back().nbnxmSimd = BenchMarkKernels::SimdNo;
 271         }
 272     }
 273     else
 274     {
 275         optionsList->push_back(options);
 276     }
 277 }
 278
 279 //! Sets up and runs the requested benchmark instance and prints the results
 280 //
 281 // When \p doWarmup is true runs the warmup iterations instead
 282 // of the normal ones and does not print any results
 283 static void setupAndRunInstance(const gmx::BenchmarkSystem &system,
 284                                 const KernelBenchOptions   &options,
 285                                 const bool                  doWarmup)
 286 {
 287     // Generate an, accurate, estimate of the number of non-zero pair interactions
 288     const real                          atomDensity          = system.coordinates.size()/det(system.box);
 289     const real                          numPairsWithinCutoff = atomDensity*4.0/3.0*M_PI*std::pow(options.pairlistCutoff, 3);
 290     const real                          numUsefulPairs       = system.coordinates.size()*0.5*(numPairsWithinCutoff + 1);
 291
 292     std::unique_ptr<nonbonded_verlet_t> nbv = setupNbnxmForBenchInstance(options, system);
 293
 294     // We set the interaction cut-off to the pairlist cut-off
 295     interaction_const_t   ic = setupInteractionConst(options);
 296
 297     t_nrnb                nrnb = { 0 };
 298
 299     gmx_enerdata_t        enerd(1, 0);
 300
 301     gmx::StepWorkload     stepWork;
 302     stepWork.computeForces = true;
 303     if (options.computeVirialAndEnergy)
 304     {
 305         stepWork.computeVirial = true;
 306         stepWork.computeEnergy = true;
 307     }
 308
 309     const gmx::EnumerationArray<BenchMarkKernels, std::string>  kernelNames = { "auto", "no", "4xM", "2xMM" };
 310
 311     const gmx::EnumerationArray<BenchMarkCombRule, std::string> combruleNames = { "geom.", "LB", "none" };
 312
 313     if (!doWarmup)
 314     {
 315         fprintf(stdout, "%-7s %-4s %-5s %-4s ",
 316                 options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
 317                 options.useHalfLJOptimization ? "half" : "all",
 318                 combruleNames[options.ljCombinationRule].c_str(),
 319                 kernelNames[options.nbnxmSimd].c_str());
 320     }
 321
 322     // Run pre-iteration to avoid cache misses
 323     for (int iter = 0; iter < options.numPreIterations; iter++)
 324     {
 325         nbv->dispatchNonbondedKernel(InteractionLocality::Local,
 326                                      ic, stepWork, enbvClearFYes, system.forceRec,
 327                                      &enerd,
 328                                      &nrnb);
 329     }
 330
 331     const int          numIterations = (doWarmup ? options.numWarmupIterations : options.numIterations);
 332     const PairlistSet &pairlistSet   = nbv->pairlistSets().pairlistSet(InteractionLocality::Local);
 333     const gmx::index   numPairs      = pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_ + pairlistSet.natpair_q_;
 334     gmx_cycles_t       cycles        = gmx_cycles_read();
 335     for (int iter = 0; iter < numIterations; iter++)
 336     {
 337         // Run the kernel without force clearing
 338         nbv->dispatchNonbondedKernel(InteractionLocality::Local,
 339                                      ic, stepWork, enbvClearFNo, system.forceRec,
 340                                      &enerd,
 341                                      &nrnb);
 342     }
 343     cycles = gmx_cycles_read() - cycles;
 344     if (!doWarmup)
 345     {
 346         const double dCycles = static_cast<double>(cycles);
 347         if (options.cyclesPerPair)
 348         {
 349             fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n",
 350                     cycles*1e-6,
 351                     dCycles/options.numIterations*1e-6,
 352                     dCycles/(options.numIterations*numPairs),
 353                     dCycles/(options.numIterations*numUsefulPairs));
 354         }
 355         else
 356         {
 357             fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n",
 358                     dCycles*1e-6,
 359                     dCycles/options.numIterations*1e-6,
 360                     options.numIterations*numPairs/dCycles,
 361                     options.numIterations*numUsefulPairs/dCycles);
 362         }
 363     }
 364 }
 365
 366 void bench(const int                 sizeFactor,
 367            const KernelBenchOptions &options)
 368 {
 369     // We don't want to call gmx_omp_nthreads_init(), so we init what we need
 370     gmx_omp_nthreads_set(emntPairsearch, options.numThreads);
 371     gmx_omp_nthreads_set(emntNonbonded, options.numThreads);
 372
 373     const gmx::BenchmarkSystem system(sizeFactor);
 374
 375     real                       minBoxSize = norm(system.box[XX]);
 376     for (int dim = YY; dim < DIM; dim++)
 377     {
 378         minBoxSize = std::min(minBoxSize, norm(system.box[dim]));
 379     }
 380     if (options.pairlistCutoff > 0.5*minBoxSize)
 381     {
 382         gmx_fatal(FARGS, "The cut-off should be shorter than half the box size");
 383     }
 384
 385     std::vector<KernelBenchOptions> optionsList;
 386     if (options.doAll)
 387     {
 388         KernelBenchOptions opt = options;
 389         gmx::EnumerationWrapper<BenchMarkCoulomb> coulombIter;
 390         for (auto coulombType : coulombIter)
 391         {
 392             opt.coulombType = coulombType;
 393             for (int halfLJ = 0; halfLJ <= 1; halfLJ++)
 394             {
 395                 opt.useHalfLJOptimization = (halfLJ == 1);
 396
 397                 gmx::EnumerationWrapper<BenchMarkCombRule> combRuleIter;
 398                 for (auto combRule : combRuleIter)
 399                 {
 400                     if (combRule == BenchMarkCombRule::RuleNone)
 401                     {
 402                         continue;
 403                     }
 404                     opt.ljCombinationRule = combRule;
 405
 406                     expandSimdOptionAndPushBack(opt, &optionsList);
 407                 }
 408             }
 409         }
 410     }
 411     else
 412     {
 413         expandSimdOptionAndPushBack(options, &optionsList);
 414     }
 415     GMX_RELEASE_ASSERT(!optionsList.empty(), "Expect at least on benchmark setup");
 416
 417 #if GMX_SIMD
 418     if (options.nbnxmSimd != BenchMarkKernels::SimdNo)
 419     {
 420         fprintf(stdout, "SIMD width:           %d\n", GMX_SIMD_REAL_WIDTH);
 421     }
 422 #endif
 423     fprintf(stdout, "System size:          %zu atoms\n", system.coordinates.size());
 424     fprintf(stdout, "Cut-off radius:       %g nm\n", options.pairlistCutoff);
 425     fprintf(stdout, "Number of threads:    %d\n", options.numThreads);
 426     fprintf(stdout, "Number of iterations: %d\n", options.numIterations);
 427     fprintf(stdout, "Compute energies:     %s\n",
 428             options.computeVirialAndEnergy ? "yes" : "no");
 429     if (options.coulombType != BenchMarkCoulomb::ReactionField)
 430     {
 431         fprintf(stdout, "Ewald excl. corr.:    %s\n",
 432                 options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr ? "table" : "analytical");
 433     }
 434     printf("\n");
 435
 436     if (options.numWarmupIterations > 0)
 437     {
 438         setupAndRunInstance(system, optionsList[0], true);
 439     }
 440
 441     fprintf(stdout, "Coulomb LJ   comb. SIMD    Mcycles  Mcycles/it.   %s\n",
 442             options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
 443     fprintf(stdout, "                                                total    useful\n");
 444
 445     for (const auto &optionsInstance : optionsList)
 446     {
 447         setupAndRunInstance(system, optionsInstance, false);
 448     }
 449 }
 450
 451 } // namespace Nbnxm