src/gromacs/nbnxm/benchmark/bench_setup.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * This file defines functions for setting up kernel benchmarks
  39  *
  40  * \author Berk Hess <hess@kth.se>
  41  * \ingroup module_nbnxm
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "bench_setup.h"
  47
  48 #include <optional>
  49
  50 #include "gromacs/gmxlib/nrnb.h"
  51 #include "gromacs/mdlib/dispersioncorrection.h"
  52 #include "gromacs/mdlib/force_flags.h"
  53 #include "gromacs/mdlib/forcerec.h"
  54 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  55 #include "gromacs/mdtypes/enerdata.h"
  56 #include "gromacs/mdtypes/forcerec.h"
  57 #include "gromacs/mdtypes/interaction_const.h"
  58 #include "gromacs/mdtypes/mdatom.h"
  59 #include "gromacs/mdtypes/simulation_workload.h"
  60 #include "gromacs/nbnxm/atomdata.h"
  61 #include "gromacs/nbnxm/gridset.h"
  62 #include "gromacs/nbnxm/nbnxm.h"
  63 #include "gromacs/nbnxm/nbnxm_simd.h"
  64 #include "gromacs/nbnxm/pairlistset.h"
  65 #include "gromacs/nbnxm/pairlistsets.h"
  66 #include "gromacs/nbnxm/pairsearch.h"
  67 #include "gromacs/pbcutil/ishift.h"
  68 #include "gromacs/pbcutil/pbc.h"
  69 #include "gromacs/simd/simd.h"
  70 #include "gromacs/timing/cyclecounter.h"
  71 #include "gromacs/utility/enumerationhelpers.h"
  72 #include "gromacs/utility/fatalerror.h"
  73 #include "gromacs/utility/logger.h"
  74
  75 #include "bench_system.h"
  76
  77 namespace Nbnxm
  78 {
  79
  80 /*! \brief Checks the kernel setup
  81  *
  82  * Returns an error string when the kernel is not available.
  83  */
  84 static std::optional<std::string> checkKernelSetup(const KernelBenchOptions& options)
  85 {
  86     GMX_RELEASE_ASSERT(options.nbnxmSimd < BenchMarkKernels::Count
  87                                && options.nbnxmSimd != BenchMarkKernels::SimdAuto,
  88                        "Need a valid kernel SIMD type");
  89
  90     // Check SIMD support
  91     if ((options.nbnxmSimd != BenchMarkKernels::SimdNo && !GMX_SIMD)
  92 #ifndef GMX_NBNXN_SIMD_4XN
  93         || options.nbnxmSimd == BenchMarkKernels::Simd4XM
  94 #endif
  95 #ifndef GMX_NBNXN_SIMD_2XNN
  96         || options.nbnxmSimd == BenchMarkKernels::Simd2XMM
  97 #endif
  98     )
  99     {
 100         return "the requested SIMD kernel was not set up at configuration time";
 101     }
 102
 103     if (options.reportTime && (0 > gmx_cycles_calibrate(1.0)))
 104     {
 105         return "the -time option is not supported on this system";
 106     }
 107
 108     return {};
 109 }
 110
 111 //! Helper to translate between the different enumeration values.
 112 static KernelType translateBenchmarkEnum(const BenchMarkKernels& kernel)
 113 {
 114     int kernelInt = static_cast<int>(kernel);
 115     return static_cast<KernelType>(kernelInt);
 116 }
 117
 118 /*! \brief Returns the kernel setup
 119  */
 120 static KernelSetup getKernelSetup(const KernelBenchOptions& options)
 121 {
 122     auto messageWhenInvalid = checkKernelSetup(options);
 123     GMX_RELEASE_ASSERT(!messageWhenInvalid, "Need valid options");
 124
 125     KernelSetup kernelSetup;
 126
 127     // The int enum options.nbnxnSimd is set up to match KernelType + 1
 128     kernelSetup.kernelType = translateBenchmarkEnum(options.nbnxmSimd);
 129     // The plain-C kernel does not support analytical ewald correction
 130     if (kernelSetup.kernelType == KernelType::Cpu4x4_PlainC)
 131     {
 132         kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
 133     }
 134     else
 135     {
 136         kernelSetup.ewaldExclusionType = options.useTabulatedEwaldCorr ? EwaldExclusionType::Table
 137                                                                        : EwaldExclusionType::Analytical;
 138     }
 139
 140     return kernelSetup;
 141 }
 142
 143 //! Return an interaction constants struct with members used in the benchmark set appropriately
 144 static interaction_const_t setupInteractionConst(const KernelBenchOptions& options)
 145
 146 {
 147     interaction_const_t ic;
 148
 149     ic.vdwtype      = evdwCUT;
 150     ic.vdw_modifier = eintmodPOTSHIFT;
 151     ic.rvdw         = options.pairlistCutoff;
 152
 153     ic.eeltype          = (options.coulombType == BenchMarkCoulomb::Pme ? eelPME : eelRF);
 154     ic.coulomb_modifier = eintmodPOTSHIFT;
 155     ic.rcoulomb         = options.pairlistCutoff;
 156
 157     // Reaction-field with epsilon_rf=inf
 158     // TODO: Replace by calc_rffac() after refactoring that
 159     ic.k_rf = 0.5 * std::pow(ic.rcoulomb, -3);
 160     ic.c_rf = 1 / ic.rcoulomb + ic.k_rf * ic.rcoulomb * ic.rcoulomb;
 161
 162     if (EEL_PME_EWALD(ic.eeltype))
 163     {
 164         // Ewald coefficients, we ignore the potential shift
 165         GMX_RELEASE_ASSERT(options.ewaldcoeff_q > 0, "Ewald coefficient should be > 0");
 166         ic.ewaldcoeff_q       = options.ewaldcoeff_q;
 167         ic.coulombEwaldTables = std::make_unique<EwaldCorrectionTables>();
 168         init_interaction_const_tables(nullptr, &ic, 0);
 169     }
 170
 171     return ic;
 172 }
 173
 174 //! Sets up and returns a Nbnxm object for the given benchmark options and system
 175 static std::unique_ptr<nonbonded_verlet_t> setupNbnxmForBenchInstance(const KernelBenchOptions& options,
 176                                                                       const gmx::BenchmarkSystem& system)
 177 {
 178     const auto pinPolicy  = (options.useGpu ? gmx::PinningPolicy::PinnedIfSupported
 179                                            : gmx::PinningPolicy::CannotBePinned);
 180     const int  numThreads = options.numThreads;
 181     // Note: the options and Nbnxm combination rule enums values should match
 182     const int combinationRule = static_cast<int>(options.ljCombinationRule);
 183
 184     auto messageWhenInvalid = checkKernelSetup(options);
 185     if (messageWhenInvalid)
 186     {
 187         gmx_fatal(FARGS, "Requested kernel is unavailable because %s.", messageWhenInvalid->c_str());
 188     }
 189     Nbnxm::KernelSetup kernelSetup = getKernelSetup(options);
 190
 191     PairlistParams pairlistParams(kernelSetup.kernelType, false, options.pairlistCutoff, false);
 192
 193     GridSet gridSet(
 194             PbcType::Xyz, false, nullptr, nullptr, pairlistParams.pairlistType, false, numThreads, pinPolicy);
 195
 196     auto pairlistSets = std::make_unique<PairlistSets>(pairlistParams, false, 0);
 197
 198     auto pairSearch = std::make_unique<PairSearch>(
 199             PbcType::Xyz, false, nullptr, nullptr, pairlistParams.pairlistType, false, numThreads, pinPolicy);
 200
 201     auto atomData = std::make_unique<nbnxn_atomdata_t>(pinPolicy);
 202
 203     // Put everything together
 204     auto nbv = std::make_unique<nonbonded_verlet_t>(
 205             std::move(pairlistSets), std::move(pairSearch), std::move(atomData), kernelSetup, nullptr, nullptr);
 206
 207     nbnxn_atomdata_init(gmx::MDLogger(),
 208                         nbv->nbat.get(),
 209                         kernelSetup.kernelType,
 210                         combinationRule,
 211                         system.numAtomTypes,
 212                         system.nonbondedParameters,
 213                         1,
 214                         numThreads);
 215
 216     t_nrnb nrnb;
 217
 218     GMX_RELEASE_ASSERT(!TRICLINIC(system.box), "Only rectangular unit-cells are supported here");
 219     const rvec lowerCorner = { 0, 0, 0 };
 220     const rvec upperCorner = { system.box[XX][XX], system.box[YY][YY], system.box[ZZ][ZZ] };
 221
 222     gmx::ArrayRef<const int> atomInfo;
 223     if (options.useHalfLJOptimization)
 224     {
 225         atomInfo = system.atomInfoOxygenVdw;
 226     }
 227     else
 228     {
 229         atomInfo = system.atomInfoAllVdw;
 230     }
 231
 232     const real atomDensity = system.coordinates.size() / det(system.box);
 233
 234     nbnxn_put_on_grid(nbv.get(),
 235                       system.box,
 236                       0,
 237                       lowerCorner,
 238                       upperCorner,
 239                       nullptr,
 240                       { 0, int(system.coordinates.size()) },
 241                       atomDensity,
 242                       atomInfo,
 243                       system.coordinates,
 244                       0,
 245                       nullptr);
 246
 247     nbv->constructPairlist(gmx::InteractionLocality::Local, system.excls, 0, &nrnb);
 248
 249     nbv->setAtomProperties(system.atomTypes, system.charges, atomInfo);
 250
 251     return nbv;
 252 }
 253
 254 //! Add the options instance to the list for all requested kernel SIMD types
 255 static void expandSimdOptionAndPushBack(const KernelBenchOptions&        options,
 256                                         std::vector<KernelBenchOptions>* optionsList)
 257 {
 258     if (options.nbnxmSimd == BenchMarkKernels::SimdAuto)
 259     {
 260         bool addedInstance = false;
 261 #ifdef GMX_NBNXN_SIMD_4XN
 262         optionsList->push_back(options);
 263         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd4XM;
 264         addedInstance                 = true;
 265 #endif
 266 #ifdef GMX_NBNXN_SIMD_2XNN
 267         optionsList->push_back(options);
 268         optionsList->back().nbnxmSimd = BenchMarkKernels::Simd2XMM;
 269         addedInstance                 = true;
 270 #endif
 271         if (!addedInstance)
 272         {
 273             optionsList->push_back(options);
 274             optionsList->back().nbnxmSimd = BenchMarkKernels::SimdNo;
 275         }
 276     }
 277     else
 278     {
 279         optionsList->push_back(options);
 280     }
 281 }
 282
 283 //! Sets up and runs the requested benchmark instance and prints the results
 284 //
 285 // When \p doWarmup is true runs the warmup iterations instead
 286 // of the normal ones and does not print any results
 287 static void setupAndRunInstance(const gmx::BenchmarkSystem& system,
 288                                 const KernelBenchOptions&   options,
 289                                 const bool                  doWarmup)
 290 {
 291     // Generate an, accurate, estimate of the number of non-zero pair interactions
 292     const real atomDensity = system.coordinates.size() / det(system.box);
 293     const real numPairsWithinCutoff =
 294             atomDensity * 4.0 / 3.0 * M_PI * std::pow(options.pairlistCutoff, 3);
 295     const real numUsefulPairs = system.coordinates.size() * 0.5 * (numPairsWithinCutoff + 1);
 296
 297     std::unique_ptr<nonbonded_verlet_t> nbv = setupNbnxmForBenchInstance(options, system);
 298
 299     // We set the interaction cut-off to the pairlist cut-off
 300     interaction_const_t ic = setupInteractionConst(options);
 301
 302     t_nrnb nrnb = { 0 };
 303
 304     gmx_enerdata_t enerd(1, 0);
 305
 306     gmx::StepWorkload stepWork;
 307     stepWork.computeForces = true;
 308     if (options.computeVirialAndEnergy)
 309     {
 310         stepWork.computeVirial = true;
 311         stepWork.computeEnergy = true;
 312     }
 313
 314     const gmx::EnumerationArray<BenchMarkKernels, std::string> kernelNames = {
 315         "auto", "no", "4xM", "2xMM"
 316     };
 317
 318     const gmx::EnumerationArray<BenchMarkCombRule, std::string> combruleNames = { "geom.",
 319                                                                                   "LB",
 320                                                                                   "none" };
 321
 322     if (!doWarmup)
 323     {
 324         fprintf(stdout,
 325                 "%-7s %-4s %-5s %-4s ",
 326                 options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
 327                 options.useHalfLJOptimization ? "half" : "all",
 328                 combruleNames[options.ljCombinationRule].c_str(),
 329                 kernelNames[options.nbnxmSimd].c_str());
 330         if (!options.outputFile.empty())
 331         {
 332             fprintf(system.csv,
 333                     "\"%d\",\"%zu\",\"%g\",\"%d\",\"%d\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%"
 334                     "s\",",
 335 #if GMX_SIMD
 336                     (options.nbnxmSimd != BenchMarkKernels::SimdNo) ? GMX_SIMD_REAL_WIDTH : 0,
 337 #else
 338                     0,
 339 #endif
 340                     system.coordinates.size(),
 341                     options.pairlistCutoff,
 342                     options.numThreads,
 343                     options.numIterations,
 344                     options.computeVirialAndEnergy ? "yes" : "no",
 345                     (options.coulombType != BenchMarkCoulomb::ReactionField)
 346                             ? ((options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr)
 347                                        ? "table"
 348                                        : "analytical")
 349                             : "",
 350                     options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
 351                     options.useHalfLJOptimization ? "half" : "all",
 352                     combruleNames[options.ljCombinationRule].c_str(),
 353                     kernelNames[options.nbnxmSimd].c_str());
 354         }
 355     }
 356
 357     // Run pre-iteration to avoid cache misses
 358     for (int iter = 0; iter < options.numPreIterations; iter++)
 359     {
 360         nbv->dispatchNonbondedKernel(
 361                 gmx::InteractionLocality::Local, ic, stepWork, enbvClearFYes, system.forceRec, &enerd, &nrnb);
 362     }
 363
 364     const int numIterations = (doWarmup ? options.numWarmupIterations : options.numIterations);
 365     const PairlistSet& pairlistSet = nbv->pairlistSets().pairlistSet(gmx::InteractionLocality::Local);
 366     const gmx::index numPairs = pairlistSet.natpair_ljq_ + pairlistSet.natpair_lj_ + pairlistSet.natpair_q_;
 367     gmx_cycles_t cycles = gmx_cycles_read();
 368     for (int iter = 0; iter < numIterations; iter++)
 369     {
 370         // Run the kernel without force clearing
 371         nbv->dispatchNonbondedKernel(
 372                 gmx::InteractionLocality::Local, ic, stepWork, enbvClearFNo, system.forceRec, &enerd, &nrnb);
 373     }
 374     cycles = gmx_cycles_read() - cycles;
 375     if (!doWarmup)
 376     {
 377         if (options.reportTime)
 378         {
 379             const double uSec = static_cast<double>(cycles) * gmx_cycles_calibrate(1.0) * 1.e6;
 380             if (options.cyclesPerPair)
 381             {
 382                 fprintf(stdout,
 383                         "%13.2f %13.3f %10.3f %10.3f\n",
 384                         uSec,
 385                         uSec / options.numIterations,
 386                         uSec / (options.numIterations * numPairs),
 387                         uSec / (options.numIterations * numUsefulPairs));
 388                 if (!options.outputFile.empty())
 389                 {
 390                     fprintf(system.csv,
 391                             "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n",
 392                             uSec,
 393                             uSec / options.numIterations,
 394                             uSec / (options.numIterations * numPairs),
 395                             uSec / (options.numIterations * numUsefulPairs));
 396                 }
 397             }
 398             else
 399             {
 400                 fprintf(stdout,
 401                         "%13.2f %13.3f %10.3f %10.3f\n",
 402                         uSec,
 403                         uSec / options.numIterations,
 404                         options.numIterations * numPairs / uSec,
 405                         options.numIterations * numUsefulPairs / uSec);
 406                 if (!options.outputFile.empty())
 407                 {
 408                     fprintf(system.csv,
 409                             "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n",
 410                             uSec,
 411                             uSec / options.numIterations,
 412                             options.numIterations * numPairs / uSec,
 413                             options.numIterations * numUsefulPairs / uSec);
 414                 }
 415             }
 416         }
 417         else
 418         {
 419             const double dCycles = static_cast<double>(cycles);
 420             if (options.cyclesPerPair)
 421             {
 422                 fprintf(stdout,
 423                         "%10.3f %10.4f %8.4f %8.4f\n",
 424                         cycles * 1e-6,
 425                         dCycles / options.numIterations * 1e-6,
 426                         dCycles / (options.numIterations * numPairs),
 427                         dCycles / (options.numIterations * numUsefulPairs));
 428             }
 429             else
 430             {
 431                 fprintf(stdout,
 432                         "%10.3f %10.4f %8.4f %8.4f\n",
 433                         dCycles * 1e-6,
 434                         dCycles / options.numIterations * 1e-6,
 435                         options.numIterations * numPairs / dCycles,
 436                         options.numIterations * numUsefulPairs / dCycles);
 437             }
 438         }
 439     }
 440 }
 441
 442 void bench(const int sizeFactor, const KernelBenchOptions& options)
 443 {
 444     // We don't want to call gmx_omp_nthreads_init(), so we init what we need
 445     gmx_omp_nthreads_set(emntPairsearch, options.numThreads);
 446     gmx_omp_nthreads_set(emntNonbonded, options.numThreads);
 447
 448     const gmx::BenchmarkSystem system(sizeFactor, options.outputFile);
 449
 450     real minBoxSize = norm(system.box[XX]);
 451     for (int dim = YY; dim < DIM; dim++)
 452     {
 453         minBoxSize = std::min(minBoxSize, norm(system.box[dim]));
 454     }
 455     if (options.pairlistCutoff > 0.5 * minBoxSize)
 456     {
 457         gmx_fatal(FARGS, "The cut-off should be shorter than half the box size");
 458     }
 459
 460     std::vector<KernelBenchOptions> optionsList;
 461     if (options.doAll)
 462     {
 463         KernelBenchOptions                        opt = options;
 464         gmx::EnumerationWrapper<BenchMarkCoulomb> coulombIter;
 465         for (auto coulombType : coulombIter)
 466         {
 467             opt.coulombType = coulombType;
 468             for (int halfLJ = 0; halfLJ <= 1; halfLJ++)
 469             {
 470                 opt.useHalfLJOptimization = (halfLJ == 1);
 471
 472                 gmx::EnumerationWrapper<BenchMarkCombRule> combRuleIter;
 473                 for (auto combRule : combRuleIter)
 474                 {
 475                     opt.ljCombinationRule = combRule;
 476
 477                     expandSimdOptionAndPushBack(opt, &optionsList);
 478                 }
 479             }
 480         }
 481     }
 482     else
 483     {
 484         expandSimdOptionAndPushBack(options, &optionsList);
 485     }
 486     GMX_RELEASE_ASSERT(!optionsList.empty(), "Expect at least on benchmark setup");
 487
 488 #if GMX_SIMD
 489     if (options.nbnxmSimd != BenchMarkKernels::SimdNo)
 490     {
 491         fprintf(stdout, "SIMD width:           %d\n", GMX_SIMD_REAL_WIDTH);
 492     }
 493 #endif
 494     fprintf(stdout, "System size:          %zu atoms\n", system.coordinates.size());
 495     fprintf(stdout, "Cut-off radius:       %g nm\n", options.pairlistCutoff);
 496     fprintf(stdout, "Number of threads:    %d\n", options.numThreads);
 497     fprintf(stdout, "Number of iterations: %d\n", options.numIterations);
 498     fprintf(stdout, "Compute energies:     %s\n", options.computeVirialAndEnergy ? "yes" : "no");
 499     if (options.coulombType != BenchMarkCoulomb::ReactionField)
 500     {
 501         fprintf(stdout,
 502                 "Ewald excl. corr.:    %s\n",
 503                 options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr
 504                         ? "table"
 505                         : "analytical");
 506     }
 507     printf("\n");
 508
 509     if (options.numWarmupIterations > 0)
 510     {
 511         setupAndRunInstance(system, optionsList[0], true);
 512     }
 513
 514     if (options.reportTime)
 515     {
 516         fprintf(stdout,
 517                 "Coulomb LJ   comb. SIMD       usec         usec/it.        %s\n",
 518                 options.cyclesPerPair ? "usec/pair" : "pairs/usec");
 519         if (!options.outputFile.empty())
 520         {
 521             fprintf(system.csv,
 522                     "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
 523                     "energy\",\"Ewald excl. "
 524                     "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"usec\",\"usec/it\",\"total "
 525                     "pairs/usec\",\"useful pairs/usec\"\n");
 526         }
 527         fprintf(stdout,
 528                 "                                                        total      useful\n");
 529     }
 530     else
 531     {
 532         fprintf(stdout,
 533                 "Coulomb LJ   comb. SIMD    Mcycles  Mcycles/it.   %s\n",
 534                 options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
 535         if (!options.outputFile.empty())
 536         {
 537             fprintf(system.csv,
 538                     "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
 539                     "energy\",\"Ewald excl. "
 540                     "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"Mcycles\",\"Mcycles/"
 541                     "it\",\"total "
 542                     "total cycles/pair\",\"total cycles per useful pair\"\n");
 543         }
 544         fprintf(stdout, "                                                total    useful\n");
 545     }
 546
 547     for (const auto& optionsInstance : optionsList)
 548     {
 549         setupAndRunInstance(system, optionsInstance, false);
 550     }
 551
 552     if (!options.outputFile.empty())
 553     {
 554         fclose(system.csv);
 555     }
 556 }
 557
 558 } // namespace Nbnxm