Add helper functions for setting up Nbnxm gpu object in nblib
[alexxy/gromacs.git] / api / nblib / nbnxmsetuphelpers.cpp
index a1b4c9c22fee217ebce22f652bdc884a75a72a7f..c876482b97f3b30b9072d4554e173bbd661d6170 100644 (file)
@@ -86,7 +86,7 @@ Nbnxm::KernelType translateBenchmarkEnum(const SimdKernels& kernel)
     return static_cast<Nbnxm::KernelType>(kernelInt);
 }
 
-void checkKernelSetup(const SimdKernels nbnxmSimd)
+void checkKernelSetupSimd(const SimdKernels nbnxmSimd)
 {
     if (nbnxmSimd >= SimdKernels::Count || nbnxmSimd == SimdKernels::SimdAuto)
     {
@@ -106,14 +106,14 @@ void checkKernelSetup(const SimdKernels nbnxmSimd)
     }
 }
 
-Nbnxm::KernelSetup createKernelSetupCPU(const NBKernelOptions& options)
+Nbnxm::KernelSetup createKernelSetupCPU(const SimdKernels nbnxmSimd, const bool useTabulatedEwaldCorr)
 {
-    checkKernelSetup(options.nbnxmSimd);
+    checkKernelSetupSimd(nbnxmSimd);
 
     Nbnxm::KernelSetup kernelSetup;
 
     // The int enum options.nbnxnSimd is set up to match Nbnxm::KernelType + 1
-    kernelSetup.kernelType = translateBenchmarkEnum(options.nbnxmSimd);
+    kernelSetup.kernelType = translateBenchmarkEnum(nbnxmSimd);
 
     // The plain-C kernel does not support analytical ewald correction
     if (kernelSetup.kernelType == Nbnxm::KernelType::Cpu4x4_PlainC)
@@ -122,16 +122,24 @@ Nbnxm::KernelSetup createKernelSetupCPU(const NBKernelOptions& options)
     }
     else
     {
-        kernelSetup.ewaldExclusionType = options.useTabulatedEwaldCorr
-                                                 ? Nbnxm::EwaldExclusionType::Table
-                                                 : Nbnxm::EwaldExclusionType::Analytical;
+        kernelSetup.ewaldExclusionType = useTabulatedEwaldCorr ? Nbnxm::EwaldExclusionType::Table
+                                                               : Nbnxm::EwaldExclusionType::Analytical;
     }
 
     return kernelSetup;
 }
 
-std::vector<int64_t> createParticleInfoAllVdv(const size_t numParticles)
+Nbnxm::KernelSetup createKernelSetupGPU(const bool useTabulatedEwaldCorr)
+{
+    Nbnxm::KernelSetup kernelSetup;
+    kernelSetup.kernelType         = Nbnxm::KernelType::Gpu8x8x8;
+    kernelSetup.ewaldExclusionType = useTabulatedEwaldCorr ? Nbnxm::EwaldExclusionType::Table
+                                                           : Nbnxm::EwaldExclusionType::Analytical;
+
+    return kernelSetup;
+}
 
+std::vector<int64_t> createParticleInfoAllVdw(const size_t numParticles)
 {
     std::vector<int64_t> particleInfoAllVdw(numParticles);
     for (size_t particleI = 0; particleI < numParticles; particleI++)
@@ -169,7 +177,7 @@ std::vector<real> createNonBondedParameters(const std::vector<ParticleType>& par
     return nonbondedParameters;
 }
 
-gmx::StepWorkload createStepWorkload([[maybe_unused]] const NBKernelOptions& options)
+gmx::StepWorkload createStepWorkload()
 {
     gmx::StepWorkload stepWorkload;
     stepWorkload.computeForces          = true;
@@ -180,6 +188,29 @@ gmx::StepWorkload createStepWorkload([[maybe_unused]] const NBKernelOptions& opt
     return stepWorkload;
 }
 
+static gmx::SimulationWorkload createSimulationWorkload()
+{
+    gmx::SimulationWorkload simulationWork;
+    simulationWork.computeNonbonded = true;
+    return simulationWork;
+}
+
+gmx::SimulationWorkload createSimulationWorkloadGpu()
+{
+    gmx::SimulationWorkload simulationWork = createSimulationWorkload();
+
+    simulationWork.useGpuNonbonded = true;
+    simulationWork.useGpuUpdate    = false;
+
+    return simulationWork;
+}
+
+std::shared_ptr<gmx::DeviceStreamManager> createDeviceStreamManager(const DeviceInformation& deviceInfo,
+                                                                    const gmx::SimulationWorkload& simulationWorkload)
+{
+    return std::make_shared<gmx::DeviceStreamManager>(deviceInfo, false, simulationWorkload, false);
+}
+
 real ewaldCoeff(const real ewald_rtol, const real pairlistCutoff)
 {
     return calc_ewaldcoeff_q(pairlistCutoff, ewald_rtol);
@@ -255,7 +286,8 @@ std::unique_ptr<nonbonded_verlet_t> createNbnxmCPU(const size_t              num
     // Note: the options and Nbnxm combination rule enums values should match
     const int combinationRule = static_cast<int>(options.ljCombinationRule);
 
-    Nbnxm::KernelSetup kernelSetup = createKernelSetupCPU(options);
+    Nbnxm::KernelSetup kernelSetup =
+            createKernelSetupCPU(options.nbnxmSimd, options.useTabulatedEwaldCorr);
 
     PairlistParams pairlistParams(kernelSetup.kernelType, false, options.pairlistCutoff, false);
 
@@ -280,6 +312,53 @@ std::unique_ptr<nonbonded_verlet_t> createNbnxmCPU(const size_t              num
     return nbv;
 }
 
+std::unique_ptr<nonbonded_verlet_t> createNbnxmGPU(const size_t               numParticleTypes,
+                                                   const NBKernelOptions&     options,
+                                                   const std::vector<real>&   nonbondedParameters,
+                                                   const interaction_const_t& interactionConst,
+                                                   const gmx::DeviceStreamManager& deviceStreamManager)
+{
+    const auto pinPolicy       = gmx::PinningPolicy::PinnedIfSupported;
+    const int  combinationRule = static_cast<int>(options.ljCombinationRule);
+
+    Nbnxm::KernelSetup kernelSetup = createKernelSetupGPU(options.useTabulatedEwaldCorr);
+
+    PairlistParams pairlistParams(kernelSetup.kernelType, false, options.pairlistCutoff, false);
+
+
+    // nbnxn_atomdata is always initialized with 1 thread if the GPU is used
+    constexpr int numThreadsInit = 1;
+    // multiple energy groups are not supported on the GPU
+    constexpr int numEnergyGroups = 1;
+    auto          atomData        = std::make_unique<nbnxn_atomdata_t>(pinPolicy,
+                                                       gmx::MDLogger(),
+                                                       kernelSetup.kernelType,
+                                                       combinationRule,
+                                                       numParticleTypes,
+                                                       nonbondedParameters,
+                                                       numEnergyGroups,
+                                                       numThreadsInit);
+
+    NbnxmGpu* nbnxmGpu = Nbnxm::gpu_init(
+            deviceStreamManager, &interactionConst, pairlistParams, atomData.get(), false);
+
+    // minimum iList count for GPU balancing
+    int iListCount = Nbnxm::gpu_min_ci_balanced(nbnxmGpu);
+
+    auto pairlistSets = std::make_unique<PairlistSets>(pairlistParams, false, iListCount);
+    auto pairSearch   = std::make_unique<PairSearch>(
+            PbcType::Xyz, false, nullptr, nullptr, pairlistParams.pairlistType, false, options.numOpenMPThreads, pinPolicy);
+
+    // Put everything together
+    auto nbv = std::make_unique<nonbonded_verlet_t>(
+            std::move(pairlistSets), std::move(pairSearch), std::move(atomData), kernelSetup, nbnxmGpu, nullptr);
+
+    // Some paramters must be copied to NbnxmGpu to have a fully constructed nonbonded_verlet_t
+    Nbnxm::gpu_init_atomdata(nbv->gpu_nbv, nbv->nbat.get());
+
+    return nbv;
+}
+
 void setGmxNonBondedNThreads(int numThreads)
 {
     gmx_omp_nthreads_set(ModuleMultiThread::Pairsearch, numThreads);