Refactor PME tests for better usability
[alexxy/gromacs.git] / src / gromacs / ewald / tests / pmetestcommon.cpp
index eb1e170411bbf090582852ed283fe65824919143..8589d21beac4357cc274a16e043ddbbe4c1b07a6 100644 (file)
@@ -1,7 +1,8 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020 by the GROMACS development team.
+ * Copyright (c) 2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -69,6 +70,7 @@
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/logger.h"
 #include "gromacs/utility/stringutil.h"
+#include "gromacs/ewald/pme_coordinate_receiver_gpu.h"
 
 #include "testutils/test_hardware_environment.h"
 #include "testutils/testasserts.h"
@@ -80,6 +82,15 @@ namespace gmx
 namespace test
 {
 
+//! A couple of valid inputs for boxes.
+const std::map<std::string, Matrix3x3> c_inputBoxes = {
+    { "rect", { { 8.0F, 0.0F, 0.0F, 0.0F, 3.4F, 0.0F, 0.0F, 0.0F, 2.0F } } },
+    { "tric", { { 7.0F, 0.0F, 0.0F, 0.0F, 4.1F, 0.0F, 3.5F, 2.0F, 12.2F } } },
+};
+
+//! Valid PME orders for testing
+std::vector<int> c_inputPmeOrders{ 3, 4, 5 };
+
 bool pmeSupportsInputForMode(const gmx_hw_info_t& hwinfo, const t_inputrec* inputRec, CodePath mode)
 {
     bool implemented;
@@ -120,9 +131,21 @@ PmeSafePointer pmeInitWrapper(const t_inputrec*    inputRec,
     const auto     runMode       = (mode == CodePath::CPU) ? PmeRunMode::CPU : PmeRunMode::Mixed;
     t_commrec      dummyCommrec  = { 0 };
     NumPmeDomains  numPmeDomains = { 1, 1 };
-    gmx_pme_t* pmeDataRaw = gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, false, false, true,
-                                         ewaldCoeff_q, ewaldCoeff_lj, 1, runMode, nullptr,
-                                         deviceContext, deviceStream, pmeGpuProgram, dummyLogger);
+    gmx_pme_t*     pmeDataRaw    = gmx_pme_init(&dummyCommrec,
+                                         numPmeDomains,
+                                         inputRec,
+                                         false,
+                                         false,
+                                         true,
+                                         ewaldCoeff_q,
+                                         ewaldCoeff_lj,
+                                         1,
+                                         runMode,
+                                         nullptr,
+                                         deviceContext,
+                                         deviceStream,
+                                         pmeGpuProgram,
+                                         dummyLogger);
     PmeSafePointer pme(pmeDataRaw); // taking ownership
 
     // TODO get rid of this with proper matrix type
@@ -166,8 +189,8 @@ std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme
     // TODO: Pin the host buffer and use async memory copies
     // TODO: Special constructor for PME-only rank / PME-tests is used here. There should be a mechanism to
     //       restrict one from using other constructor here.
-    return std::make_unique<StatePropagatorDataGpu>(deviceStream, *deviceContext, GpuApiCallBehavior::Sync,
-                                                    pme_gpu_get_block_size(&pme), nullptr);
+    return std::make_unique<StatePropagatorDataGpu>(
+            deviceStream, *deviceContext, GpuApiCallBehavior::Sync, pme_gpu_get_block_size(&pme), nullptr);
 }
 
 //! PME initialization with atom data
@@ -178,7 +201,7 @@ void pmeInitAtoms(gmx_pme_t*               pme,
                   const ChargesVector&     charges)
 {
     const index atomCount = coordinates.size();
-    GMX_RELEASE_ASSERT(atomCount == charges.ssize(), "Mismatch in atom data");
+    GMX_RELEASE_ASSERT(atomCount == gmx::ssize(charges), "Mismatch in atom data");
     PmeAtomComm* atc = nullptr;
 
     switch (mode)
@@ -187,7 +210,7 @@ void pmeInitAtoms(gmx_pme_t*               pme,
             atc              = &(pme->atc[0]);
             atc->x           = coordinates;
             atc->coefficient = charges;
-            gmx_pme_reinit_atoms(pme, atomCount, charges.data(), nullptr);
+            gmx_pme_reinit_atoms(pme, atomCount, charges, {});
             /* With decomposition there would be more boilerplate atc code here, e.g. do_redist_pos_coeffs */
             break;
 
@@ -196,11 +219,11 @@ void pmeInitAtoms(gmx_pme_t*               pme,
             atc = &(pme->atc[0]);
             // We need to set atc->n for passing the size in the tests
             atc->setNumAtoms(atomCount);
-            gmx_pme_reinit_atoms(pme, atomCount, charges.data(), nullptr);
+            gmx_pme_reinit_atoms(pme, atomCount, charges, {});
 
             stateGpu->reinit(atomCount, atomCount);
             stateGpu->copyCoordinatesToGpu(arrayRefFromArray(coordinates.data(), coordinates.size()),
-                                           gmx::AtomLocality::All);
+                                           gmx::AtomLocality::Local);
             pme_gpu_set_kernelparam_coordinates(pme->gpu, stateGpu->getCoordinates());
 
             break;
@@ -227,8 +250,8 @@ static void pmeGetRealGridSizesInternal(const gmx_pme_t* pme,
     switch (mode)
     {
         case CodePath::CPU:
-            gmx_parallel_3dfft_real_limits(pme->pfft_setup[gridIndex], gridSize, gridOffsetUnused,
-                                           paddedGridSize);
+            gmx_parallel_3dfft_real_limits(
+                    pme->pfft_setup[gridIndex], gridSize, gridOffsetUnused, paddedGridSize);
             break;
 
         case CodePath::GPU:
@@ -253,8 +276,8 @@ static void pmeGetComplexGridSizesInternal(const gmx_pme_t* pme,
 {
     const size_t gridIndex = 0;
     IVec         gridOffsetUnused, complexOrderUnused;
-    gmx_parallel_3dfft_complex_limits(pme->pfft_setup[gridIndex], complexOrderUnused, gridSize,
-                                      gridOffsetUnused, paddedGridSize); // TODO: what about YZX ordering?
+    gmx_parallel_3dfft_complex_limits(
+            pme->pfft_setup[gridIndex], complexOrderUnused, gridSize, gridOffsetUnused, paddedGridSize); // TODO: what about YZX ordering?
 }
 
 //! Getting the PME grid memory buffer and its sizes - template definition
@@ -266,7 +289,7 @@ static void pmeGetGridAndSizesInternal(const gmx_pme_t* /*unused*/,
                                        IVec& /*unused*/)       //NOLINT(google-runtime-references)
 {
     GMX_THROW(InternalError("Deleted function call"));
-    // explicitly deleting general template does not compile in clang/icc, see https://llvm.org/bugs/show_bug.cgi?id=17537
+    // explicitly deleting general template does not compile in clang, see https://llvm.org/bugs/show_bug.cgi?id=17537
 }
 
 //! Getting the PME real grid memory buffer and its sizes
@@ -305,9 +328,14 @@ void pmePerformSplineAndSpread(gmx_pme_t* pme,
     switch (mode)
     {
         case CodePath::CPU:
-            spread_on_grid(pme, atc, &pme->pmegrid[gridIndex], computeSplines, spreadCharges,
+            spread_on_grid(pme,
+                           atc,
+                           &pme->pmegrid[gridIndex],
+                           computeSplines,
+                           spreadCharges,
                            fftgrid != nullptr ? fftgrid[gridIndex] : nullptr,
-                           computeSplinesForZeroCharges, gridIndex);
+                           computeSplinesForZeroCharges,
+                           gridIndex);
             if (spreadCharges && !pme->bUseThreads)
             {
                 wrap_periodic_pmegrid(pme, pmegrid);
@@ -324,7 +352,18 @@ void pmePerformSplineAndSpread(gmx_pme_t* pme,
             const real lambdaQ = 1.0;
             // no synchronization needed as x is transferred in the PME stream
             GpuEventSynchronizer* xReadyOnDevice = nullptr;
-            pme_gpu_spread(pme->gpu, xReadyOnDevice, fftgrid, computeSplines, spreadCharges, lambdaQ);
+
+            bool                           useGpuDirectComm         = false;
+            gmx::PmeCoordinateReceiverGpu* pmeCoordinateReceiverGpu = nullptr;
+
+            pme_gpu_spread(pme->gpu,
+                           xReadyOnDevice,
+                           fftgrid,
+                           computeSplines,
+                           spreadCharges,
+                           lambdaQ,
+                           useGpuDirectComm,
+                           pmeCoordinateReceiverGpu);
         }
         break;
 #endif
@@ -381,8 +420,13 @@ void pmePerformSolve(const gmx_pme_t*  pme,
                     break;
 
                 case PmeSolveAlgorithm::LennardJones:
-                    solve_pme_lj_yzx(pme, &h_grid, useLorentzBerthelot, cellVolume,
-                                     computeEnergyAndVirial, pme->nthread, threadIndex);
+                    solve_pme_lj_yzx(pme,
+                                     &h_grid,
+                                     useLorentzBerthelot,
+                                     cellVolume,
+                                     computeEnergyAndVirial,
+                                     pme->nthread,
+                                     threadIndex);
                     break;
 
                 default: GMX_THROW(InternalError("Test not implemented for this mode"));
@@ -642,7 +686,7 @@ void pmeSetGridLineIndices(gmx_pme_t* pme, CodePath mode, const GridLineIndicesV
 {
     PmeAtomComm* atc       = &(pme->atc[0]);
     const index  atomCount = atc->numAtoms();
-    GMX_RELEASE_ASSERT(atomCount == gridLineIndices.ssize(), "Mismatch in gridline indices size");
+    GMX_RELEASE_ASSERT(atomCount == ssize(gridLineIndices), "Mismatch in gridline indices size");
 
     IVec paddedGridSizeUnused, gridSize(0, 0, 0);
     pmeGetRealGridSizesInternal(pme, mode, gridSize, paddedGridSizeUnused);
@@ -659,7 +703,8 @@ void pmeSetGridLineIndices(gmx_pme_t* pme, CodePath mode, const GridLineIndicesV
     switch (mode)
     {
         case CodePath::GPU:
-            memcpy(pme_gpu_staging(pme->gpu).h_gridlineIndices, gridLineIndices.data(),
+            memcpy(pme_gpu_staging(pme->gpu).h_gridlineIndices,
+                   gridLineIndices.data(),
                    atomCount * sizeof(gridLineIndices[0]));
             break;
 
@@ -705,8 +750,7 @@ static void pmeSetGridInternal(const gmx_pme_t*                        pme,
     {
         case CodePath::GPU: // intentional absence of break, the grid will be copied from the host buffer in testing mode
         case CodePath::CPU:
-            std::memset(grid, 0,
-                        paddedGridSize[XX] * paddedGridSize[YY] * paddedGridSize[ZZ] * sizeof(ValueType));
+            std::memset(grid, 0, paddedGridSize[XX] * paddedGridSize[YY] * paddedGridSize[ZZ] * sizeof(ValueType));
             for (const auto& gridValue : gridValues)
             {
                 for (int i = 0; i < DIM; i++)
@@ -775,11 +819,15 @@ GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t* pme, CodePath mode)
     switch (mode)
     {
         case CodePath::GPU:
-            gridLineIndices = arrayRefFromArray(
-                    reinterpret_cast<IVec*>(pme_gpu_staging(pme->gpu).h_gridlineIndices), atomCount);
-            break;
+        {
+            auto* gridlineIndicesAsIVec =
+                    reinterpret_cast<IVec*>(pme_gpu_staging(pme->gpu).h_gridlineIndices);
+            ArrayRef<IVec> gridlineIndicesArrayRef = arrayRefFromArray(gridlineIndicesAsIVec, atomCount);
+            gridLineIndices = { gridlineIndicesArrayRef.begin(), gridlineIndicesArrayRef.end() };
+        }
+        break;
 
-        case CodePath::CPU: gridLineIndices = atc->idx; break;
+        case CodePath::CPU: gridLineIndices = { atc->idx.begin(), atc->idx.end() }; break;
 
         default: GMX_THROW(InternalError("Test not implemented for this mode"));
     }
@@ -875,21 +923,10 @@ PmeOutput pmeGetReciprocalEnergyAndVirial(const gmx_pme_t* pme, CodePath mode, P
     return output;
 }
 
-const char* codePathToString(CodePath codePath)
-{
-    switch (codePath)
-    {
-        case CodePath::CPU: return "CPU";
-        case CodePath::GPU: return "GPU";
-        default: GMX_THROW(NotImplementedError("This CodePath should support codePathToString"));
-    }
-}
-
 PmeTestHardwareContext::PmeTestHardwareContext() : codePath_(CodePath::CPU) {}
 
 PmeTestHardwareContext::PmeTestHardwareContext(TestDevice* testDevice) :
-    codePath_(CodePath::CPU),
-    testDevice_(testDevice)
+    codePath_(CodePath::GPU), testDevice_(testDevice)
 {
     setActiveDevice(testDevice_->deviceInfo());
     pmeGpuProgram_ = buildPmeGpuProgram(testDevice_->deviceContext());