src/gromacs/ewald/tests/pmetestcommon.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief
  37  * Implements common routines for PME tests.
  38  *
  39  * \author Aleksei Iupinov <a.yupinov@gmail.com>
  40  * \ingroup module_ewald
  41  */
  42 #include "gmxpre.h"
  43
  44 #include "pmetestcommon.h"
  45
  46 #include <cstring>
  47
  48 #include <algorithm>
  49
  50 #include "gromacs/domdec/domdec.h"
  51 #include "gromacs/ewald/pme_gather.h"
  52 #include "gromacs/ewald/pme_gpu_internal.h"
  53 #include "gromacs/ewald/pme_grid.h"
  54 #include "gromacs/ewald/pme_internal.h"
  55 #include "gromacs/ewald/pme_redistribute.h"
  56 #include "gromacs/ewald/pme_solve.h"
  57 #include "gromacs/ewald/pme_spread.h"
  58 #include "gromacs/fft/parallel_3dfft.h"
  59 #include "gromacs/gpu_utils/gpu_utils.h"
  60 #include "gromacs/math/invertmatrix.h"
  61 #include "gromacs/mdtypes/commrec.h"
  62 #include "gromacs/pbcutil/pbc.h"
  63 #include "gromacs/topology/topology.h"
  64 #include "gromacs/utility/exceptions.h"
  65 #include "gromacs/utility/gmxassert.h"
  66 #include "gromacs/utility/logger.h"
  67 #include "gromacs/utility/stringutil.h"
  68
  69 #include "testutils/testasserts.h"
  70
  71 namespace gmx
  72 {
  73 namespace test
  74 {
  75
  76 bool pmeSupportsInputForMode(const gmx_hw_info_t &hwinfo,
  77                              const t_inputrec    *inputRec,
  78                              CodePath             mode)
  79 {
  80     bool       implemented;
  81     gmx_mtop_t mtop;
  82     switch (mode)
  83     {
  84         case CodePath::CPU:
  85             implemented = true;
  86             break;
  87
  88         case CodePath::GPU:
  89             implemented = (pme_gpu_supports_build(nullptr) &&
  90                            pme_gpu_supports_hardware(hwinfo, nullptr) &&
  91                            pme_gpu_supports_input(*inputRec, mtop, nullptr));
  92             break;
  93
  94         default:
  95             GMX_THROW(InternalError("Test not implemented for this mode"));
  96     }
  97     return implemented;
  98 }
  99
 100 uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder)
 101 {
 102     /* Arbitrary ulp tolerance for sine/cosine implementation. It's
 103      * hard to know what to pick without testing lots of
 104      * implementations. */
 105     const uint64_t sineUlps = 10;
 106     return 4 * (splineOrder - 2) + 2 * sineUlps * splineOrder;
 107 }
 108
 109 //! PME initialization
 110 PmeSafePointer pmeInitWrapper(const t_inputrec         *inputRec,
 111                               const CodePath            mode,
 112                               const gmx_device_info_t  *gpuInfo,
 113                               PmeGpuProgramHandle       pmeGpuProgram,
 114                               const Matrix3x3          &box,
 115                               const real                ewaldCoeff_q,
 116                               const real                ewaldCoeff_lj)
 117 {
 118     const MDLogger dummyLogger;
 119     const auto     runMode       = (mode == CodePath::CPU) ? PmeRunMode::CPU : PmeRunMode::Mixed;
 120     t_commrec      dummyCommrec  = {0};
 121     NumPmeDomains  numPmeDomains = { 1, 1 };
 122     gmx_pme_t     *pmeDataRaw    = gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, false, false, true,
 123                                                 ewaldCoeff_q, ewaldCoeff_lj, 1, runMode, nullptr, gpuInfo, pmeGpuProgram, dummyLogger);
 124     PmeSafePointer pme(pmeDataRaw); // taking ownership
 125
 126     // TODO get rid of this with proper matrix type
 127     matrix boxTemp;
 128     for (int i = 0; i < DIM; i++)
 129     {
 130         for (int j = 0; j < DIM; j++)
 131         {
 132             boxTemp[i][j] = box[i * DIM + j];
 133         }
 134     }
 135     const char *boxError = check_box(-1, boxTemp);
 136     GMX_RELEASE_ASSERT(boxError == nullptr, boxError);
 137
 138     switch (mode)
 139     {
 140         case CodePath::CPU:
 141             invertBoxMatrix(boxTemp, pme->recipbox);
 142             break;
 143
 144         case CodePath::GPU:
 145             pme_gpu_set_testing(pme->gpu, true);
 146             pme_gpu_update_input_box(pme->gpu, boxTemp);
 147             break;
 148
 149         default:
 150             GMX_THROW(InternalError("Test not implemented for this mode"));
 151     }
 152
 153     return pme;
 154 }
 155
 156 //! Simple PME initialization based on input, no atom data
 157 PmeSafePointer pmeInitEmpty(const t_inputrec         *inputRec,
 158                             const CodePath            mode,
 159                             const gmx_device_info_t  *gpuInfo,
 160                             PmeGpuProgramHandle       pmeGpuProgram,
 161                             const Matrix3x3          &box,
 162                             real                      ewaldCoeff_q,
 163                             real                      ewaldCoeff_lj
 164                             )
 165 {
 166     return pmeInitWrapper(inputRec, mode, gpuInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj);
 167     // hiding the fact that PME actually needs to know the number of atoms in advance
 168 }
 169
 170 //! Make a GPU state-propagator manager
 171 StatePropagatorDataGpu
 172 makeStatePropagatorDataGpu(const gmx_pme_t &pme)
 173 {
 174     // TODO: Pin the host buffer and use async memory copies
 175     return StatePropagatorDataGpu(pme_gpu_get_device_stream(&pme),
 176                                   pme_gpu_get_device_context(&pme),
 177                                   GpuApiCallBehavior::Sync,
 178                                   pme_gpu_get_padding_size(&pme));
 179 }
 180
 181 //! PME initialization with atom data
 182 void pmeInitAtoms(gmx_pme_t               *pme,
 183                   StatePropagatorDataGpu  *stateGpu,
 184                   const CodePath           mode,
 185                   const CoordinatesVector &coordinates,
 186                   const ChargesVector     &charges)
 187 {
 188     const index  atomCount = coordinates.size();
 189     GMX_RELEASE_ASSERT(atomCount == charges.ssize(), "Mismatch in atom data");
 190     PmeAtomComm *atc = nullptr;
 191
 192     switch (mode)
 193     {
 194         case CodePath::CPU:
 195             atc              = &(pme->atc[0]);
 196             atc->x           = coordinates;
 197             atc->coefficient = charges;
 198             gmx_pme_reinit_atoms(pme, atomCount, charges.data());
 199             /* With decomposition there would be more boilerplate atc code here, e.g. do_redist_pos_coeffs */
 200             break;
 201
 202         case CodePath::GPU:
 203             // TODO: Avoid use of atc in the GPU code path
 204             atc              = &(pme->atc[0]);
 205             // We need to set atc->n for passing the size in the tests
 206             atc->setNumAtoms(atomCount);
 207             gmx_pme_reinit_atoms(pme, atomCount, charges.data());
 208
 209             stateGpu->reinit(atomCount, atomCount);
 210             stateGpu->copyCoordinatesToGpu(arrayRefFromArray(coordinates.data(), coordinates.size()), gmx::StatePropagatorDataGpu::AtomLocality::All);
 211             pme_gpu_set_kernelparam_coordinates(pme->gpu, stateGpu->getCoordinates());
 212
 213             break;
 214
 215         default:
 216             GMX_THROW(InternalError("Test not implemented for this mode"));
 217     }
 218 }
 219
 220 //! Getting local PME real grid pointer for test I/O
 221 static real *pmeGetRealGridInternal(const gmx_pme_t *pme)
 222 {
 223     const size_t gridIndex = 0;
 224     return pme->fftgrid[gridIndex];
 225 }
 226
 227 //! Getting local PME real grid dimensions
 228 static void pmeGetRealGridSizesInternal(const gmx_pme_t      *pme,
 229                                         CodePath              mode,
 230                                         IVec                 &gridSize,       //NOLINT(google-runtime-references)
 231                                         IVec                 &paddedGridSize) //NOLINT(google-runtime-references)
 232 {
 233     const size_t gridIndex = 0;
 234     IVec         gridOffsetUnused;
 235     switch (mode)
 236     {
 237         case CodePath::CPU:
 238             gmx_parallel_3dfft_real_limits(pme->pfft_setup[gridIndex], gridSize, gridOffsetUnused, paddedGridSize);
 239             break;
 240
 241         case CodePath::GPU:
 242             pme_gpu_get_real_grid_sizes(pme->gpu, &gridSize, &paddedGridSize);
 243             break;
 244
 245         default:
 246             GMX_THROW(InternalError("Test not implemented for this mode"));
 247     }
 248 }
 249
 250 //! Getting local PME complex grid pointer for test I/O
 251 static t_complex *pmeGetComplexGridInternal(const gmx_pme_t *pme)
 252 {
 253     const size_t gridIndex = 0;
 254     return pme->cfftgrid[gridIndex];
 255 }
 256
 257 //! Getting local PME complex grid dimensions
 258 static void pmeGetComplexGridSizesInternal(const gmx_pme_t      *pme,
 259                                            IVec                 &gridSize,       //NOLINT(google-runtime-references)
 260                                            IVec                 &paddedGridSize) //NOLINT(google-runtime-references)
 261 {
 262     const size_t gridIndex = 0;
 263     IVec         gridOffsetUnused, complexOrderUnused;
 264     gmx_parallel_3dfft_complex_limits(pme->pfft_setup[gridIndex], complexOrderUnused, gridSize, gridOffsetUnused, paddedGridSize); //TODO: what about YZX ordering?
 265 }
 266
 267 //! Getting the PME grid memory buffer and its sizes - template definition
 268 template<typename ValueType> static void pmeGetGridAndSizesInternal(const gmx_pme_t * /*unused*/, CodePath /*unused*/, ValueType * & /*unused*/, IVec & /*unused*/, IVec & /*unused*/) //NOLINT(google-runtime-references)
 269 {
 270     GMX_THROW(InternalError("Deleted function call"));
 271     // explicitly deleting general template does not compile in clang/icc, see https://llvm.org/bugs/show_bug.cgi?id=17537
 272 }
 273
 274 //! Getting the PME real grid memory buffer and its sizes
 275 template<> void pmeGetGridAndSizesInternal<real>(const gmx_pme_t *pme, CodePath mode, real * &grid, IVec &gridSize, IVec &paddedGridSize)
 276 {
 277     grid = pmeGetRealGridInternal(pme);
 278     pmeGetRealGridSizesInternal(pme, mode, gridSize, paddedGridSize);
 279 }
 280
 281 //! Getting the PME complex grid memory buffer and its sizes
 282 template<> void pmeGetGridAndSizesInternal<t_complex>(const gmx_pme_t *pme, CodePath /*unused*/, t_complex * &grid, IVec &gridSize, IVec &paddedGridSize)
 283 {
 284     grid = pmeGetComplexGridInternal(pme);
 285     pmeGetComplexGridSizesInternal(pme, gridSize, paddedGridSize);
 286 }
 287
 288 //! PME spline calculation and charge spreading
 289 void pmePerformSplineAndSpread(gmx_pme_t *pme, CodePath mode, // TODO const qualifiers elsewhere
 290                                bool computeSplines, bool spreadCharges)
 291 {
 292     GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
 293     PmeAtomComm    *atc                          = &(pme->atc[0]);
 294     const size_t    gridIndex                    = 0;
 295     const bool      computeSplinesForZeroCharges = true;
 296     real           *fftgrid                      = spreadCharges ? pme->fftgrid[gridIndex] : nullptr;
 297     real           *pmegrid                      = pme->pmegrid[gridIndex].grid.grid;
 298
 299     switch (mode)
 300     {
 301         case CodePath::CPU:
 302             spread_on_grid(pme, atc, &pme->pmegrid[gridIndex], computeSplines, spreadCharges,
 303                            fftgrid, computeSplinesForZeroCharges, gridIndex);
 304             if (spreadCharges && !pme->bUseThreads)
 305             {
 306                 wrap_periodic_pmegrid(pme, pmegrid);
 307                 copy_pmegrid_to_fftgrid(pme, pmegrid, fftgrid, gridIndex);
 308             }
 309             break;
 310
 311         case CodePath::GPU:
 312             pme_gpu_spread(pme->gpu, gridIndex, fftgrid, computeSplines, spreadCharges);
 313             break;
 314
 315         default:
 316             GMX_THROW(InternalError("Test not implemented for this mode"));
 317     }
 318 }
 319
 320 //! Getting the internal spline data buffer pointer
 321 static real *pmeGetSplineDataInternal(const gmx_pme_t *pme, PmeSplineDataType type, int dimIndex)
 322 {
 323     GMX_ASSERT((0 <= dimIndex) && (dimIndex < DIM), "Invalid dimension index");
 324     const PmeAtomComm    *atc          = &(pme->atc[0]);
 325     const size_t          threadIndex  = 0;
 326     real                 *splineBuffer = nullptr;
 327     switch (type)
 328     {
 329         case PmeSplineDataType::Values:
 330             splineBuffer = atc->spline[threadIndex].theta.coefficients[dimIndex];
 331             break;
 332
 333         case PmeSplineDataType::Derivatives:
 334             splineBuffer = atc->spline[threadIndex].dtheta.coefficients[dimIndex];
 335             break;
 336
 337         default:
 338             GMX_THROW(InternalError("Unknown spline data type"));
 339     }
 340     return splineBuffer;
 341 }
 342
 343 //! PME solving
 344 void pmePerformSolve(const gmx_pme_t *pme, CodePath mode,
 345                      PmeSolveAlgorithm method, real cellVolume,
 346                      GridOrdering gridOrdering, bool computeEnergyAndVirial)
 347 {
 348     t_complex      *h_grid                 = pmeGetComplexGridInternal(pme);
 349     const bool      useLorentzBerthelot    = false;
 350     const size_t    threadIndex            = 0;
 351     switch (mode)
 352     {
 353         case CodePath::CPU:
 354             if (gridOrdering != GridOrdering::YZX)
 355             {
 356                 GMX_THROW(InternalError("Test not implemented for this mode"));
 357             }
 358             switch (method)
 359             {
 360                 case PmeSolveAlgorithm::Coulomb:
 361                     solve_pme_yzx(pme, h_grid, cellVolume,
 362                                   computeEnergyAndVirial, pme->nthread, threadIndex);
 363                     break;
 364
 365                 case PmeSolveAlgorithm::LennardJones:
 366                     solve_pme_lj_yzx(pme, &h_grid, useLorentzBerthelot,
 367                                      cellVolume, computeEnergyAndVirial, pme->nthread, threadIndex);
 368                     break;
 369
 370                 default:
 371                     GMX_THROW(InternalError("Test not implemented for this mode"));
 372             }
 373             break;
 374
 375         case CodePath::GPU:
 376             switch (method)
 377             {
 378                 case PmeSolveAlgorithm::Coulomb:
 379                     pme_gpu_solve(pme->gpu, h_grid, gridOrdering, computeEnergyAndVirial);
 380                     break;
 381
 382                 default:
 383                     GMX_THROW(InternalError("Test not implemented for this mode"));
 384             }
 385             break;
 386
 387         default:
 388             GMX_THROW(InternalError("Test not implemented for this mode"));
 389     }
 390 }
 391
 392 //! PME force gathering
 393 void pmePerformGather(gmx_pme_t *pme, CodePath mode,
 394                       PmeForceOutputHandling inputTreatment, ForcesVector &forces)
 395 {
 396     PmeAtomComm    *atc                     = &(pme->atc[0]);
 397     const index     atomCount               = atc->numAtoms();
 398     GMX_RELEASE_ASSERT(forces.ssize() == atomCount, "Invalid force buffer size");
 399     const bool      forceReductionWithInput = (inputTreatment == PmeForceOutputHandling::ReduceWithInput);
 400     const real      scale                   = 1.0;
 401     const size_t    threadIndex             = 0;
 402     const size_t    gridIndex               = 0;
 403     real           *pmegrid                 = pme->pmegrid[gridIndex].grid.grid;
 404     real           *fftgrid                 = pme->fftgrid[gridIndex];
 405
 406     switch (mode)
 407     {
 408         case CodePath::CPU:
 409             atc->f = forces;
 410             if (atc->nthread == 1)
 411             {
 412                 // something which is normally done in serial spline computation (make_thread_local_ind())
 413                 atc->spline[threadIndex].n = atomCount;
 414             }
 415             copy_fftgrid_to_pmegrid(pme, fftgrid, pmegrid, gridIndex, pme->nthread, threadIndex);
 416             unwrap_periodic_pmegrid(pme, pmegrid);
 417             gather_f_bsplines(pme, pmegrid, !forceReductionWithInput, atc, &atc->spline[threadIndex], scale);
 418             break;
 419
 420         case CodePath::GPU:
 421         {
 422             // Variable initialization needs a non-switch scope
 423             PmeOutput output = pme_gpu_getOutput(*pme, GMX_PME_CALC_F);
 424             GMX_ASSERT(forces.size() == output.forces_.size(), "Size of force buffers did not match");
 425             if (forceReductionWithInput)
 426             {
 427                 std::copy(std::begin(forces), std::end(forces), std::begin(output.forces_));
 428             }
 429             pme_gpu_gather(pme->gpu, inputTreatment, reinterpret_cast<float *>(fftgrid));
 430             std::copy(std::begin(output.forces_), std::end(output.forces_), std::begin(forces));
 431         }
 432         break;
 433
 434         default:
 435             GMX_THROW(InternalError("Test not implemented for this mode"));
 436     }
 437 }
 438
 439 //! PME test finalization before fetching the outputs
 440 void pmeFinalizeTest(const gmx_pme_t *pme, CodePath mode)
 441 {
 442     switch (mode)
 443     {
 444         case CodePath::CPU:
 445             break;
 446
 447         case CodePath::GPU:
 448             pme_gpu_synchronize(pme->gpu);
 449             break;
 450
 451         default:
 452             GMX_THROW(InternalError("Test not implemented for this mode"));
 453     }
 454 }
 455
 456 //! Setting atom spline values/derivatives to be used in spread/gather
 457 void pmeSetSplineData(const gmx_pme_t *pme, CodePath mode,
 458                       const SplineParamsDimVector &splineValues, PmeSplineDataType type, int dimIndex)
 459 {
 460     const PmeAtomComm    *atc         = &(pme->atc[0]);
 461     const index           atomCount   = atc->numAtoms();
 462     const index           pmeOrder    = pme->pme_order;
 463     const index           dimSize     = pmeOrder * atomCount;
 464     GMX_RELEASE_ASSERT(dimSize == splineValues.ssize(), "Mismatch in spline data");
 465     real                 *splineBuffer = pmeGetSplineDataInternal(pme, type, dimIndex);
 466
 467     switch (mode)
 468     {
 469         case CodePath::CPU:
 470             std::copy(splineValues.begin(), splineValues.end(), splineBuffer);
 471             break;
 472
 473         case CodePath::GPU:
 474             std::copy(splineValues.begin(), splineValues.end(), splineBuffer);
 475             pme_gpu_transform_spline_atom_data(pme->gpu, atc, type, dimIndex, PmeLayoutTransform::HostToGpu);
 476             break;
 477
 478         default:
 479             GMX_THROW(InternalError("Test not implemented for this mode"));
 480     }
 481 }
 482
 483 //! Setting gridline indices to be used in spread/gather
 484 void pmeSetGridLineIndices(gmx_pme_t *pme, CodePath mode,
 485                            const GridLineIndicesVector &gridLineIndices)
 486 {
 487     PmeAtomComm                *atc         = &(pme->atc[0]);
 488     const index                 atomCount   = atc->numAtoms();
 489     GMX_RELEASE_ASSERT(atomCount == gridLineIndices.ssize(), "Mismatch in gridline indices size");
 490
 491     IVec paddedGridSizeUnused, gridSize(0, 0, 0);
 492     pmeGetRealGridSizesInternal(pme, mode, gridSize, paddedGridSizeUnused);
 493
 494     for (const auto &index : gridLineIndices)
 495     {
 496         for (int i = 0; i < DIM; i++)
 497         {
 498             GMX_RELEASE_ASSERT((0 <= index[i]) && (index[i] < gridSize[i]), "Invalid gridline index");
 499         }
 500     }
 501
 502     switch (mode)
 503     {
 504         case CodePath::GPU:
 505             memcpy(pme->gpu->staging.h_gridlineIndices, gridLineIndices.data(), atomCount * sizeof(gridLineIndices[0]));
 506             break;
 507
 508         case CodePath::CPU:
 509             atc->idx.resize(gridLineIndices.size());
 510             std::copy(gridLineIndices.begin(), gridLineIndices.end(), atc->idx.begin());
 511             break;
 512         default:
 513             GMX_THROW(InternalError("Test not implemented for this mode"));
 514     }
 515 }
 516
 517 //! Getting plain index into the complex 3d grid
 518 inline size_t pmeGetGridPlainIndexInternal(const IVec &index, const IVec &paddedGridSize, GridOrdering gridOrdering)
 519 {
 520     size_t result;
 521     switch (gridOrdering)
 522     {
 523         case GridOrdering::YZX:
 524             result = (index[YY] * paddedGridSize[ZZ] + index[ZZ]) * paddedGridSize[XX] + index[XX];
 525             break;
 526
 527         case GridOrdering::XYZ:
 528             result = (index[XX] * paddedGridSize[YY] + index[YY]) * paddedGridSize[ZZ] + index[ZZ];
 529             break;
 530
 531         default:
 532             GMX_THROW(InternalError("Test not implemented for this mode"));
 533     }
 534     return result;
 535 }
 536
 537 //! Setting real or complex grid
 538 template<typename ValueType>
 539 static void pmeSetGridInternal(const gmx_pme_t *pme, CodePath mode,
 540                                GridOrdering gridOrdering,
 541                                const SparseGridValuesInput<ValueType> &gridValues)
 542 {
 543     IVec       gridSize(0, 0, 0), paddedGridSize(0, 0, 0);
 544     ValueType *grid;
 545     pmeGetGridAndSizesInternal<ValueType>(pme, mode, grid, gridSize, paddedGridSize);
 546
 547     switch (mode)
 548     {
 549         case CodePath::GPU: // intentional absence of break, the grid will be copied from the host buffer in testing mode
 550         case CodePath::CPU:
 551             std::memset(grid, 0, paddedGridSize[XX] * paddedGridSize[YY] * paddedGridSize[ZZ] * sizeof(ValueType));
 552             for (const auto &gridValue : gridValues)
 553             {
 554                 for (int i = 0; i < DIM; i++)
 555                 {
 556                     GMX_RELEASE_ASSERT((0 <= gridValue.first[i]) && (gridValue.first[i] < gridSize[i]), "Invalid grid value index");
 557                 }
 558                 const size_t gridValueIndex = pmeGetGridPlainIndexInternal(gridValue.first, paddedGridSize, gridOrdering);
 559                 grid[gridValueIndex] = gridValue.second;
 560             }
 561             break;
 562
 563         default:
 564             GMX_THROW(InternalError("Test not implemented for this mode"));
 565     }
 566 }
 567
 568 //! Setting real grid to be used in gather
 569 void pmeSetRealGrid(const gmx_pme_t *pme, CodePath mode,
 570                     const SparseRealGridValuesInput &gridValues)
 571 {
 572     pmeSetGridInternal<real>(pme, mode, GridOrdering::XYZ, gridValues);
 573 }
 574
 575 //! Setting complex grid to be used in solve
 576 void pmeSetComplexGrid(const gmx_pme_t *pme, CodePath mode,
 577                        GridOrdering gridOrdering,
 578                        const SparseComplexGridValuesInput &gridValues)
 579 {
 580     pmeSetGridInternal<t_complex>(pme, mode, gridOrdering, gridValues);
 581 }
 582
 583 //! Getting the single dimension's spline values or derivatives
 584 SplineParamsDimVector pmeGetSplineData(const gmx_pme_t *pme, CodePath mode,
 585                                        PmeSplineDataType type, int dimIndex)
 586 {
 587     GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
 588     const PmeAtomComm       *atc         = &(pme->atc[0]);
 589     const size_t             atomCount   = atc->numAtoms();
 590     const size_t             pmeOrder    = pme->pme_order;
 591     const size_t             dimSize     = pmeOrder * atomCount;
 592
 593     real                    *sourceBuffer = pmeGetSplineDataInternal(pme, type, dimIndex);
 594     SplineParamsDimVector    result;
 595     switch (mode)
 596     {
 597         case CodePath::GPU:
 598             pme_gpu_transform_spline_atom_data(pme->gpu, atc, type, dimIndex, PmeLayoutTransform::GpuToHost);
 599             result = arrayRefFromArray(sourceBuffer, dimSize);
 600             break;
 601
 602         case CodePath::CPU:
 603             result = arrayRefFromArray(sourceBuffer, dimSize);
 604             break;
 605
 606         default:
 607             GMX_THROW(InternalError("Test not implemented for this mode"));
 608     }
 609     return result;
 610 }
 611
 612 //! Getting the gridline indices
 613 GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t *pme, CodePath mode)
 614 {
 615     GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
 616     const PmeAtomComm    *atc         = &(pme->atc[0]);
 617     const size_t          atomCount   = atc->numAtoms();
 618
 619     GridLineIndicesVector gridLineIndices;
 620     switch (mode)
 621     {
 622         case CodePath::GPU:
 623             gridLineIndices = arrayRefFromArray(reinterpret_cast<IVec *>(pme->gpu->staging.h_gridlineIndices), atomCount);
 624             break;
 625
 626         case CodePath::CPU:
 627             gridLineIndices = atc->idx;
 628             break;
 629
 630         default:
 631             GMX_THROW(InternalError("Test not implemented for this mode"));
 632     }
 633     return gridLineIndices;
 634 }
 635
 636 //! Getting real or complex grid - only non zero values
 637 template<typename ValueType>
 638 static SparseGridValuesOutput<ValueType> pmeGetGridInternal(const gmx_pme_t *pme, CodePath mode, GridOrdering gridOrdering)
 639 {
 640     IVec       gridSize(0, 0, 0), paddedGridSize(0, 0, 0);
 641     ValueType *grid;
 642     pmeGetGridAndSizesInternal<ValueType>(pme, mode, grid, gridSize, paddedGridSize);
 643     SparseGridValuesOutput<ValueType> gridValues;
 644     switch (mode)
 645     {
 646         case CodePath::GPU: // intentional absence of break
 647         case CodePath::CPU:
 648             gridValues.clear();
 649             for (int ix = 0; ix < gridSize[XX]; ix++)
 650             {
 651                 for (int iy = 0; iy < gridSize[YY]; iy++)
 652                 {
 653                     for (int iz = 0; iz < gridSize[ZZ]; iz++)
 654                     {
 655                         IVec            temp(ix, iy, iz);
 656                         const size_t    gridValueIndex = pmeGetGridPlainIndexInternal(temp, paddedGridSize, gridOrdering);
 657                         const ValueType value          = grid[gridValueIndex];
 658                         if (value != ValueType {})
 659                         {
 660                             auto key = formatString("Cell %d %d %d", ix, iy, iz);
 661                             gridValues[key] = value;
 662                         }
 663                     }
 664                 }
 665             }
 666             break;
 667
 668         default:
 669             GMX_THROW(InternalError("Test not implemented for this mode"));
 670     }
 671     return gridValues;
 672 }
 673
 674 //! Getting the real grid (spreading output of pmePerformSplineAndSpread())
 675 SparseRealGridValuesOutput pmeGetRealGrid(const gmx_pme_t *pme, CodePath mode)
 676 {
 677     return pmeGetGridInternal<real>(pme, mode, GridOrdering::XYZ);
 678 }
 679
 680 //! Getting the complex grid output of pmePerformSolve()
 681 SparseComplexGridValuesOutput pmeGetComplexGrid(const gmx_pme_t *pme, CodePath mode,
 682                                                 GridOrdering gridOrdering)
 683 {
 684     return pmeGetGridInternal<t_complex>(pme, mode, gridOrdering);
 685 }
 686
 687 //! Getting the reciprocal energy and virial
 688 PmeOutput pmeGetReciprocalEnergyAndVirial(const gmx_pme_t *pme, CodePath mode,
 689                                           PmeSolveAlgorithm method)
 690 {
 691     PmeOutput output;
 692     switch (mode)
 693     {
 694         case CodePath::CPU:
 695             switch (method)
 696             {
 697                 case PmeSolveAlgorithm::Coulomb:
 698                     get_pme_ener_vir_q(pme->solve_work, pme->nthread, &output);
 699                     break;
 700
 701                 case PmeSolveAlgorithm::LennardJones:
 702                     get_pme_ener_vir_lj(pme->solve_work, pme->nthread, &output);
 703                     break;
 704
 705                 default:
 706                     GMX_THROW(InternalError("Test not implemented for this mode"));
 707             }
 708             break;
 709         case CodePath::GPU:
 710             switch (method)
 711             {
 712                 case PmeSolveAlgorithm::Coulomb:
 713                     pme_gpu_getEnergyAndVirial(*pme, &output);
 714                     break;
 715
 716                 default:
 717                     GMX_THROW(InternalError("Test not implemented for this mode"));
 718             }
 719             break;
 720
 721         default:
 722             GMX_THROW(InternalError("Test not implemented for this mode"));
 723     }
 724     return output;
 725 }
 726
 727 }  // namespace test
 728 }  // namespace gmx