src/gromacs/ewald/tests/pmetestcommon.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief
  37  * Implements common routines for PME tests.
  38  *
  39  * \author Aleksei Iupinov <a.yupinov@gmail.com>
  40  * \ingroup module_ewald
  41  */
  42 #include "gmxpre.h"
  43
  44 #include "pmetestcommon.h"
  45
  46 #include <cstring>
  47
  48 #include "gromacs/ewald/pme-gather.h"
  49 #include "gromacs/ewald/pme-gpu-internal.h"
  50 #include "gromacs/ewald/pme-grid.h"
  51 #include "gromacs/ewald/pme-internal.h"
  52 #include "gromacs/ewald/pme-solve.h"
  53 #include "gromacs/ewald/pme-spread.h"
  54 #include "gromacs/fft/parallel_3dfft.h"
  55 #include "gromacs/gpu_utils/gpu_utils.h"
  56 #include "gromacs/math/invertmatrix.h"
  57 #include "gromacs/mdtypes/commrec.h"
  58 #include "gromacs/pbcutil/pbc.h"
  59 #include "gromacs/utility/exceptions.h"
  60 #include "gromacs/utility/gmxassert.h"
  61 #include "gromacs/utility/logger.h"
  62 #include "gromacs/utility/stringutil.h"
  63
  64 #include "testutils/testasserts.h"
  65
  66 namespace gmx
  67 {
  68 namespace test
  69 {
  70
  71 bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode)
  72 {
  73     bool implemented;
  74     switch (mode)
  75     {
  76         case CodePath::CPU:
  77             implemented = true;
  78             break;
  79
  80         case CodePath::CUDA:
  81             implemented = pme_gpu_supports_input(inputRec, nullptr);
  82             break;
  83
  84         default:
  85             GMX_THROW(InternalError("Test not implemented for this mode"));
  86     }
  87     return implemented;
  88 }
  89
  90 FloatingPointTolerance getSplineTolerance(gmx_int64_t toleranceUlps)
  91 {
  92     /* Double precision is more affected by error propagation, as moduli are always computed in double. */
  93     return relativeToleranceAsPrecisionDependentUlp(1.0, toleranceUlps, 2 * toleranceUlps);
  94 }
  95
  96 //! PME initialization - internal
  97 static PmeSafePointer pmeInitInternal(const t_inputrec         *inputRec,
  98                                       CodePath                  mode,
  99                                       gmx_device_info_t        *gpuInfo,
 100                                       size_t                    atomCount,
 101                                       const Matrix3x3          &box,
 102                                       real                      ewaldCoeff_q = 1.0f,
 103                                       real                      ewaldCoeff_lj = 1.0f
 104                                       )
 105 {
 106     const MDLogger dummyLogger;
 107     if (gpuInfo)
 108     {
 109         init_gpu(dummyLogger, gpuInfo);
 110     }
 111     const auto     runMode       = (mode == CodePath::CPU) ? PmeRunMode::CPU : PmeRunMode::GPU;
 112     t_commrec      dummyCommrec  = {0};
 113     gmx_pme_t     *pmeDataRaw    = gmx_pme_init(&dummyCommrec, 1, 1, inputRec, atomCount, false, false, true,
 114                                                 ewaldCoeff_q, ewaldCoeff_lj, 1, runMode, nullptr, gpuInfo, dummyLogger);
 115     PmeSafePointer pme(pmeDataRaw); // taking ownership
 116
 117     // TODO get rid of this with proper matrix type
 118     matrix boxTemp;
 119     for (int i = 0; i < DIM; i++)
 120     {
 121         for (int j = 0; j < DIM; j++)
 122         {
 123             boxTemp[i][j] = box[i * DIM + j];
 124         }
 125     }
 126     const char *boxError = check_box(-1, boxTemp);
 127     GMX_RELEASE_ASSERT(boxError == nullptr, boxError);
 128
 129     switch (mode)
 130     {
 131         case CodePath::CPU:
 132             invertBoxMatrix(boxTemp, pme->recipbox);
 133             break;
 134
 135         case CodePath::CUDA:
 136             pme_gpu_set_testing(pme->gpu, true);
 137             pme_gpu_update_input_box(pme->gpu, boxTemp);
 138             break;
 139
 140         default:
 141             GMX_THROW(InternalError("Test not implemented for this mode"));
 142     }
 143
 144     return pme;
 145 }
 146
 147 //! Simple PME initialization based on input, no atom data
 148 PmeSafePointer pmeInitEmpty(const t_inputrec         *inputRec,
 149                             CodePath                  mode,
 150                             gmx_device_info_t        *gpuInfo,
 151                             const Matrix3x3          &box,
 152                             real                      ewaldCoeff_q,
 153                             real                      ewaldCoeff_lj
 154                             )
 155 {
 156     return pmeInitInternal(inputRec, mode, gpuInfo, 0, box, ewaldCoeff_q, ewaldCoeff_lj);
 157     // hiding the fact that PME actually needs to know the number of atoms in advance
 158 }
 159
 160 //! PME initialization with atom data
 161 PmeSafePointer pmeInitAtoms(const t_inputrec         *inputRec,
 162                             CodePath                  mode,
 163                             gmx_device_info_t        *gpuInfo,
 164                             const CoordinatesVector  &coordinates,
 165                             const ChargesVector      &charges,
 166                             const Matrix3x3          &box
 167                             )
 168 {
 169     const size_t    atomCount = coordinates.size();
 170     GMX_RELEASE_ASSERT(atomCount == charges.size(), "Mismatch in atom data");
 171     PmeSafePointer  pmeSafe = pmeInitInternal(inputRec, mode, gpuInfo, atomCount, box);
 172     pme_atomcomm_t *atc     = nullptr;
 173
 174     switch (mode)
 175     {
 176         case CodePath::CPU:
 177             atc              = &(pmeSafe->atc[0]);
 178             atc->x           = const_cast<rvec *>(as_rvec_array(coordinates.data()));
 179             atc->coefficient = const_cast<real *>(charges.data());
 180             /* With decomposition there would be more boilerplate atc code here, e.g. do_redist_pos_coeffs */
 181             break;
 182
 183         case CodePath::CUDA:
 184             gmx_pme_reinit_atoms(pmeSafe.get(), atomCount, charges.data());
 185             pme_gpu_copy_input_coordinates(pmeSafe->gpu, as_rvec_array(coordinates.data()));
 186             break;
 187
 188         default:
 189             GMX_THROW(InternalError("Test not implemented for this mode"));
 190     }
 191
 192     return pmeSafe;
 193 }
 194
 195 //! Getting local PME real grid pointer for test I/O
 196 static real *pmeGetRealGridInternal(const gmx_pme_t *pme)
 197 {
 198     const size_t gridIndex = 0;
 199     return pme->fftgrid[gridIndex];
 200 }
 201
 202 //! Getting local PME real grid dimensions
 203 static void pmeGetRealGridSizesInternal(const gmx_pme_t      *pme,
 204                                         CodePath              mode,
 205                                         IVec                 &gridSize,
 206                                         IVec                 &paddedGridSize)
 207 {
 208     const size_t gridIndex = 0;
 209     IVec         gridOffsetUnused;
 210     switch (mode)
 211     {
 212         case CodePath::CPU:
 213             gmx_parallel_3dfft_real_limits(pme->pfft_setup[gridIndex], gridSize, gridOffsetUnused, paddedGridSize);
 214             break;
 215
 216         case CodePath::CUDA:
 217             pme_gpu_get_real_grid_sizes(pme->gpu, &gridSize, &paddedGridSize);
 218             break;
 219
 220         default:
 221             GMX_THROW(InternalError("Test not implemented for this mode"));
 222     }
 223 }
 224
 225 //! Getting local PME complex grid pointer for test I/O
 226 static t_complex *pmeGetComplexGridInternal(const gmx_pme_t *pme)
 227 {
 228     const size_t gridIndex = 0;
 229     return pme->cfftgrid[gridIndex];
 230 }
 231
 232 //! Getting local PME complex grid dimensions
 233 static void pmeGetComplexGridSizesInternal(const gmx_pme_t      *pme,
 234                                            IVec                 &gridSize,
 235                                            IVec                 &paddedGridSize)
 236 {
 237     const size_t gridIndex = 0;
 238     IVec         gridOffsetUnused, complexOrderUnused;
 239     gmx_parallel_3dfft_complex_limits(pme->pfft_setup[gridIndex], complexOrderUnused, gridSize, gridOffsetUnused, paddedGridSize); //TODO: what about YZX ordering?
 240 }
 241
 242 //! Getting the PME grid memory buffer and its sizes - template definition
 243 template<typename ValueType> static void pmeGetGridAndSizesInternal(const gmx_pme_t *, CodePath, ValueType * &, IVec &, IVec &)
 244 {
 245     GMX_THROW(InternalError("Deleted function call"));
 246     // explicitly deleting general template does not compile in clang/icc, see https://llvm.org/bugs/show_bug.cgi?id=17537
 247 }
 248
 249 //! Getting the PME real grid memory buffer and its sizes
 250 template<> void pmeGetGridAndSizesInternal<real>(const gmx_pme_t *pme, CodePath mode, real * &grid, IVec &gridSize, IVec &paddedGridSize)
 251 {
 252     grid = pmeGetRealGridInternal(pme);
 253     pmeGetRealGridSizesInternal(pme, mode, gridSize, paddedGridSize);
 254 }
 255
 256 //! Getting the PME complex grid memory buffer and its sizes
 257 template<> void pmeGetGridAndSizesInternal<t_complex>(const gmx_pme_t *pme, CodePath, t_complex * &grid, IVec &gridSize, IVec &paddedGridSize)
 258 {
 259     grid = pmeGetComplexGridInternal(pme);
 260     pmeGetComplexGridSizesInternal(pme, gridSize, paddedGridSize);
 261 }
 262
 263 //! PME spline calculation and charge spreading
 264 void pmePerformSplineAndSpread(gmx_pme_t *pme, CodePath mode, // TODO const qualifiers elsewhere
 265                                bool computeSplines, bool spreadCharges)
 266 {
 267     GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
 268     pme_atomcomm_t *atc                          = &(pme->atc[0]);
 269     const size_t    gridIndex                    = 0;
 270     const bool      computeSplinesForZeroCharges = true;
 271     real           *fftgrid                      = spreadCharges ? pme->fftgrid[gridIndex] : nullptr;
 272     real           *pmegrid                      = pme->pmegrid[gridIndex].grid.grid;
 273
 274     switch (mode)
 275     {
 276         case CodePath::CPU:
 277             spread_on_grid(pme, atc, &pme->pmegrid[gridIndex], computeSplines, spreadCharges,
 278                            fftgrid, computeSplinesForZeroCharges, gridIndex);
 279             if (spreadCharges && !pme->bUseThreads)
 280             {
 281                 wrap_periodic_pmegrid(pme, pmegrid);
 282                 copy_pmegrid_to_fftgrid(pme, pmegrid, fftgrid, gridIndex);
 283             }
 284             break;
 285
 286         case CodePath::CUDA:
 287             pme_gpu_spread(pme->gpu, gridIndex, fftgrid, computeSplines, spreadCharges);
 288             break;
 289
 290         default:
 291             GMX_THROW(InternalError("Test not implemented for this mode"));
 292     }
 293 }
 294
 295 //! Getting the internal spline data buffer pointer
 296 static real *pmeGetSplineDataInternal(const gmx_pme_t *pme, PmeSplineDataType type, int dimIndex)
 297 {
 298     GMX_ASSERT((0 <= dimIndex) && (dimIndex < DIM), "Invalid dimension index");
 299     const pme_atomcomm_t *atc          = &(pme->atc[0]);
 300     const size_t          threadIndex  = 0;
 301     real                 *splineBuffer = nullptr;
 302     switch (type)
 303     {
 304         case PmeSplineDataType::Values:
 305             splineBuffer = atc->spline[threadIndex].theta[dimIndex];
 306             break;
 307
 308         case PmeSplineDataType::Derivatives:
 309             splineBuffer = atc->spline[threadIndex].dtheta[dimIndex];
 310             break;
 311
 312         default:
 313             GMX_THROW(InternalError("Unknown spline data type"));
 314     }
 315     return splineBuffer;
 316 }
 317
 318 //! PME solving
 319 void pmePerformSolve(const gmx_pme_t *pme, CodePath mode,
 320                      PmeSolveAlgorithm method, real cellVolume,
 321                      GridOrdering gridOrdering, bool computeEnergyAndVirial)
 322 {
 323     t_complex      *h_grid                 = pmeGetComplexGridInternal(pme);
 324     const bool      useLorentzBerthelot    = false;
 325     const size_t    threadIndex            = 0;
 326     switch (mode)
 327     {
 328         case CodePath::CPU:
 329             if (gridOrdering != GridOrdering::YZX)
 330             {
 331                 GMX_THROW(InternalError("Test not implemented for this mode"));
 332             }
 333             switch (method)
 334             {
 335                 case PmeSolveAlgorithm::Coulomb:
 336                     solve_pme_yzx(pme, h_grid, cellVolume,
 337                                   computeEnergyAndVirial, pme->nthread, threadIndex);
 338                     break;
 339
 340                 case PmeSolveAlgorithm::LennardJones:
 341                     solve_pme_lj_yzx(pme, &h_grid, useLorentzBerthelot,
 342                                      cellVolume, computeEnergyAndVirial, pme->nthread, threadIndex);
 343                     break;
 344
 345                 default:
 346                     GMX_THROW(InternalError("Test not implemented for this mode"));
 347             }
 348             break;
 349
 350         case CodePath::CUDA:
 351             switch (method)
 352             {
 353                 case PmeSolveAlgorithm::Coulomb:
 354                     pme_gpu_solve(pme->gpu, h_grid, gridOrdering, computeEnergyAndVirial);
 355                     break;
 356
 357                 default:
 358                     GMX_THROW(InternalError("Test not implemented for this mode"));
 359             }
 360             break;
 361
 362         default:
 363             GMX_THROW(InternalError("Test not implemented for this mode"));
 364     }
 365 }
 366
 367 //! PME force gathering
 368 void pmePerformGather(gmx_pme_t *pme, CodePath mode,
 369                       PmeForceOutputHandling inputTreatment, ForcesVector &forces)
 370 {
 371     pme_atomcomm_t *atc                     = &(pme->atc[0]);
 372     const size_t    atomCount               = atc->n;
 373     GMX_RELEASE_ASSERT(forces.size() == atomCount, "Invalid force buffer size");
 374     const bool      forceReductionWithInput = (inputTreatment == PmeForceOutputHandling::ReduceWithInput);
 375     const real      scale                   = 1.0;
 376     const size_t    threadIndex             = 0;
 377     const size_t    gridIndex               = 0;
 378     real           *pmegrid                 = pme->pmegrid[gridIndex].grid.grid;
 379     real           *fftgrid                 = pme->fftgrid[gridIndex];
 380
 381     switch (mode)
 382     {
 383         case CodePath::CPU:
 384             atc->f = as_rvec_array(forces.begin());
 385             if (atc->nthread == 1)
 386             {
 387                 // something which is normally done in serial spline computation (make_thread_local_ind())
 388                 atc->spline[threadIndex].n = atomCount;
 389             }
 390             copy_fftgrid_to_pmegrid(pme, fftgrid, pmegrid, gridIndex, pme->nthread, threadIndex);
 391             unwrap_periodic_pmegrid(pme, pmegrid);
 392             gather_f_bsplines(pme, pmegrid, !forceReductionWithInput, atc, &atc->spline[threadIndex], scale);
 393             break;
 394
 395         case CodePath::CUDA:
 396         {
 397             // Variable initialization needs a non-switch scope
 398             auto stagingForces = pme_gpu_get_forces(pme->gpu);
 399             GMX_ASSERT(forces.size() == stagingForces.size(), "Size of force buffers did not match");
 400             if (forceReductionWithInput)
 401             {
 402                 for (size_t i = 0; i != forces.size(); ++i)
 403                 {
 404                     stagingForces[i] = forces[i];
 405                 }
 406             }
 407             pme_gpu_gather(pme->gpu, inputTreatment, reinterpret_cast<float *>(fftgrid));
 408             for (size_t i = 0; i != forces.size(); ++i)
 409             {
 410                 forces[i] = stagingForces[i];
 411             }
 412         }
 413         break;
 414
 415         default:
 416             GMX_THROW(InternalError("Test not implemented for this mode"));
 417     }
 418 }
 419
 420 //! PME test finalization before fetching the outputs
 421 void pmeFinalizeTest(const gmx_pme_t *pme, CodePath mode)
 422 {
 423     switch (mode)
 424     {
 425         case CodePath::CPU:
 426             break;
 427
 428         case CodePath::CUDA:
 429             pme_gpu_synchronize(pme->gpu);
 430             break;
 431
 432         default:
 433             GMX_THROW(InternalError("Test not implemented for this mode"));
 434     }
 435 }
 436
 437 //! Setting atom spline values/derivatives to be used in spread/gather
 438 void pmeSetSplineData(const gmx_pme_t *pme, CodePath mode,
 439                       const SplineParamsDimVector &splineValues, PmeSplineDataType type, int dimIndex)
 440 {
 441     const pme_atomcomm_t *atc         = &(pme->atc[0]);
 442     const size_t          atomCount   = atc->n;
 443     const size_t          pmeOrder    = pme->pme_order;
 444     const size_t          dimSize     = pmeOrder * atomCount;
 445     GMX_RELEASE_ASSERT(dimSize == splineValues.size(), "Mismatch in spline data");
 446     real                 *splineBuffer = pmeGetSplineDataInternal(pme, type, dimIndex);
 447
 448     switch (mode)
 449     {
 450         case CodePath::CPU:
 451             std::copy(splineValues.begin(), splineValues.end(), splineBuffer);
 452             break;
 453
 454         case CodePath::CUDA:
 455             std::copy(splineValues.begin(), splineValues.end(), splineBuffer);
 456             pme_gpu_transform_spline_atom_data(pme->gpu, atc, type, dimIndex, PmeLayoutTransform::HostToGpu);
 457             break;
 458
 459         default:
 460             GMX_THROW(InternalError("Test not implemented for this mode"));
 461     }
 462 }
 463
 464 //! Setting gridline indices to be used in spread/gather
 465 void pmeSetGridLineIndices(const gmx_pme_t *pme, CodePath mode,
 466                            const GridLineIndicesVector &gridLineIndices)
 467 {
 468     const pme_atomcomm_t       *atc         = &(pme->atc[0]);
 469     const size_t                atomCount   = atc->n;
 470     GMX_RELEASE_ASSERT(atomCount == gridLineIndices.size(), "Mismatch in gridline indices size");
 471
 472     IVec paddedGridSizeUnused, gridSize;
 473     pmeGetRealGridSizesInternal(pme, mode, gridSize, paddedGridSizeUnused);
 474
 475     for (const auto &index : gridLineIndices)
 476     {
 477         for (int i = 0; i < DIM; i++)
 478         {
 479             GMX_RELEASE_ASSERT((0 <= index[i]) && (index[i] < gridSize[i]), "Invalid gridline index");
 480         }
 481     }
 482
 483     switch (mode)
 484     {
 485         case CodePath::CUDA:
 486             memcpy(pme->gpu->staging.h_gridlineIndices, gridLineIndices.data(), atomCount * sizeof(gridLineIndices[0]));
 487             break;
 488
 489         case CodePath::CPU:
 490             // incompatible IVec and ivec assignment?
 491             //std::copy(gridLineIndices.begin(), gridLineIndices.end(), atc->idx);
 492             memcpy(atc->idx, gridLineIndices.data(), atomCount * sizeof(gridLineIndices[0]));
 493             break;
 494
 495         default:
 496             GMX_THROW(InternalError("Test not implemented for this mode"));
 497     }
 498 }
 499
 500 //! Getting plain index into the complex 3d grid
 501 inline size_t pmeGetGridPlainIndexInternal(const IVec &index, const IVec &paddedGridSize, GridOrdering gridOrdering)
 502 {
 503     size_t result;
 504     switch (gridOrdering)
 505     {
 506         case GridOrdering::YZX:
 507             result = (index[YY] * paddedGridSize[ZZ] + index[ZZ]) * paddedGridSize[XX] + index[XX];
 508             break;
 509
 510         case GridOrdering::XYZ:
 511             result = (index[XX] * paddedGridSize[YY] + index[YY]) * paddedGridSize[ZZ] + index[ZZ];
 512             break;
 513
 514         default:
 515             GMX_THROW(InternalError("Test not implemented for this mode"));
 516     }
 517     return result;
 518 }
 519
 520 //! Setting real or complex grid
 521 template<typename ValueType>
 522 static void pmeSetGridInternal(const gmx_pme_t *pme, CodePath mode,
 523                                GridOrdering gridOrdering,
 524                                const SparseGridValuesInput<ValueType> &gridValues)
 525 {
 526     IVec       gridSize, paddedGridSize;
 527     ValueType *grid;
 528     pmeGetGridAndSizesInternal<ValueType>(pme, mode, grid, gridSize, paddedGridSize);
 529
 530     switch (mode)
 531     {
 532         case CodePath::CUDA: // intentional absence of break, the grid will be copied from the host buffer in testing mode
 533         case CodePath::CPU:
 534             std::memset(grid, 0, paddedGridSize[XX] * paddedGridSize[YY] * paddedGridSize[ZZ] * sizeof(ValueType));
 535             for (const auto &gridValue : gridValues)
 536             {
 537                 for (int i = 0; i < DIM; i++)
 538                 {
 539                     GMX_RELEASE_ASSERT((0 <= gridValue.first[i]) && (gridValue.first[i] < gridSize[i]), "Invalid grid value index");
 540                 }
 541                 const size_t gridValueIndex = pmeGetGridPlainIndexInternal(gridValue.first, paddedGridSize, gridOrdering);
 542                 grid[gridValueIndex] = gridValue.second;
 543             }
 544             break;
 545
 546         default:
 547             GMX_THROW(InternalError("Test not implemented for this mode"));
 548     }
 549 }
 550
 551 //! Setting real grid to be used in gather
 552 void pmeSetRealGrid(const gmx_pme_t *pme, CodePath mode,
 553                     const SparseRealGridValuesInput &gridValues)
 554 {
 555     pmeSetGridInternal<real>(pme, mode, GridOrdering::XYZ, gridValues);
 556 }
 557
 558 //! Setting complex grid to be used in solve
 559 void pmeSetComplexGrid(const gmx_pme_t *pme, CodePath mode,
 560                        GridOrdering gridOrdering,
 561                        const SparseComplexGridValuesInput &gridValues)
 562 {
 563     pmeSetGridInternal<t_complex>(pme, mode, gridOrdering, gridValues);
 564 }
 565
 566 //! Getting the single dimension's spline values or derivatives
 567 SplineParamsDimVector pmeGetSplineData(const gmx_pme_t *pme, CodePath mode,
 568                                        PmeSplineDataType type, int dimIndex)
 569 {
 570     GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
 571     const pme_atomcomm_t    *atc         = &(pme->atc[0]);
 572     const size_t             atomCount   = atc->n;
 573     const size_t             pmeOrder    = pme->pme_order;
 574     const size_t             dimSize     = pmeOrder * atomCount;
 575
 576     real                    *sourceBuffer = pmeGetSplineDataInternal(pme, type, dimIndex);
 577     SplineParamsDimVector    result;
 578     switch (mode)
 579     {
 580         case CodePath::CUDA:
 581             pme_gpu_transform_spline_atom_data(pme->gpu, atc, type, dimIndex, PmeLayoutTransform::GpuToHost);
 582         // fallthrough
 583
 584         case CodePath::CPU:
 585             result = arrayRefFromArray(sourceBuffer, dimSize);
 586             break;
 587
 588         default:
 589             GMX_THROW(InternalError("Test not implemented for this mode"));
 590     }
 591     return result;
 592 }
 593
 594 //! Getting the gridline indices
 595 GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t *pme, CodePath mode)
 596 {
 597     GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
 598     const pme_atomcomm_t *atc         = &(pme->atc[0]);
 599     const size_t          atomCount   = atc->n;
 600
 601     GridLineIndicesVector gridLineIndices;
 602     switch (mode)
 603     {
 604         case CodePath::CUDA:
 605             gridLineIndices = arrayRefFromArray(reinterpret_cast<IVec *>(pme->gpu->staging.h_gridlineIndices), atomCount);
 606             break;
 607
 608         case CodePath::CPU:
 609             gridLineIndices = arrayRefFromArray(reinterpret_cast<IVec *>(atc->idx), atomCount);
 610             break;
 611
 612         default:
 613             GMX_THROW(InternalError("Test not implemented for this mode"));
 614     }
 615     return gridLineIndices;
 616 }
 617
 618 //! Getting real or complex grid - only non zero values
 619 template<typename ValueType>
 620 static SparseGridValuesOutput<ValueType> pmeGetGridInternal(const gmx_pme_t *pme, CodePath mode, GridOrdering gridOrdering)
 621 {
 622     IVec       gridSize, paddedGridSize;
 623     ValueType *grid;
 624     pmeGetGridAndSizesInternal<ValueType>(pme, mode, grid, gridSize, paddedGridSize);
 625     SparseGridValuesOutput<ValueType> gridValues;
 626     switch (mode)
 627     {
 628         case CodePath::CUDA: // intentional absence of break
 629         case CodePath::CPU:
 630             gridValues.clear();
 631             for (int ix = 0; ix < gridSize[XX]; ix++)
 632             {
 633                 for (int iy = 0; iy < gridSize[YY]; iy++)
 634                 {
 635                     for (int iz = 0; iz < gridSize[ZZ]; iz++)
 636                     {
 637                         IVec            temp(ix, iy, iz);
 638                         const size_t    gridValueIndex = pmeGetGridPlainIndexInternal(temp, paddedGridSize, gridOrdering);
 639                         const ValueType value          = grid[gridValueIndex];
 640                         if (value != ValueType {})
 641                         {
 642                             auto key = formatString("Cell %d %d %d", ix, iy, iz);
 643                             gridValues[key] = value;
 644                         }
 645                     }
 646                 }
 647             }
 648             break;
 649
 650         default:
 651             GMX_THROW(InternalError("Test not implemented for this mode"));
 652     }
 653     return gridValues;
 654 }
 655
 656 //! Getting the real grid (spreading output of pmePerformSplineAndSpread())
 657 SparseRealGridValuesOutput pmeGetRealGrid(const gmx_pme_t *pme, CodePath mode)
 658 {
 659     return pmeGetGridInternal<real>(pme, mode, GridOrdering::XYZ);
 660 }
 661
 662 //! Getting the complex grid output of pmePerformSolve()
 663 SparseComplexGridValuesOutput pmeGetComplexGrid(const gmx_pme_t *pme, CodePath mode,
 664                                                 GridOrdering gridOrdering)
 665 {
 666     return pmeGetGridInternal<t_complex>(pme, mode, gridOrdering);
 667 }
 668
 669 //! Getting the reciprocal energy and virial
 670 PmeSolveOutput pmeGetReciprocalEnergyAndVirial(const gmx_pme_t *pme, CodePath mode,
 671                                                PmeSolveAlgorithm method)
 672 {
 673     real      energy = 0.0f;
 674     Matrix3x3 virial;
 675     matrix    virialTemp; //TODO get rid of
 676     switch (mode)
 677     {
 678         case CodePath::CPU:
 679             switch (method)
 680             {
 681                 case PmeSolveAlgorithm::Coulomb:
 682                     get_pme_ener_vir_q(pme->solve_work, pme->nthread, &energy, virialTemp);
 683                     break;
 684
 685                 case PmeSolveAlgorithm::LennardJones:
 686                     get_pme_ener_vir_lj(pme->solve_work, pme->nthread, &energy, virialTemp);
 687                     break;
 688
 689                 default:
 690                     GMX_THROW(InternalError("Test not implemented for this mode"));
 691             }
 692             break;
 693         case CodePath::CUDA:
 694             switch (method)
 695             {
 696                 case PmeSolveAlgorithm::Coulomb:
 697                     pme_gpu_get_energy_virial(pme->gpu, &energy, virialTemp);
 698                     break;
 699
 700                 default:
 701                     GMX_THROW(InternalError("Test not implemented for this mode"));
 702             }
 703             break;
 704
 705         default:
 706             GMX_THROW(InternalError("Test not implemented for this mode"));
 707     }
 708     for (int i = 0; i < DIM; i++)
 709     {
 710         for (int j = 0; j < DIM; j++)
 711         {
 712             virial[i * DIM + j] = virialTemp[i][j];
 713         }
 714     }
 715     return std::make_tuple(energy, virial);
 716 }
 717
 718 }
 719 }