src/gromacs/ewald/tests/pmetestcommon.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  * \brief
  37  * Implements common routines for PME tests.
  38  *
  39  * \author Aleksei Iupinov <a.yupinov@gmail.com>
  40  * \ingroup module_ewald
  41  */
  42 #include "gmxpre.h"
  43
  44 #include "pmetestcommon.h"
  45
  46 #include <cstring>
  47
  48 #include "gromacs/domdec/domdec.h"
  49 #include "gromacs/ewald/pme-gather.h"
  50 #include "gromacs/ewald/pme-gpu-internal.h"
  51 #include "gromacs/ewald/pme-grid.h"
  52 #include "gromacs/ewald/pme-internal.h"
  53 #include "gromacs/ewald/pme-solve.h"
  54 #include "gromacs/ewald/pme-spread.h"
  55 #include "gromacs/fft/parallel_3dfft.h"
  56 #include "gromacs/gpu_utils/gpu_utils.h"
  57 #include "gromacs/math/invertmatrix.h"
  58 #include "gromacs/mdtypes/commrec.h"
  59 #include "gromacs/pbcutil/pbc.h"
  60 #include "gromacs/topology/topology.h"
  61 #include "gromacs/utility/exceptions.h"
  62 #include "gromacs/utility/gmxassert.h"
  63 #include "gromacs/utility/logger.h"
  64 #include "gromacs/utility/stringutil.h"
  65
  66 #include "testutils/testasserts.h"
  67
  68 namespace gmx
  69 {
  70 namespace test
  71 {
  72
  73 bool pmeSupportsInputForMode(const t_inputrec *inputRec, CodePath mode)
  74 {
  75     bool       implemented;
  76     gmx_mtop_t mtop;
  77     switch (mode)
  78     {
  79         case CodePath::CPU:
  80             implemented = true;
  81             break;
  82
  83         case CodePath::CUDA:
  84             implemented = (pme_gpu_supports_build(nullptr) &&
  85                            pme_gpu_supports_input(*inputRec, mtop, nullptr));
  86             break;
  87
  88         default:
  89             GMX_THROW(InternalError("Test not implemented for this mode"));
  90     }
  91     return implemented;
  92 }
  93
  94 uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder)
  95 {
  96     /* Arbitrary ulp tolerance for sine/cosine implementation. It's
  97      * hard to know what to pick without testing lots of
  98      * implementations. */
  99     const uint64_t sineUlps = 10;
 100     return 4 * (splineOrder - 2) + 2 * sineUlps * splineOrder;
 101 }
 102
 103 //! PME initialization - internal
 104 static PmeSafePointer pmeInitInternal(const t_inputrec         *inputRec,
 105                                       CodePath                  mode,
 106                                       const gmx_device_info_t  *gpuInfo,
 107                                       PmeGpuProgramHandle       pmeGpuProgram,
 108                                       size_t                    atomCount,
 109                                       const Matrix3x3          &box,
 110                                       real                      ewaldCoeff_q = 1.0f,
 111                                       real                      ewaldCoeff_lj = 1.0f
 112                                       )
 113 {
 114     const MDLogger dummyLogger;
 115     const auto     runMode       = (mode == CodePath::CPU) ? PmeRunMode::CPU : PmeRunMode::GPU;
 116     t_commrec      dummyCommrec  = {0};
 117     NumPmeDomains  numPmeDomains = { 1, 1 };
 118     gmx_pme_t     *pmeDataRaw    = gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, atomCount, false, false, true,
 119                                                 ewaldCoeff_q, ewaldCoeff_lj, 1, runMode, nullptr, gpuInfo, pmeGpuProgram, dummyLogger);
 120     PmeSafePointer pme(pmeDataRaw); // taking ownership
 121
 122     // TODO get rid of this with proper matrix type
 123     matrix boxTemp;
 124     for (int i = 0; i < DIM; i++)
 125     {
 126         for (int j = 0; j < DIM; j++)
 127         {
 128             boxTemp[i][j] = box[i * DIM + j];
 129         }
 130     }
 131     const char *boxError = check_box(-1, boxTemp);
 132     GMX_RELEASE_ASSERT(boxError == nullptr, boxError);
 133
 134     switch (mode)
 135     {
 136         case CodePath::CPU:
 137             invertBoxMatrix(boxTemp, pme->recipbox);
 138             break;
 139
 140         case CodePath::CUDA:
 141             pme_gpu_set_testing(pme->gpu, true);
 142             pme_gpu_update_input_box(pme->gpu, boxTemp);
 143             break;
 144
 145         default:
 146             GMX_THROW(InternalError("Test not implemented for this mode"));
 147     }
 148
 149     return pme;
 150 }
 151
 152 //! Simple PME initialization based on input, no atom data
 153 PmeSafePointer pmeInitEmpty(const t_inputrec         *inputRec,
 154                             CodePath                  mode,
 155                             const gmx_device_info_t  *gpuInfo,
 156                             PmeGpuProgramHandle       pmeGpuProgram,
 157                             const Matrix3x3          &box,
 158                             real                      ewaldCoeff_q,
 159                             real                      ewaldCoeff_lj
 160                             )
 161 {
 162     return pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, 0, box, ewaldCoeff_q, ewaldCoeff_lj);
 163     // hiding the fact that PME actually needs to know the number of atoms in advance
 164 }
 165
 166 //! PME initialization with atom data
 167 PmeSafePointer pmeInitAtoms(const t_inputrec         *inputRec,
 168                             CodePath                  mode,
 169                             const gmx_device_info_t  *gpuInfo,
 170                             PmeGpuProgramHandle       pmeGpuProgram,
 171                             const CoordinatesVector  &coordinates,
 172                             const ChargesVector      &charges,
 173                             const Matrix3x3          &box
 174                             )
 175 {
 176     const index     atomCount = coordinates.size();
 177     GMX_RELEASE_ASSERT(atomCount == charges.size(), "Mismatch in atom data");
 178     PmeSafePointer  pmeSafe = pmeInitInternal(inputRec, mode, gpuInfo, pmeGpuProgram, atomCount, box);
 179     pme_atomcomm_t *atc     = nullptr;
 180
 181     switch (mode)
 182     {
 183         case CodePath::CPU:
 184             atc              = &(pmeSafe->atc[0]);
 185             atc->x           = const_cast<rvec *>(as_rvec_array(coordinates.data()));
 186             atc->coefficient = const_cast<real *>(charges.data());
 187             /* With decomposition there would be more boilerplate atc code here, e.g. do_redist_pos_coeffs */
 188             break;
 189
 190         case CodePath::CUDA:
 191             gmx_pme_reinit_atoms(pmeSafe.get(), atomCount, charges.data());
 192             pme_gpu_copy_input_coordinates(pmeSafe->gpu, as_rvec_array(coordinates.data()));
 193             break;
 194
 195         default:
 196             GMX_THROW(InternalError("Test not implemented for this mode"));
 197     }
 198
 199     return pmeSafe;
 200 }
 201
 202 //! Getting local PME real grid pointer for test I/O
 203 static real *pmeGetRealGridInternal(const gmx_pme_t *pme)
 204 {
 205     const size_t gridIndex = 0;
 206     return pme->fftgrid[gridIndex];
 207 }
 208
 209 //! Getting local PME real grid dimensions
 210 static void pmeGetRealGridSizesInternal(const gmx_pme_t      *pme,
 211                                         CodePath              mode,
 212                                         IVec                 &gridSize,       //NOLINT(google-runtime-references)
 213                                         IVec                 &paddedGridSize) //NOLINT(google-runtime-references)
 214 {
 215     const size_t gridIndex = 0;
 216     IVec         gridOffsetUnused;
 217     switch (mode)
 218     {
 219         case CodePath::CPU:
 220             gmx_parallel_3dfft_real_limits(pme->pfft_setup[gridIndex], gridSize, gridOffsetUnused, paddedGridSize);
 221             break;
 222
 223         case CodePath::CUDA:
 224             pme_gpu_get_real_grid_sizes(pme->gpu, &gridSize, &paddedGridSize);
 225             break;
 226
 227         default:
 228             GMX_THROW(InternalError("Test not implemented for this mode"));
 229     }
 230 }
 231
 232 //! Getting local PME complex grid pointer for test I/O
 233 static t_complex *pmeGetComplexGridInternal(const gmx_pme_t *pme)
 234 {
 235     const size_t gridIndex = 0;
 236     return pme->cfftgrid[gridIndex];
 237 }
 238
 239 //! Getting local PME complex grid dimensions
 240 static void pmeGetComplexGridSizesInternal(const gmx_pme_t      *pme,
 241                                            IVec                 &gridSize,       //NOLINT(google-runtime-references)
 242                                            IVec                 &paddedGridSize) //NOLINT(google-runtime-references)
 243 {
 244     const size_t gridIndex = 0;
 245     IVec         gridOffsetUnused, complexOrderUnused;
 246     gmx_parallel_3dfft_complex_limits(pme->pfft_setup[gridIndex], complexOrderUnused, gridSize, gridOffsetUnused, paddedGridSize); //TODO: what about YZX ordering?
 247 }
 248
 249 //! Getting the PME grid memory buffer and its sizes - template definition
 250 template<typename ValueType> static void pmeGetGridAndSizesInternal(const gmx_pme_t * /*unused*/, CodePath /*unused*/, ValueType * & /*unused*/, IVec & /*unused*/, IVec & /*unused*/) //NOLINT(google-runtime-references)
 251 {
 252     GMX_THROW(InternalError("Deleted function call"));
 253     // explicitly deleting general template does not compile in clang/icc, see https://llvm.org/bugs/show_bug.cgi?id=17537
 254 }
 255
 256 //! Getting the PME real grid memory buffer and its sizes
 257 template<> void pmeGetGridAndSizesInternal<real>(const gmx_pme_t *pme, CodePath mode, real * &grid, IVec &gridSize, IVec &paddedGridSize)
 258 {
 259     grid = pmeGetRealGridInternal(pme);
 260     pmeGetRealGridSizesInternal(pme, mode, gridSize, paddedGridSize);
 261 }
 262
 263 //! Getting the PME complex grid memory buffer and its sizes
 264 template<> void pmeGetGridAndSizesInternal<t_complex>(const gmx_pme_t *pme, CodePath /*unused*/, t_complex * &grid, IVec &gridSize, IVec &paddedGridSize)
 265 {
 266     grid = pmeGetComplexGridInternal(pme);
 267     pmeGetComplexGridSizesInternal(pme, gridSize, paddedGridSize);
 268 }
 269
 270 //! PME spline calculation and charge spreading
 271 void pmePerformSplineAndSpread(gmx_pme_t *pme, CodePath mode, // TODO const qualifiers elsewhere
 272                                bool computeSplines, bool spreadCharges)
 273 {
 274     GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
 275     pme_atomcomm_t *atc                          = &(pme->atc[0]);
 276     const size_t    gridIndex                    = 0;
 277     const bool      computeSplinesForZeroCharges = true;
 278     real           *fftgrid                      = spreadCharges ? pme->fftgrid[gridIndex] : nullptr;
 279     real           *pmegrid                      = pme->pmegrid[gridIndex].grid.grid;
 280
 281     switch (mode)
 282     {
 283         case CodePath::CPU:
 284             spread_on_grid(pme, atc, &pme->pmegrid[gridIndex], computeSplines, spreadCharges,
 285                            fftgrid, computeSplinesForZeroCharges, gridIndex);
 286             if (spreadCharges && !pme->bUseThreads)
 287             {
 288                 wrap_periodic_pmegrid(pme, pmegrid);
 289                 copy_pmegrid_to_fftgrid(pme, pmegrid, fftgrid, gridIndex);
 290             }
 291             break;
 292
 293         case CodePath::CUDA:
 294             pme_gpu_spread(pme->gpu, gridIndex, fftgrid, computeSplines, spreadCharges);
 295             break;
 296
 297         default:
 298             GMX_THROW(InternalError("Test not implemented for this mode"));
 299     }
 300 }
 301
 302 //! Getting the internal spline data buffer pointer
 303 static real *pmeGetSplineDataInternal(const gmx_pme_t *pme, PmeSplineDataType type, int dimIndex)
 304 {
 305     GMX_ASSERT((0 <= dimIndex) && (dimIndex < DIM), "Invalid dimension index");
 306     const pme_atomcomm_t *atc          = &(pme->atc[0]);
 307     const size_t          threadIndex  = 0;
 308     real                 *splineBuffer = nullptr;
 309     switch (type)
 310     {
 311         case PmeSplineDataType::Values:
 312             splineBuffer = atc->spline[threadIndex].theta[dimIndex];
 313             break;
 314
 315         case PmeSplineDataType::Derivatives:
 316             splineBuffer = atc->spline[threadIndex].dtheta[dimIndex];
 317             break;
 318
 319         default:
 320             GMX_THROW(InternalError("Unknown spline data type"));
 321     }
 322     return splineBuffer;
 323 }
 324
 325 //! PME solving
 326 void pmePerformSolve(const gmx_pme_t *pme, CodePath mode,
 327                      PmeSolveAlgorithm method, real cellVolume,
 328                      GridOrdering gridOrdering, bool computeEnergyAndVirial)
 329 {
 330     t_complex      *h_grid                 = pmeGetComplexGridInternal(pme);
 331     const bool      useLorentzBerthelot    = false;
 332     const size_t    threadIndex            = 0;
 333     switch (mode)
 334     {
 335         case CodePath::CPU:
 336             if (gridOrdering != GridOrdering::YZX)
 337             {
 338                 GMX_THROW(InternalError("Test not implemented for this mode"));
 339             }
 340             switch (method)
 341             {
 342                 case PmeSolveAlgorithm::Coulomb:
 343                     solve_pme_yzx(pme, h_grid, cellVolume,
 344                                   computeEnergyAndVirial, pme->nthread, threadIndex);
 345                     break;
 346
 347                 case PmeSolveAlgorithm::LennardJones:
 348                     solve_pme_lj_yzx(pme, &h_grid, useLorentzBerthelot,
 349                                      cellVolume, computeEnergyAndVirial, pme->nthread, threadIndex);
 350                     break;
 351
 352                 default:
 353                     GMX_THROW(InternalError("Test not implemented for this mode"));
 354             }
 355             break;
 356
 357         case CodePath::CUDA:
 358             switch (method)
 359             {
 360                 case PmeSolveAlgorithm::Coulomb:
 361                     pme_gpu_solve(pme->gpu, h_grid, gridOrdering, computeEnergyAndVirial);
 362                     break;
 363
 364                 default:
 365                     GMX_THROW(InternalError("Test not implemented for this mode"));
 366             }
 367             break;
 368
 369         default:
 370             GMX_THROW(InternalError("Test not implemented for this mode"));
 371     }
 372 }
 373
 374 //! PME force gathering
 375 void pmePerformGather(gmx_pme_t *pme, CodePath mode,
 376                       PmeForceOutputHandling inputTreatment, ForcesVector &forces)
 377 {
 378     pme_atomcomm_t *atc                     = &(pme->atc[0]);
 379     const index     atomCount               = atc->n;
 380     GMX_RELEASE_ASSERT(forces.size() == atomCount, "Invalid force buffer size");
 381     const bool      forceReductionWithInput = (inputTreatment == PmeForceOutputHandling::ReduceWithInput);
 382     const real      scale                   = 1.0;
 383     const size_t    threadIndex             = 0;
 384     const size_t    gridIndex               = 0;
 385     real           *pmegrid                 = pme->pmegrid[gridIndex].grid.grid;
 386     real           *fftgrid                 = pme->fftgrid[gridIndex];
 387
 388     switch (mode)
 389     {
 390         case CodePath::CPU:
 391             atc->f = as_rvec_array(forces.begin());
 392             if (atc->nthread == 1)
 393             {
 394                 // something which is normally done in serial spline computation (make_thread_local_ind())
 395                 atc->spline[threadIndex].n = atomCount;
 396             }
 397             copy_fftgrid_to_pmegrid(pme, fftgrid, pmegrid, gridIndex, pme->nthread, threadIndex);
 398             unwrap_periodic_pmegrid(pme, pmegrid);
 399             gather_f_bsplines(pme, pmegrid, !forceReductionWithInput, atc, &atc->spline[threadIndex], scale);
 400             break;
 401
 402         case CodePath::CUDA:
 403         {
 404             // Variable initialization needs a non-switch scope
 405             auto stagingForces = pme_gpu_get_forces(pme->gpu);
 406             GMX_ASSERT(forces.size() == stagingForces.size(), "Size of force buffers did not match");
 407             if (forceReductionWithInput)
 408             {
 409                 for (index i = 0; i != forces.size(); ++i)
 410                 {
 411                     stagingForces[i] = forces[i];
 412                 }
 413             }
 414             pme_gpu_gather(pme->gpu, inputTreatment, reinterpret_cast<float *>(fftgrid));
 415             for (index i = 0; i != forces.size(); ++i)
 416             {
 417                 forces[i] = stagingForces[i];
 418             }
 419         }
 420         break;
 421
 422         default:
 423             GMX_THROW(InternalError("Test not implemented for this mode"));
 424     }
 425 }
 426
 427 //! PME test finalization before fetching the outputs
 428 void pmeFinalizeTest(const gmx_pme_t *pme, CodePath mode)
 429 {
 430     switch (mode)
 431     {
 432         case CodePath::CPU:
 433             break;
 434
 435         case CodePath::CUDA:
 436             pme_gpu_synchronize(pme->gpu);
 437             break;
 438
 439         default:
 440             GMX_THROW(InternalError("Test not implemented for this mode"));
 441     }
 442 }
 443
 444 //! Setting atom spline values/derivatives to be used in spread/gather
 445 void pmeSetSplineData(const gmx_pme_t *pme, CodePath mode,
 446                       const SplineParamsDimVector &splineValues, PmeSplineDataType type, int dimIndex)
 447 {
 448     const pme_atomcomm_t *atc         = &(pme->atc[0]);
 449     const index           atomCount   = atc->n;
 450     const index           pmeOrder    = pme->pme_order;
 451     const index           dimSize     = pmeOrder * atomCount;
 452     GMX_RELEASE_ASSERT(dimSize == splineValues.size(), "Mismatch in spline data");
 453     real                 *splineBuffer = pmeGetSplineDataInternal(pme, type, dimIndex);
 454
 455     switch (mode)
 456     {
 457         case CodePath::CPU:
 458             std::copy(splineValues.begin(), splineValues.end(), splineBuffer);
 459             break;
 460
 461         case CodePath::CUDA:
 462             std::copy(splineValues.begin(), splineValues.end(), splineBuffer);
 463             pme_gpu_transform_spline_atom_data(pme->gpu, atc, type, dimIndex, PmeLayoutTransform::HostToGpu);
 464             break;
 465
 466         default:
 467             GMX_THROW(InternalError("Test not implemented for this mode"));
 468     }
 469 }
 470
 471 //! Setting gridline indices to be used in spread/gather
 472 void pmeSetGridLineIndices(const gmx_pme_t *pme, CodePath mode,
 473                            const GridLineIndicesVector &gridLineIndices)
 474 {
 475     const pme_atomcomm_t       *atc         = &(pme->atc[0]);
 476     const index                 atomCount   = atc->n;
 477     GMX_RELEASE_ASSERT(atomCount == gridLineIndices.size(), "Mismatch in gridline indices size");
 478
 479     IVec paddedGridSizeUnused, gridSize(0, 0, 0);
 480     pmeGetRealGridSizesInternal(pme, mode, gridSize, paddedGridSizeUnused);
 481
 482     for (const auto &index : gridLineIndices)
 483     {
 484         for (int i = 0; i < DIM; i++)
 485         {
 486             GMX_RELEASE_ASSERT((0 <= index[i]) && (index[i] < gridSize[i]), "Invalid gridline index");
 487         }
 488     }
 489
 490     switch (mode)
 491     {
 492         case CodePath::CUDA:
 493             memcpy(pme->gpu->staging.h_gridlineIndices, gridLineIndices.data(), atomCount * sizeof(gridLineIndices[0]));
 494             break;
 495
 496         case CodePath::CPU:
 497             // incompatible IVec and ivec assignment?
 498             //std::copy(gridLineIndices.begin(), gridLineIndices.end(), atc->idx);
 499             memcpy(atc->idx, gridLineIndices.data(), atomCount * sizeof(gridLineIndices[0]));
 500             break;
 501
 502         default:
 503             GMX_THROW(InternalError("Test not implemented for this mode"));
 504     }
 505 }
 506
 507 //! Getting plain index into the complex 3d grid
 508 inline size_t pmeGetGridPlainIndexInternal(const IVec &index, const IVec &paddedGridSize, GridOrdering gridOrdering)
 509 {
 510     size_t result;
 511     switch (gridOrdering)
 512     {
 513         case GridOrdering::YZX:
 514             result = (index[YY] * paddedGridSize[ZZ] + index[ZZ]) * paddedGridSize[XX] + index[XX];
 515             break;
 516
 517         case GridOrdering::XYZ:
 518             result = (index[XX] * paddedGridSize[YY] + index[YY]) * paddedGridSize[ZZ] + index[ZZ];
 519             break;
 520
 521         default:
 522             GMX_THROW(InternalError("Test not implemented for this mode"));
 523     }
 524     return result;
 525 }
 526
 527 //! Setting real or complex grid
 528 template<typename ValueType>
 529 static void pmeSetGridInternal(const gmx_pme_t *pme, CodePath mode,
 530                                GridOrdering gridOrdering,
 531                                const SparseGridValuesInput<ValueType> &gridValues)
 532 {
 533     IVec       gridSize(0, 0, 0), paddedGridSize(0, 0, 0);
 534     ValueType *grid;
 535     pmeGetGridAndSizesInternal<ValueType>(pme, mode, grid, gridSize, paddedGridSize);
 536
 537     switch (mode)
 538     {
 539         case CodePath::CUDA: // intentional absence of break, the grid will be copied from the host buffer in testing mode
 540         case CodePath::CPU:
 541             std::memset(grid, 0, paddedGridSize[XX] * paddedGridSize[YY] * paddedGridSize[ZZ] * sizeof(ValueType));
 542             for (const auto &gridValue : gridValues)
 543             {
 544                 for (int i = 0; i < DIM; i++)
 545                 {
 546                     GMX_RELEASE_ASSERT((0 <= gridValue.first[i]) && (gridValue.first[i] < gridSize[i]), "Invalid grid value index");
 547                 }
 548                 const size_t gridValueIndex = pmeGetGridPlainIndexInternal(gridValue.first, paddedGridSize, gridOrdering);
 549                 grid[gridValueIndex] = gridValue.second;
 550             }
 551             break;
 552
 553         default:
 554             GMX_THROW(InternalError("Test not implemented for this mode"));
 555     }
 556 }
 557
 558 //! Setting real grid to be used in gather
 559 void pmeSetRealGrid(const gmx_pme_t *pme, CodePath mode,
 560                     const SparseRealGridValuesInput &gridValues)
 561 {
 562     pmeSetGridInternal<real>(pme, mode, GridOrdering::XYZ, gridValues);
 563 }
 564
 565 //! Setting complex grid to be used in solve
 566 void pmeSetComplexGrid(const gmx_pme_t *pme, CodePath mode,
 567                        GridOrdering gridOrdering,
 568                        const SparseComplexGridValuesInput &gridValues)
 569 {
 570     pmeSetGridInternal<t_complex>(pme, mode, gridOrdering, gridValues);
 571 }
 572
 573 //! Getting the single dimension's spline values or derivatives
 574 SplineParamsDimVector pmeGetSplineData(const gmx_pme_t *pme, CodePath mode,
 575                                        PmeSplineDataType type, int dimIndex)
 576 {
 577     GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
 578     const pme_atomcomm_t    *atc         = &(pme->atc[0]);
 579     const size_t             atomCount   = atc->n;
 580     const size_t             pmeOrder    = pme->pme_order;
 581     const size_t             dimSize     = pmeOrder * atomCount;
 582
 583     real                    *sourceBuffer = pmeGetSplineDataInternal(pme, type, dimIndex);
 584     SplineParamsDimVector    result;
 585     switch (mode)
 586     {
 587         case CodePath::CUDA:
 588             pme_gpu_transform_spline_atom_data(pme->gpu, atc, type, dimIndex, PmeLayoutTransform::GpuToHost);
 589             result = arrayRefFromArray(sourceBuffer, dimSize);
 590             break;
 591
 592         case CodePath::CPU:
 593             result = arrayRefFromArray(sourceBuffer, dimSize);
 594             break;
 595
 596         default:
 597             GMX_THROW(InternalError("Test not implemented for this mode"));
 598     }
 599     return result;
 600 }
 601
 602 //! Getting the gridline indices
 603 GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t *pme, CodePath mode)
 604 {
 605     GMX_RELEASE_ASSERT(pme != nullptr, "PME data is not initialized");
 606     const pme_atomcomm_t *atc         = &(pme->atc[0]);
 607     const size_t          atomCount   = atc->n;
 608
 609     GridLineIndicesVector gridLineIndices;
 610     switch (mode)
 611     {
 612         case CodePath::CUDA:
 613             gridLineIndices = arrayRefFromArray(reinterpret_cast<IVec *>(pme->gpu->staging.h_gridlineIndices), atomCount);
 614             break;
 615
 616         case CodePath::CPU:
 617             gridLineIndices = arrayRefFromArray(reinterpret_cast<IVec *>(atc->idx), atomCount);
 618             break;
 619
 620         default:
 621             GMX_THROW(InternalError("Test not implemented for this mode"));
 622     }
 623     return gridLineIndices;
 624 }
 625
 626 //! Getting real or complex grid - only non zero values
 627 template<typename ValueType>
 628 static SparseGridValuesOutput<ValueType> pmeGetGridInternal(const gmx_pme_t *pme, CodePath mode, GridOrdering gridOrdering)
 629 {
 630     IVec       gridSize(0, 0, 0), paddedGridSize(0, 0, 0);
 631     ValueType *grid;
 632     pmeGetGridAndSizesInternal<ValueType>(pme, mode, grid, gridSize, paddedGridSize);
 633     SparseGridValuesOutput<ValueType> gridValues;
 634     switch (mode)
 635     {
 636         case CodePath::CUDA: // intentional absence of break
 637         case CodePath::CPU:
 638             gridValues.clear();
 639             for (int ix = 0; ix < gridSize[XX]; ix++)
 640             {
 641                 for (int iy = 0; iy < gridSize[YY]; iy++)
 642                 {
 643                     for (int iz = 0; iz < gridSize[ZZ]; iz++)
 644                     {
 645                         IVec            temp(ix, iy, iz);
 646                         const size_t    gridValueIndex = pmeGetGridPlainIndexInternal(temp, paddedGridSize, gridOrdering);
 647                         const ValueType value          = grid[gridValueIndex];
 648                         if (value != ValueType {})
 649                         {
 650                             auto key = formatString("Cell %d %d %d", ix, iy, iz);
 651                             gridValues[key] = value;
 652                         }
 653                     }
 654                 }
 655             }
 656             break;
 657
 658         default:
 659             GMX_THROW(InternalError("Test not implemented for this mode"));
 660     }
 661     return gridValues;
 662 }
 663
 664 //! Getting the real grid (spreading output of pmePerformSplineAndSpread())
 665 SparseRealGridValuesOutput pmeGetRealGrid(const gmx_pme_t *pme, CodePath mode)
 666 {
 667     return pmeGetGridInternal<real>(pme, mode, GridOrdering::XYZ);
 668 }
 669
 670 //! Getting the complex grid output of pmePerformSolve()
 671 SparseComplexGridValuesOutput pmeGetComplexGrid(const gmx_pme_t *pme, CodePath mode,
 672                                                 GridOrdering gridOrdering)
 673 {
 674     return pmeGetGridInternal<t_complex>(pme, mode, gridOrdering);
 675 }
 676
 677 //! Getting the reciprocal energy and virial
 678 PmeSolveOutput pmeGetReciprocalEnergyAndVirial(const gmx_pme_t *pme, CodePath mode,
 679                                                PmeSolveAlgorithm method)
 680 {
 681     real      energy = 0.0f;
 682     Matrix3x3 virial;
 683     matrix    virialTemp = {{0}}; //TODO get rid of
 684     switch (mode)
 685     {
 686         case CodePath::CPU:
 687             switch (method)
 688             {
 689                 case PmeSolveAlgorithm::Coulomb:
 690                     get_pme_ener_vir_q(pme->solve_work, pme->nthread, &energy, virialTemp);
 691                     break;
 692
 693                 case PmeSolveAlgorithm::LennardJones:
 694                     get_pme_ener_vir_lj(pme->solve_work, pme->nthread, &energy, virialTemp);
 695                     break;
 696
 697                 default:
 698                     GMX_THROW(InternalError("Test not implemented for this mode"));
 699             }
 700             break;
 701         case CodePath::CUDA:
 702             switch (method)
 703             {
 704                 case PmeSolveAlgorithm::Coulomb:
 705                     pme_gpu_get_energy_virial(pme->gpu, &energy, virialTemp);
 706                     break;
 707
 708                 default:
 709                     GMX_THROW(InternalError("Test not implemented for this mode"));
 710             }
 711             break;
 712
 713         default:
 714             GMX_THROW(InternalError("Test not implemented for this mode"));
 715     }
 716     for (int i = 0; i < DIM; i++)
 717     {
 718         for (int j = 0; j < DIM; j++)
 719         {
 720             virial[i * DIM + j] = virialTemp[i][j];
 721         }
 722     }
 723     return std::make_tuple(energy, virial);
 724 }
 725
 726 }  // namespace test
 727 }  // namespace gmx