/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_EWALD_PME_GATHER_H
#define GMX_EWALD_PME_GATHER_H
+#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
-#include "pme_internal.h"
+class PmeAtomComm;
+struct gmx_pme_t;
+struct splinedata_t;
void gather_f_bsplines(const struct gmx_pme_t* pme,
const real* grid,
return c_pmeAtomDataBlockSize;
}
-int pme_gpu_get_atoms_per_warp(const PmeGpu* pmeGpu)
-{
- if (pmeGpu->settings.useOrderThreadsPerAtom)
- {
- return pmeGpu->programHandle_->impl_->warpSize / c_pmeSpreadGatherThreadsPerAtom4ThPerAtom;
- }
- else
- {
- return pmeGpu->programHandle_->impl_->warpSize / c_pmeSpreadGatherThreadsPerAtom;
- }
-}
-
void pme_gpu_synchronize(const PmeGpu* pmeGpu)
{
pmeGpu->archSpecific->pmeStream_.synchronize();
pmeGpu->archSpecific->fftSetup.resize(0);
}
-int getSplineParamFullIndex(int order, int splineIndex, int dimIndex, int atomIndex, int atomsPerWarp)
-{
- if (order != c_pmeGpuOrder)
- {
- throw order;
- }
- constexpr int fixedOrder = c_pmeGpuOrder;
- GMX_UNUSED_VALUE(fixedOrder);
-
- const int atomWarpIndex = atomIndex % atomsPerWarp;
- const int warpIndex = atomIndex / atomsPerWarp;
- int indexBase, result;
- switch (atomsPerWarp)
- {
- case 1:
- indexBase = getSplineParamIndexBase<fixedOrder, 1>(warpIndex, atomWarpIndex);
- result = getSplineParamIndex<fixedOrder, 1>(indexBase, dimIndex, splineIndex);
- break;
-
- case 2:
- indexBase = getSplineParamIndexBase<fixedOrder, 2>(warpIndex, atomWarpIndex);
- result = getSplineParamIndex<fixedOrder, 2>(indexBase, dimIndex, splineIndex);
- break;
-
- case 4:
- indexBase = getSplineParamIndexBase<fixedOrder, 4>(warpIndex, atomWarpIndex);
- result = getSplineParamIndex<fixedOrder, 4>(indexBase, dimIndex, splineIndex);
- break;
-
- case 8:
- indexBase = getSplineParamIndexBase<fixedOrder, 8>(warpIndex, atomWarpIndex);
- result = getSplineParamIndex<fixedOrder, 8>(indexBase, dimIndex, splineIndex);
- break;
-
- default:
- GMX_THROW(gmx::NotImplementedError(
- gmx::formatString("Test function call not unrolled for atomsPerWarp = %d in "
- "getSplineParamFullIndex",
- atomsPerWarp)));
- }
- return result;
-}
-
void pme_gpu_getEnergyAndVirial(const gmx_pme_t& pme, PmeOutput* output)
{
const PmeGpu* pmeGpu = pme.gpu;
kernelParamsPtr->constants.elFactor = ONE_4PI_EPS0 / pmeGpu->common->epsilon_r;
}
-void pme_gpu_transform_spline_atom_data(const PmeGpu* pmeGpu,
- const PmeAtomComm* atc,
- PmeSplineDataType type,
- int dimIndex,
- PmeLayoutTransform transform)
-{
- // The GPU atom spline data is laid out in a different way currently than the CPU one.
- // This function converts the data from GPU to CPU layout (in the host memory).
- // It is only intended for testing purposes so far.
- // Ideally we should use similar layouts on CPU and GPU if we care about mixed modes and their
- // performance (e.g. spreading on GPU, gathering on CPU).
- GMX_RELEASE_ASSERT(atc->nthread == 1, "Only the serial PME data layout is supported");
- const uintmax_t threadIndex = 0;
- const auto atomCount = pme_gpu_get_kernel_params_base_ptr(pmeGpu)->atoms.nAtoms;
- const auto atomsPerWarp = pme_gpu_get_atoms_per_warp(pmeGpu);
- const auto pmeOrder = pmeGpu->common->pme_order;
- GMX_ASSERT(pmeOrder == c_pmeGpuOrder, "Only PME order 4 is implemented");
-
- real* cpuSplineBuffer;
- float* h_splineBuffer;
- switch (type)
- {
- case PmeSplineDataType::Values:
- cpuSplineBuffer = atc->spline[threadIndex].theta.coefficients[dimIndex];
- h_splineBuffer = pmeGpu->staging.h_theta;
- break;
-
- case PmeSplineDataType::Derivatives:
- cpuSplineBuffer = atc->spline[threadIndex].dtheta.coefficients[dimIndex];
- h_splineBuffer = pmeGpu->staging.h_dtheta;
- break;
-
- default: GMX_THROW(gmx::InternalError("Unknown spline data type"));
- }
-
- for (auto atomIndex = 0; atomIndex < atomCount; atomIndex++)
- {
- for (auto orderIndex = 0; orderIndex < pmeOrder; orderIndex++)
- {
- const auto gpuValueIndex =
- getSplineParamFullIndex(pmeOrder, orderIndex, dimIndex, atomIndex, atomsPerWarp);
- const auto cpuValueIndex = atomIndex * pmeOrder + orderIndex;
- GMX_ASSERT(cpuValueIndex < atomCount * pmeOrder,
- "Atom spline data index out of bounds (while transforming GPU data layout "
- "for host)");
- switch (transform)
- {
- case PmeLayoutTransform::GpuToHost:
- cpuSplineBuffer[cpuValueIndex] = h_splineBuffer[gpuValueIndex];
- break;
-
- case PmeLayoutTransform::HostToGpu:
- h_splineBuffer[gpuValueIndex] = cpuSplineBuffer[cpuValueIndex];
- break;
-
- default: GMX_THROW(gmx::InternalError("Unknown layout transform"));
- }
- }
- }
-}
-
void pme_gpu_get_real_grid_sizes(const PmeGpu* pmeGpu, gmx::IVec* gridSize, gmx::IVec* paddedGridSize)
{
GMX_ASSERT(gridSize != nullptr, "");
{
cellsPerBlock = (gridLineSize + blocksPerGridLine - 1) / blocksPerGridLine;
}
- const int warpSize = pmeGpu->programHandle_->impl_->warpSize;
+ const int warpSize = pmeGpu->programHandle_->warpSize();
const int blockSize = (cellsPerBlock + warpSize - 1) / warpSize * warpSize;
static_assert(GMX_GPU != GMX_GPU_CUDA || c_solveMaxWarpsPerBlock / 2 >= 4,
*/
int pme_gpu_get_atom_data_block_size();
-/*! \libinternal \brief
- * Returns the number of atoms per chunk in the atom spline theta/dtheta data layout.
- *
- * \param[in] pmeGpu The PME GPU structure.
- * \returns Number of atoms in a single GPU atom spline data chunk.
- */
-int pme_gpu_get_atoms_per_warp(const PmeGpu* pmeGpu);
-
/*! \libinternal \brief
* Synchronizes the current computation, waiting for the GPU kernels/transfers to finish.
*
*/
void pme_gpu_finish_computation(const PmeGpu* pmeGpu);
-//! A binary enum for spline data layout transformation
-enum class PmeLayoutTransform
-{
- GpuToHost,
- HostToGpu
-};
-
-/*! \libinternal \brief
- * Rearranges the atom spline data between the GPU and host layouts.
- * Only used for test purposes so far, likely to be horribly slow.
- *
- * \param[in] pmeGpu The PME GPU structure.
- * \param[out] atc The PME CPU atom data structure (with a single-threaded layout).
- * \param[in] type The spline data type (values or derivatives).
- * \param[in] dimIndex Dimension index.
- * \param[in] transform Layout transform type
- */
-GPU_FUNC_QUALIFIER void pme_gpu_transform_spline_atom_data(const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu),
- const PmeAtomComm* GPU_FUNC_ARGUMENT(atc),
- PmeSplineDataType GPU_FUNC_ARGUMENT(type),
- int GPU_FUNC_ARGUMENT(dimIndex),
- PmeLayoutTransform GPU_FUNC_ARGUMENT(transform)) GPU_FUNC_TERM;
-
-/*! \libinternal \brief
- * Gets a unique index to an element in a spline parameter buffer (theta/dtheta),
- * which is laid out for GPU spread/gather kernels. The index is wrt the execution block,
- * in range(0, atomsPerBlock * order * DIM).
- * This is a wrapper, only used in unit tests.
- * \param[in] order PME order
- * \param[in] splineIndex Spline contribution index (from 0 to \p order - 1)
- * \param[in] dimIndex Dimension index (from 0 to 2)
- * \param[in] atomIndex Atom index wrt the block.
- * \param[in] atomsPerWarp Number of atoms processed by a warp.
- *
- * \returns Index into theta or dtheta array using GPU layout.
- */
-int getSplineParamFullIndex(int order, int splineIndex, int dimIndex, int atomIndex, int atomsPerWarp);
-
/*! \libinternal \brief
* Get the normal/padded grid dimensions of the real-space PME grid on GPU. Only used in tests.
*
PmeGpuProgram::~PmeGpuProgram() = default;
+int PmeGpuProgram::warpSize() const
+{
+ return impl_->warpSize();
+}
+
PmeGpuProgramStorage buildPmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext)
{
return std::make_unique<PmeGpuProgram>(deviceInfo, deviceContext);
/*! \libinternal \file
* \brief
- * Declares PmeGpuProgram, which wrap arounds PmeGpuProgramImpl
- * to store permanent PME GPU context-derived data,
- * such as (compiled) kernel handles.
+ * Declares PmeGpuProgram
+ * to store data derived from the GPU context or devices for
+ * PME, such as (compiled) kernel handles and the warp sizes
+ * they work with.
*
* \author Aleksei Iupinov <a.yupinov@gmail.com>
* \ingroup module_ewald
struct PmeGpuProgramImpl;
struct DeviceInformation;
+/*! \libinternal
+ * \brief Stores PME data derived from the GPU context or devices.
+ *
+ * This includes compiled kernel handles and the warp sizes they
+ * work with.
+ */
class PmeGpuProgram
{
public:
+ //! Constructor
explicit PmeGpuProgram(const DeviceInformation& deviceInfo, const DeviceContext& deviceContext);
~PmeGpuProgram();
- // TODO: design getters for information inside, if needed for PME, and make this private?
+ //! Return the warp size for which the kernels were compiled
+ int warpSize() const;
+
+ // TODO: design more getters for information inside, if needed for PME, and make this private?
+ //! Private impl class
std::unique_ptr<PmeGpuProgramImpl> impl_;
};
PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceInformation& /* deviceInfo */,
const DeviceContext& deviceContext) :
deviceContext_(deviceContext),
- warpSize(0),
+ warpSize_(0),
spreadWorkGroupSize(0),
gatherWorkGroupSize(0),
solveMaxWorkGroupSize(0)
deviceContext_(deviceContext)
{
// kernel parameters
- warpSize = warp_size;
+ warpSize_ = warp_size;
spreadWorkGroupSize = c_spreadMaxThreadsPerBlock;
solveMaxWorkGroupSize = c_solveMaxThreadsPerBlock;
gatherWorkGroupSize = c_gatherMaxThreadsPerBlock;
* For CUDA, this is a static value that comes from gromacs/gpu_utils/cuda_arch_utils.cuh;
* for OpenCL, we have to query it dynamically.
*/
- size_t warpSize;
+ size_t warpSize_;
//@{
/**
~PmeGpuProgramImpl();
GMX_DISALLOW_COPY_AND_ASSIGN(PmeGpuProgramImpl);
+ //! Return the warp size for which the kernels were compiled
+ int warpSize() const { return warpSize_; }
+
private:
// Compiles kernels, if supported. Called by the constructor.
void compileKernels(const DeviceInformation& deviceInfo);
deviceContext_(deviceContext)
{
// kernel parameters
- warpSize = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
+ warpSize_ = gmx::ocl::getDeviceWarpSize(deviceContext_.context(), deviceInfo.oclDeviceId);
// TODO: for Intel ideally we'd want to set these based on the compiler warp size
// but given that we've done no tuning for Intel iGPU, this is as good as anything.
- spreadWorkGroupSize = std::min(c_spreadMaxWarpsPerBlock * warpSize, deviceInfo.maxWorkGroupSize);
- solveMaxWorkGroupSize = std::min(c_solveMaxWarpsPerBlock * warpSize, deviceInfo.maxWorkGroupSize);
- gatherWorkGroupSize = std::min(c_gatherMaxWarpsPerBlock * warpSize, deviceInfo.maxWorkGroupSize);
+ spreadWorkGroupSize = std::min(c_spreadMaxWarpsPerBlock * warpSize_, deviceInfo.maxWorkGroupSize);
+ solveMaxWorkGroupSize = std::min(c_solveMaxWarpsPerBlock * warpSize_, deviceInfo.maxWorkGroupSize);
+ gatherWorkGroupSize = std::min(c_gatherMaxWarpsPerBlock * warpSize_, deviceInfo.maxWorkGroupSize);
compileKernels(deviceInfo);
}
"-DDIM=%d -DXX=%d -DYY=%d -DZZ=%d "
// decomposition parameter placeholders
"-DwrapX=true -DwrapY=true ",
- warpSize, c_pmeGpuOrder, c_pmeSpreadGatherThreadsPerAtom,
+ warpSize_, c_pmeGpuOrder, c_pmeSpreadGatherThreadsPerAtom,
static_cast<float>(c_pmeMaxUnitcellShift), static_cast<int>(c_skipNeutralAtoms),
c_virialAndEnergyCount, spreadWorkGroupSize, solveMaxWorkGroupSize,
gatherWorkGroupSize, DIM, XX, YY, ZZ);
pmegathertest.cpp
pmesolvetest.cpp
pmesplinespreadtest.cpp
- pmetestcommon.cpp
testhardwarecontexts.cpp
+ GPU_CPP_SOURCE_FILES
+ pmetestcommon.cpp
)
#include "testutils/testasserts.h"
#include "pmetestcommon.h"
+#include "testhardwarecontexts.h"
namespace gmx
{
#include "testutils/testasserts.h"
#include "pmetestcommon.h"
+#include "testhardwarecontexts.h"
namespace gmx
{
#include "testutils/testasserts.h"
#include "pmetestcommon.h"
+#include "testhardwarecontexts.h"
namespace gmx
{
#include "testutils/testasserts.h"
#include "pmetestcommon.h"
+#include "testhardwarecontexts.h"
namespace gmx
{
#include "gromacs/domdec/domdec.h"
#include "gromacs/ewald/pme_gather.h"
+#include "gromacs/ewald/pme_gpu_calculate_splines.h"
+#include "gromacs/ewald/pme_gpu_constants.h"
#include "gromacs/ewald/pme_gpu_internal.h"
#include "gromacs/ewald/pme_gpu_staging.h"
#include "gromacs/ewald/pme_grid.h"
#include "testutils/testasserts.h"
+#include "testhardwarecontexts.h"
+
namespace gmx
{
namespace test
const DeviceInformation* deviceInfo,
const PmeGpuProgram* pmeGpuProgram,
const Matrix3x3& box,
- real ewaldCoeff_q,
- real ewaldCoeff_lj)
+ const real ewaldCoeff_q,
+ const real ewaldCoeff_lj)
{
return pmeInitWrapper(inputRec, mode, deviceInfo, pmeGpuProgram, box, ewaldCoeff_q, ewaldCoeff_lj);
// hiding the fact that PME actually needs to know the number of atoms in advance
}
+PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec)
+{
+ const Matrix3x3 defaultBox = { { 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F } };
+ return pmeInitWrapper(inputRec, CodePath::CPU, nullptr, nullptr, defaultBox, 0.0F, 0.0F);
+}
+
//! Make a GPU state-propagator manager
std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme,
const DeviceContext& deviceContext)
}
}
+//! A binary enum for spline data layout transformation
+enum class PmeLayoutTransform
+{
+ GpuToHost,
+ HostToGpu
+};
+
+/*! \brief Gets a unique index to an element in a spline parameter buffer.
+ *
+ * These theta/dtheta buffers are laid out for GPU spread/gather
+ * kernels. The index is wrt the execution block, in range(0,
+ * atomsPerBlock * order * DIM).
+ *
+ * This is a wrapper, only used in unit tests.
+ * \param[in] order PME order
+ * \param[in] splineIndex Spline contribution index (from 0 to \p order - 1)
+ * \param[in] dimIndex Dimension index (from 0 to 2)
+ * \param[in] atomIndex Atom index wrt the block.
+ * \param[in] atomsPerWarp Number of atoms processed by a warp.
+ *
+ * \returns Index into theta or dtheta array using GPU layout.
+ */
+static int getSplineParamFullIndex(int order, int splineIndex, int dimIndex, int atomIndex, int atomsPerWarp)
+{
+ if (order != c_pmeGpuOrder)
+ {
+ throw order;
+ }
+ constexpr int fixedOrder = c_pmeGpuOrder;
+ GMX_UNUSED_VALUE(fixedOrder);
+
+ const int atomWarpIndex = atomIndex % atomsPerWarp;
+ const int warpIndex = atomIndex / atomsPerWarp;
+ int indexBase, result;
+ switch (atomsPerWarp)
+ {
+ case 1:
+ indexBase = getSplineParamIndexBase<fixedOrder, 1>(warpIndex, atomWarpIndex);
+ result = getSplineParamIndex<fixedOrder, 1>(indexBase, dimIndex, splineIndex);
+ break;
+
+ case 2:
+ indexBase = getSplineParamIndexBase<fixedOrder, 2>(warpIndex, atomWarpIndex);
+ result = getSplineParamIndex<fixedOrder, 2>(indexBase, dimIndex, splineIndex);
+ break;
+
+ case 4:
+ indexBase = getSplineParamIndexBase<fixedOrder, 4>(warpIndex, atomWarpIndex);
+ result = getSplineParamIndex<fixedOrder, 4>(indexBase, dimIndex, splineIndex);
+ break;
+
+ case 8:
+ indexBase = getSplineParamIndexBase<fixedOrder, 8>(warpIndex, atomWarpIndex);
+ result = getSplineParamIndex<fixedOrder, 8>(indexBase, dimIndex, splineIndex);
+ break;
+
+ default:
+ GMX_THROW(NotImplementedError(
+ formatString("Test function call not unrolled for atomsPerWarp = %d in "
+ "getSplineParamFullIndex",
+ atomsPerWarp)));
+ }
+ return result;
+}
+
+/*!\brief Return the number of atoms per warp */
+static int pme_gpu_get_atoms_per_warp(const PmeGpu* pmeGpu)
+{
+ if (pmeGpu->settings.useOrderThreadsPerAtom)
+ {
+ return pmeGpu->programHandle_->warpSize() / c_pmeSpreadGatherThreadsPerAtom4ThPerAtom;
+ }
+ else
+ {
+ return pmeGpu->programHandle_->warpSize() / c_pmeSpreadGatherThreadsPerAtom;
+ }
+}
+
+/*! \brief Rearranges the atom spline data between the GPU and host layouts.
+ * Only used for test purposes so far, likely to be horribly slow.
+ *
+ * \param[in] pmeGpu The PME GPU structure.
+ * \param[out] atc The PME CPU atom data structure (with a single-threaded layout).
+ * \param[in] type The spline data type (values or derivatives).
+ * \param[in] dimIndex Dimension index.
+ * \param[in] transform Layout transform type
+ */
+static void pme_gpu_transform_spline_atom_data(const PmeGpu* pmeGpu,
+ const PmeAtomComm* atc,
+ PmeSplineDataType type,
+ int dimIndex,
+ PmeLayoutTransform transform)
+{
+ // The GPU atom spline data is laid out in a different way currently than the CPU one.
+ // This function converts the data from GPU to CPU layout (in the host memory).
+ // It is only intended for testing purposes so far.
+ // Ideally we should use similar layouts on CPU and GPU if we care about mixed modes and their
+ // performance (e.g. spreading on GPU, gathering on CPU).
+ GMX_RELEASE_ASSERT(atc->nthread == 1, "Only the serial PME data layout is supported");
+ const uintmax_t threadIndex = 0;
+ const auto atomCount = atc->numAtoms();
+ const auto atomsPerWarp = pme_gpu_get_atoms_per_warp(pmeGpu);
+ const auto pmeOrder = pmeGpu->common->pme_order;
+ GMX_ASSERT(pmeOrder == c_pmeGpuOrder, "Only PME order 4 is implemented");
+
+ real* cpuSplineBuffer;
+ float* h_splineBuffer;
+ switch (type)
+ {
+ case PmeSplineDataType::Values:
+ cpuSplineBuffer = atc->spline[threadIndex].theta.coefficients[dimIndex];
+ h_splineBuffer = pmeGpu->staging.h_theta;
+ break;
+
+ case PmeSplineDataType::Derivatives:
+ cpuSplineBuffer = atc->spline[threadIndex].dtheta.coefficients[dimIndex];
+ h_splineBuffer = pmeGpu->staging.h_dtheta;
+ break;
+
+ default: GMX_THROW(InternalError("Unknown spline data type"));
+ }
+
+ for (auto atomIndex = 0; atomIndex < atomCount; atomIndex++)
+ {
+ for (auto orderIndex = 0; orderIndex < pmeOrder; orderIndex++)
+ {
+ const auto gpuValueIndex =
+ getSplineParamFullIndex(pmeOrder, orderIndex, dimIndex, atomIndex, atomsPerWarp);
+ const auto cpuValueIndex = atomIndex * pmeOrder + orderIndex;
+ GMX_ASSERT(cpuValueIndex < atomCount * pmeOrder,
+ "Atom spline data index out of bounds (while transforming GPU data layout "
+ "for host)");
+ switch (transform)
+ {
+ case PmeLayoutTransform::GpuToHost:
+ cpuSplineBuffer[cpuValueIndex] = h_splineBuffer[gpuValueIndex];
+ break;
+
+ case PmeLayoutTransform::HostToGpu:
+ h_splineBuffer[gpuValueIndex] = cpuSplineBuffer[cpuValueIndex];
+ break;
+
+ default: GMX_THROW(InternalError("Unknown layout transform"));
+ }
+ }
+ }
+}
+
//! Setting atom spline values/derivatives to be used in spread/gather
void pmeSetSplineData(const gmx_pme_t* pme,
CodePath mode,
#include <map>
#include <vector>
-#include <gtest/gtest.h>
-
#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_gpu_internal.h"
#include "gromacs/math/gmxcomplex.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/unique_cptr.h"
-#include "testhardwarecontexts.h"
-
namespace gmx
{
namespace test
{
+// Forward declaration
+enum class CodePath;
+
// Convenience typedefs
//! A safe pointer type for PME.
typedef gmx::unique_cptr<gmx_pme_t, gmx_pme_destroy> PmeSafePointer;
real ewaldCoeff_lj = 1.0F);
//! Simple PME initialization (no atom data)
PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec,
- CodePath mode = CodePath::CPU,
- const DeviceInformation* deviceInfo = nullptr,
- const PmeGpuProgram* pmeGpuProgram = nullptr,
- const Matrix3x3& box = { { 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F } },
- real ewaldCoeff_q = 0.0F,
- real ewaldCoeff_lj = 0.0F);
+ CodePath mode,
+ const DeviceInformation* deviceInfo,
+ const PmeGpuProgram* pmeGpuProgram,
+ const Matrix3x3& box,
+ real ewaldCoeff_q,
+ real ewaldCoeff_lj);
+//! Simple PME initialization based on inputrec only
+PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec);
//! Make a GPU state-propagator manager
std::unique_ptr<StatePropagatorDataGpu> makeStatePropagatorDataGpu(const gmx_pme_t& pme,
const DeviceContext& deviceContext);
const SplineParamsDimVector& splineValues,
PmeSplineDataType type,
int dimIndex);
+
//! Setting gridline indices be used in spread/gather
void pmeSetGridLineIndices(gmx_pme_t* pme, CodePath mode, const GridLineIndicesVector& gridLineIndices);
//! Setting real grid to be used in gather