const auto runMode = (mode == CodePath::CPU) ? PmeRunMode::CPU : PmeRunMode::Mixed;
t_commrec dummyCommrec = { 0 };
NumPmeDomains numPmeDomains = { 1, 1 };
- gmx_pme_t* pmeDataRaw = gmx_pme_init(&dummyCommrec, numPmeDomains, inputRec, false, false, true,
- ewaldCoeff_q, ewaldCoeff_lj, 1, runMode, nullptr,
- deviceContext, deviceStream, pmeGpuProgram, dummyLogger);
+ gmx_pme_t* pmeDataRaw = gmx_pme_init(&dummyCommrec,
+ numPmeDomains,
+ inputRec,
+ false,
+ false,
+ true,
+ ewaldCoeff_q,
+ ewaldCoeff_lj,
+ 1,
+ runMode,
+ nullptr,
+ deviceContext,
+ deviceStream,
+ pmeGpuProgram,
+ dummyLogger);
PmeSafePointer pme(pmeDataRaw); // taking ownership
// TODO get rid of this with proper matrix type
// TODO: Pin the host buffer and use async memory copies
// TODO: Special constructor for PME-only rank / PME-tests is used here. There should be a mechanism to
// restrict one from using other constructor here.
- return std::make_unique<StatePropagatorDataGpu>(deviceStream, *deviceContext, GpuApiCallBehavior::Sync,
- pme_gpu_get_block_size(&pme), nullptr);
+ return std::make_unique<StatePropagatorDataGpu>(
+ deviceStream, *deviceContext, GpuApiCallBehavior::Sync, pme_gpu_get_block_size(&pme), nullptr);
}
//! PME initialization with atom data
atc = &(pme->atc[0]);
atc->x = coordinates;
atc->coefficient = charges;
- gmx_pme_reinit_atoms(pme, atomCount, charges.data(), nullptr);
+ gmx_pme_reinit_atoms(pme, atomCount, charges, {});
/* With decomposition there would be more boilerplate atc code here, e.g. do_redist_pos_coeffs */
break;
atc = &(pme->atc[0]);
// We need to set atc->n for passing the size in the tests
atc->setNumAtoms(atomCount);
- gmx_pme_reinit_atoms(pme, atomCount, charges.data(), nullptr);
+ gmx_pme_reinit_atoms(pme, atomCount, charges, {});
stateGpu->reinit(atomCount, atomCount);
stateGpu->copyCoordinatesToGpu(arrayRefFromArray(coordinates.data(), coordinates.size()),
- gmx::AtomLocality::All);
+ gmx::AtomLocality::Local);
pme_gpu_set_kernelparam_coordinates(pme->gpu, stateGpu->getCoordinates());
break;
switch (mode)
{
case CodePath::CPU:
- gmx_parallel_3dfft_real_limits(pme->pfft_setup[gridIndex], gridSize, gridOffsetUnused,
- paddedGridSize);
+ gmx_parallel_3dfft_real_limits(
+ pme->pfft_setup[gridIndex], gridSize, gridOffsetUnused, paddedGridSize);
break;
case CodePath::GPU:
{
const size_t gridIndex = 0;
IVec gridOffsetUnused, complexOrderUnused;
- gmx_parallel_3dfft_complex_limits(pme->pfft_setup[gridIndex], complexOrderUnused, gridSize,
- gridOffsetUnused, paddedGridSize); // TODO: what about YZX ordering?
+ gmx_parallel_3dfft_complex_limits(
+ pme->pfft_setup[gridIndex], complexOrderUnused, gridSize, gridOffsetUnused, paddedGridSize); // TODO: what about YZX ordering?
}
//! Getting the PME grid memory buffer and its sizes - template definition
IVec& /*unused*/) //NOLINT(google-runtime-references)
{
GMX_THROW(InternalError("Deleted function call"));
- // explicitly deleting general template does not compile in clang/icc, see https://llvm.org/bugs/show_bug.cgi?id=17537
+ // explicitly deleting general template does not compile in clang, see https://llvm.org/bugs/show_bug.cgi?id=17537
}
//! Getting the PME real grid memory buffer and its sizes
switch (mode)
{
case CodePath::CPU:
- spread_on_grid(pme, atc, &pme->pmegrid[gridIndex], computeSplines, spreadCharges,
+ spread_on_grid(pme,
+ atc,
+ &pme->pmegrid[gridIndex],
+ computeSplines,
+ spreadCharges,
fftgrid != nullptr ? fftgrid[gridIndex] : nullptr,
- computeSplinesForZeroCharges, gridIndex);
+ computeSplinesForZeroCharges,
+ gridIndex);
if (spreadCharges && !pme->bUseThreads)
{
wrap_periodic_pmegrid(pme, pmegrid);
break;
case PmeSolveAlgorithm::LennardJones:
- solve_pme_lj_yzx(pme, &h_grid, useLorentzBerthelot, cellVolume,
- computeEnergyAndVirial, pme->nthread, threadIndex);
+ solve_pme_lj_yzx(pme,
+ &h_grid,
+ useLorentzBerthelot,
+ cellVolume,
+ computeEnergyAndVirial,
+ pme->nthread,
+ threadIndex);
break;
default: GMX_THROW(InternalError("Test not implemented for this mode"));
switch (mode)
{
case CodePath::GPU:
- memcpy(pme_gpu_staging(pme->gpu).h_gridlineIndices, gridLineIndices.data(),
+ memcpy(pme_gpu_staging(pme->gpu).h_gridlineIndices,
+ gridLineIndices.data(),
atomCount * sizeof(gridLineIndices[0]));
break;
{
case CodePath::GPU: // intentional absence of break, the grid will be copied from the host buffer in testing mode
case CodePath::CPU:
- std::memset(grid, 0,
- paddedGridSize[XX] * paddedGridSize[YY] * paddedGridSize[ZZ] * sizeof(ValueType));
+ std::memset(grid, 0, paddedGridSize[XX] * paddedGridSize[YY] * paddedGridSize[ZZ] * sizeof(ValueType));
for (const auto& gridValue : gridValues)
{
for (int i = 0; i < DIM; i++)
PmeTestHardwareContext::PmeTestHardwareContext() : codePath_(CodePath::CPU) {}
PmeTestHardwareContext::PmeTestHardwareContext(TestDevice* testDevice) :
- codePath_(CodePath::GPU),
- testDevice_(testDevice)
+ codePath_(CodePath::GPU), testDevice_(testDevice)
{
setActiveDevice(testDevice_->deviceInfo());
pmeGpuProgram_ = buildPmeGpuProgram(testDevice_->deviceContext());