/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/domdec/ga2la.h"
#include "gromacs/domdec/localatomsetmanager.h"
#include "gromacs/domdec/mdsetup.h"
-#include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/imd/imd.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/smalloc.h"
-#include "pme_internal.h"
-
static void make_dft_mod(real* mod, const double* data, int splineOrder, int ndata)
{
for (int i = 0; i < ndata; i++)
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_EWALD_CALCULATE_SPLINE_MODULI_H
#define GMX_EWALD_CALCULATE_SPLINE_MODULI_H
-#include "pme_internal.h"
+#include "spline_vectors.h"
/* Calulate plain SPME B-spline interpolation */
void make_bspline_moduli(splinevec bsp_mod, int nx, int ny, int nz, int order);
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
-#include "pme_internal.h"
-
/* There's nothing special to do here if just masses are perturbed,
* but if either charge or type is perturbed then the implementation
* requires that B states are defined for both charge and type, and
PmeRunMode runMode,
PmeGpu* pmeGpu,
const gmx_device_info_t* gpuInfo,
- PmeGpuProgramHandle pmeGpuProgram,
+ const PmeGpuProgram* pmeGpuProgram,
const gmx::MDLogger& /*mdlog*/)
{
int use_threads, sum_use_threads, i;
pme->atc.emplace_back(pme->mpi_comm_d[1], pme->nthread, pme->pme_order, secondDimIndex, doSpread);
}
- if (pme_gpu_active(pme.get()))
+ // Initial check of validity of the input for running on the GPU
+ if (pme->runMode != PmeRunMode::CPU)
{
- if (!pme->gpu)
+ std::string errorString;
+ bool canRunOnGpu = pme_gpu_check_restrictions(pme.get(), &errorString);
+ if (!canRunOnGpu)
{
- // Initial check of validity of the data
- std::string errorString;
- bool canRunOnGpu = pme_gpu_check_restrictions(pme.get(), &errorString);
- if (!canRunOnGpu)
- {
- GMX_THROW(gmx::NotImplementedError(errorString));
- }
+ GMX_THROW(gmx::NotImplementedError(errorString));
}
-
- pme_gpu_reinit(pme.get(), gpuInfo, pmeGpuProgram);
}
+ pme_gpu_reinit(pme.get(), gpuInfo, pmeGpuProgram);
pme_init_all_work(&pme->solve_work, pme->nthread, pme->nkx);
destroy_pme_spline_work(pme->spline_work);
- if (pme_gpu_active(pme) && pme->gpu)
+ if (pme->gpu != nullptr)
{
pme_gpu_destroy(pme->gpu);
}
void gmx_pme_reinit_atoms(gmx_pme_t* pme, const int numAtoms, const real* charges)
{
- if (pme_gpu_active(pme))
+ if (pme->gpu != nullptr)
{
pme_gpu_reinit_atoms(pme->gpu, numAtoms, charges);
}
// TODO: set the charges here as well
}
}
+
+bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size)
+{
+ return (pme.nkx == grid_size[XX] && pme.nky == grid_size[YY] && pme.nkz == grid_size[ZZ]);
+}
#include "gromacs/utility/real.h"
struct gmx_hw_info_t;
-struct interaction_const_t;
struct t_commrec;
-struct t_forcerec;
struct t_inputrec;
struct t_nrnb;
struct PmeGpu;
enum class GpuTaskCompletion;
class PmeGpuProgram;
class GpuEventSynchronizer;
-//! Convenience name.
-using PmeGpuProgramHandle = const PmeGpuProgram*;
namespace gmx
{
-class PmePpCommGpu;
class ForceWithVirial;
class MDLogger;
enum class PinningPolicy : int;
/*! \brief Return the smallest allowed PME grid size for \p pmeOrder */
int minimalPmeGridSize(int pmeOrder);
+//! Return whether the grid of \c pme is identical to \c grid_size.
+bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size);
+
/*! \brief Check restrictions on pme_order and the PME grid nkx,nky,nkz.
*
* With errorsAreFatal=true, an exception or fatal error is generated
* \returns Pointer to newly allocated and initialized PME data.
*
* \todo We should evolve something like a \c GpuManager that holds \c
- * gmx_device_info_t * and \c PmeGpuProgramHandle and perhaps other
+ * gmx_device_info_t * and \c PmeGpuProgram* and perhaps other
* related things whose lifetime can/should exceed that of a task (or
* perhaps task manager). See Redmine #2522.
*/
PmeRunMode runMode,
PmeGpu* pmeGpu,
const gmx_device_info_t* gpuInfo,
- PmeGpuProgramHandle pmeGpuProgram,
+ const PmeGpuProgram* pmeGpuProgram,
const gmx::MDLogger& mdlog);
+/*! \brief As gmx_pme_init, but takes most settings, except the grid/Ewald coefficients, from
+ * pme_src. This is only called when the PME cut-off/grid size changes.
+ */
+void gmx_pme_reinit(gmx_pme_t** pmedata,
+ const t_commrec* cr,
+ gmx_pme_t* pme_src,
+ const t_inputrec* ir,
+ const ivec grid_size,
+ real ewaldcoeff_q,
+ real ewaldcoeff_lj);
+
/*! \brief Destroys the PME data structure.*/
void gmx_pme_destroy(gmx_pme_t* pme);
real* dvdlambda_lj,
int flags);
-/*! \brief Called on the nodes that do PME exclusively */
-int gmx_pmeonly(struct gmx_pme_t* pme,
- const t_commrec* cr,
- t_nrnb* mynrnb,
- gmx_wallcycle* wcycle,
- gmx_walltime_accounting_t walltime_accounting,
- t_inputrec* ir,
- PmeRunMode runMode);
-
/*! \brief Calculate the PME grid energy V for n charges.
*
* The potential (found in \p pme) must have been found already with a
*/
void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::ArrayRef<const real> q, real* V);
-/*! \brief Send the charges and maxshift to out PME-only node. */
-void gmx_pme_send_parameters(const t_commrec* cr,
- const interaction_const_t* ic,
- gmx_bool bFreeEnergy_q,
- gmx_bool bFreeEnergy_lj,
- real* chargeA,
- real* chargeB,
- real* sqrt_c6A,
- real* sqrt_c6B,
- real* sigmaA,
- real* sigmaB,
- int maxshift_x,
- int maxshift_y);
-
-/*! \brief Send the coordinates to our PME-only node and request a PME calculation */
-void gmx_pme_send_coordinates(t_forcerec* fr,
- const t_commrec* cr,
- const matrix box,
- const rvec* x,
- real lambda_q,
- real lambda_lj,
- gmx_bool bEnerVir,
- int64_t step,
- bool useGpuPmePpComms,
- bool reinitGpuPmePpComms,
- bool sendCoordinatesFromGpu,
- GpuEventSynchronizer* coordinatesReadyOnDeviceEvent,
- gmx_wallcycle* wcycle);
-
-/*! \brief Tell our PME-only node to finish */
-void gmx_pme_send_finish(const t_commrec* cr);
-
-/*! \brief Tell our PME-only node to reset all cycle and flop counters */
-void gmx_pme_send_resetcounters(const t_commrec* cr, int64_t step);
-
-/*! \brief PP nodes receive the long range forces from the PME nodes */
-void gmx_pme_receive_f(gmx::PmePpCommGpu* pmePpCommGpu,
- const t_commrec* cr,
- gmx::ForceWithVirial* forceWithVirial,
- real* energy_q,
- real* energy_lj,
- real* dvdlambda_q,
- real* dvdlambda_lj,
- bool useGpuPmePpComms,
- bool receivePmeForceToGpu,
- float* pme_cycles);
-
/*! \brief
* This function updates the local atom data on GPU after DD (charges, coordinates, etc.).
* TODO: it should update the PME CPU atom data as well.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_PMECOORDINATERECEIVERGPU_H
#define GMX_PMECOORDINATERECEIVERGPU_H
-#include "gromacs/ewald/pme.h"
-#include "gromacs/ewald/pme_force_sender_gpu.h"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/utility/classhelpers.h"
#include "gromacs/utility/gmxmpi.h"
+struct PpRanks;
+
namespace gmx
{
+template<typename>
+class ArrayRef;
+
class PmeCoordinateReceiverGpu
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "config.h"
#include "gromacs/ewald/pme_coordinate_receiver_gpu.h"
+#include "gromacs/utility/arrayref.h"
+#include "gromacs/utility/gmxassert.h"
#if GMX_GPU != GMX_GPU_CUDA
PmeCoordinateReceiverGpu::~PmeCoordinateReceiverGpu() = default;
/*!\brief init PME-PP GPU communication stub */
-void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(const DeviceBuffer<float> /* d_x */)
+void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(DeviceBuffer<float> /* d_x */)
{
GMX_ASSERT(false,
"A CPU stub for PME-PP GPU communication initialization was called instead of the "
#include "config.h"
-#include <assert.h>
-#include <stdio.h>
-
-#include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_force_sender_gpu.h"
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
#include "gromacs/utility/gmxmpi.h"
PmeCoordinateReceiverGpu::Impl::~Impl() = default;
-void PmeCoordinateReceiverGpu::Impl::sendCoordinateBufferAddressToPpRanks(const DeviceBuffer<float> d_x)
+void PmeCoordinateReceiverGpu::Impl::sendCoordinateBufferAddressToPpRanks(DeviceBuffer<float> d_x)
{
int ind_start = 0;
PmeCoordinateReceiverGpu::~PmeCoordinateReceiverGpu() = default;
-void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(const DeviceBuffer<float> d_x)
+void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(DeviceBuffer<float> d_x)
{
impl_->sendCoordinateBufferAddressToPpRanks(d_x);
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#define GMX_PMECOORDINATERECEIVERGPU_IMPL_H
#include "gromacs/ewald/pme_coordinate_receiver_gpu.h"
-#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#include "gromacs/utility/arrayref.h"
+
+class GpuEventSynchronizer;
namespace gmx
{
* send coordinates buffer address to PP rank
* \param[in] d_x coordinates buffer in GPU memory
*/
- void sendCoordinateBufferAddressToPpRanks(const DeviceBuffer<float> d_x);
+ void sendCoordinateBufferAddressToPpRanks(DeviceBuffer<float> d_x);
/*! \brief
* launch receive of coordinate data from PP rank
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/utility/stringutil.h"
#include "pme_gpu_internal.h"
+#include "pme_gpu_settings.h"
+#include "pme_gpu_timings.h"
+#include "pme_gpu_types_host.h"
#include "pme_grid.h"
#include "pme_internal.h"
#include "pme_solve.h"
+/*! \brief
+ * Finds out if PME is currently running on GPU.
+ *
+ * \todo The GPU module should not be constructed (or at least called)
+ * when it is not active, so there should be no need to check whether
+ * it is active. An assertion that this is true makes sense.
+ *
+ * \param[in] pme The PME structure.
+ * \returns True if PME runs on GPU currently, false otherwise.
+ */
+static inline bool pme_gpu_active(const gmx_pme_t* pme)
+{
+ return (pme != nullptr) && (pme->runMode != PmeRunMode::CPU);
+}
+
void pme_gpu_reset_timings(const gmx_pme_t* pme)
{
if (pme_gpu_active(pme))
gmx_wallcycle_t wcycle)
{
GMX_ASSERT(gridIndex == 0, "Only single grid supported");
- if (pme_gpu_performs_FFT(pme->gpu))
+ if (pme_gpu_settings(pme->gpu).performGPUFFT)
{
wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
- if (!pme_gpu_performs_solve(pmeGpu))
+ if (!pme_gpu_settings(pmeGpu).performGPUSolve)
{
// TODO remove code duplication and add test coverage
matrix scaledBox;
void pme_gpu_launch_complex_transforms(gmx_pme_t* pme, gmx_wallcycle* wcycle)
{
- PmeGpu* pmeGpu = pme->gpu;
- const bool computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
- const bool performBackFFT = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0;
+ PmeGpu* pmeGpu = pme->gpu;
+ const auto& settings = pmeGpu->settings;
+ const bool computeEnergyAndVirial = (settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
+ const bool performBackFFT = (settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0;
const unsigned int gridIndex = 0;
t_complex* cfftgrid = pme->cfftgrid[gridIndex];
- if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD)
+ if (settings.currentFlags & GMX_PME_SPREAD)
{
- if (!pme_gpu_performs_FFT(pmeGpu))
+ if (!settings.performGPUFFT)
{
wallcycle_start(wcycle, ewcWAIT_GPU_PME_SPREAD);
pme_gpu_sync_spread_grid(pme->gpu);
try
{
- if (pmeGpu->settings.currentFlags & GMX_PME_SOLVE)
+ if (settings.currentFlags & GMX_PME_SOLVE)
{
/* do R2C 3D-FFT */
parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_REAL_TO_COMPLEX, wcycle);
/* solve in k-space for our local cells */
- if (pme_gpu_performs_solve(pmeGpu))
+ if (settings.performGPUSolve)
{
- const auto gridOrdering = pme_gpu_uses_dd(pmeGpu) ? GridOrdering::YZX : GridOrdering::XYZ;
+ // TODO grid ordering should be set up at pme init time.
+ const auto gridOrdering =
+ settings.useDecomposition ? GridOrdering::YZX : GridOrdering::XYZ;
wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
pme_gpu_solve(pmeGpu, cfftgrid, gridOrdering, computeEnergyAndVirial);
{
GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
- if (!pme_gpu_performs_gather(pme->gpu))
+ if (!pme_gpu_settings(pme->gpu).performGPUGather)
{
return;
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
complexGridSizePadded[i] = kernelParamsPtr->grid.complexGridSizePadded[i];
}
- GMX_RELEASE_ASSERT(!pme_gpu_uses_dd(pmeGpu), "FFT decomposition not implemented");
+ GMX_RELEASE_ASSERT(!pme_gpu_settings(pmeGpu).useDecomposition,
+ "FFT decomposition not implemented");
const int complexGridSizePaddedTotal =
complexGridSizePadded[XX] * complexGridSizePadded[YY] * complexGridSizePadded[ZZ];
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
// Extracting all the data from PME GPU
std::array<size_t, DIM> realGridSize, realGridSizePadded, complexGridSizePadded;
- GMX_RELEASE_ASSERT(!pme_gpu_uses_dd(pmeGpu), "FFT decomposition not implemented");
+ GMX_RELEASE_ASSERT(!pme_gpu_settings(pmeGpu).useDecomposition,
+ "FFT decomposition not implemented");
PmeGpuKernelParamsBase* kernelParamsPtr = pmeGpu->kernelParams.get();
for (int i = 0; i < DIM; i++)
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
pmeGpu->settings.transferKind, nullptr);
}
-void pme_gpu_realloc_coordinates(const PmeGpu* pmeGpu)
+void pme_gpu_realloc_coordinates(PmeGpu* pmeGpu)
{
const size_t newCoordinatesSize = pmeGpu->nAtomsAlloc * DIM;
GMX_ASSERT(newCoordinatesSize > 0, "Bad number of atoms in PME GPU");
freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_coordinates);
}
-void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu* pmeGpu, const float* h_coefficients)
+void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients)
{
GMX_ASSERT(h_coefficients, "Bad host-side charge buffer in PME GPU");
const size_t newCoefficientsSize = pmeGpu->nAtomsAlloc;
void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu)
{
- if (pme_gpu_performs_FFT(pmeGpu))
+ if (pme_gpu_settings(pmeGpu).performGPUFFT)
{
pmeGpu->archSpecific->fftSetup.resize(0);
for (int i = 0; i < pmeGpu->common->ngrids; i++)
// on the else branch
if (haveComputedEnergyAndVirial)
{
- if (pme_gpu_performs_solve(pmeGpu))
+ if (pme_gpu_settings(pmeGpu).performGPUSolve)
{
pme_gpu_getEnergyAndVirial(pme, &output);
}
kernelParamsPtr->grid.complexGridSizePadded[i] = kernelParamsPtr->grid.realGridSize[i];
}
/* FFT: n real elements correspond to (n / 2 + 1) complex elements in minor dimension */
- if (!pme_gpu_performs_FFT(pmeGpu))
+ if (!pme_gpu_settings(pmeGpu).performGPUFFT)
{
// This allows for GPU spreading grid and CPU fftgrid to have the same layout, so that we can copy the data directly
kernelParamsPtr->grid.realGridSizePadded[ZZ] =
* \param[in,out] gpuInfo The GPU information structure.
* \param[in] pmeGpuProgram The handle to the program/kernel data created outside (e.g. in unit tests/runner)
*/
-static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGpuProgramHandle pmeGpuProgram)
+static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram)
{
pme->gpu = new PmeGpu();
PmeGpu* pmeGpu = pme->gpu;
/* These settings are set here for the whole run; dynamic ones are set in pme_gpu_reinit() */
/* A convenience variable. */
- pmeGpu->settings.useDecomposition = (pme->nnodes == 1);
+ pmeGpu->settings.useDecomposition = (pme->nnodes != 1);
/* TODO: CPU gather with GPU spread is broken due to different theta/dtheta layout. */
pmeGpu->settings.performGPUGather = true;
// By default GPU-side reduction is off (explicitly set here for tests, otherwise reset per-step)
}
}
-void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGpuProgramHandle pmeGpuProgram)
+void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram)
{
- if (!pme_gpu_active(pme))
+ GMX_ASSERT(pme != nullptr, "Need valid PME object");
+ if (pme->runMode == PmeRunMode::CPU)
{
+ GMX_ASSERT(pme->gpu == nullptr, "Should not have PME GPU object");
return;
}
}
/* GPU FFT will only get used for a single rank.*/
pme->gpu->settings.performGPUFFT =
- (pme->gpu->common->runMode == PmeRunMode::GPU) && !pme_gpu_uses_dd(pme->gpu);
+ (pme->gpu->common->runMode == PmeRunMode::GPU) && !pme->gpu->settings.useDecomposition;
pme->gpu->settings.performGPUSolve = (pme->gpu->common->runMode == PmeRunMode::GPU);
/* Reinit active timers */
}
}
+/*! \internal \brief
+ * Returns raw timing event from the corresponding GpuRegionTimer (if timings are enabled).
+ * In CUDA result can be nullptr stub, per GpuRegionTimer implementation.
+ *
+ * \param[in] pmeGpu The PME GPU data structure.
+ * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
+ */
+static CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId)
+{
+ CommandEvent* timingEvent = nullptr;
+ if (pme_gpu_timings_enabled(pmeGpu))
+ {
+ GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(),
+ "Wrong PME GPU timing event index");
+ timingEvent = pmeGpu->archSpecific->timingEvents[PMEStageId].fetchNextEvent();
+ }
+ return timingEvent;
+}
+
void pme_gpu_3dfft(const PmeGpu* pmeGpu, gmx_fft_direction dir, int grid_index)
{
int timerId = (dir == GMX_FFT_REAL_TO_COMPLEX) ? gtPME_FFT_R2C : gtPME_FFT_C2R;
// only needed with CUDA on PP+PME ranks, not on separate PME ranks, in unit tests
// nor in OpenCL as these cases use a single stream (hence xReadyOnDevice == nullptr).
GMX_ASSERT(xReadyOnDevice != nullptr || (GMX_GPU != GMX_GPU_CUDA)
- || pmeGpu->common->isRankPmeOnly || pme_gpu_is_testing(pmeGpu),
+ || pmeGpu->common->isRankPmeOnly || pme_gpu_settings(pmeGpu).copyAllOutputs,
"Need a valid coordinate synchronizer on PP+PME ranks with CUDA.");
if (xReadyOnDevice)
{
launchGpuKernel(kernelPtr, config, timingEvent, "PME spline/spread", kernelArgs);
pme_gpu_stop_timing(pmeGpu, timingId);
- const bool copyBackGrid =
- spreadCharges && (pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_FFT(pmeGpu));
+ const auto& settings = pmeGpu->settings;
+ const bool copyBackGrid = spreadCharges && (!settings.performGPUFFT || settings.copyAllOutputs);
if (copyBackGrid)
{
pme_gpu_copy_output_spread_grid(pmeGpu, h_grid);
}
const bool copyBackAtomData =
- computeSplines && (pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_gather(pmeGpu));
+ computeSplines && (!settings.performGPUGather || settings.copyAllOutputs);
if (copyBackAtomData)
{
pme_gpu_copy_output_spread_atom_data(pmeGpu);
void pme_gpu_solve(const PmeGpu* pmeGpu, t_complex* h_grid, GridOrdering gridOrdering, bool computeEnergyAndVirial)
{
- const bool copyInputAndOutputGrid = pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_FFT(pmeGpu);
+ const auto& settings = pmeGpu->settings;
+ const bool copyInputAndOutputGrid = !settings.performGPUFFT || settings.copyAllOutputs;
auto* kernelParamsPtr = pmeGpu->kernelParams.get();
pme_gpu_copy_input_forces(pmeGpu);
}
- if (!pme_gpu_performs_FFT(pmeGpu) || pme_gpu_is_testing(pmeGpu))
+ const auto& settings = pmeGpu->settings;
+ if (!settings.performGPUFFT || settings.copyAllOutputs)
{
pme_gpu_copy_input_gather_grid(pmeGpu, const_cast<float*>(h_grid));
}
- if (pme_gpu_is_testing(pmeGpu))
+ if (settings.copyAllOutputs)
{
pme_gpu_copy_input_gather_atom_data(pmeGpu);
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_EWALD_PME_GPU_INTERNAL_H
#define GMX_EWALD_PME_GPU_INTERNAL_H
-#include "gromacs/fft/fft.h" // for the gmx_fft_direction enum
+#include "gromacs/fft/fft.h" // for the gmx_fft_direction enum
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/gpu_utils/gpu_macros.h" // for the GPU_FUNC_ macros
#include "gromacs/utility/arrayref.h"
-#include "pme_gpu_types_host.h" // for the inline functions accessing PmeGpu members
+#include "pme_gpu_types_host.h"
+#include "pme_output.h"
+class GpuEventSynchronizer;
+struct gmx_device_info_t;
struct gmx_hw_info_t;
struct gmx_gpu_opt_t;
struct gmx_pme_t; // only used in pme_gpu_reinit
-struct gmx_wallclock_gpu_pme_t;
+struct gmx_wallcycle;
class PmeAtomComm;
+enum class PmeForceOutputHandling;
+struct PmeGpu;
+class PmeGpuProgram;
+struct PmeGpuStaging;
+struct PmeGpuSettings;
struct t_complex;
namespace gmx
*
* Needs to be called on every DD step/in the beginning.
*/
-void pme_gpu_realloc_coordinates(const PmeGpu* pmeGpu);
+void pme_gpu_realloc_coordinates(PmeGpu* pmeGpu);
/*! \libinternal \brief
* Frees the coordinates on the GPU.
* Does not need to be done for every PME computation, only whenever the local charges change.
* (So, in the beginning of the run, or on DD step).
*/
-void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu* pmeGpu, const float* h_coefficients);
+void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients);
/*! \libinternal \brief
* Frees the charges/coefficients on the GPU.
*/
void pme_gpu_destroy_3dfft(const PmeGpu* pmeGpu);
-/* Several GPU event-based timing functions that live in pme_gpu_timings.cpp */
-
-/*! \libinternal \brief
- * Finalizes all the active PME GPU stage timings for the current computation. Should be called at the end of every computation.
- *
- * \param[in] pmeGpu The PME GPU structure.
- */
-void pme_gpu_update_timings(const PmeGpu* pmeGpu);
-
-/*! \libinternal \brief
- * Updates the internal list of active PME GPU stages (if timings are enabled).
- *
- * \param[in] pmeGpu The PME GPU data structure.
- */
-void pme_gpu_reinit_timings(const PmeGpu* pmeGpu);
-
-/*! \brief
- * Resets the PME GPU timings. To be called at the reset MD step.
- *
- * \param[in] pmeGpu The PME GPU structure.
- */
-void pme_gpu_reset_timings(const PmeGpu* pmeGpu);
-
-/*! \libinternal \brief
- * Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end.
- *
- * \param[in] pmeGpu The PME GPU structure.
- * \param[in] timings The gmx_wallclock_gpu_pme_t structure.
- */
-void pme_gpu_get_timings(const PmeGpu* pmeGpu, gmx_wallclock_gpu_pme_t* timings);
-
/* The PME stages themselves */
/*! \libinternal \brief
GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer(
const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
-/* The inlined convenience PME GPU status getters */
-
/*! \libinternal \brief
- * Tells if PME runs on multiple GPUs with the decomposition.
+ * Returns the PME GPU settings
*
* \param[in] pmeGpu The PME GPU structure.
- * \returns True if PME runs on multiple GPUs, false otherwise.
+ * \returns The settings for PME on GPU
*/
-inline bool pme_gpu_uses_dd(const PmeGpu* pmeGpu)
+inline const PmeGpuSettings& pme_gpu_settings(const PmeGpu* pmeGpu)
{
- return !pmeGpu->settings.useDecomposition;
+ return pmeGpu->settings;
}
/*! \libinternal \brief
- * Tells if PME performs the gathering stage on GPU.
+ * Returns the PME GPU staging object
*
* \param[in] pmeGpu The PME GPU structure.
- * \returns True if the gathering is performed on GPU, false otherwise.
+ * \returns The staging object for PME on GPU
*/
-inline bool pme_gpu_performs_gather(const PmeGpu* pmeGpu)
+inline const PmeGpuStaging& pme_gpu_staging(const PmeGpu* pmeGpu)
{
- return pmeGpu->settings.performGPUGather;
+ return pmeGpu->staging;
}
/*! \libinternal \brief
- * Tells if PME performs the FFT stages on GPU.
+ * Sets whether the PME module is running in testing mode
*
* \param[in] pmeGpu The PME GPU structure.
- * \returns True if FFT is performed on GPU, false otherwise.
- */
-inline bool pme_gpu_performs_FFT(const PmeGpu* pmeGpu)
-{
- return pmeGpu->settings.performGPUFFT;
-}
-
-/*! \libinternal \brief
- * Tells if PME performs the grid (un-)wrapping on GPU.
- *
- * \param[in] pmeGpu The PME GPU structure.
- * \returns True if (un-)wrapping is performed on GPU, false otherwise.
- */
-inline bool pme_gpu_performs_wrapping(const PmeGpu* pmeGpu)
-{
- return pmeGpu->settings.useDecomposition;
-}
-
-/*! \libinternal \brief
- * Tells if PME performs the grid solving on GPU.
- *
- * \param[in] pmeGpu The PME GPU structure.
- * \returns True if solving is performed on GPU, false otherwise.
- */
-inline bool pme_gpu_performs_solve(const PmeGpu* pmeGpu)
-{
- return pmeGpu->settings.performGPUSolve;
-}
-
-/*! \libinternal \brief
- * Enables or disables the testing mode.
- * Testing mode only implies copying all the outputs, even the intermediate ones, to the host,
- * and also makes the copies synchronous.
- *
- * \param[in] pmeGpu The PME GPU structure.
- * \param[in] testing Should the testing mode be enabled, or disabled.
+ * \param[in] testing Whether testing mode is on.
*/
inline void pme_gpu_set_testing(PmeGpu* pmeGpu, bool testing)
{
}
}
-/*! \libinternal \brief
- * Tells if PME is in the testing mode.
- *
- * \param[in] pmeGpu The PME GPU structure.
- * \returns true if testing mode is enabled, false otherwise.
- */
-inline bool pme_gpu_is_testing(const PmeGpu* pmeGpu)
-{
- return pmeGpu->settings.copyAllOutputs;
-}
-
/* A block of C++ functions that live in pme_gpu_internal.cpp */
/*! \libinternal \brief
*/
GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
const gmx_device_info_t* GPU_FUNC_ARGUMENT(gpuInfo),
- PmeGpuProgramHandle GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
+ const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
/*! \libinternal \brief
* Destroys the PME GPU data at the end of the run.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*/
using PmeGpuProgramStorage = std::unique_ptr<PmeGpuProgram>;
-/*! \brief This is a handle for passing references to PME GPU program data.
- * TODO: it should be a const reference, but for that the PmeGpu types need to be C++
- */
-using PmeGpuProgramHandle = const PmeGpuProgram*;
-
/*! \brief
* Factory function used to build persistent PME GPU program for the device at once.
*/
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ * \brief Defines the PME GPU settings data structures.
+ * \todo Some renaming/refactoring, which does not impair the performance:
+ * -- PmeGpuSettings -> PmeGpuTasks
+ *
+ * \author Aleksei Iupinov <a.yupinov@gmail.com>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_GPU_SETTINGS_H
+#define GMX_EWALD_PME_GPU_SETTINGS_H
+
+#include "gromacs/gpu_utils/gpu_utils.h" // for GpuApiCallBehavior
+
+/*! \internal \brief
+ * The PME GPU settings structure, included in the main PME GPU structure by value.
+ */
+struct PmeGpuSettings
+{
+ /* Permanent settings set on initialization */
+ /*! \brief A boolean which tells if the solving is performed on GPU. Currently always true */
+ bool performGPUSolve;
+ /*! \brief A boolean which tells if the gathering is performed on GPU. Currently always true */
+ bool performGPUGather;
+ /*! \brief A boolean which tells if the FFT is performed on GPU. Currently true for a single MPI rank. */
+ bool performGPUFFT;
+ /*! \brief A convenience boolean which tells if PME decomposition is used. */
+ bool useDecomposition;
+ /*! \brief True if PME forces are reduced on-GPU, false if reduction is done on the CPU;
+ * in the former case transfer does not need to happen.
+ *
+ * Note that this flag may change per-step.
+ */
+ bool useGpuForceReduction;
+
+ /*! \brief A boolean which tells if any PME GPU stage should copy all of its outputs to the
+ * host. Only intended to be used by the test framework.
+ */
+ bool copyAllOutputs;
+ /*! \brief An enum which tells whether most PME GPU D2H/H2D data transfers should be synchronous. */
+ GpuApiCallBehavior transferKind;
+ /*! \brief Various flags for the current PME computation, corresponding to the GMX_PME_ flags in pme.h. */
+ int currentFlags;
+ /*! \brief
+ * Currently only supported by CUDA.
+ * Controls if we should use order (i.e. 4) threads per atom for the GPU
+ * or order*order (i.e. 16) threads per atom.
+ */
+ bool useOrderThreadsPerAtom;
+ /*! \brief
+ * Currently only supported by CUDA.
+ * Controls if we should recalculate the splines in the gather or
+ * save the values in the spread and reload in the gather.
+ */
+ bool recalculateSplines;
+};
+
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ * \brief Defines the host-side PME GPU data structures.
+ * \todo Some renaming/refactoring, which does not impair the performance:
+ * -- bringing the function names up to guidelines
+ * -- PmeGpuSettings -> PmeGpuTasks
+ * -- refining GPU notation application (#2053)
+ * -- renaming coefficients to charges (?)
+ *
+ * \author Aleksei Iupinov <a.yupinov@gmail.com>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_GPU_STAGING_H
+#define GMX_EWALD_PME_GPU_STAGING_H
+
+#include <vector>
+
+#include "gromacs/gpu_utils/hostallocator.h"
+#include "gromacs/math/vectypes.h"
+
+/*! \internal \brief
+ * The PME GPU intermediate buffers structure, included in the main PME GPU structure by value.
+ * Buffers are managed by the PME GPU module.
+ */
+struct PmeGpuStaging
+{
+ //! Host-side force buffer
+ gmx::PaddedHostVector<gmx::RVec> h_forces;
+
+ /*! \brief Virial and energy intermediate host-side buffer. Size is PME_GPU_VIRIAL_AND_ENERGY_COUNT. */
+ float* h_virialAndEnergy;
+ /*! \brief B-spline values intermediate host-side buffer. */
+ float* h_splineModuli;
+
+ /*! \brief Pointer to the host memory with B-spline values. Only used for host-side gather, or unit tests */
+ float* h_theta;
+ /*! \brief Pointer to the host memory with B-spline derivative values. Only used for host-side gather, or unit tests */
+ float* h_dtheta;
+ /*! \brief Pointer to the host memory with ivec atom gridline indices. Only used for host-side gather, or unit tests */
+ int* h_gridlineIndices;
+};
+
+#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "pme_gpu_types_host.h"
#include "pme_gpu_types_host_impl.h"
-/*! \brief
- * Tells if CUDA-based performance tracking is enabled for PME.
- *
- * \param[in] pmeGpu The PME GPU data structure.
- * \returns True if timings are enabled, false otherwise.
- */
-inline bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu)
+bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu)
{
return pmeGpu->archSpecific->useTiming;
}
}
}
-CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId)
-{
- CommandEvent* timingEvent = nullptr;
- if (pme_gpu_timings_enabled(pmeGpu))
- {
- GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(),
- "Wrong PME GPU timing event index");
- timingEvent = pmeGpu->archSpecific->timingEvents[PMEStageId].fetchNextEvent();
- }
- return timingEvent;
-}
-
void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId)
{
if (pme_gpu_timings_enabled(pmeGpu))
{
pmeGpu->archSpecific->activeTimers.clear();
pmeGpu->archSpecific->activeTimers.insert(gtPME_SPLINEANDSPREAD);
+ const auto& settings = pme_gpu_settings(pmeGpu);
// TODO: no separate gtPME_SPLINE and gtPME_SPREAD as they are not used currently
- if (pme_gpu_performs_FFT(pmeGpu))
+ if (settings.performGPUFFT)
{
pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_C2R);
pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_R2C);
}
- if (pme_gpu_performs_solve(pmeGpu))
+ if (settings.performGPUSolve)
{
pmeGpu->archSpecific->activeTimers.insert(gtPME_SOLVE);
}
- if (pme_gpu_performs_gather(pmeGpu))
+ if (settings.performGPUGather)
{
pmeGpu->archSpecific->activeTimers.insert(gtPME_GATHER);
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_EWALD_PME_GPU_TIMINGS_H
#define GMX_EWALD_PME_GPU_TIMINGS_H
-#include "config.h"
-
-#if GMX_GPU == GMX_GPU_CUDA
-# include "gromacs/gpu_utils/gputraits.cuh"
-#elif GMX_GPU == GMX_GPU_OPENCL
-# include "gromacs/gpu_utils/gputraits_ocl.h"
-#endif
+#include <cstddef>
+struct gmx_wallclock_gpu_pme_t;
struct PmeGpu;
/*! \libinternal \brief
void pme_gpu_start_timing(const PmeGpu* pmeGpu, size_t PMEStageId);
/*! \libinternal \brief
- * Returns raw timing event from the corresponding GpuRegionTimer (if timings are enabled).
- * In CUDA result can be nullptr stub, per GpuRegionTimer implementation.
+ * Stops timing the certain PME GPU stage during a single computation (if timings are enabled).
*
* \param[in] pmeGpu The PME GPU data structure.
* \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
*/
-CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId);
+void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId);
+
+/*! \brief
+ * Tells if CUDA-based performance tracking is enabled for PME.
+ *
+ * \param[in] pmeGpu The PME GPU data structure.
+ * \returns True if timings are enabled, false otherwise.
+ */
+bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu);
/*! \libinternal \brief
- * Stops timing the certain PME GPU stage during a single computation (if timings are enabled).
+ * Finalizes all the active PME GPU stage timings for the current computation. Should be called at the end of every computation.
+ *
+ * \param[in] pmeGpu The PME GPU structure.
+ */
+void pme_gpu_update_timings(const PmeGpu* pmeGpu);
+
+/*! \libinternal \brief
+ * Updates the internal list of active PME GPU stages (if timings are enabled).
*
* \param[in] pmeGpu The PME GPU data structure.
- * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
*/
-void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId);
+void pme_gpu_reinit_timings(const PmeGpu* pmeGpu);
+
+/*! \brief
+ * Resets the PME GPU timings. To be called at the reset MD step.
+ *
+ * \param[in] pmeGpu The PME GPU structure.
+ */
+void pme_gpu_reset_timings(const PmeGpu* pmeGpu);
+
+/*! \libinternal \brief
+ * Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end.
+ *
+ * \param[in] pmeGpu The PME GPU structure.
+ * \param[in] timings The gmx_wallclock_gpu_pme_t structure.
+ */
+void pme_gpu_get_timings(const PmeGpu* pmeGpu, gmx_wallclock_gpu_pme_t* timings);
#endif
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
/*! \brief Electrostatics coefficient = ONE_4PI_EPS0 / pme->epsilon_r */
float elFactor;
- /*! \brief Virial and energy GPU array. Size is PME_GPU_ENERGY_AND_VIRIAL_COUNT (7) floats.
+ /*! \brief Virial and energy GPU array. Size is c_virialAndEnergyCount (7) floats.
* The element order is virxx, viryy, virzz, virxy, virxz, viryz, energy. */
HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_virialAndEnergy;
};
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_gpu_program.h"
#include "gromacs/gpu_utils/clfftinitializer.h"
-#include "gromacs/gpu_utils/gpu_utils.h" // for GpuApiCallBehavior
#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/math/vectypes.h"
+#include "pme_gpu_settings.h"
+#include "pme_gpu_staging.h"
+
+namespace gmx
+{
+class PmeDeviceBuffers;
+} // namespace gmx
+
#if GMX_GPU != GMX_GPU_NONE
struct PmeGpuSpecific;
#else
struct gmx_device_info_t;
-/*! \internal \brief
- * The PME GPU settings structure, included in the main PME GPU structure by value.
- */
-struct PmeGpuSettings
-{
- /* Permanent settings set on initialization */
- /*! \brief A boolean which tells if the solving is performed on GPU. Currently always true */
- bool performGPUSolve;
- /*! \brief A boolean which tells if the gathering is performed on GPU. Currently always true */
- bool performGPUGather;
- /*! \brief A boolean which tells if the FFT is performed on GPU. Currently true for a single MPI rank. */
- bool performGPUFFT;
- /*! \brief A convenience boolean which tells if PME decomposition is used. */
- bool useDecomposition;
- /*! \brief True if PME forces are reduced on-GPU, false if reduction is done on the CPU;
- * in the former case transfer does not need to happen.
- *
- * Note that this flag may change per-step.
- */
- bool useGpuForceReduction;
-
- /*! \brief A boolean which tells if any PME GPU stage should copy all of its outputs to the
- * host. Only intended to be used by the test framework.
- */
- bool copyAllOutputs;
- /*! \brief An enum which tells whether most PME GPU D2H/H2D data transfers should be synchronous. */
- GpuApiCallBehavior transferKind;
- /*! \brief Various flags for the current PME computation, corresponding to the GMX_PME_ flags in pme.h. */
- int currentFlags;
- /*! \brief
- * Currently only supported by CUDA.
- * Controls if we should use order (i.e. 4) threads per atom for the GPU
- * or order*order (i.e. 16) threads per atom.
- */
- bool useOrderThreadsPerAtom;
- /*! \brief
- * Currently only supported by CUDA.
- * Controls if we should recalculate the splines in the gather or
- * save the values in the spread and reload in the gather.
- */
- bool recalculateSplines;
-};
-
-// TODO There's little value in computing the Coulomb and LJ virial
-// separately, so we should simplify that.
-// TODO The matrices might be best as a view, but not currently
-// possible. Use mdspan?
-struct PmeOutput
-{
- gmx::ArrayRef<gmx::RVec> forces_; //!< Host staging area for PME forces
- bool haveForceOutput_ =
- false; //!< True if forces have been staged other false (when forces are reduced on the GPU).
- real coulombEnergy_ = 0; //!< Host staging area for PME coulomb energy
- matrix coulombVirial_ = { { 0 } }; //!< Host staging area for PME coulomb virial contributions
- real lennardJonesEnergy_ = 0; //!< Host staging area for PME LJ energy
- matrix lennardJonesVirial_ = { { 0 } }; //!< Host staging area for PME LJ virial contributions
-};
-
-/*! \internal \brief
- * The PME GPU intermediate buffers structure, included in the main PME GPU structure by value.
- * Buffers are managed by the PME GPU module.
- */
-struct PmeGpuStaging
-{
- //! Host-side force buffer
- gmx::PaddedHostVector<gmx::RVec> h_forces;
-
- /*! \brief Virial and energy intermediate host-side buffer. Size is PME_GPU_VIRIAL_AND_ENERGY_COUNT. */
- float* h_virialAndEnergy;
- /*! \brief B-spline values intermediate host-side buffer. */
- float* h_splineModuli;
-
- /*! \brief Pointer to the host memory with B-spline values. Only used for host-side gather, or unit tests */
- float* h_theta;
- /*! \brief Pointer to the host memory with B-spline derivative values. Only used for host-side gather, or unit tests */
- float* h_dtheta;
- /*! \brief Pointer to the host memory with ivec atom gridline indices. Only used for host-side gather, or unit tests */
- int* h_gridlineIndices;
-};
-
/*! \internal \brief
* The PME GPU structure for all the data copied directly from the CPU PME structure.
* The copying is done when the CPU PME structure is already (re-)initialized
std::shared_ptr<PmeShared> common; // TODO: make the CPU structure use the same type
//! A handle to the program created by buildPmeGpuProgram()
- PmeGpuProgramHandle programHandle_;
+ const PmeGpuProgram* programHandle_;
//! Handle that ensures the clFFT library has been initialized once per process.
std::unique_ptr<gmx::ClfftInitializer> initializedClfftLibrary_;
#include "config.h"
+#include <vector>
+
#include "gromacs/math/gmxcomplex.h"
+#include "gromacs/utility/alignedallocator.h"
+#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/defaultinitializationallocator.h"
#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
-#include "pme_gpu_types_host.h"
+#include "spline_vectors.h"
//! A repeat of typedef from parallel_3dfft.h
typedef struct gmx_parallel_3dfft* gmx_parallel_3dfft_t;
struct t_inputrec;
struct PmeGpu;
+enum class PmeRunMode;
+
//@{
//! Grid indices for A state for charge and Lennard-Jones C6
#define PME_GRID_QA 0
*/
#define PME_ORDER_MAX 12
-/*! \brief As gmx_pme_init, but takes most settings, except the grid/Ewald coefficients, from
- * pme_src. This is only called when the PME cut-off/grid size changes.
- */
-void gmx_pme_reinit(struct gmx_pme_t** pmedata,
- const t_commrec* cr,
- struct gmx_pme_t* pme_src,
- const t_inputrec* ir,
- const ivec grid_size,
- real ewaldcoeff_q,
- real ewaldcoeff_lj);
-
/* Temporary suppression until these structs become opaque and don't live in
* a header that is included by other headers. Also, until then I have no
FastVector<int> i;
};
-/*! \brief Helper typedef for spline vectors */
-typedef real* splinevec[DIM];
-
/*! \internal
* \brief Coefficients for theta or dtheta
*/
//! @endcond
-/*! \brief
- * Finds out if PME is currently running on GPU.
- * TODO: should this be removed eventually?
- *
- * \param[in] pme The PME structure.
- * \returns True if PME runs on GPU currently, false otherwise.
- */
-inline bool pme_gpu_active(const gmx_pme_t* pme)
-{
- return (pme != nullptr) && (pme->runMode != PmeRunMode::CPU);
-}
-
-/*! \brief Tell our PME-only node to switch to a new grid size */
-void gmx_pme_send_switchgrid(const t_commrec* cr, ivec grid_size, real ewaldcoeff_q, real ewaldcoeff_lj);
-
#endif
#include "gromacs/utility/strconvert.h"
#include "pme_internal.h"
+#include "pme_pp.h"
/*! \brief Parameters and settings for one PP-PME setup */
struct pme_setup_t
#include "gmxpre.h"
+#include "pme_only.h"
+
#include "config.h"
#include <cassert>
#include "gromacs/utility/smalloc.h"
#include "pme_gpu_internal.h"
-#include "pme_internal.h"
+#include "pme_output.h"
#include "pme_pp_communication.h"
/*! \brief environment variable to enable GPU P2P communication */
for (auto& pme : *pmedata)
{
GMX_ASSERT(pme, "Bad PME tuning list element pointer");
- if (pme->nkx == grid_size[XX] && pme->nky == grid_size[YY] && pme->nkz == grid_size[ZZ])
+ if (gmx_pme_grid_matches(*pme, grid_size))
{
/* Here we have found an existing PME data structure that suits us.
* However, in the GPU case, we have to reinitialize it - there's only one GPU structure.
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * running on an MPI rank doing only PME long-ranged work.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_ONLY_H
+#define GMX_EWALD_PME_ONLY_H
+
+#include <string>
+
+#include "gromacs/timing/walltime_accounting.h"
+
+struct t_commrec;
+struct t_inputrec;
+struct t_nrnb;
+struct gmx_pme_t;
+struct gmx_wallcycle;
+
+enum class PmeRunMode;
+
+/*! \brief Called on the nodes that do PME exclusively */
+int gmx_pmeonly(gmx_pme_t* pme,
+ const t_commrec* cr,
+ t_nrnb* mynrnb,
+ gmx_wallcycle* wcycle,
+ gmx_walltime_accounting_t walltime_accounting,
+ t_inputrec* ir,
+ PmeRunMode runMode);
+
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ * \brief Defines a struct useful for transferring the PME output
+ * values
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_OUTPUT_H
+#define GMX_EWALD_PME_OUTPUT_H
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/arrayref.h"
+
+// TODO There's little value in computing the Coulomb and LJ virial
+// separately, so we should simplify that.
+// TODO The matrices might be best as a view, but not currently
+// possible. Use mdspan?
+struct PmeOutput
+{
+ //!< Host staging area for PME forces
+ gmx::ArrayRef<gmx::RVec> forces_;
+ //!< True if forces have been staged other false (when forces are reduced on the GPU).
+ bool haveForceOutput_ = false;
+ //!< Host staging area for PME coulomb energy
+ real coulombEnergy_ = 0;
+ //!< Host staging area for PME coulomb virial contributions
+ matrix coulombVirial_ = { { 0 } };
+ //!< Host staging area for PME LJ energy
+ real lennardJonesEnergy_ = 0;
+ //!< Host staging area for PME LJ virial contributions
+ matrix lennardJonesVirial_ = { { 0 } };
+};
+
+#endif
#include "gmxpre.h"
+#include "pme_pp.h"
+
#include "config.h"
#include <cstdio>
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/smalloc.h"
-#include "pme_internal.h"
#include "pme_pp_communication.h"
/*! \brief Block to wait for communication to PME ranks to complete
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * mananging the PP side of PME-only ranks.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_PP_H
+#define GMX_EWALD_PME_PP_H
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/basedefinitions.h"
+
+struct gmx_wallcycle;
+struct interaction_const_t;
+struct t_commrec;
+struct t_forcerec;
+
+class GpuEventSynchronizer;
+
+namespace gmx
+{
+class ForceWithVirial;
+class PmePpCommGpu;
+} // namespace gmx
+
+/*! \brief Send the charges and maxshift to out PME-only node. */
+void gmx_pme_send_parameters(const t_commrec* cr,
+ const interaction_const_t* ic,
+ gmx_bool bFreeEnergy_q,
+ gmx_bool bFreeEnergy_lj,
+ real* chargeA,
+ real* chargeB,
+ real* sqrt_c6A,
+ real* sqrt_c6B,
+ real* sigmaA,
+ real* sigmaB,
+ int maxshift_x,
+ int maxshift_y);
+
+/*! \brief Send the coordinates to our PME-only node and request a PME calculation */
+void gmx_pme_send_coordinates(t_forcerec* fr,
+ const t_commrec* cr,
+ const matrix box,
+ const rvec* x,
+ real lambda_q,
+ real lambda_lj,
+ gmx_bool bEnerVir,
+ int64_t step,
+ bool useGpuPmePpComms,
+ bool reinitGpuPmePpComms,
+ bool sendCoordinatesFromGpu,
+ GpuEventSynchronizer* coordinatesReadyOnDeviceEvent,
+ gmx_wallcycle* wcycle);
+
+/*! \brief Tell our PME-only node to finish */
+void gmx_pme_send_finish(const t_commrec* cr);
+
+/*! \brief Tell our PME-only node to reset all cycle and flop counters */
+void gmx_pme_send_resetcounters(const t_commrec* cr, int64_t step);
+
+/*! \brief PP nodes receive the long range forces from the PME nodes */
+void gmx_pme_receive_f(gmx::PmePpCommGpu* pmePpCommGpu,
+ const t_commrec* cr,
+ gmx::ForceWithVirial* forceWithVirial,
+ real* energy_q,
+ real* energy_lj,
+ real* dvdlambda_q,
+ real* dvdlambda_lj,
+ bool useGpuPmePpComms,
+ bool receivePmeForceToGpu,
+ float* pme_cycles);
+
+/*! \brief Tell our PME-only node to switch to a new grid size */
+void gmx_pme_send_switchgrid(const t_commrec* cr, ivec grid_size, real ewaldcoeff_q, real ewaldcoeff_lj);
+
+#endif
#include "gromacs/utility/smalloc.h"
#include "pme_internal.h"
+#include "pme_output.h"
#if GMX_SIMD_HAVE_REAL
/* Turn on arbitrary width SIMD intrinsics for PME solve */
#include "pme_internal.h"
#include "pme_simd.h"
#include "pme_spline_work.h"
+#include "spline_vectors.h"
/* TODO consider split of pme-spline from this file */
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief This file declares a type useful for spline vectors
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_SPLINE_VECTORS_H
+#define GMX_EWALD_SPLINE_VECTORS_H
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/real.h"
+
+/*! \brief Helper typedef for spline vectors */
+typedef real* splinevec[DIM];
+
+#endif
#include "gromacs/domdec/domdec.h"
#include "gromacs/ewald/pme_gather.h"
#include "gromacs/ewald/pme_gpu_internal.h"
+#include "gromacs/ewald/pme_gpu_staging.h"
#include "gromacs/ewald/pme_grid.h"
#include "gromacs/ewald/pme_internal.h"
#include "gromacs/ewald/pme_redistribute.h"
PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec,
const CodePath mode,
const gmx_device_info_t* gpuInfo,
- PmeGpuProgramHandle pmeGpuProgram,
+ const PmeGpuProgram* pmeGpuProgram,
const Matrix3x3& box,
const real ewaldCoeff_q,
const real ewaldCoeff_lj)
PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec,
const CodePath mode,
const gmx_device_info_t* gpuInfo,
- PmeGpuProgramHandle pmeGpuProgram,
+ const PmeGpuProgram* pmeGpuProgram,
const Matrix3x3& box,
real ewaldCoeff_q,
real ewaldCoeff_lj)
switch (mode)
{
case CodePath::GPU:
- memcpy(pme->gpu->staging.h_gridlineIndices, gridLineIndices.data(),
+ memcpy(pme_gpu_staging(pme->gpu).h_gridlineIndices, gridLineIndices.data(),
atomCount * sizeof(gridLineIndices[0]));
break;
{
case CodePath::GPU:
gridLineIndices = arrayRefFromArray(
- reinterpret_cast<IVec*>(pme->gpu->staging.h_gridlineIndices), atomCount);
+ reinterpret_cast<IVec*>(pme_gpu_staging(pme->gpu).h_gridlineIndices), atomCount);
break;
case CodePath::CPU: gridLineIndices = atc->idx; break;
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec,
CodePath mode,
const gmx_device_info_t* gpuInfo,
- PmeGpuProgramHandle pmeGpuProgram,
+ const PmeGpuProgram* pmeGpuProgram,
const Matrix3x3& box,
real ewaldCoeff_q = 1.0F,
real ewaldCoeff_lj = 1.0F);
PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec,
CodePath mode = CodePath::CPU,
const gmx_device_info_t* gpuInfo = nullptr,
- PmeGpuProgramHandle pmeGpuProgram = nullptr,
+ const PmeGpuProgram* pmeGpuProgram = nullptr,
const Matrix3x3& box = { { 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F } },
real ewaldCoeff_q = 0.0F,
real ewaldCoeff_lj = 0.0F);
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
//! Returns the device info pointer
const gmx_device_info_t* getDeviceInfo() const { return deviceInfo_; }
//! Returns the persistent PME GPU kernels
- PmeGpuProgramHandle getPmeGpuProgram() const { return program_.get(); }
+ const PmeGpuProgram* getPmeGpuProgram() const { return program_.get(); }
//! Constructs the context
TestHardwareContext(CodePath codePath, const char* description, const gmx_device_info_t* deviceInfo) :
codePath_(codePath),
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/domdec/domdec.h"
#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/mdrunutility/printtime.h"
#include "gromacs/domdec/partition.h"
#include "gromacs/essentialdynamics/edsam.h"
#include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/ewald/pme_pp_comm_gpu.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
#include "gromacs/domdec/mdsetup.h"
#include "gromacs/domdec/partition.h"
#include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/fileio/trxio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/domdec/mdsetup.h"
#include "gromacs/domdec/partition.h"
#include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/fileio/trxio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/domdec/domdec_struct.h"
#include "gromacs/domdec/mdsetup.h"
#include "gromacs/domdec/partition.h"
-#include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/fileio/confio.h"
#include "gromacs/fileio/mtxio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/domdec/mdsetup.h"
#include "gromacs/domdec/partition.h"
#include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/fileio/trxio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/domdec/localatomsetmanager.h"
#include "gromacs/domdec/partition.h"
#include "gromacs/ewald/ewald_utils.h"
-#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_gpu_program.h"
+#include "gromacs/ewald/pme_only.h"
#include "gromacs/ewald/pme_pp_comm_gpu.h"
#include "gromacs/fileio/checkpoint.h"
#include "gromacs/fileio/gmxfio.h"
#include "gromacs/domdec/domdec.h"
#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/math/vec.h"
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
for (const auto& mode : runModes)
{
+ SCOPED_TRACE("mdrun " + joinStrings(mode.second, " "));
auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos);
if (modeTargetsGpus && !s_hasCompatibleGpus)
{