From 97c8dcb67407357cb3092b8e1cb32939d94a2f1b Mon Sep 17 00:00:00 2001 From: Mark Abraham Date: Sun, 5 Jan 2020 22:16:52 +0100 Subject: [PATCH] Clean up ewald module internals This prepares for future refactoring by starting to break up the cross dependence on many internal headers. Note that useDecomposition was set (and then used) with the opposite logic, which was a benign problem that is now fixed. Change-Id: Ibb7f0ceb82d644374bfd396b38b9611cb8d7fd62 --- src/gromacs/domdec/partition.cpp | 4 +- src/gromacs/ewald/calculate_spline_moduli.cpp | 2 - src/gromacs/ewald/calculate_spline_moduli.h | 4 +- src/gromacs/ewald/long_range_correction.cpp | 2 - src/gromacs/ewald/pme.cpp | 29 +++-- src/gromacs/ewald/pme.h | 79 +++--------- .../ewald/pme_coordinate_receiver_gpu.h | 10 +- .../pme_coordinate_receiver_gpu_impl.cpp | 6 +- .../ewald/pme_coordinate_receiver_gpu_impl.cu | 9 +- .../ewald/pme_coordinate_receiver_gpu_impl.h | 8 +- src/gromacs/ewald/pme_gpu.cpp | 45 +++++-- src/gromacs/ewald/pme_gpu_3dfft.cu | 5 +- src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp | 5 +- src/gromacs/ewald/pme_gpu_internal.cpp | 59 ++++++--- src/gromacs/ewald/pme_gpu_internal.h | 122 ++++-------------- src/gromacs/ewald/pme_gpu_program.h | 7 +- src/gromacs/ewald/pme_gpu_settings.h | 94 ++++++++++++++ src/gromacs/ewald/pme_gpu_staging.h | 79 ++++++++++++ src/gromacs/ewald/pme_gpu_timings.cpp | 29 +---- src/gromacs/ewald/pme_gpu_timings.h | 51 ++++++-- src/gromacs/ewald/pme_gpu_types.h | 4 +- src/gromacs/ewald/pme_gpu_types_host.h | 93 ++----------- src/gromacs/ewald/pme_internal.h | 38 +----- src/gromacs/ewald/pme_load_balancing.cpp | 1 + src/gromacs/ewald/pme_only.cpp | 6 +- src/gromacs/ewald/pme_only.h | 69 ++++++++++ src/gromacs/ewald/pme_output.h | 70 ++++++++++ src/gromacs/ewald/pme_pp.cpp | 3 +- src/gromacs/ewald/pme_pp.h | 114 ++++++++++++++++ src/gromacs/ewald/pme_solve.cpp | 1 + src/gromacs/ewald/pme_spread.cpp | 1 + src/gromacs/ewald/spline_vectors.h | 53 ++++++++ src/gromacs/ewald/tests/pmetestcommon.cpp | 9 +- src/gromacs/ewald/tests/pmetestcommon.h | 6 +- .../ewald/tests/testhardwarecontexts.h | 4 +- src/gromacs/mdlib/resethandler.cpp | 3 +- src/gromacs/mdlib/sim_util.cpp | 1 + src/gromacs/mdrun/md.cpp | 2 +- src/gromacs/mdrun/mimic.cpp | 2 +- src/gromacs/mdrun/minimize.cpp | 2 +- src/gromacs/mdrun/rerun.cpp | 2 +- src/gromacs/mdrun/runner.cpp | 2 +- .../modularsimulator/modularsimulator.cpp | 1 + src/programs/mdrun/tests/pmetest.cpp | 3 +- 44 files changed, 732 insertions(+), 407 deletions(-) create mode 100644 src/gromacs/ewald/pme_gpu_settings.h create mode 100644 src/gromacs/ewald/pme_gpu_staging.h create mode 100644 src/gromacs/ewald/pme_only.h create mode 100644 src/gromacs/ewald/pme_output.h create mode 100644 src/gromacs/ewald/pme_pp.h create mode 100644 src/gromacs/ewald/spline_vectors.h diff --git a/src/gromacs/domdec/partition.cpp b/src/gromacs/domdec/partition.cpp index 487514edae..6cd6d56e25 100644 --- a/src/gromacs/domdec/partition.cpp +++ b/src/gromacs/domdec/partition.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -60,7 +60,7 @@ #include "gromacs/domdec/ga2la.h" #include "gromacs/domdec/localatomsetmanager.h" #include "gromacs/domdec/mdsetup.h" -#include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme_pp.h" #include "gromacs/gmxlib/network.h" #include "gromacs/gmxlib/nrnb.h" #include "gromacs/imd/imd.h" diff --git a/src/gromacs/ewald/calculate_spline_moduli.cpp b/src/gromacs/ewald/calculate_spline_moduli.cpp index 5f3dbf9960..09dc3998fb 100644 --- a/src/gromacs/ewald/calculate_spline_moduli.cpp +++ b/src/gromacs/ewald/calculate_spline_moduli.cpp @@ -49,8 +49,6 @@ #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/smalloc.h" -#include "pme_internal.h" - static void make_dft_mod(real* mod, const double* data, int splineOrder, int ndata) { for (int i = 0; i < ndata; i++) diff --git a/src/gromacs/ewald/calculate_spline_moduli.h b/src/gromacs/ewald/calculate_spline_moduli.h index 24c8fef6df..abdf4223f3 100644 --- a/src/gromacs/ewald/calculate_spline_moduli.h +++ b/src/gromacs/ewald/calculate_spline_moduli.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2004, The GROMACS development team. - * Copyright (c) 2013,2014,2015,2019, by the GROMACS development team, led by + * Copyright (c) 2013,2014,2015,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -37,7 +37,7 @@ #ifndef GMX_EWALD_CALCULATE_SPLINE_MODULI_H #define GMX_EWALD_CALCULATE_SPLINE_MODULI_H -#include "pme_internal.h" +#include "spline_vectors.h" /* Calulate plain SPME B-spline interpolation */ void make_bspline_moduli(splinevec bsp_mod, int nx, int ny, int nz, int order); diff --git a/src/gromacs/ewald/long_range_correction.cpp b/src/gromacs/ewald/long_range_correction.cpp index 8e2a15f23a..532be50cd9 100644 --- a/src/gromacs/ewald/long_range_correction.cpp +++ b/src/gromacs/ewald/long_range_correction.cpp @@ -53,8 +53,6 @@ #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" -#include "pme_internal.h" - /* There's nothing special to do here if just masses are perturbed, * but if either charge or type is perturbed then the implementation * requires that B states are defined for both charge and type, and diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp index a4b3cae165..fbb59bdf92 100644 --- a/src/gromacs/ewald/pme.cpp +++ b/src/gromacs/ewald/pme.cpp @@ -571,7 +571,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, PmeRunMode runMode, PmeGpu* pmeGpu, const gmx_device_info_t* gpuInfo, - PmeGpuProgramHandle pmeGpuProgram, + const PmeGpuProgram* pmeGpuProgram, const gmx::MDLogger& /*mdlog*/) { int use_threads, sum_use_threads, i; @@ -873,21 +873,17 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, pme->atc.emplace_back(pme->mpi_comm_d[1], pme->nthread, pme->pme_order, secondDimIndex, doSpread); } - if (pme_gpu_active(pme.get())) + // Initial check of validity of the input for running on the GPU + if (pme->runMode != PmeRunMode::CPU) { - if (!pme->gpu) + std::string errorString; + bool canRunOnGpu = pme_gpu_check_restrictions(pme.get(), &errorString); + if (!canRunOnGpu) { - // Initial check of validity of the data - std::string errorString; - bool canRunOnGpu = pme_gpu_check_restrictions(pme.get(), &errorString); - if (!canRunOnGpu) - { - GMX_THROW(gmx::NotImplementedError(errorString)); - } + GMX_THROW(gmx::NotImplementedError(errorString)); } - - pme_gpu_reinit(pme.get(), gpuInfo, pmeGpuProgram); } + pme_gpu_reinit(pme.get(), gpuInfo, pmeGpuProgram); pme_init_all_work(&pme->solve_work, pme->nthread, pme->nkx); @@ -1715,7 +1711,7 @@ void gmx_pme_destroy(gmx_pme_t* pme) destroy_pme_spline_work(pme->spline_work); - if (pme_gpu_active(pme) && pme->gpu) + if (pme->gpu != nullptr) { pme_gpu_destroy(pme->gpu); } @@ -1725,7 +1721,7 @@ void gmx_pme_destroy(gmx_pme_t* pme) void gmx_pme_reinit_atoms(gmx_pme_t* pme, const int numAtoms, const real* charges) { - if (pme_gpu_active(pme)) + if (pme->gpu != nullptr) { pme_gpu_reinit_atoms(pme->gpu, numAtoms, charges); } @@ -1735,3 +1731,8 @@ void gmx_pme_reinit_atoms(gmx_pme_t* pme, const int numAtoms, const real* charge // TODO: set the charges here as well } } + +bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size) +{ + return (pme.nkx == grid_size[XX] && pme.nky == grid_size[YY] && pme.nkz == grid_size[ZZ]); +} diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h index 171f50b71d..4a2f159a56 100644 --- a/src/gromacs/ewald/pme.h +++ b/src/gromacs/ewald/pme.h @@ -60,9 +60,7 @@ #include "gromacs/utility/real.h" struct gmx_hw_info_t; -struct interaction_const_t; struct t_commrec; -struct t_forcerec; struct t_inputrec; struct t_nrnb; struct PmeGpu; @@ -77,12 +75,9 @@ struct NumPmeDomains; enum class GpuTaskCompletion; class PmeGpuProgram; class GpuEventSynchronizer; -//! Convenience name. -using PmeGpuProgramHandle = const PmeGpuProgram*; namespace gmx { -class PmePpCommGpu; class ForceWithVirial; class MDLogger; enum class PinningPolicy : int; @@ -116,6 +111,9 @@ enum class PmeForceOutputHandling /*! \brief Return the smallest allowed PME grid size for \p pmeOrder */ int minimalPmeGridSize(int pmeOrder); +//! Return whether the grid of \c pme is identical to \c grid_size. +bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size); + /*! \brief Check restrictions on pme_order and the PME grid nkx,nky,nkz. * * With errorsAreFatal=true, an exception or fatal error is generated @@ -141,7 +139,7 @@ bool gmx_pme_check_restrictions(int pme_order, * \returns Pointer to newly allocated and initialized PME data. * * \todo We should evolve something like a \c GpuManager that holds \c - * gmx_device_info_t * and \c PmeGpuProgramHandle and perhaps other + * gmx_device_info_t * and \c PmeGpuProgram* and perhaps other * related things whose lifetime can/should exceed that of a task (or * perhaps task manager). See Redmine #2522. */ @@ -157,9 +155,20 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, PmeRunMode runMode, PmeGpu* pmeGpu, const gmx_device_info_t* gpuInfo, - PmeGpuProgramHandle pmeGpuProgram, + const PmeGpuProgram* pmeGpuProgram, const gmx::MDLogger& mdlog); +/*! \brief As gmx_pme_init, but takes most settings, except the grid/Ewald coefficients, from + * pme_src. This is only called when the PME cut-off/grid size changes. + */ +void gmx_pme_reinit(gmx_pme_t** pmedata, + const t_commrec* cr, + gmx_pme_t* pme_src, + const t_inputrec* ir, + const ivec grid_size, + real ewaldcoeff_q, + real ewaldcoeff_lj); + /*! \brief Destroys the PME data structure.*/ void gmx_pme_destroy(gmx_pme_t* pme); @@ -214,15 +223,6 @@ int gmx_pme_do(struct gmx_pme_t* pme, real* dvdlambda_lj, int flags); -/*! \brief Called on the nodes that do PME exclusively */ -int gmx_pmeonly(struct gmx_pme_t* pme, - const t_commrec* cr, - t_nrnb* mynrnb, - gmx_wallcycle* wcycle, - gmx_walltime_accounting_t walltime_accounting, - t_inputrec* ir, - PmeRunMode runMode); - /*! \brief Calculate the PME grid energy V for n charges. * * The potential (found in \p pme) must have been found already with a @@ -233,53 +233,6 @@ int gmx_pmeonly(struct gmx_pme_t* pme, */ void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef x, gmx::ArrayRef q, real* V); -/*! \brief Send the charges and maxshift to out PME-only node. */ -void gmx_pme_send_parameters(const t_commrec* cr, - const interaction_const_t* ic, - gmx_bool bFreeEnergy_q, - gmx_bool bFreeEnergy_lj, - real* chargeA, - real* chargeB, - real* sqrt_c6A, - real* sqrt_c6B, - real* sigmaA, - real* sigmaB, - int maxshift_x, - int maxshift_y); - -/*! \brief Send the coordinates to our PME-only node and request a PME calculation */ -void gmx_pme_send_coordinates(t_forcerec* fr, - const t_commrec* cr, - const matrix box, - const rvec* x, - real lambda_q, - real lambda_lj, - gmx_bool bEnerVir, - int64_t step, - bool useGpuPmePpComms, - bool reinitGpuPmePpComms, - bool sendCoordinatesFromGpu, - GpuEventSynchronizer* coordinatesReadyOnDeviceEvent, - gmx_wallcycle* wcycle); - -/*! \brief Tell our PME-only node to finish */ -void gmx_pme_send_finish(const t_commrec* cr); - -/*! \brief Tell our PME-only node to reset all cycle and flop counters */ -void gmx_pme_send_resetcounters(const t_commrec* cr, int64_t step); - -/*! \brief PP nodes receive the long range forces from the PME nodes */ -void gmx_pme_receive_f(gmx::PmePpCommGpu* pmePpCommGpu, - const t_commrec* cr, - gmx::ForceWithVirial* forceWithVirial, - real* energy_q, - real* energy_lj, - real* dvdlambda_q, - real* dvdlambda_lj, - bool useGpuPmePpComms, - bool receivePmeForceToGpu, - float* pme_cycles); - /*! \brief * This function updates the local atom data on GPU after DD (charges, coordinates, etc.). * TODO: it should update the PME CPU atom data as well. diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu.h b/src/gromacs/ewald/pme_coordinate_receiver_gpu.h index a4a608b5a2..ae49098251 100644 --- a/src/gromacs/ewald/pme_coordinate_receiver_gpu.h +++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2019, by the GROMACS development team, led by + * Copyright (c) 2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -42,14 +42,18 @@ #ifndef GMX_PMECOORDINATERECEIVERGPU_H #define GMX_PMECOORDINATERECEIVERGPU_H -#include "gromacs/ewald/pme.h" -#include "gromacs/ewald/pme_force_sender_gpu.h" +#include "gromacs/gpu_utils/devicebuffer_datatype.h" #include "gromacs/utility/classhelpers.h" #include "gromacs/utility/gmxmpi.h" +struct PpRanks; + namespace gmx { +template +class ArrayRef; + class PmeCoordinateReceiverGpu { diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp index 213a88c59e..02ddd447a4 100644 --- a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp +++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2019, by the GROMACS development team, led by + * Copyright (c) 2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -48,6 +48,8 @@ #include "config.h" #include "gromacs/ewald/pme_coordinate_receiver_gpu.h" +#include "gromacs/utility/arrayref.h" +#include "gromacs/utility/gmxassert.h" #if GMX_GPU != GMX_GPU_CUDA @@ -73,7 +75,7 @@ PmeCoordinateReceiverGpu::PmeCoordinateReceiverGpu(void* /* pmeStream */, PmeCoordinateReceiverGpu::~PmeCoordinateReceiverGpu() = default; /*!\brief init PME-PP GPU communication stub */ -void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(const DeviceBuffer /* d_x */) +void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(DeviceBuffer /* d_x */) { GMX_ASSERT(false, "A CPU stub for PME-PP GPU communication initialization was called instead of the " diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cu b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cu index 88d7af569b..e2000e6e54 100644 --- a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cu +++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cu @@ -47,10 +47,7 @@ #include "config.h" -#include -#include - -#include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme_force_sender_gpu.h" #include "gromacs/gpu_utils/cudautils.cuh" #include "gromacs/gpu_utils/gpueventsynchronizer.cuh" #include "gromacs/utility/gmxmpi.h" @@ -72,7 +69,7 @@ PmeCoordinateReceiverGpu::Impl::Impl(void* pmeStream, MPI_Comm comm, gmx::ArrayR PmeCoordinateReceiverGpu::Impl::~Impl() = default; -void PmeCoordinateReceiverGpu::Impl::sendCoordinateBufferAddressToPpRanks(const DeviceBuffer d_x) +void PmeCoordinateReceiverGpu::Impl::sendCoordinateBufferAddressToPpRanks(DeviceBuffer d_x) { int ind_start = 0; @@ -134,7 +131,7 @@ PmeCoordinateReceiverGpu::PmeCoordinateReceiverGpu(void* pmeStr PmeCoordinateReceiverGpu::~PmeCoordinateReceiverGpu() = default; -void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(const DeviceBuffer d_x) +void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(DeviceBuffer d_x) { impl_->sendCoordinateBufferAddressToPpRanks(d_x); } diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h index 84a554725b..281985b0b0 100644 --- a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h +++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2019, by the GROMACS development team, led by + * Copyright (c) 2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -44,7 +44,9 @@ #define GMX_PMECOORDINATERECEIVERGPU_IMPL_H #include "gromacs/ewald/pme_coordinate_receiver_gpu.h" -#include "gromacs/gpu_utils/gpueventsynchronizer.cuh" +#include "gromacs/utility/arrayref.h" + +class GpuEventSynchronizer; namespace gmx { @@ -67,7 +69,7 @@ public: * send coordinates buffer address to PP rank * \param[in] d_x coordinates buffer in GPU memory */ - void sendCoordinateBufferAddressToPpRanks(const DeviceBuffer d_x); + void sendCoordinateBufferAddressToPpRanks(DeviceBuffer d_x); /*! \brief * launch receive of coordinate data from PP rank diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp index 31467da6da..1bbdaa7de7 100644 --- a/src/gromacs/ewald/pme_gpu.cpp +++ b/src/gromacs/ewald/pme_gpu.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -60,10 +60,28 @@ #include "gromacs/utility/stringutil.h" #include "pme_gpu_internal.h" +#include "pme_gpu_settings.h" +#include "pme_gpu_timings.h" +#include "pme_gpu_types_host.h" #include "pme_grid.h" #include "pme_internal.h" #include "pme_solve.h" +/*! \brief + * Finds out if PME is currently running on GPU. + * + * \todo The GPU module should not be constructed (or at least called) + * when it is not active, so there should be no need to check whether + * it is active. An assertion that this is true makes sense. + * + * \param[in] pme The PME structure. + * \returns True if PME runs on GPU currently, false otherwise. + */ +static inline bool pme_gpu_active(const gmx_pme_t* pme) +{ + return (pme != nullptr) && (pme->runMode != PmeRunMode::CPU); +} + void pme_gpu_reset_timings(const gmx_pme_t* pme) { if (pme_gpu_active(pme)) @@ -107,7 +125,7 @@ void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t* pme, gmx_wallcycle_t wcycle) { GMX_ASSERT(gridIndex == 0, "Only single grid supported"); - if (pme_gpu_performs_FFT(pme->gpu)) + if (pme_gpu_settings(pme->gpu).performGPUFFT) { wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); @@ -163,7 +181,7 @@ void pme_gpu_prepare_computation(gmx_pme_t* pme, wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME); wallcycle_stop(wcycle, ewcLAUNCH_GPU); - if (!pme_gpu_performs_solve(pmeGpu)) + if (!pme_gpu_settings(pmeGpu).performGPUSolve) { // TODO remove code duplication and add test coverage matrix scaledBox; @@ -199,15 +217,16 @@ void pme_gpu_launch_spread(gmx_pme_t* pme, GpuEventSynchronizer* xReadyOnDevice, void pme_gpu_launch_complex_transforms(gmx_pme_t* pme, gmx_wallcycle* wcycle) { - PmeGpu* pmeGpu = pme->gpu; - const bool computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0; - const bool performBackFFT = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0; + PmeGpu* pmeGpu = pme->gpu; + const auto& settings = pmeGpu->settings; + const bool computeEnergyAndVirial = (settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0; + const bool performBackFFT = (settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0; const unsigned int gridIndex = 0; t_complex* cfftgrid = pme->cfftgrid[gridIndex]; - if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD) + if (settings.currentFlags & GMX_PME_SPREAD) { - if (!pme_gpu_performs_FFT(pmeGpu)) + if (!settings.performGPUFFT) { wallcycle_start(wcycle, ewcWAIT_GPU_PME_SPREAD); pme_gpu_sync_spread_grid(pme->gpu); @@ -217,15 +236,17 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t* pme, gmx_wallcycle* wcycle) try { - if (pmeGpu->settings.currentFlags & GMX_PME_SOLVE) + if (settings.currentFlags & GMX_PME_SOLVE) { /* do R2C 3D-FFT */ parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_REAL_TO_COMPLEX, wcycle); /* solve in k-space for our local cells */ - if (pme_gpu_performs_solve(pmeGpu)) + if (settings.performGPUSolve) { - const auto gridOrdering = pme_gpu_uses_dd(pmeGpu) ? GridOrdering::YZX : GridOrdering::XYZ; + // TODO grid ordering should be set up at pme init time. + const auto gridOrdering = + settings.useDecomposition ? GridOrdering::YZX : GridOrdering::XYZ; wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU); wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME); pme_gpu_solve(pmeGpu, cfftgrid, gridOrdering, computeEnergyAndVirial); @@ -257,7 +278,7 @@ void pme_gpu_launch_gather(const gmx_pme_t* pme, gmx_wallcycle gmx_unused* wcycl { GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled."); - if (!pme_gpu_performs_gather(pme->gpu)) + if (!pme_gpu_settings(pme->gpu).performGPUGather) { return; } diff --git a/src/gromacs/ewald/pme_gpu_3dfft.cu b/src/gromacs/ewald/pme_gpu_3dfft.cu index 16bde23684..2b30dcdef1 100644 --- a/src/gromacs/ewald/pme_gpu_3dfft.cu +++ b/src/gromacs/ewald/pme_gpu_3dfft.cu @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -71,7 +71,8 @@ GpuParallel3dFft::GpuParallel3dFft(const PmeGpu* pmeGpu) complexGridSizePadded[i] = kernelParamsPtr->grid.complexGridSizePadded[i]; } - GMX_RELEASE_ASSERT(!pme_gpu_uses_dd(pmeGpu), "FFT decomposition not implemented"); + GMX_RELEASE_ASSERT(!pme_gpu_settings(pmeGpu).useDecomposition, + "FFT decomposition not implemented"); const int complexGridSizePaddedTotal = complexGridSizePadded[XX] * complexGridSizePadded[YY] * complexGridSizePadded[ZZ]; diff --git a/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp b/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp index cd0a18e0a5..2b14dc7567 100644 --- a/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp +++ b/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -68,7 +68,8 @@ GpuParallel3dFft::GpuParallel3dFft(const PmeGpu* pmeGpu) // Extracting all the data from PME GPU std::array realGridSize, realGridSizePadded, complexGridSizePadded; - GMX_RELEASE_ASSERT(!pme_gpu_uses_dd(pmeGpu), "FFT decomposition not implemented"); + GMX_RELEASE_ASSERT(!pme_gpu_settings(pmeGpu).useDecomposition, + "FFT decomposition not implemented"); PmeGpuKernelParamsBase* kernelParamsPtr = pmeGpu->kernelParams.get(); for (int i = 0; i < DIM; i++) { diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp index a7d6e1e963..39973556be 100644 --- a/src/gromacs/ewald/pme_gpu_internal.cpp +++ b/src/gromacs/ewald/pme_gpu_internal.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -231,7 +231,7 @@ void pme_gpu_copy_output_forces(PmeGpu* pmeGpu) pmeGpu->settings.transferKind, nullptr); } -void pme_gpu_realloc_coordinates(const PmeGpu* pmeGpu) +void pme_gpu_realloc_coordinates(PmeGpu* pmeGpu) { const size_t newCoordinatesSize = pmeGpu->nAtomsAlloc * DIM; GMX_ASSERT(newCoordinatesSize > 0, "Bad number of atoms in PME GPU"); @@ -255,7 +255,7 @@ void pme_gpu_free_coordinates(const PmeGpu* pmeGpu) freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_coordinates); } -void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu* pmeGpu, const float* h_coefficients) +void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients) { GMX_ASSERT(h_coefficients, "Bad host-side charge buffer in PME GPU"); const size_t newCoefficientsSize = pmeGpu->nAtomsAlloc; @@ -587,7 +587,7 @@ void pme_gpu_destroy_specific(const PmeGpu* pmeGpu) void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu) { - if (pme_gpu_performs_FFT(pmeGpu)) + if (pme_gpu_settings(pmeGpu).performGPUFFT) { pmeGpu->archSpecific->fftSetup.resize(0); for (int i = 0; i < pmeGpu->common->ngrids; i++) @@ -694,7 +694,7 @@ PmeOutput pme_gpu_getOutput(const gmx_pme_t& pme, const int flags) // on the else branch if (haveComputedEnergyAndVirial) { - if (pme_gpu_performs_solve(pmeGpu)) + if (pme_gpu_settings(pmeGpu).performGPUSolve) { pme_gpu_getEnergyAndVirial(pme, &output); } @@ -755,7 +755,7 @@ static void pme_gpu_reinit_grids(PmeGpu* pmeGpu) kernelParamsPtr->grid.complexGridSizePadded[i] = kernelParamsPtr->grid.realGridSize[i]; } /* FFT: n real elements correspond to (n / 2 + 1) complex elements in minor dimension */ - if (!pme_gpu_performs_FFT(pmeGpu)) + if (!pme_gpu_settings(pmeGpu).performGPUFFT) { // This allows for GPU spreading grid and CPU fftgrid to have the same layout, so that we can copy the data directly kernelParamsPtr->grid.realGridSizePadded[ZZ] = @@ -846,7 +846,7 @@ static void pme_gpu_select_best_performing_pme_spreadgather_kernels(PmeGpu* pmeG * \param[in,out] gpuInfo The GPU information structure. * \param[in] pmeGpuProgram The handle to the program/kernel data created outside (e.g. in unit tests/runner) */ -static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGpuProgramHandle pmeGpuProgram) +static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram) { pme->gpu = new PmeGpu(); PmeGpu* pmeGpu = pme->gpu; @@ -855,7 +855,7 @@ static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGp /* These settings are set here for the whole run; dynamic ones are set in pme_gpu_reinit() */ /* A convenience variable. */ - pmeGpu->settings.useDecomposition = (pme->nnodes == 1); + pmeGpu->settings.useDecomposition = (pme->nnodes != 1); /* TODO: CPU gather with GPU spread is broken due to different theta/dtheta layout. */ pmeGpu->settings.performGPUGather = true; // By default GPU-side reduction is off (explicitly set here for tests, otherwise reset per-step) @@ -954,10 +954,12 @@ void pme_gpu_get_real_grid_sizes(const PmeGpu* pmeGpu, gmx::IVec* gridSize, gmx: } } -void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGpuProgramHandle pmeGpuProgram) +void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram) { - if (!pme_gpu_active(pme)) + GMX_ASSERT(pme != nullptr, "Need valid PME object"); + if (pme->runMode == PmeRunMode::CPU) { + GMX_ASSERT(pme->gpu == nullptr, "Should not have PME GPU object"); return; } @@ -973,7 +975,7 @@ void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGpuProg } /* GPU FFT will only get used for a single rank.*/ pme->gpu->settings.performGPUFFT = - (pme->gpu->common->runMode == PmeRunMode::GPU) && !pme_gpu_uses_dd(pme->gpu); + (pme->gpu->common->runMode == PmeRunMode::GPU) && !pme->gpu->settings.useDecomposition; pme->gpu->settings.performGPUSolve = (pme->gpu->common->runMode == PmeRunMode::GPU); /* Reinit active timers */ @@ -1037,6 +1039,25 @@ void pme_gpu_reinit_atoms(PmeGpu* pmeGpu, const int nAtoms, const real* charges) } } +/*! \internal \brief + * Returns raw timing event from the corresponding GpuRegionTimer (if timings are enabled). + * In CUDA result can be nullptr stub, per GpuRegionTimer implementation. + * + * \param[in] pmeGpu The PME GPU data structure. + * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h + */ +static CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId) +{ + CommandEvent* timingEvent = nullptr; + if (pme_gpu_timings_enabled(pmeGpu)) + { + GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(), + "Wrong PME GPU timing event index"); + timingEvent = pmeGpu->archSpecific->timingEvents[PMEStageId].fetchNextEvent(); + } + return timingEvent; +} + void pme_gpu_3dfft(const PmeGpu* pmeGpu, gmx_fft_direction dir, int grid_index) { int timerId = (dir == GMX_FFT_REAL_TO_COMPLEX) ? gtPME_FFT_R2C : gtPME_FFT_C2R; @@ -1206,7 +1227,7 @@ void pme_gpu_spread(const PmeGpu* pmeGpu, // only needed with CUDA on PP+PME ranks, not on separate PME ranks, in unit tests // nor in OpenCL as these cases use a single stream (hence xReadyOnDevice == nullptr). GMX_ASSERT(xReadyOnDevice != nullptr || (GMX_GPU != GMX_GPU_CUDA) - || pmeGpu->common->isRankPmeOnly || pme_gpu_is_testing(pmeGpu), + || pmeGpu->common->isRankPmeOnly || pme_gpu_settings(pmeGpu).copyAllOutputs, "Need a valid coordinate synchronizer on PP+PME ranks with CUDA."); if (xReadyOnDevice) { @@ -1265,14 +1286,14 @@ void pme_gpu_spread(const PmeGpu* pmeGpu, launchGpuKernel(kernelPtr, config, timingEvent, "PME spline/spread", kernelArgs); pme_gpu_stop_timing(pmeGpu, timingId); - const bool copyBackGrid = - spreadCharges && (pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_FFT(pmeGpu)); + const auto& settings = pmeGpu->settings; + const bool copyBackGrid = spreadCharges && (!settings.performGPUFFT || settings.copyAllOutputs); if (copyBackGrid) { pme_gpu_copy_output_spread_grid(pmeGpu, h_grid); } const bool copyBackAtomData = - computeSplines && (pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_gather(pmeGpu)); + computeSplines && (!settings.performGPUGather || settings.copyAllOutputs); if (copyBackAtomData) { pme_gpu_copy_output_spread_atom_data(pmeGpu); @@ -1281,7 +1302,8 @@ void pme_gpu_spread(const PmeGpu* pmeGpu, void pme_gpu_solve(const PmeGpu* pmeGpu, t_complex* h_grid, GridOrdering gridOrdering, bool computeEnergyAndVirial) { - const bool copyInputAndOutputGrid = pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_FFT(pmeGpu); + const auto& settings = pmeGpu->settings; + const bool copyInputAndOutputGrid = !settings.performGPUFFT || settings.copyAllOutputs; auto* kernelParamsPtr = pmeGpu->kernelParams.get(); @@ -1441,12 +1463,13 @@ void pme_gpu_gather(PmeGpu* pmeGpu, PmeForceOutputHandling forceTreatment, const pme_gpu_copy_input_forces(pmeGpu); } - if (!pme_gpu_performs_FFT(pmeGpu) || pme_gpu_is_testing(pmeGpu)) + const auto& settings = pmeGpu->settings; + if (!settings.performGPUFFT || settings.copyAllOutputs) { pme_gpu_copy_input_gather_grid(pmeGpu, const_cast(h_grid)); } - if (pme_gpu_is_testing(pmeGpu)) + if (settings.copyAllOutputs) { pme_gpu_copy_input_gather_atom_data(pmeGpu); } diff --git a/src/gromacs/ewald/pme_gpu_internal.h b/src/gromacs/ewald/pme_gpu_internal.h index 443b97a60e..f010d15456 100644 --- a/src/gromacs/ewald/pme_gpu_internal.h +++ b/src/gromacs/ewald/pme_gpu_internal.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -46,17 +46,26 @@ #ifndef GMX_EWALD_PME_GPU_INTERNAL_H #define GMX_EWALD_PME_GPU_INTERNAL_H -#include "gromacs/fft/fft.h" // for the gmx_fft_direction enum +#include "gromacs/fft/fft.h" // for the gmx_fft_direction enum +#include "gromacs/gpu_utils/devicebuffer_datatype.h" #include "gromacs/gpu_utils/gpu_macros.h" // for the GPU_FUNC_ macros #include "gromacs/utility/arrayref.h" -#include "pme_gpu_types_host.h" // for the inline functions accessing PmeGpu members +#include "pme_gpu_types_host.h" +#include "pme_output.h" +class GpuEventSynchronizer; +struct gmx_device_info_t; struct gmx_hw_info_t; struct gmx_gpu_opt_t; struct gmx_pme_t; // only used in pme_gpu_reinit -struct gmx_wallclock_gpu_pme_t; +struct gmx_wallcycle; class PmeAtomComm; +enum class PmeForceOutputHandling; +struct PmeGpu; +class PmeGpuProgram; +struct PmeGpuStaging; +struct PmeGpuSettings; struct t_complex; namespace gmx @@ -183,7 +192,7 @@ bool pme_gpu_stream_query(const PmeGpu* pmeGpu); * * Needs to be called on every DD step/in the beginning. */ -void pme_gpu_realloc_coordinates(const PmeGpu* pmeGpu); +void pme_gpu_realloc_coordinates(PmeGpu* pmeGpu); /*! \libinternal \brief * Frees the coordinates on the GPU. @@ -202,7 +211,7 @@ void pme_gpu_free_coordinates(const PmeGpu* pmeGpu); * Does not need to be done for every PME computation, only whenever the local charges change. * (So, in the beginning of the run, or on DD step). */ -void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu* pmeGpu, const float* h_coefficients); +void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients); /*! \libinternal \brief * Frees the charges/coefficients on the GPU. @@ -342,37 +351,6 @@ void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu); */ void pme_gpu_destroy_3dfft(const PmeGpu* pmeGpu); -/* Several GPU event-based timing functions that live in pme_gpu_timings.cpp */ - -/*! \libinternal \brief - * Finalizes all the active PME GPU stage timings for the current computation. Should be called at the end of every computation. - * - * \param[in] pmeGpu The PME GPU structure. - */ -void pme_gpu_update_timings(const PmeGpu* pmeGpu); - -/*! \libinternal \brief - * Updates the internal list of active PME GPU stages (if timings are enabled). - * - * \param[in] pmeGpu The PME GPU data structure. - */ -void pme_gpu_reinit_timings(const PmeGpu* pmeGpu); - -/*! \brief - * Resets the PME GPU timings. To be called at the reset MD step. - * - * \param[in] pmeGpu The PME GPU structure. - */ -void pme_gpu_reset_timings(const PmeGpu* pmeGpu); - -/*! \libinternal \brief - * Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end. - * - * \param[in] pmeGpu The PME GPU structure. - * \param[in] timings The gmx_wallclock_gpu_pme_t structure. - */ -void pme_gpu_get_timings(const PmeGpu* pmeGpu, gmx_wallclock_gpu_pme_t* timings); - /* The PME stages themselves */ /*! \libinternal \brief @@ -470,70 +448,33 @@ GPU_FUNC_QUALIFIER void* pme_gpu_get_context(const PmeGpu* GPU_FUNC_ARGUMENT(pme GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer( const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr); -/* The inlined convenience PME GPU status getters */ - /*! \libinternal \brief - * Tells if PME runs on multiple GPUs with the decomposition. + * Returns the PME GPU settings * * \param[in] pmeGpu The PME GPU structure. - * \returns True if PME runs on multiple GPUs, false otherwise. + * \returns The settings for PME on GPU */ -inline bool pme_gpu_uses_dd(const PmeGpu* pmeGpu) +inline const PmeGpuSettings& pme_gpu_settings(const PmeGpu* pmeGpu) { - return !pmeGpu->settings.useDecomposition; + return pmeGpu->settings; } /*! \libinternal \brief - * Tells if PME performs the gathering stage on GPU. + * Returns the PME GPU staging object * * \param[in] pmeGpu The PME GPU structure. - * \returns True if the gathering is performed on GPU, false otherwise. + * \returns The staging object for PME on GPU */ -inline bool pme_gpu_performs_gather(const PmeGpu* pmeGpu) +inline const PmeGpuStaging& pme_gpu_staging(const PmeGpu* pmeGpu) { - return pmeGpu->settings.performGPUGather; + return pmeGpu->staging; } /*! \libinternal \brief - * Tells if PME performs the FFT stages on GPU. + * Sets whether the PME module is running in testing mode * * \param[in] pmeGpu The PME GPU structure. - * \returns True if FFT is performed on GPU, false otherwise. - */ -inline bool pme_gpu_performs_FFT(const PmeGpu* pmeGpu) -{ - return pmeGpu->settings.performGPUFFT; -} - -/*! \libinternal \brief - * Tells if PME performs the grid (un-)wrapping on GPU. - * - * \param[in] pmeGpu The PME GPU structure. - * \returns True if (un-)wrapping is performed on GPU, false otherwise. - */ -inline bool pme_gpu_performs_wrapping(const PmeGpu* pmeGpu) -{ - return pmeGpu->settings.useDecomposition; -} - -/*! \libinternal \brief - * Tells if PME performs the grid solving on GPU. - * - * \param[in] pmeGpu The PME GPU structure. - * \returns True if solving is performed on GPU, false otherwise. - */ -inline bool pme_gpu_performs_solve(const PmeGpu* pmeGpu) -{ - return pmeGpu->settings.performGPUSolve; -} - -/*! \libinternal \brief - * Enables or disables the testing mode. - * Testing mode only implies copying all the outputs, even the intermediate ones, to the host, - * and also makes the copies synchronous. - * - * \param[in] pmeGpu The PME GPU structure. - * \param[in] testing Should the testing mode be enabled, or disabled. + * \param[in] testing Whether testing mode is on. */ inline void pme_gpu_set_testing(PmeGpu* pmeGpu, bool testing) { @@ -544,17 +485,6 @@ inline void pme_gpu_set_testing(PmeGpu* pmeGpu, bool testing) } } -/*! \libinternal \brief - * Tells if PME is in the testing mode. - * - * \param[in] pmeGpu The PME GPU structure. - * \returns true if testing mode is enabled, false otherwise. - */ -inline bool pme_gpu_is_testing(const PmeGpu* pmeGpu) -{ - return pmeGpu->settings.copyAllOutputs; -} - /* A block of C++ functions that live in pme_gpu_internal.cpp */ /*! \libinternal \brief @@ -661,7 +591,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu* GPU_FUNC_ARGUM */ GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t* GPU_FUNC_ARGUMENT(pme), const gmx_device_info_t* GPU_FUNC_ARGUMENT(gpuInfo), - PmeGpuProgramHandle GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM; + const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM; /*! \libinternal \brief * Destroys the PME GPU data at the end of the run. diff --git a/src/gromacs/ewald/pme_gpu_program.h b/src/gromacs/ewald/pme_gpu_program.h index 3045feb973..e9e084bf1e 100644 --- a/src/gromacs/ewald/pme_gpu_program.h +++ b/src/gromacs/ewald/pme_gpu_program.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -66,11 +66,6 @@ public: */ using PmeGpuProgramStorage = std::unique_ptr; -/*! \brief This is a handle for passing references to PME GPU program data. - * TODO: it should be a const reference, but for that the PmeGpu types need to be C++ - */ -using PmeGpuProgramHandle = const PmeGpuProgram*; - /*! \brief * Factory function used to build persistent PME GPU program for the device at once. */ diff --git a/src/gromacs/ewald/pme_gpu_settings.h b/src/gromacs/ewald/pme_gpu_settings.h new file mode 100644 index 0000000000..a848e7efa5 --- /dev/null +++ b/src/gromacs/ewald/pme_gpu_settings.h @@ -0,0 +1,94 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +/*! \libinternal \file + * \brief Defines the PME GPU settings data structures. + * \todo Some renaming/refactoring, which does not impair the performance: + * -- PmeGpuSettings -> PmeGpuTasks + * + * \author Aleksei Iupinov + * \author Mark Abraham + * \ingroup module_ewald + */ + +#ifndef GMX_EWALD_PME_GPU_SETTINGS_H +#define GMX_EWALD_PME_GPU_SETTINGS_H + +#include "gromacs/gpu_utils/gpu_utils.h" // for GpuApiCallBehavior + +/*! \internal \brief + * The PME GPU settings structure, included in the main PME GPU structure by value. + */ +struct PmeGpuSettings +{ + /* Permanent settings set on initialization */ + /*! \brief A boolean which tells if the solving is performed on GPU. Currently always true */ + bool performGPUSolve; + /*! \brief A boolean which tells if the gathering is performed on GPU. Currently always true */ + bool performGPUGather; + /*! \brief A boolean which tells if the FFT is performed on GPU. Currently true for a single MPI rank. */ + bool performGPUFFT; + /*! \brief A convenience boolean which tells if PME decomposition is used. */ + bool useDecomposition; + /*! \brief True if PME forces are reduced on-GPU, false if reduction is done on the CPU; + * in the former case transfer does not need to happen. + * + * Note that this flag may change per-step. + */ + bool useGpuForceReduction; + + /*! \brief A boolean which tells if any PME GPU stage should copy all of its outputs to the + * host. Only intended to be used by the test framework. + */ + bool copyAllOutputs; + /*! \brief An enum which tells whether most PME GPU D2H/H2D data transfers should be synchronous. */ + GpuApiCallBehavior transferKind; + /*! \brief Various flags for the current PME computation, corresponding to the GMX_PME_ flags in pme.h. */ + int currentFlags; + /*! \brief + * Currently only supported by CUDA. + * Controls if we should use order (i.e. 4) threads per atom for the GPU + * or order*order (i.e. 16) threads per atom. + */ + bool useOrderThreadsPerAtom; + /*! \brief + * Currently only supported by CUDA. + * Controls if we should recalculate the splines in the gather or + * save the values in the spread and reload in the gather. + */ + bool recalculateSplines; +}; + +#endif diff --git a/src/gromacs/ewald/pme_gpu_staging.h b/src/gromacs/ewald/pme_gpu_staging.h new file mode 100644 index 0000000000..4a95c0cda8 --- /dev/null +++ b/src/gromacs/ewald/pme_gpu_staging.h @@ -0,0 +1,79 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +/*! \libinternal \file + * \brief Defines the host-side PME GPU data structures. + * \todo Some renaming/refactoring, which does not impair the performance: + * -- bringing the function names up to guidelines + * -- PmeGpuSettings -> PmeGpuTasks + * -- refining GPU notation application (#2053) + * -- renaming coefficients to charges (?) + * + * \author Aleksei Iupinov + * \author Mark Abraham + * \ingroup module_ewald + */ + +#ifndef GMX_EWALD_PME_GPU_STAGING_H +#define GMX_EWALD_PME_GPU_STAGING_H + +#include + +#include "gromacs/gpu_utils/hostallocator.h" +#include "gromacs/math/vectypes.h" + +/*! \internal \brief + * The PME GPU intermediate buffers structure, included in the main PME GPU structure by value. + * Buffers are managed by the PME GPU module. + */ +struct PmeGpuStaging +{ + //! Host-side force buffer + gmx::PaddedHostVector h_forces; + + /*! \brief Virial and energy intermediate host-side buffer. Size is PME_GPU_VIRIAL_AND_ENERGY_COUNT. */ + float* h_virialAndEnergy; + /*! \brief B-spline values intermediate host-side buffer. */ + float* h_splineModuli; + + /*! \brief Pointer to the host memory with B-spline values. Only used for host-side gather, or unit tests */ + float* h_theta; + /*! \brief Pointer to the host memory with B-spline derivative values. Only used for host-side gather, or unit tests */ + float* h_dtheta; + /*! \brief Pointer to the host memory with ivec atom gridline indices. Only used for host-side gather, or unit tests */ + int* h_gridlineIndices; +}; + +#endif diff --git a/src/gromacs/ewald/pme_gpu_timings.cpp b/src/gromacs/ewald/pme_gpu_timings.cpp index d725cd968a..3680631c57 100644 --- a/src/gromacs/ewald/pme_gpu_timings.cpp +++ b/src/gromacs/ewald/pme_gpu_timings.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -50,13 +50,7 @@ #include "pme_gpu_types_host.h" #include "pme_gpu_types_host_impl.h" -/*! \brief - * Tells if CUDA-based performance tracking is enabled for PME. - * - * \param[in] pmeGpu The PME GPU data structure. - * \returns True if timings are enabled, false otherwise. - */ -inline bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu) +bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu) { return pmeGpu->archSpecific->useTiming; } @@ -71,18 +65,6 @@ void pme_gpu_start_timing(const PmeGpu* pmeGpu, size_t PMEStageId) } } -CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId) -{ - CommandEvent* timingEvent = nullptr; - if (pme_gpu_timings_enabled(pmeGpu)) - { - GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(), - "Wrong PME GPU timing event index"); - timingEvent = pmeGpu->archSpecific->timingEvents[PMEStageId].fetchNextEvent(); - } - return timingEvent; -} - void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId) { if (pme_gpu_timings_enabled(pmeGpu)) @@ -123,17 +105,18 @@ void pme_gpu_reinit_timings(const PmeGpu* pmeGpu) { pmeGpu->archSpecific->activeTimers.clear(); pmeGpu->archSpecific->activeTimers.insert(gtPME_SPLINEANDSPREAD); + const auto& settings = pme_gpu_settings(pmeGpu); // TODO: no separate gtPME_SPLINE and gtPME_SPREAD as they are not used currently - if (pme_gpu_performs_FFT(pmeGpu)) + if (settings.performGPUFFT) { pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_C2R); pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_R2C); } - if (pme_gpu_performs_solve(pmeGpu)) + if (settings.performGPUSolve) { pmeGpu->archSpecific->activeTimers.insert(gtPME_SOLVE); } - if (pme_gpu_performs_gather(pmeGpu)) + if (settings.performGPUGather) { pmeGpu->archSpecific->activeTimers.insert(gtPME_GATHER); } diff --git a/src/gromacs/ewald/pme_gpu_timings.h b/src/gromacs/ewald/pme_gpu_timings.h index b2d09e21f7..f7c222b6b2 100644 --- a/src/gromacs/ewald/pme_gpu_timings.h +++ b/src/gromacs/ewald/pme_gpu_timings.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -43,14 +43,9 @@ #ifndef GMX_EWALD_PME_GPU_TIMINGS_H #define GMX_EWALD_PME_GPU_TIMINGS_H -#include "config.h" - -#if GMX_GPU == GMX_GPU_CUDA -# include "gromacs/gpu_utils/gputraits.cuh" -#elif GMX_GPU == GMX_GPU_OPENCL -# include "gromacs/gpu_utils/gputraits_ocl.h" -#endif +#include +struct gmx_wallclock_gpu_pme_t; struct PmeGpu; /*! \libinternal \brief @@ -62,20 +57,48 @@ struct PmeGpu; void pme_gpu_start_timing(const PmeGpu* pmeGpu, size_t PMEStageId); /*! \libinternal \brief - * Returns raw timing event from the corresponding GpuRegionTimer (if timings are enabled). - * In CUDA result can be nullptr stub, per GpuRegionTimer implementation. + * Stops timing the certain PME GPU stage during a single computation (if timings are enabled). * * \param[in] pmeGpu The PME GPU data structure. * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h */ -CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId); +void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId); + +/*! \brief + * Tells if CUDA-based performance tracking is enabled for PME. + * + * \param[in] pmeGpu The PME GPU data structure. + * \returns True if timings are enabled, false otherwise. + */ +bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu); /*! \libinternal \brief - * Stops timing the certain PME GPU stage during a single computation (if timings are enabled). + * Finalizes all the active PME GPU stage timings for the current computation. Should be called at the end of every computation. + * + * \param[in] pmeGpu The PME GPU structure. + */ +void pme_gpu_update_timings(const PmeGpu* pmeGpu); + +/*! \libinternal \brief + * Updates the internal list of active PME GPU stages (if timings are enabled). * * \param[in] pmeGpu The PME GPU data structure. - * \param[in] PMEStageId The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h */ -void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId); +void pme_gpu_reinit_timings(const PmeGpu* pmeGpu); + +/*! \brief + * Resets the PME GPU timings. To be called at the reset MD step. + * + * \param[in] pmeGpu The PME GPU structure. + */ +void pme_gpu_reset_timings(const PmeGpu* pmeGpu); + +/*! \libinternal \brief + * Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end. + * + * \param[in] pmeGpu The PME GPU structure. + * \param[in] timings The gmx_wallclock_gpu_pme_t structure. + */ +void pme_gpu_get_timings(const PmeGpu* pmeGpu, gmx_wallclock_gpu_pme_t* timings); #endif diff --git a/src/gromacs/ewald/pme_gpu_types.h b/src/gromacs/ewald/pme_gpu_types.h index 3749d5748b..be501f2cfa 100644 --- a/src/gromacs/ewald/pme_gpu_types.h +++ b/src/gromacs/ewald/pme_gpu_types.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -97,7 +97,7 @@ struct PmeGpuConstParams { /*! \brief Electrostatics coefficient = ONE_4PI_EPS0 / pme->epsilon_r */ float elFactor; - /*! \brief Virial and energy GPU array. Size is PME_GPU_ENERGY_AND_VIRIAL_COUNT (7) floats. + /*! \brief Virial and energy GPU array. Size is c_virialAndEnergyCount (7) floats. * The element order is virxx, viryy, virzz, virxy, virxz, viryz, energy. */ HIDE_FROM_OPENCL_COMPILER(DeviceBuffer) d_virialAndEnergy; }; diff --git a/src/gromacs/ewald/pme_gpu_types_host.h b/src/gromacs/ewald/pme_gpu_types_host.h index 21c77f94b3..45745c9a19 100644 --- a/src/gromacs/ewald/pme_gpu_types_host.h +++ b/src/gromacs/ewald/pme_gpu_types_host.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -56,10 +56,17 @@ #include "gromacs/ewald/pme.h" #include "gromacs/ewald/pme_gpu_program.h" #include "gromacs/gpu_utils/clfftinitializer.h" -#include "gromacs/gpu_utils/gpu_utils.h" // for GpuApiCallBehavior #include "gromacs/gpu_utils/hostallocator.h" #include "gromacs/math/vectypes.h" +#include "pme_gpu_settings.h" +#include "pme_gpu_staging.h" + +namespace gmx +{ +class PmeDeviceBuffers; +} // namespace gmx + #if GMX_GPU != GMX_GPU_NONE struct PmeGpuSpecific; #else @@ -82,86 +89,6 @@ typedef int PmeGpuKernelParams; struct gmx_device_info_t; -/*! \internal \brief - * The PME GPU settings structure, included in the main PME GPU structure by value. - */ -struct PmeGpuSettings -{ - /* Permanent settings set on initialization */ - /*! \brief A boolean which tells if the solving is performed on GPU. Currently always true */ - bool performGPUSolve; - /*! \brief A boolean which tells if the gathering is performed on GPU. Currently always true */ - bool performGPUGather; - /*! \brief A boolean which tells if the FFT is performed on GPU. Currently true for a single MPI rank. */ - bool performGPUFFT; - /*! \brief A convenience boolean which tells if PME decomposition is used. */ - bool useDecomposition; - /*! \brief True if PME forces are reduced on-GPU, false if reduction is done on the CPU; - * in the former case transfer does not need to happen. - * - * Note that this flag may change per-step. - */ - bool useGpuForceReduction; - - /*! \brief A boolean which tells if any PME GPU stage should copy all of its outputs to the - * host. Only intended to be used by the test framework. - */ - bool copyAllOutputs; - /*! \brief An enum which tells whether most PME GPU D2H/H2D data transfers should be synchronous. */ - GpuApiCallBehavior transferKind; - /*! \brief Various flags for the current PME computation, corresponding to the GMX_PME_ flags in pme.h. */ - int currentFlags; - /*! \brief - * Currently only supported by CUDA. - * Controls if we should use order (i.e. 4) threads per atom for the GPU - * or order*order (i.e. 16) threads per atom. - */ - bool useOrderThreadsPerAtom; - /*! \brief - * Currently only supported by CUDA. - * Controls if we should recalculate the splines in the gather or - * save the values in the spread and reload in the gather. - */ - bool recalculateSplines; -}; - -// TODO There's little value in computing the Coulomb and LJ virial -// separately, so we should simplify that. -// TODO The matrices might be best as a view, but not currently -// possible. Use mdspan? -struct PmeOutput -{ - gmx::ArrayRef forces_; //!< Host staging area for PME forces - bool haveForceOutput_ = - false; //!< True if forces have been staged other false (when forces are reduced on the GPU). - real coulombEnergy_ = 0; //!< Host staging area for PME coulomb energy - matrix coulombVirial_ = { { 0 } }; //!< Host staging area for PME coulomb virial contributions - real lennardJonesEnergy_ = 0; //!< Host staging area for PME LJ energy - matrix lennardJonesVirial_ = { { 0 } }; //!< Host staging area for PME LJ virial contributions -}; - -/*! \internal \brief - * The PME GPU intermediate buffers structure, included in the main PME GPU structure by value. - * Buffers are managed by the PME GPU module. - */ -struct PmeGpuStaging -{ - //! Host-side force buffer - gmx::PaddedHostVector h_forces; - - /*! \brief Virial and energy intermediate host-side buffer. Size is PME_GPU_VIRIAL_AND_ENERGY_COUNT. */ - float* h_virialAndEnergy; - /*! \brief B-spline values intermediate host-side buffer. */ - float* h_splineModuli; - - /*! \brief Pointer to the host memory with B-spline values. Only used for host-side gather, or unit tests */ - float* h_theta; - /*! \brief Pointer to the host memory with B-spline derivative values. Only used for host-side gather, or unit tests */ - float* h_dtheta; - /*! \brief Pointer to the host memory with ivec atom gridline indices. Only used for host-side gather, or unit tests */ - int* h_gridlineIndices; -}; - /*! \internal \brief * The PME GPU structure for all the data copied directly from the CPU PME structure. * The copying is done when the CPU PME structure is already (re-)initialized @@ -211,7 +138,7 @@ struct PmeGpu std::shared_ptr common; // TODO: make the CPU structure use the same type //! A handle to the program created by buildPmeGpuProgram() - PmeGpuProgramHandle programHandle_; + const PmeGpuProgram* programHandle_; //! Handle that ensures the clFFT library has been initialized once per process. std::unique_ptr initializedClfftLibrary_; diff --git a/src/gromacs/ewald/pme_internal.h b/src/gromacs/ewald/pme_internal.h index a0dc1c7247..686d63e424 100644 --- a/src/gromacs/ewald/pme_internal.h +++ b/src/gromacs/ewald/pme_internal.h @@ -56,13 +56,16 @@ #include "config.h" +#include + #include "gromacs/math/gmxcomplex.h" +#include "gromacs/utility/alignedallocator.h" +#include "gromacs/utility/arrayref.h" #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/defaultinitializationallocator.h" #include "gromacs/utility/gmxmpi.h" -#include "gromacs/utility/smalloc.h" -#include "pme_gpu_types_host.h" +#include "spline_vectors.h" //! A repeat of typedef from parallel_3dfft.h typedef struct gmx_parallel_3dfft* gmx_parallel_3dfft_t; @@ -71,6 +74,8 @@ struct t_commrec; struct t_inputrec; struct PmeGpu; +enum class PmeRunMode; + //@{ //! Grid indices for A state for charge and Lennard-Jones C6 #define PME_GRID_QA 0 @@ -97,17 +102,6 @@ static const real lb_scale_factor_symm[] = { 2.0 / 64, 12.0 / 64, 30.0 / 64, 20. */ #define PME_ORDER_MAX 12 -/*! \brief As gmx_pme_init, but takes most settings, except the grid/Ewald coefficients, from - * pme_src. This is only called when the PME cut-off/grid size changes. - */ -void gmx_pme_reinit(struct gmx_pme_t** pmedata, - const t_commrec* cr, - struct gmx_pme_t* pme_src, - const t_inputrec* ir, - const ivec grid_size, - real ewaldcoeff_q, - real ewaldcoeff_lj); - /* Temporary suppression until these structs become opaque and don't live in * a header that is included by other headers. Also, until then I have no @@ -158,9 +152,6 @@ struct AtomToThreadMap FastVector i; }; -/*! \brief Helper typedef for spline vectors */ -typedef real* splinevec[DIM]; - /*! \internal * \brief Coefficients for theta or dtheta */ @@ -436,19 +427,4 @@ struct gmx_pme_t //! @endcond -/*! \brief - * Finds out if PME is currently running on GPU. - * TODO: should this be removed eventually? - * - * \param[in] pme The PME structure. - * \returns True if PME runs on GPU currently, false otherwise. - */ -inline bool pme_gpu_active(const gmx_pme_t* pme) -{ - return (pme != nullptr) && (pme->runMode != PmeRunMode::CPU); -} - -/*! \brief Tell our PME-only node to switch to a new grid size */ -void gmx_pme_send_switchgrid(const t_commrec* cr, ivec grid_size, real ewaldcoeff_q, real ewaldcoeff_lj); - #endif diff --git a/src/gromacs/ewald/pme_load_balancing.cpp b/src/gromacs/ewald/pme_load_balancing.cpp index c551bd7d95..33924d5565 100644 --- a/src/gromacs/ewald/pme_load_balancing.cpp +++ b/src/gromacs/ewald/pme_load_balancing.cpp @@ -80,6 +80,7 @@ #include "gromacs/utility/strconvert.h" #include "pme_internal.h" +#include "pme_pp.h" /*! \brief Parameters and settings for one PP-PME setup */ struct pme_setup_t diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp index babba1b559..9ca5df248c 100644 --- a/src/gromacs/ewald/pme_only.cpp +++ b/src/gromacs/ewald/pme_only.cpp @@ -60,6 +60,8 @@ #include "gmxpre.h" +#include "pme_only.h" + #include "config.h" #include @@ -97,7 +99,7 @@ #include "gromacs/utility/smalloc.h" #include "pme_gpu_internal.h" -#include "pme_internal.h" +#include "pme_output.h" #include "pme_pp_communication.h" /*! \brief environment variable to enable GPU P2P communication */ @@ -193,7 +195,7 @@ static gmx_pme_t* gmx_pmeonly_switch(std::vector* pmedata, for (auto& pme : *pmedata) { GMX_ASSERT(pme, "Bad PME tuning list element pointer"); - if (pme->nkx == grid_size[XX] && pme->nky == grid_size[YY] && pme->nkz == grid_size[ZZ]) + if (gmx_pme_grid_matches(*pme, grid_size)) { /* Here we have found an existing PME data structure that suits us. * However, in the GPU case, we have to reinitialize it - there's only one GPU structure. diff --git a/src/gromacs/ewald/pme_only.h b/src/gromacs/ewald/pme_only.h new file mode 100644 index 0000000000..0ed37f1e2e --- /dev/null +++ b/src/gromacs/ewald/pme_only.h @@ -0,0 +1,69 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * + * \brief This file contains function declarations necessary for + * running on an MPI rank doing only PME long-ranged work. + * + * \author Berk Hess + * \inlibraryapi + * \ingroup module_ewald + */ + +#ifndef GMX_EWALD_PME_ONLY_H +#define GMX_EWALD_PME_ONLY_H + +#include + +#include "gromacs/timing/walltime_accounting.h" + +struct t_commrec; +struct t_inputrec; +struct t_nrnb; +struct gmx_pme_t; +struct gmx_wallcycle; + +enum class PmeRunMode; + +/*! \brief Called on the nodes that do PME exclusively */ +int gmx_pmeonly(gmx_pme_t* pme, + const t_commrec* cr, + t_nrnb* mynrnb, + gmx_wallcycle* wcycle, + gmx_walltime_accounting_t walltime_accounting, + t_inputrec* ir, + PmeRunMode runMode); + +#endif diff --git a/src/gromacs/ewald/pme_output.h b/src/gromacs/ewald/pme_output.h new file mode 100644 index 0000000000..6567341a1e --- /dev/null +++ b/src/gromacs/ewald/pme_output.h @@ -0,0 +1,70 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +/*! \libinternal \file + * \brief Defines a struct useful for transferring the PME output + * values + * + * \author Mark Abraham + * \ingroup module_ewald + */ + +#ifndef GMX_EWALD_PME_OUTPUT_H +#define GMX_EWALD_PME_OUTPUT_H + +#include "gromacs/math/vectypes.h" +#include "gromacs/utility/arrayref.h" + +// TODO There's little value in computing the Coulomb and LJ virial +// separately, so we should simplify that. +// TODO The matrices might be best as a view, but not currently +// possible. Use mdspan? +struct PmeOutput +{ + //!< Host staging area for PME forces + gmx::ArrayRef forces_; + //!< True if forces have been staged other false (when forces are reduced on the GPU). + bool haveForceOutput_ = false; + //!< Host staging area for PME coulomb energy + real coulombEnergy_ = 0; + //!< Host staging area for PME coulomb virial contributions + matrix coulombVirial_ = { { 0 } }; + //!< Host staging area for PME LJ energy + real lennardJonesEnergy_ = 0; + //!< Host staging area for PME LJ virial contributions + matrix lennardJonesVirial_ = { { 0 } }; +}; + +#endif diff --git a/src/gromacs/ewald/pme_pp.cpp b/src/gromacs/ewald/pme_pp.cpp index 691cee18dc..d22f91e1de 100644 --- a/src/gromacs/ewald/pme_pp.cpp +++ b/src/gromacs/ewald/pme_pp.cpp @@ -47,6 +47,8 @@ #include "gmxpre.h" +#include "pme_pp.h" + #include "config.h" #include @@ -71,7 +73,6 @@ #include "gromacs/utility/gmxmpi.h" #include "gromacs/utility/smalloc.h" -#include "pme_internal.h" #include "pme_pp_communication.h" /*! \brief Block to wait for communication to PME ranks to complete diff --git a/src/gromacs/ewald/pme_pp.h b/src/gromacs/ewald/pme_pp.h new file mode 100644 index 0000000000..d85880ac9a --- /dev/null +++ b/src/gromacs/ewald/pme_pp.h @@ -0,0 +1,114 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief This file contains function declarations necessary for + * mananging the PP side of PME-only ranks. + * + * \author Berk Hess + * \author Mark Abraham + * \ingroup module_ewald + */ + +#ifndef GMX_EWALD_PME_PP_H +#define GMX_EWALD_PME_PP_H + +#include "gromacs/math/vectypes.h" +#include "gromacs/utility/basedefinitions.h" + +struct gmx_wallcycle; +struct interaction_const_t; +struct t_commrec; +struct t_forcerec; + +class GpuEventSynchronizer; + +namespace gmx +{ +class ForceWithVirial; +class PmePpCommGpu; +} // namespace gmx + +/*! \brief Send the charges and maxshift to out PME-only node. */ +void gmx_pme_send_parameters(const t_commrec* cr, + const interaction_const_t* ic, + gmx_bool bFreeEnergy_q, + gmx_bool bFreeEnergy_lj, + real* chargeA, + real* chargeB, + real* sqrt_c6A, + real* sqrt_c6B, + real* sigmaA, + real* sigmaB, + int maxshift_x, + int maxshift_y); + +/*! \brief Send the coordinates to our PME-only node and request a PME calculation */ +void gmx_pme_send_coordinates(t_forcerec* fr, + const t_commrec* cr, + const matrix box, + const rvec* x, + real lambda_q, + real lambda_lj, + gmx_bool bEnerVir, + int64_t step, + bool useGpuPmePpComms, + bool reinitGpuPmePpComms, + bool sendCoordinatesFromGpu, + GpuEventSynchronizer* coordinatesReadyOnDeviceEvent, + gmx_wallcycle* wcycle); + +/*! \brief Tell our PME-only node to finish */ +void gmx_pme_send_finish(const t_commrec* cr); + +/*! \brief Tell our PME-only node to reset all cycle and flop counters */ +void gmx_pme_send_resetcounters(const t_commrec* cr, int64_t step); + +/*! \brief PP nodes receive the long range forces from the PME nodes */ +void gmx_pme_receive_f(gmx::PmePpCommGpu* pmePpCommGpu, + const t_commrec* cr, + gmx::ForceWithVirial* forceWithVirial, + real* energy_q, + real* energy_lj, + real* dvdlambda_q, + real* dvdlambda_lj, + bool useGpuPmePpComms, + bool receivePmeForceToGpu, + float* pme_cycles); + +/*! \brief Tell our PME-only node to switch to a new grid size */ +void gmx_pme_send_switchgrid(const t_commrec* cr, ivec grid_size, real ewaldcoeff_q, real ewaldcoeff_lj); + +#endif diff --git a/src/gromacs/ewald/pme_solve.cpp b/src/gromacs/ewald/pme_solve.cpp index aa6a69670e..755802abc1 100644 --- a/src/gromacs/ewald/pme_solve.cpp +++ b/src/gromacs/ewald/pme_solve.cpp @@ -53,6 +53,7 @@ #include "gromacs/utility/smalloc.h" #include "pme_internal.h" +#include "pme_output.h" #if GMX_SIMD_HAVE_REAL /* Turn on arbitrary width SIMD intrinsics for PME solve */ diff --git a/src/gromacs/ewald/pme_spread.cpp b/src/gromacs/ewald/pme_spread.cpp index c6a057a198..c5711797a4 100644 --- a/src/gromacs/ewald/pme_spread.cpp +++ b/src/gromacs/ewald/pme_spread.cpp @@ -58,6 +58,7 @@ #include "pme_internal.h" #include "pme_simd.h" #include "pme_spline_work.h" +#include "spline_vectors.h" /* TODO consider split of pme-spline from this file */ diff --git a/src/gromacs/ewald/spline_vectors.h b/src/gromacs/ewald/spline_vectors.h new file mode 100644 index 0000000000..24b0b55d08 --- /dev/null +++ b/src/gromacs/ewald/spline_vectors.h @@ -0,0 +1,53 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2020, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief This file declares a type useful for spline vectors + * + * \author Berk Hess + * \author Mark Abraham + * \ingroup module_ewald + */ + +#ifndef GMX_EWALD_SPLINE_VECTORS_H +#define GMX_EWALD_SPLINE_VECTORS_H + +#include "gromacs/math/vectypes.h" +#include "gromacs/utility/real.h" + +/*! \brief Helper typedef for spline vectors */ +typedef real* splinevec[DIM]; + +#endif diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp index 57bed345c4..543dff85ff 100644 --- a/src/gromacs/ewald/tests/pmetestcommon.cpp +++ b/src/gromacs/ewald/tests/pmetestcommon.cpp @@ -50,6 +50,7 @@ #include "gromacs/domdec/domdec.h" #include "gromacs/ewald/pme_gather.h" #include "gromacs/ewald/pme_gpu_internal.h" +#include "gromacs/ewald/pme_gpu_staging.h" #include "gromacs/ewald/pme_grid.h" #include "gromacs/ewald/pme_internal.h" #include "gromacs/ewald/pme_redistribute.h" @@ -104,7 +105,7 @@ uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder) PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec, const CodePath mode, const gmx_device_info_t* gpuInfo, - PmeGpuProgramHandle pmeGpuProgram, + const PmeGpuProgram* pmeGpuProgram, const Matrix3x3& box, const real ewaldCoeff_q, const real ewaldCoeff_lj) @@ -149,7 +150,7 @@ PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec, PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec, const CodePath mode, const gmx_device_info_t* gpuInfo, - PmeGpuProgramHandle pmeGpuProgram, + const PmeGpuProgram* pmeGpuProgram, const Matrix3x3& box, real ewaldCoeff_q, real ewaldCoeff_lj) @@ -505,7 +506,7 @@ void pmeSetGridLineIndices(gmx_pme_t* pme, CodePath mode, const GridLineIndicesV switch (mode) { case CodePath::GPU: - memcpy(pme->gpu->staging.h_gridlineIndices, gridLineIndices.data(), + memcpy(pme_gpu_staging(pme->gpu).h_gridlineIndices, gridLineIndices.data(), atomCount * sizeof(gridLineIndices[0])); break; @@ -622,7 +623,7 @@ GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t* pme, CodePath mode) { case CodePath::GPU: gridLineIndices = arrayRefFromArray( - reinterpret_cast(pme->gpu->staging.h_gridlineIndices), atomCount); + reinterpret_cast(pme_gpu_staging(pme->gpu).h_gridlineIndices), atomCount); break; case CodePath::CPU: gridLineIndices = atc->idx; break; diff --git a/src/gromacs/ewald/tests/pmetestcommon.h b/src/gromacs/ewald/tests/pmetestcommon.h index 1679dfb6c1..870b9f7aa6 100644 --- a/src/gromacs/ewald/tests/pmetestcommon.h +++ b/src/gromacs/ewald/tests/pmetestcommon.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -122,7 +122,7 @@ uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder); PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec, CodePath mode, const gmx_device_info_t* gpuInfo, - PmeGpuProgramHandle pmeGpuProgram, + const PmeGpuProgram* pmeGpuProgram, const Matrix3x3& box, real ewaldCoeff_q = 1.0F, real ewaldCoeff_lj = 1.0F); @@ -130,7 +130,7 @@ PmeSafePointer pmeInitWrapper(const t_inputrec* inputRec, PmeSafePointer pmeInitEmpty(const t_inputrec* inputRec, CodePath mode = CodePath::CPU, const gmx_device_info_t* gpuInfo = nullptr, - PmeGpuProgramHandle pmeGpuProgram = nullptr, + const PmeGpuProgram* pmeGpuProgram = nullptr, const Matrix3x3& box = { { 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F } }, real ewaldCoeff_q = 0.0F, real ewaldCoeff_lj = 0.0F); diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.h b/src/gromacs/ewald/tests/testhardwarecontexts.h index 20d208d870..e7d49c5a79 100644 --- a/src/gromacs/ewald/tests/testhardwarecontexts.h +++ b/src/gromacs/ewald/tests/testhardwarecontexts.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -90,7 +90,7 @@ public: //! Returns the device info pointer const gmx_device_info_t* getDeviceInfo() const { return deviceInfo_; } //! Returns the persistent PME GPU kernels - PmeGpuProgramHandle getPmeGpuProgram() const { return program_.get(); } + const PmeGpuProgram* getPmeGpuProgram() const { return program_.get(); } //! Constructs the context TestHardwareContext(CodePath codePath, const char* description, const gmx_device_info_t* deviceInfo) : codePath_(codePath), diff --git a/src/gromacs/mdlib/resethandler.cpp b/src/gromacs/mdlib/resethandler.cpp index 3625d4a0dd..7e807061ac 100644 --- a/src/gromacs/mdlib/resethandler.cpp +++ b/src/gromacs/mdlib/resethandler.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -47,6 +47,7 @@ #include "gromacs/domdec/domdec.h" #include "gromacs/ewald/pme.h" #include "gromacs/ewald/pme_load_balancing.h" +#include "gromacs/ewald/pme_pp.h" #include "gromacs/gmxlib/nrnb.h" #include "gromacs/gpu_utils/gpu_utils.h" #include "gromacs/mdrunutility/printtime.h" diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 2f0018f4d1..23f13f6611 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -53,6 +53,7 @@ #include "gromacs/domdec/partition.h" #include "gromacs/essentialdynamics/edsam.h" #include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme_pp.h" #include "gromacs/ewald/pme_pp_comm_gpu.h" #include "gromacs/gmxlib/network.h" #include "gromacs/gmxlib/nonbonded/nb_free_energy.h" diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index 99421cbd9a..15cf25ac1d 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -61,8 +61,8 @@ #include "gromacs/domdec/mdsetup.h" #include "gromacs/domdec/partition.h" #include "gromacs/essentialdynamics/edsam.h" -#include "gromacs/ewald/pme.h" #include "gromacs/ewald/pme_load_balancing.h" +#include "gromacs/ewald/pme_pp.h" #include "gromacs/fileio/trxio.h" #include "gromacs/gmxlib/network.h" #include "gromacs/gmxlib/nrnb.h" diff --git a/src/gromacs/mdrun/mimic.cpp b/src/gromacs/mdrun/mimic.cpp index 6c2e4e7ba5..a934bdbffa 100644 --- a/src/gromacs/mdrun/mimic.cpp +++ b/src/gromacs/mdrun/mimic.cpp @@ -60,8 +60,8 @@ #include "gromacs/domdec/mdsetup.h" #include "gromacs/domdec/partition.h" #include "gromacs/essentialdynamics/edsam.h" -#include "gromacs/ewald/pme.h" #include "gromacs/ewald/pme_load_balancing.h" +#include "gromacs/ewald/pme_pp.h" #include "gromacs/fileio/trxio.h" #include "gromacs/gmxlib/network.h" #include "gromacs/gmxlib/nrnb.h" diff --git a/src/gromacs/mdrun/minimize.cpp b/src/gromacs/mdrun/minimize.cpp index 99d8467579..93cf1c19e1 100644 --- a/src/gromacs/mdrun/minimize.cpp +++ b/src/gromacs/mdrun/minimize.cpp @@ -61,7 +61,7 @@ #include "gromacs/domdec/domdec_struct.h" #include "gromacs/domdec/mdsetup.h" #include "gromacs/domdec/partition.h" -#include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme_pp.h" #include "gromacs/fileio/confio.h" #include "gromacs/fileio/mtxio.h" #include "gromacs/gmxlib/network.h" diff --git a/src/gromacs/mdrun/rerun.cpp b/src/gromacs/mdrun/rerun.cpp index bbf7655135..6b57ab32cd 100644 --- a/src/gromacs/mdrun/rerun.cpp +++ b/src/gromacs/mdrun/rerun.cpp @@ -61,8 +61,8 @@ #include "gromacs/domdec/mdsetup.h" #include "gromacs/domdec/partition.h" #include "gromacs/essentialdynamics/edsam.h" -#include "gromacs/ewald/pme.h" #include "gromacs/ewald/pme_load_balancing.h" +#include "gromacs/ewald/pme_pp.h" #include "gromacs/fileio/trxio.h" #include "gromacs/gmxlib/network.h" #include "gromacs/gmxlib/nrnb.h" diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index fe26bd7d1d..deb9c503dd 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -64,8 +64,8 @@ #include "gromacs/domdec/localatomsetmanager.h" #include "gromacs/domdec/partition.h" #include "gromacs/ewald/ewald_utils.h" -#include "gromacs/ewald/pme.h" #include "gromacs/ewald/pme_gpu_program.h" +#include "gromacs/ewald/pme_only.h" #include "gromacs/ewald/pme_pp_comm_gpu.h" #include "gromacs/fileio/checkpoint.h" #include "gromacs/fileio/gmxfio.h" diff --git a/src/gromacs/modularsimulator/modularsimulator.cpp b/src/gromacs/modularsimulator/modularsimulator.cpp index b116f29b2c..353ea9c00e 100644 --- a/src/gromacs/modularsimulator/modularsimulator.cpp +++ b/src/gromacs/modularsimulator/modularsimulator.cpp @@ -47,6 +47,7 @@ #include "gromacs/domdec/domdec.h" #include "gromacs/ewald/pme.h" #include "gromacs/ewald/pme_load_balancing.h" +#include "gromacs/ewald/pme_pp.h" #include "gromacs/gmxlib/network.h" #include "gromacs/gmxlib/nrnb.h" #include "gromacs/math/vec.h" diff --git a/src/programs/mdrun/tests/pmetest.cpp b/src/programs/mdrun/tests/pmetest.cpp index 285adf87e7..4b5aadda1b 100644 --- a/src/programs/mdrun/tests/pmetest.cpp +++ b/src/programs/mdrun/tests/pmetest.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by + * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -126,6 +126,7 @@ void PmeTest::runTest(const RunModesList& runModes) for (const auto& mode : runModes) { + SCOPED_TRACE("mdrun " + joinStrings(mode.second, " ")); auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos); if (modeTargetsGpus && !s_hasCompatibleGpus) { -- 2.22.0