Clean up ewald module internals

author Mark Abraham <mark.j.abraham@gmail.com>

Sun, 5 Jan 2020 21:16:52 +0000 (22:16 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Thu, 9 Jan 2020 15:37:01 +0000 (16:37 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Sun, 5 Jan 2020 21:16:52 +0000 (22:16 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Thu, 9 Jan 2020 15:37:01 +0000 (16:37 +0100)
diff --git a/src/gromacs/domdec/partition.cpp b/src/gromacs/domdec/partition.cpp

index 487514edae70a00b6e050768b0c5324499f2b096..6cd6d56e25fd03d1069146278708de6b277c897c 100644 (file)
--- a/src/gromacs/domdec/partition.cpp
+++ b/src/gromacs/domdec/partition.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -60,7 +60,7 @@
  #include "gromacs/domdec/ga2la.h"
  #include "gromacs/domdec/localatomsetmanager.h"
  #include "gromacs/domdec/mdsetup.h"
-#include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
  #include "gromacs/imd/imd.h"
diff --git a/src/gromacs/ewald/calculate_spline_moduli.cpp b/src/gromacs/ewald/calculate_spline_moduli.cpp

index 5f3dbf996047fda1f1e334dead77b633689b2909..09dc3998fb67f01018a761d46d0a24e4a835a2bd 100644 (file)
--- a/src/gromacs/ewald/calculate_spline_moduli.cpp
+++ b/src/gromacs/ewald/calculate_spline_moduli.cpp
@@ -49,8 +49,6 @@
  #include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/smalloc.h"
  
-#include "pme_internal.h"
-
  static void make_dft_mod(real* mod, const double* data, int splineOrder, int ndata)
  {
      for (int i = 0; i < ndata; i++)
diff --git a/src/gromacs/ewald/calculate_spline_moduli.h b/src/gromacs/ewald/calculate_spline_moduli.h

index 24c8fef6dfeaadc3d1fd9a371b16f95df4869d9d..abdf4223f3e648933da128f0d4506ebc18ef43fb 100644 (file)
--- a/src/gromacs/ewald/calculate_spline_moduli.h
+++ b/src/gromacs/ewald/calculate_spline_moduli.h
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -37,7 +37,7 @@
  #ifndef GMX_EWALD_CALCULATE_SPLINE_MODULI_H
  #define GMX_EWALD_CALCULATE_SPLINE_MODULI_H
  
-#include "pme_internal.h"
+#include "spline_vectors.h"
  
  /* Calulate plain SPME B-spline interpolation */
  void make_bspline_moduli(splinevec bsp_mod, int nx, int ny, int nz, int order);
diff --git a/src/gromacs/ewald/long_range_correction.cpp b/src/gromacs/ewald/long_range_correction.cpp

index 8e2a15f23a234b3f792ef34e29fa8f85c1c72bcb..532be50cd912a0a8bc9cc6d57eccb5d6d1de7681 100644 (file)
--- a/src/gromacs/ewald/long_range_correction.cpp
+++ b/src/gromacs/ewald/long_range_correction.cpp
@@ -53,8 +53,6 @@
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxassert.h"
  
-#include "pme_internal.h"
-
  /* There's nothing special to do here if just masses are perturbed,
   * but if either charge or type is perturbed then the implementation
   * requires that B states are defined for both charge and type, and
diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp

index a4b3cae165d1e947d8e872be160231817a3b001a..fbb59bdf92f978a4f0c466a75e7e7ecbef1fc270 100644 (file)
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -571,7 +571,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec*         cr,
                          PmeRunMode               runMode,
                          PmeGpu*                  pmeGpu,
                          const gmx_device_info_t* gpuInfo,
-                        PmeGpuProgramHandle      pmeGpuProgram,
+                        const PmeGpuProgram*     pmeGpuProgram,
                          const gmx::MDLogger& /*mdlog*/)
  {
      int  use_threads, sum_use_threads, i;
@@ -873,21 +873,17 @@ gmx_pme_t* gmx_pme_init(const t_commrec*         cr,
          pme->atc.emplace_back(pme->mpi_comm_d[1], pme->nthread, pme->pme_order, secondDimIndex, doSpread);
      }
  
-    if (pme_gpu_active(pme.get()))
+    // Initial check of validity of the input for running on the GPU
+    if (pme->runMode != PmeRunMode::CPU)
      {
-        if (!pme->gpu)
+        std::string errorString;
+        bool        canRunOnGpu = pme_gpu_check_restrictions(pme.get(), &errorString);
+        if (!canRunOnGpu)
          {
-            // Initial check of validity of the data
-            std::string errorString;
-            bool        canRunOnGpu = pme_gpu_check_restrictions(pme.get(), &errorString);
-            if (!canRunOnGpu)
-            {
-                GMX_THROW(gmx::NotImplementedError(errorString));
-            }
+            GMX_THROW(gmx::NotImplementedError(errorString));
          }
-
-        pme_gpu_reinit(pme.get(), gpuInfo, pmeGpuProgram);
      }
+    pme_gpu_reinit(pme.get(), gpuInfo, pmeGpuProgram);
  
      pme_init_all_work(&pme->solve_work, pme->nthread, pme->nkx);
  
@@ -1715,7 +1711,7 @@ void gmx_pme_destroy(gmx_pme_t* pme)
  
      destroy_pme_spline_work(pme->spline_work);
  
-    if (pme_gpu_active(pme) && pme->gpu)
+    if (pme->gpu != nullptr)
      {
          pme_gpu_destroy(pme->gpu);
      }
@@ -1725,7 +1721,7 @@ void gmx_pme_destroy(gmx_pme_t* pme)
  
  void gmx_pme_reinit_atoms(gmx_pme_t* pme, const int numAtoms, const real* charges)
  {
-    if (pme_gpu_active(pme))
+    if (pme->gpu != nullptr)
      {
          pme_gpu_reinit_atoms(pme->gpu, numAtoms, charges);
      }
@@ -1735,3 +1731,8 @@ void gmx_pme_reinit_atoms(gmx_pme_t* pme, const int numAtoms, const real* charge
          // TODO: set the charges here as well
      }
  }
+
+bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size)
+{
+    return (pme.nkx == grid_size[XX] && pme.nky == grid_size[YY] && pme.nkz == grid_size[ZZ]);
+}
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index 171f50b71d73ae62109d34b7f7465b20441dd7a8..4a2f159a5670f0787c3bc4d058de51a973dea945 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -60,9 +60,7 @@
  #include "gromacs/utility/real.h"
  
  struct gmx_hw_info_t;
-struct interaction_const_t;
  struct t_commrec;
-struct t_forcerec;
  struct t_inputrec;
  struct t_nrnb;
  struct PmeGpu;
@@ -77,12 +75,9 @@ struct NumPmeDomains;
  enum class GpuTaskCompletion;
  class PmeGpuProgram;
  class GpuEventSynchronizer;
-//! Convenience name.
-using PmeGpuProgramHandle = const PmeGpuProgram*;
  
  namespace gmx
  {
-class PmePpCommGpu;
  class ForceWithVirial;
  class MDLogger;
  enum class PinningPolicy : int;
@@ -116,6 +111,9 @@ enum class PmeForceOutputHandling
  /*! \brief Return the smallest allowed PME grid size for \p pmeOrder */
  int minimalPmeGridSize(int pmeOrder);
  
+//! Return whether the grid of \c pme is identical to \c grid_size.
+bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size);
+
  /*! \brief Check restrictions on pme_order and the PME grid nkx,nky,nkz.
   *
   * With errorsAreFatal=true, an exception or fatal error is generated
@@ -141,7 +139,7 @@ bool gmx_pme_check_restrictions(int  pme_order,
   * \returns  Pointer to newly allocated and initialized PME data.
   *
   * \todo We should evolve something like a \c GpuManager that holds \c
- * gmx_device_info_t * and \c PmeGpuProgramHandle and perhaps other
+ * gmx_device_info_t * and \c PmeGpuProgram* and perhaps other
   * related things whose lifetime can/should exceed that of a task (or
   * perhaps task manager). See Redmine #2522.
   */
@@ -157,9 +155,20 @@ gmx_pme_t* gmx_pme_init(const t_commrec*         cr,
                          PmeRunMode               runMode,
                          PmeGpu*                  pmeGpu,
                          const gmx_device_info_t* gpuInfo,
-                        PmeGpuProgramHandle      pmeGpuProgram,
+                        const PmeGpuProgram*     pmeGpuProgram,
                          const gmx::MDLogger&     mdlog);
  
+/*! \brief As gmx_pme_init, but takes most settings, except the grid/Ewald coefficients, from
+ * pme_src. This is only called when the PME cut-off/grid size changes.
+ */
+void gmx_pme_reinit(gmx_pme_t**       pmedata,
+                    const t_commrec*  cr,
+                    gmx_pme_t*        pme_src,
+                    const t_inputrec* ir,
+                    const ivec        grid_size,
+                    real              ewaldcoeff_q,
+                    real              ewaldcoeff_lj);
+
  /*! \brief Destroys the PME data structure.*/
  void gmx_pme_destroy(gmx_pme_t* pme);
  
@@ -214,15 +223,6 @@ int gmx_pme_do(struct gmx_pme_t*              pme,
                 real*                          dvdlambda_lj,
                 int                            flags);
  
-/*! \brief Called on the nodes that do PME exclusively */
-int gmx_pmeonly(struct gmx_pme_t*         pme,
-                const t_commrec*          cr,
-                t_nrnb*                   mynrnb,
-                gmx_wallcycle*            wcycle,
-                gmx_walltime_accounting_t walltime_accounting,
-                t_inputrec*               ir,
-                PmeRunMode                runMode);
-
  /*! \brief Calculate the PME grid energy V for n charges.
   *
   * The potential (found in \p pme) must have been found already with a
@@ -233,53 +233,6 @@ int gmx_pmeonly(struct gmx_pme_t*         pme,
   */
  void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef<const gmx::RVec> x, gmx::ArrayRef<const real> q, real* V);
  
-/*! \brief Send the charges and maxshift to out PME-only node. */
-void gmx_pme_send_parameters(const t_commrec*           cr,
-                             const interaction_const_t* ic,
-                             gmx_bool                   bFreeEnergy_q,
-                             gmx_bool                   bFreeEnergy_lj,
-                             real*                      chargeA,
-                             real*                      chargeB,
-                             real*                      sqrt_c6A,
-                             real*                      sqrt_c6B,
-                             real*                      sigmaA,
-                             real*                      sigmaB,
-                             int                        maxshift_x,
-                             int                        maxshift_y);
-
-/*! \brief Send the coordinates to our PME-only node and request a PME calculation */
-void gmx_pme_send_coordinates(t_forcerec*           fr,
-                              const t_commrec*      cr,
-                              const matrix          box,
-                              const rvec*           x,
-                              real                  lambda_q,
-                              real                  lambda_lj,
-                              gmx_bool              bEnerVir,
-                              int64_t               step,
-                              bool                  useGpuPmePpComms,
-                              bool                  reinitGpuPmePpComms,
-                              bool                  sendCoordinatesFromGpu,
-                              GpuEventSynchronizer* coordinatesReadyOnDeviceEvent,
-                              gmx_wallcycle*        wcycle);
-
-/*! \brief Tell our PME-only node to finish */
-void gmx_pme_send_finish(const t_commrec* cr);
-
-/*! \brief Tell our PME-only node to reset all cycle and flop counters */
-void gmx_pme_send_resetcounters(const t_commrec* cr, int64_t step);
-
-/*! \brief PP nodes receive the long range forces from the PME nodes */
-void gmx_pme_receive_f(gmx::PmePpCommGpu*    pmePpCommGpu,
-                       const t_commrec*      cr,
-                       gmx::ForceWithVirial* forceWithVirial,
-                       real*                 energy_q,
-                       real*                 energy_lj,
-                       real*                 dvdlambda_q,
-                       real*                 dvdlambda_lj,
-                       bool                  useGpuPmePpComms,
-                       bool                  receivePmeForceToGpu,
-                       float*                pme_cycles);
-
  /*! \brief
   * This function updates the local atom data on GPU after DD (charges, coordinates, etc.).
   * TODO: it should update the PME CPU atom data as well.
diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu.h b/src/gromacs/ewald/pme_coordinate_receiver_gpu.h

index a4a608b5a25b1dd265ca412eeb64dc44524389f7..ae49098251bdacbcb0f2483c2944c81dd55d7fcb 100644 (file)
--- a/src/gromacs/ewald/pme_coordinate_receiver_gpu.h
+++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -42,14 +42,18 @@
  #ifndef GMX_PMECOORDINATERECEIVERGPU_H
  #define GMX_PMECOORDINATERECEIVERGPU_H
  
-#include "gromacs/ewald/pme.h"
-#include "gromacs/ewald/pme_force_sender_gpu.h"
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/utility/classhelpers.h"
  #include "gromacs/utility/gmxmpi.h"
  
+struct PpRanks;
+
  namespace gmx
  {
  
+template<typename>
+class ArrayRef;
+
  class PmeCoordinateReceiverGpu
  {
  
diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp

index 213a88c59edaf5426aaaad7aa2998a4a685e8135..02ddd447a4aeca130c7ce00c4e424134e58143bf 100644 (file)
--- a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp
+++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -48,6 +48,8 @@
  #include "config.h"
  
  #include "gromacs/ewald/pme_coordinate_receiver_gpu.h"
+#include "gromacs/utility/arrayref.h"
+#include "gromacs/utility/gmxassert.h"
  
  #if GMX_GPU != GMX_GPU_CUDA
  
@@ -73,7 +75,7 @@ PmeCoordinateReceiverGpu::PmeCoordinateReceiverGpu(void* /* pmeStream */,
  PmeCoordinateReceiverGpu::~PmeCoordinateReceiverGpu() = default;
  
  /*!\brief init PME-PP GPU communication stub */
-void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(const DeviceBuffer<float> /* d_x */)
+void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(DeviceBuffer<float> /* d_x */)
  {
      GMX_ASSERT(false,
                 "A CPU stub for PME-PP GPU communication initialization was called instead of the "
diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cu b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cu

index 88d7af569b8026eb37e778bff0700a66be2ed149..e2000e6e5472ab96f6d97b0718d39b8bcd49483d 100644 (file)
--- a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cu
+++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cu
@@ -47,10 +47,7 @@
  
  #include "config.h"
  
-#include <assert.h>
-#include <stdio.h>
-
-#include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_force_sender_gpu.h"
  #include "gromacs/gpu_utils/cudautils.cuh"
  #include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
  #include "gromacs/utility/gmxmpi.h"
@@ -72,7 +69,7 @@ PmeCoordinateReceiverGpu::Impl::Impl(void* pmeStream, MPI_Comm comm, gmx::ArrayR
  
  PmeCoordinateReceiverGpu::Impl::~Impl() = default;
  
-void PmeCoordinateReceiverGpu::Impl::sendCoordinateBufferAddressToPpRanks(const DeviceBuffer<float> d_x)
+void PmeCoordinateReceiverGpu::Impl::sendCoordinateBufferAddressToPpRanks(DeviceBuffer<float> d_x)
  {
  
      int ind_start = 0;
@@ -134,7 +131,7 @@ PmeCoordinateReceiverGpu::PmeCoordinateReceiverGpu(void*                  pmeStr
  
  PmeCoordinateReceiverGpu::~PmeCoordinateReceiverGpu() = default;
  
-void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(const DeviceBuffer<float> d_x)
+void PmeCoordinateReceiverGpu::sendCoordinateBufferAddressToPpRanks(DeviceBuffer<float> d_x)
  {
      impl_->sendCoordinateBufferAddressToPpRanks(d_x);
  }
diff --git a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h

index 84a554725b8e439c9d0ab595d5afafa6b514d28b..281985b0b0c8139c6980aab1797af84f906794dd 100644 (file)
--- a/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h
+++ b/src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -44,7 +44,9 @@
  #define GMX_PMECOORDINATERECEIVERGPU_IMPL_H
  
  #include "gromacs/ewald/pme_coordinate_receiver_gpu.h"
-#include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#include "gromacs/utility/arrayref.h"
+
+class GpuEventSynchronizer;
  
  namespace gmx
  {
@@ -67,7 +69,7 @@ public:
       * send coordinates buffer address to PP rank
       * \param[in] d_x   coordinates buffer in GPU memory
       */
-    void sendCoordinateBufferAddressToPpRanks(const DeviceBuffer<float> d_x);
+    void sendCoordinateBufferAddressToPpRanks(DeviceBuffer<float> d_x);
  
      /*! \brief
       * launch receive of coordinate data from PP rank
diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp

index 31467da6da1683e0e655157eed515efc5672f1fa..1bbdaa7de7f039e13dc4cef9ea0cd6f325df8e73 100644 (file)
--- a/src/gromacs/ewald/pme_gpu.cpp
+++ b/src/gromacs/ewald/pme_gpu.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -60,10 +60,28 @@
  #include "gromacs/utility/stringutil.h"
  
  #include "pme_gpu_internal.h"
+#include "pme_gpu_settings.h"
+#include "pme_gpu_timings.h"
+#include "pme_gpu_types_host.h"
  #include "pme_grid.h"
  #include "pme_internal.h"
  #include "pme_solve.h"
  
+/*! \brief
+ * Finds out if PME is currently running on GPU.
+ *
+ * \todo The GPU module should not be constructed (or at least called)
+ * when it is not active, so there should be no need to check whether
+ * it is active. An assertion that this is true makes sense.
+ *
+ * \param[in] pme  The PME structure.
+ * \returns        True if PME runs on GPU currently, false otherwise.
+ */
+static inline bool pme_gpu_active(const gmx_pme_t* pme)
+{
+    return (pme != nullptr) && (pme->runMode != PmeRunMode::CPU);
+}
+
  void pme_gpu_reset_timings(const gmx_pme_t* pme)
  {
      if (pme_gpu_active(pme))
@@ -107,7 +125,7 @@ void inline parallel_3dfft_execute_gpu_wrapper(gmx_pme_t*             pme,
                                                 gmx_wallcycle_t        wcycle)
  {
      GMX_ASSERT(gridIndex == 0, "Only single grid supported");
-    if (pme_gpu_performs_FFT(pme->gpu))
+    if (pme_gpu_settings(pme->gpu).performGPUFFT)
      {
          wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
          wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
@@ -163,7 +181,7 @@ void pme_gpu_prepare_computation(gmx_pme_t*     pme,
          wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_PME);
          wallcycle_stop(wcycle, ewcLAUNCH_GPU);
  
-        if (!pme_gpu_performs_solve(pmeGpu))
+        if (!pme_gpu_settings(pmeGpu).performGPUSolve)
          {
              // TODO remove code duplication and add test coverage
              matrix scaledBox;
@@ -199,15 +217,16 @@ void pme_gpu_launch_spread(gmx_pme_t* pme, GpuEventSynchronizer* xReadyOnDevice,
  
  void pme_gpu_launch_complex_transforms(gmx_pme_t* pme, gmx_wallcycle* wcycle)
  {
-    PmeGpu*    pmeGpu                 = pme->gpu;
-    const bool computeEnergyAndVirial = (pmeGpu->settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
-    const bool performBackFFT = (pmeGpu->settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0;
+    PmeGpu*     pmeGpu                 = pme->gpu;
+    const auto& settings               = pmeGpu->settings;
+    const bool  computeEnergyAndVirial = (settings.currentFlags & GMX_PME_CALC_ENER_VIR) != 0;
+    const bool  performBackFFT = (settings.currentFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT)) != 0;
      const unsigned int gridIndex = 0;
      t_complex*         cfftgrid  = pme->cfftgrid[gridIndex];
  
-    if (pmeGpu->settings.currentFlags & GMX_PME_SPREAD)
+    if (settings.currentFlags & GMX_PME_SPREAD)
      {
-        if (!pme_gpu_performs_FFT(pmeGpu))
+        if (!settings.performGPUFFT)
          {
              wallcycle_start(wcycle, ewcWAIT_GPU_PME_SPREAD);
              pme_gpu_sync_spread_grid(pme->gpu);
@@ -217,15 +236,17 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t* pme, gmx_wallcycle* wcycle)
  
      try
      {
-        if (pmeGpu->settings.currentFlags & GMX_PME_SOLVE)
+        if (settings.currentFlags & GMX_PME_SOLVE)
          {
              /* do R2C 3D-FFT */
              parallel_3dfft_execute_gpu_wrapper(pme, gridIndex, GMX_FFT_REAL_TO_COMPLEX, wcycle);
  
              /* solve in k-space for our local cells */
-            if (pme_gpu_performs_solve(pmeGpu))
+            if (settings.performGPUSolve)
              {
-                const auto gridOrdering = pme_gpu_uses_dd(pmeGpu) ? GridOrdering::YZX : GridOrdering::XYZ;
+                // TODO grid ordering should be set up at pme init time.
+                const auto gridOrdering =
+                        settings.useDecomposition ? GridOrdering::YZX : GridOrdering::XYZ;
                  wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU);
                  wallcycle_sub_start_nocount(wcycle, ewcsLAUNCH_GPU_PME);
                  pme_gpu_solve(pmeGpu, cfftgrid, gridOrdering, computeEnergyAndVirial);
@@ -257,7 +278,7 @@ void pme_gpu_launch_gather(const gmx_pme_t* pme, gmx_wallcycle gmx_unused* wcycl
  {
      GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
  
-    if (!pme_gpu_performs_gather(pme->gpu))
+    if (!pme_gpu_settings(pme->gpu).performGPUGather)
      {
          return;
      }
diff --git a/src/gromacs/ewald/pme_gpu_3dfft.cu b/src/gromacs/ewald/pme_gpu_3dfft.cu

index 16bde23684d9a64d7dc52814bf1b9f1958a39053..2b30dcdef14899569b81aead9c14501665068cfc 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_3dfft.cu
+++ b/src/gromacs/ewald/pme_gpu_3dfft.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -71,7 +71,8 @@ GpuParallel3dFft::GpuParallel3dFft(const PmeGpu* pmeGpu)
          complexGridSizePadded[i] = kernelParamsPtr->grid.complexGridSizePadded[i];
      }
  
-    GMX_RELEASE_ASSERT(!pme_gpu_uses_dd(pmeGpu), "FFT decomposition not implemented");
+    GMX_RELEASE_ASSERT(!pme_gpu_settings(pmeGpu).useDecomposition,
+                       "FFT decomposition not implemented");
  
      const int complexGridSizePaddedTotal =
              complexGridSizePadded[XX] * complexGridSizePadded[YY] * complexGridSizePadded[ZZ];
diff --git a/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp b/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp

index cd0a18e0a5264a4ac38eb118caa509b86fdacffb..2b14dc7567b3849960f169dfcebef49b6475dce7 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp
+++ b/src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -68,7 +68,8 @@ GpuParallel3dFft::GpuParallel3dFft(const PmeGpu* pmeGpu)
      // Extracting all the data from PME GPU
      std::array<size_t, DIM> realGridSize, realGridSizePadded, complexGridSizePadded;
  
-    GMX_RELEASE_ASSERT(!pme_gpu_uses_dd(pmeGpu), "FFT decomposition not implemented");
+    GMX_RELEASE_ASSERT(!pme_gpu_settings(pmeGpu).useDecomposition,
+                       "FFT decomposition not implemented");
      PmeGpuKernelParamsBase* kernelParamsPtr = pmeGpu->kernelParams.get();
      for (int i = 0; i < DIM; i++)
      {
diff --git a/src/gromacs/ewald/pme_gpu_internal.cpp b/src/gromacs/ewald/pme_gpu_internal.cpp

index a7d6e1e96309ff60e7ab1ce1c4381fa78660214f..39973556be36e960afd6b7bced387cada1b24ca7 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.cpp
+++ b/src/gromacs/ewald/pme_gpu_internal.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -231,7 +231,7 @@ void pme_gpu_copy_output_forces(PmeGpu* pmeGpu)
                           pmeGpu->settings.transferKind, nullptr);
  }
  
-void pme_gpu_realloc_coordinates(const PmeGpu* pmeGpu)
+void pme_gpu_realloc_coordinates(PmeGpu* pmeGpu)
  {
      const size_t newCoordinatesSize = pmeGpu->nAtomsAlloc * DIM;
      GMX_ASSERT(newCoordinatesSize > 0, "Bad number of atoms in PME GPU");
@@ -255,7 +255,7 @@ void pme_gpu_free_coordinates(const PmeGpu* pmeGpu)
      freeDeviceBuffer(&pmeGpu->kernelParams->atoms.d_coordinates);
  }
  
-void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu* pmeGpu, const float* h_coefficients)
+void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients)
  {
      GMX_ASSERT(h_coefficients, "Bad host-side charge buffer in PME GPU");
      const size_t newCoefficientsSize = pmeGpu->nAtomsAlloc;
@@ -587,7 +587,7 @@ void pme_gpu_destroy_specific(const PmeGpu* pmeGpu)
  
  void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu)
  {
-    if (pme_gpu_performs_FFT(pmeGpu))
+    if (pme_gpu_settings(pmeGpu).performGPUFFT)
      {
          pmeGpu->archSpecific->fftSetup.resize(0);
          for (int i = 0; i < pmeGpu->common->ngrids; i++)
@@ -694,7 +694,7 @@ PmeOutput pme_gpu_getOutput(const gmx_pme_t& pme, const int flags)
      // on the else branch
      if (haveComputedEnergyAndVirial)
      {
-        if (pme_gpu_performs_solve(pmeGpu))
+        if (pme_gpu_settings(pmeGpu).performGPUSolve)
          {
              pme_gpu_getEnergyAndVirial(pme, &output);
          }
@@ -755,7 +755,7 @@ static void pme_gpu_reinit_grids(PmeGpu* pmeGpu)
          kernelParamsPtr->grid.complexGridSizePadded[i] = kernelParamsPtr->grid.realGridSize[i];
      }
      /* FFT: n real elements correspond to (n / 2 + 1) complex elements in minor dimension */
-    if (!pme_gpu_performs_FFT(pmeGpu))
+    if (!pme_gpu_settings(pmeGpu).performGPUFFT)
      {
          // This allows for GPU spreading grid and CPU fftgrid to have the same layout, so that we can copy the data directly
          kernelParamsPtr->grid.realGridSizePadded[ZZ] =
@@ -846,7 +846,7 @@ static void pme_gpu_select_best_performing_pme_spreadgather_kernels(PmeGpu* pmeG
   * \param[in,out] gpuInfo        The GPU information structure.
   * \param[in]     pmeGpuProgram  The handle to the program/kernel data created outside (e.g. in unit tests/runner)
   */
-static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGpuProgramHandle pmeGpuProgram)
+static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram)
  {
      pme->gpu       = new PmeGpu();
      PmeGpu* pmeGpu = pme->gpu;
@@ -855,7 +855,7 @@ static void pme_gpu_init(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGp
  
      /* These settings are set here for the whole run; dynamic ones are set in pme_gpu_reinit() */
      /* A convenience variable. */
-    pmeGpu->settings.useDecomposition = (pme->nnodes == 1);
+    pmeGpu->settings.useDecomposition = (pme->nnodes != 1);
      /* TODO: CPU gather with GPU spread is broken due to different theta/dtheta layout. */
      pmeGpu->settings.performGPUGather = true;
      // By default GPU-side reduction is off (explicitly set here for tests, otherwise reset per-step)
@@ -954,10 +954,12 @@ void pme_gpu_get_real_grid_sizes(const PmeGpu* pmeGpu, gmx::IVec* gridSize, gmx:
      }
  }
  
-void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGpuProgramHandle pmeGpuProgram)
+void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, const PmeGpuProgram* pmeGpuProgram)
  {
-    if (!pme_gpu_active(pme))
+    GMX_ASSERT(pme != nullptr, "Need valid PME object");
+    if (pme->runMode == PmeRunMode::CPU)
      {
+        GMX_ASSERT(pme->gpu == nullptr, "Should not have PME GPU object");
          return;
      }
  
@@ -973,7 +975,7 @@ void pme_gpu_reinit(gmx_pme_t* pme, const gmx_device_info_t* gpuInfo, PmeGpuProg
      }
      /* GPU FFT will only get used for a single rank.*/
      pme->gpu->settings.performGPUFFT =
-            (pme->gpu->common->runMode == PmeRunMode::GPU) && !pme_gpu_uses_dd(pme->gpu);
+            (pme->gpu->common->runMode == PmeRunMode::GPU) && !pme->gpu->settings.useDecomposition;
      pme->gpu->settings.performGPUSolve = (pme->gpu->common->runMode == PmeRunMode::GPU);
  
      /* Reinit active timers */
@@ -1037,6 +1039,25 @@ void pme_gpu_reinit_atoms(PmeGpu* pmeGpu, const int nAtoms, const real* charges)
      }
  }
  
+/*! \internal \brief
+ * Returns raw timing event from the corresponding GpuRegionTimer (if timings are enabled).
+ * In CUDA result can be nullptr stub, per GpuRegionTimer implementation.
+ *
+ * \param[in] pmeGpu         The PME GPU data structure.
+ * \param[in] PMEStageId     The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
+ */
+static CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId)
+{
+    CommandEvent* timingEvent = nullptr;
+    if (pme_gpu_timings_enabled(pmeGpu))
+    {
+        GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(),
+                   "Wrong PME GPU timing event index");
+        timingEvent = pmeGpu->archSpecific->timingEvents[PMEStageId].fetchNextEvent();
+    }
+    return timingEvent;
+}
+
  void pme_gpu_3dfft(const PmeGpu* pmeGpu, gmx_fft_direction dir, int grid_index)
  {
      int timerId = (dir == GMX_FFT_REAL_TO_COMPLEX) ? gtPME_FFT_R2C : gtPME_FFT_C2R;
@@ -1206,7 +1227,7 @@ void pme_gpu_spread(const PmeGpu*         pmeGpu,
      // only needed with CUDA on PP+PME ranks, not on separate PME ranks, in unit tests
      // nor in OpenCL as these cases use a single stream (hence xReadyOnDevice == nullptr).
      GMX_ASSERT(xReadyOnDevice != nullptr || (GMX_GPU != GMX_GPU_CUDA)
-                       || pmeGpu->common->isRankPmeOnly || pme_gpu_is_testing(pmeGpu),
+                       || pmeGpu->common->isRankPmeOnly || pme_gpu_settings(pmeGpu).copyAllOutputs,
                 "Need a valid coordinate synchronizer on PP+PME ranks with CUDA.");
      if (xReadyOnDevice)
      {
@@ -1265,14 +1286,14 @@ void pme_gpu_spread(const PmeGpu*         pmeGpu,
      launchGpuKernel(kernelPtr, config, timingEvent, "PME spline/spread", kernelArgs);
      pme_gpu_stop_timing(pmeGpu, timingId);
  
-    const bool copyBackGrid =
-            spreadCharges && (pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_FFT(pmeGpu));
+    const auto& settings    = pmeGpu->settings;
+    const bool copyBackGrid = spreadCharges && (!settings.performGPUFFT || settings.copyAllOutputs);
      if (copyBackGrid)
      {
          pme_gpu_copy_output_spread_grid(pmeGpu, h_grid);
      }
      const bool copyBackAtomData =
-            computeSplines && (pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_gather(pmeGpu));
+            computeSplines && (!settings.performGPUGather || settings.copyAllOutputs);
      if (copyBackAtomData)
      {
          pme_gpu_copy_output_spread_atom_data(pmeGpu);
@@ -1281,7 +1302,8 @@ void pme_gpu_spread(const PmeGpu*         pmeGpu,
  
  void pme_gpu_solve(const PmeGpu* pmeGpu, t_complex* h_grid, GridOrdering gridOrdering, bool computeEnergyAndVirial)
  {
-    const bool copyInputAndOutputGrid = pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_FFT(pmeGpu);
+    const auto& settings               = pmeGpu->settings;
+    const bool  copyInputAndOutputGrid = !settings.performGPUFFT || settings.copyAllOutputs;
  
      auto* kernelParamsPtr = pmeGpu->kernelParams.get();
  
@@ -1441,12 +1463,13 @@ void pme_gpu_gather(PmeGpu* pmeGpu, PmeForceOutputHandling forceTreatment, const
          pme_gpu_copy_input_forces(pmeGpu);
      }
  
-    if (!pme_gpu_performs_FFT(pmeGpu) || pme_gpu_is_testing(pmeGpu))
+    const auto& settings = pmeGpu->settings;
+    if (!settings.performGPUFFT || settings.copyAllOutputs)
      {
          pme_gpu_copy_input_gather_grid(pmeGpu, const_cast<float*>(h_grid));
      }
  
-    if (pme_gpu_is_testing(pmeGpu))
+    if (settings.copyAllOutputs)
      {
          pme_gpu_copy_input_gather_atom_data(pmeGpu);
      }
diff --git a/src/gromacs/ewald/pme_gpu_internal.h b/src/gromacs/ewald/pme_gpu_internal.h

index 443b97a60e327d042321ab8059b8f7067c3a2d92..f010d15456bd2217d30180e75a327146b8bb9ec5 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.h
+++ b/src/gromacs/ewald/pme_gpu_internal.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -46,17 +46,26 @@
  #ifndef GMX_EWALD_PME_GPU_INTERNAL_H
  #define GMX_EWALD_PME_GPU_INTERNAL_H
  
-#include "gromacs/fft/fft.h"              // for the gmx_fft_direction enum
+#include "gromacs/fft/fft.h" // for the gmx_fft_direction enum
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/gpu_utils/gpu_macros.h" // for the GPU_FUNC_ macros
  #include "gromacs/utility/arrayref.h"
  
-#include "pme_gpu_types_host.h" // for the inline functions accessing PmeGpu members
+#include "pme_gpu_types_host.h"
+#include "pme_output.h"
  
+class GpuEventSynchronizer;
+struct gmx_device_info_t;
  struct gmx_hw_info_t;
  struct gmx_gpu_opt_t;
  struct gmx_pme_t; // only used in pme_gpu_reinit
-struct gmx_wallclock_gpu_pme_t;
+struct gmx_wallcycle;
  class PmeAtomComm;
+enum class PmeForceOutputHandling;
+struct PmeGpu;
+class PmeGpuProgram;
+struct PmeGpuStaging;
+struct PmeGpuSettings;
  struct t_complex;
  
  namespace gmx
@@ -183,7 +192,7 @@ bool pme_gpu_stream_query(const PmeGpu* pmeGpu);
   *
   * Needs to be called on every DD step/in the beginning.
   */
-void pme_gpu_realloc_coordinates(const PmeGpu* pmeGpu);
+void pme_gpu_realloc_coordinates(PmeGpu* pmeGpu);
  
  /*! \libinternal \brief
   * Frees the coordinates on the GPU.
@@ -202,7 +211,7 @@ void pme_gpu_free_coordinates(const PmeGpu* pmeGpu);
   * Does not need to be done for every PME computation, only whenever the local charges change.
   * (So, in the beginning of the run, or on DD step).
   */
-void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu* pmeGpu, const float* h_coefficients);
+void pme_gpu_realloc_and_copy_input_coefficients(PmeGpu* pmeGpu, const float* h_coefficients);
  
  /*! \libinternal \brief
   * Frees the charges/coefficients on the GPU.
@@ -342,37 +351,6 @@ void pme_gpu_reinit_3dfft(const PmeGpu* pmeGpu);
   */
  void pme_gpu_destroy_3dfft(const PmeGpu* pmeGpu);
  
-/* Several GPU event-based timing functions that live in pme_gpu_timings.cpp */
-
-/*! \libinternal \brief
- * Finalizes all the active PME GPU stage timings for the current computation. Should be called at the end of every computation.
- *
- * \param[in] pmeGpu         The PME GPU structure.
- */
-void pme_gpu_update_timings(const PmeGpu* pmeGpu);
-
-/*! \libinternal \brief
- * Updates the internal list of active PME GPU stages (if timings are enabled).
- *
- * \param[in] pmeGpu         The PME GPU data structure.
- */
-void pme_gpu_reinit_timings(const PmeGpu* pmeGpu);
-
-/*! \brief
- * Resets the PME GPU timings. To be called at the reset MD step.
- *
- * \param[in] pmeGpu         The PME GPU structure.
- */
-void pme_gpu_reset_timings(const PmeGpu* pmeGpu);
-
-/*! \libinternal \brief
- * Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end.
- *
- * \param[in] pmeGpu         The PME GPU structure.
- * \param[in] timings        The gmx_wallclock_gpu_pme_t structure.
- */
-void pme_gpu_get_timings(const PmeGpu* pmeGpu, gmx_wallclock_gpu_pme_t* timings);
-
  /* The PME stages themselves */
  
  /*! \libinternal \brief
@@ -470,70 +448,33 @@ GPU_FUNC_QUALIFIER void* pme_gpu_get_context(const PmeGpu* GPU_FUNC_ARGUMENT(pme
  GPU_FUNC_QUALIFIER GpuEventSynchronizer* pme_gpu_get_forces_ready_synchronizer(
          const PmeGpu* GPU_FUNC_ARGUMENT(pmeGpu)) GPU_FUNC_TERM_WITH_RETURN(nullptr);
  
-/* The inlined convenience PME GPU status getters */
-
  /*! \libinternal \brief
- * Tells if PME runs on multiple GPUs with the decomposition.
+ * Returns the PME GPU settings
   *
   * \param[in] pmeGpu         The PME GPU structure.
- * \returns                  True if PME runs on multiple GPUs, false otherwise.
+ * \returns                  The settings for PME on GPU
   */
-inline bool pme_gpu_uses_dd(const PmeGpu* pmeGpu)
+inline const PmeGpuSettings& pme_gpu_settings(const PmeGpu* pmeGpu)
  {
-    return !pmeGpu->settings.useDecomposition;
+    return pmeGpu->settings;
  }
  
  /*! \libinternal \brief
- * Tells if PME performs the gathering stage on GPU.
+ * Returns the PME GPU staging object
   *
   * \param[in] pmeGpu         The PME GPU structure.
- * \returns                  True if the gathering is performed on GPU, false otherwise.
+ * \returns                  The staging object for PME on GPU
   */
-inline bool pme_gpu_performs_gather(const PmeGpu* pmeGpu)
+inline const PmeGpuStaging& pme_gpu_staging(const PmeGpu* pmeGpu)
  {
-    return pmeGpu->settings.performGPUGather;
+    return pmeGpu->staging;
  }
  
  /*! \libinternal \brief
- * Tells if PME performs the FFT stages on GPU.
+ * Sets whether the PME module is running in testing mode
   *
   * \param[in] pmeGpu         The PME GPU structure.
- * \returns                  True if FFT is performed on GPU, false otherwise.
- */
-inline bool pme_gpu_performs_FFT(const PmeGpu* pmeGpu)
-{
-    return pmeGpu->settings.performGPUFFT;
-}
-
-/*! \libinternal \brief
- * Tells if PME performs the grid (un-)wrapping on GPU.
- *
- * \param[in] pmeGpu         The PME GPU structure.
- * \returns                  True if (un-)wrapping is performed on GPU, false otherwise.
- */
-inline bool pme_gpu_performs_wrapping(const PmeGpu* pmeGpu)
-{
-    return pmeGpu->settings.useDecomposition;
-}
-
-/*! \libinternal \brief
- * Tells if PME performs the grid solving on GPU.
- *
- * \param[in] pmeGpu         The PME GPU structure.
- * \returns                  True if solving is performed on GPU, false otherwise.
- */
-inline bool pme_gpu_performs_solve(const PmeGpu* pmeGpu)
-{
-    return pmeGpu->settings.performGPUSolve;
-}
-
-/*! \libinternal \brief
- * Enables or disables the testing mode.
- * Testing mode only implies copying all the outputs, even the intermediate ones, to the host,
- * and also makes the copies synchronous.
- *
- * \param[in] pmeGpu             The PME GPU structure.
- * \param[in] testing            Should the testing mode be enabled, or disabled.
+ * \param[in] testing        Whether testing mode is on.
   */
  inline void pme_gpu_set_testing(PmeGpu* pmeGpu, bool testing)
  {
@@ -544,17 +485,6 @@ inline void pme_gpu_set_testing(PmeGpu* pmeGpu, bool testing)
      }
  }
  
-/*! \libinternal \brief
- * Tells if PME is in the testing mode.
- *
- * \param[in] pmeGpu             The PME GPU structure.
- * \returns                      true if testing mode is enabled, false otherwise.
- */
-inline bool pme_gpu_is_testing(const PmeGpu* pmeGpu)
-{
-    return pmeGpu->settings.copyAllOutputs;
-}
-
  /* A block of C++ functions that live in pme_gpu_internal.cpp */
  
  /*! \libinternal \brief
@@ -661,7 +591,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu* GPU_FUNC_ARGUM
   */
  GPU_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t*               GPU_FUNC_ARGUMENT(pme),
                                         const gmx_device_info_t* GPU_FUNC_ARGUMENT(gpuInfo),
-                                       PmeGpuProgramHandle GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
+                                       const PmeGpuProgram* GPU_FUNC_ARGUMENT(pmeGpuProgram)) GPU_FUNC_TERM;
  
  /*! \libinternal \brief
   * Destroys the PME GPU data at the end of the run.
diff --git a/src/gromacs/ewald/pme_gpu_program.h b/src/gromacs/ewald/pme_gpu_program.h

index 3045feb97329857a479b127782fde09e6ed06fe3..e9e084bf1e58fadb94ded5c5ac599ae3ac1780fd 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_program.h
+++ b/src/gromacs/ewald/pme_gpu_program.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -66,11 +66,6 @@ public:
   */
  using PmeGpuProgramStorage = std::unique_ptr<PmeGpuProgram>;
  
-/*! \brief This is a handle for passing references to PME GPU program data.
- * TODO: it should be a const reference, but for that the PmeGpu types need to be C++
- */
-using PmeGpuProgramHandle = const PmeGpuProgram*;
-
  /*! \brief
   * Factory function used to build persistent PME GPU program for the device at once.
   */
diff --git a/src/gromacs/ewald/pme_gpu_settings.h b/src/gromacs/ewald/pme_gpu_settings.h

new file mode 100644 (file)

index 0000000..a848e7e
--- /dev/null
+++ b/src/gromacs/ewald/pme_gpu_settings.h
@@ -0,0 +1,94 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ * \brief Defines the PME GPU settings data structures.
+ * \todo Some renaming/refactoring, which does not impair the performance:
+ * -- PmeGpuSettings -> PmeGpuTasks
+ *
+ * \author Aleksei Iupinov <a.yupinov@gmail.com>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_GPU_SETTINGS_H
+#define GMX_EWALD_PME_GPU_SETTINGS_H
+
+#include "gromacs/gpu_utils/gpu_utils.h" // for GpuApiCallBehavior
+
+/*! \internal \brief
+ * The PME GPU settings structure, included in the main PME GPU structure by value.
+ */
+struct PmeGpuSettings
+{
+    /* Permanent settings set on initialization */
+    /*! \brief A boolean which tells if the solving is performed on GPU. Currently always true */
+    bool performGPUSolve;
+    /*! \brief A boolean which tells if the gathering is performed on GPU. Currently always true */
+    bool performGPUGather;
+    /*! \brief A boolean which tells if the FFT is performed on GPU. Currently true for a single MPI rank. */
+    bool performGPUFFT;
+    /*! \brief A convenience boolean which tells if PME decomposition is used. */
+    bool useDecomposition;
+    /*! \brief True if PME forces are reduced on-GPU, false if reduction is done on the CPU;
+     *  in the former case transfer does not need to happen.
+     *
+     *  Note that this flag may change per-step.
+     */
+    bool useGpuForceReduction;
+
+    /*! \brief A boolean which tells if any PME GPU stage should copy all of its outputs to the
+     * host. Only intended to be used by the test framework.
+     */
+    bool copyAllOutputs;
+    /*! \brief An enum which tells whether most PME GPU D2H/H2D data transfers should be synchronous. */
+    GpuApiCallBehavior transferKind;
+    /*! \brief Various flags for the current PME computation, corresponding to the GMX_PME_ flags in pme.h. */
+    int currentFlags;
+    /*! \brief
+     *  Currently only supported by CUDA.
+     *  Controls if we should use order (i.e. 4) threads per atom for the GPU
+     *  or order*order (i.e. 16) threads per atom.
+     */
+    bool useOrderThreadsPerAtom;
+    /*! \brief
+     * Currently only supported by CUDA.
+     * Controls if we should recalculate the splines in the gather or
+     * save the values in the spread and reload in the gather.
+     */
+    bool recalculateSplines;
+};
+
+#endif
diff --git a/src/gromacs/ewald/pme_gpu_staging.h b/src/gromacs/ewald/pme_gpu_staging.h

new file mode 100644 (file)

index 0000000..4a95c0c
--- /dev/null
+++ b/src/gromacs/ewald/pme_gpu_staging.h
@@ -0,0 +1,79 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ * \brief Defines the host-side PME GPU data structures.
+ * \todo Some renaming/refactoring, which does not impair the performance:
+ * -- bringing the function names up to guidelines
+ * -- PmeGpuSettings -> PmeGpuTasks
+ * -- refining GPU notation application (#2053)
+ * -- renaming coefficients to charges (?)
+ *
+ * \author Aleksei Iupinov <a.yupinov@gmail.com>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_GPU_STAGING_H
+#define GMX_EWALD_PME_GPU_STAGING_H
+
+#include <vector>
+
+#include "gromacs/gpu_utils/hostallocator.h"
+#include "gromacs/math/vectypes.h"
+
+/*! \internal \brief
+ * The PME GPU intermediate buffers structure, included in the main PME GPU structure by value.
+ * Buffers are managed by the PME GPU module.
+ */
+struct PmeGpuStaging
+{
+    //! Host-side force buffer
+    gmx::PaddedHostVector<gmx::RVec> h_forces;
+
+    /*! \brief Virial and energy intermediate host-side buffer. Size is PME_GPU_VIRIAL_AND_ENERGY_COUNT. */
+    float* h_virialAndEnergy;
+    /*! \brief B-spline values intermediate host-side buffer. */
+    float* h_splineModuli;
+
+    /*! \brief Pointer to the host memory with B-spline values. Only used for host-side gather, or unit tests */
+    float* h_theta;
+    /*! \brief Pointer to the host memory with B-spline derivative values. Only used for host-side gather, or unit tests */
+    float* h_dtheta;
+    /*! \brief Pointer to the host memory with ivec atom gridline indices. Only used for host-side gather, or unit tests */
+    int* h_gridlineIndices;
+};
+
+#endif
diff --git a/src/gromacs/ewald/pme_gpu_timings.cpp b/src/gromacs/ewald/pme_gpu_timings.cpp

index d725cd968a3307f7387c29419b1a7adb5a22292e..3680631c5731b905809b5b24ad689e6db5ba994b 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_timings.cpp
+++ b/src/gromacs/ewald/pme_gpu_timings.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -50,13 +50,7 @@
  #include "pme_gpu_types_host.h"
  #include "pme_gpu_types_host_impl.h"
  
-/*! \brief
- * Tells if CUDA-based performance tracking is enabled for PME.
- *
- * \param[in] pmeGpu         The PME GPU data structure.
- * \returns                  True if timings are enabled, false otherwise.
- */
-inline bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu)
+bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu)
  {
      return pmeGpu->archSpecific->useTiming;
  }
@@ -71,18 +65,6 @@ void pme_gpu_start_timing(const PmeGpu* pmeGpu, size_t PMEStageId)
      }
  }
  
-CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId)
-{
-    CommandEvent* timingEvent = nullptr;
-    if (pme_gpu_timings_enabled(pmeGpu))
-    {
-        GMX_ASSERT(PMEStageId < pmeGpu->archSpecific->timingEvents.size(),
-                   "Wrong PME GPU timing event index");
-        timingEvent = pmeGpu->archSpecific->timingEvents[PMEStageId].fetchNextEvent();
-    }
-    return timingEvent;
-}
-
  void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId)
  {
      if (pme_gpu_timings_enabled(pmeGpu))
@@ -123,17 +105,18 @@ void pme_gpu_reinit_timings(const PmeGpu* pmeGpu)
      {
          pmeGpu->archSpecific->activeTimers.clear();
          pmeGpu->archSpecific->activeTimers.insert(gtPME_SPLINEANDSPREAD);
+        const auto& settings = pme_gpu_settings(pmeGpu);
          // TODO: no separate gtPME_SPLINE and gtPME_SPREAD as they are not used currently
-        if (pme_gpu_performs_FFT(pmeGpu))
+        if (settings.performGPUFFT)
          {
              pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_C2R);
              pmeGpu->archSpecific->activeTimers.insert(gtPME_FFT_R2C);
          }
-        if (pme_gpu_performs_solve(pmeGpu))
+        if (settings.performGPUSolve)
          {
              pmeGpu->archSpecific->activeTimers.insert(gtPME_SOLVE);
          }
-        if (pme_gpu_performs_gather(pmeGpu))
+        if (settings.performGPUGather)
          {
              pmeGpu->archSpecific->activeTimers.insert(gtPME_GATHER);
          }
diff --git a/src/gromacs/ewald/pme_gpu_timings.h b/src/gromacs/ewald/pme_gpu_timings.h

index b2d09e21f7a56b9b79c84d333442c74a04cd4d52..f7c222b6b2ec7a32e57c13f4fe05522d28cc85e3 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_timings.h
+++ b/src/gromacs/ewald/pme_gpu_timings.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -43,14 +43,9 @@
  #ifndef GMX_EWALD_PME_GPU_TIMINGS_H
  #define GMX_EWALD_PME_GPU_TIMINGS_H
  
-#include "config.h"
-
-#if GMX_GPU == GMX_GPU_CUDA
-#    include "gromacs/gpu_utils/gputraits.cuh"
-#elif GMX_GPU == GMX_GPU_OPENCL
-#    include "gromacs/gpu_utils/gputraits_ocl.h"
-#endif
+#include <cstddef>
  
+struct gmx_wallclock_gpu_pme_t;
  struct PmeGpu;
  
  /*! \libinternal \brief
@@ -62,20 +57,48 @@ struct PmeGpu;
  void pme_gpu_start_timing(const PmeGpu* pmeGpu, size_t PMEStageId);
  
  /*! \libinternal \brief
- * Returns raw timing event from the corresponding GpuRegionTimer (if timings are enabled).
- * In CUDA result can be nullptr stub, per GpuRegionTimer implementation.
+ * Stops timing the certain PME GPU stage during a single computation (if timings are enabled).
   *
   * \param[in] pmeGpu         The PME GPU data structure.
   * \param[in] PMEStageId     The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
   */
-CommandEvent* pme_gpu_fetch_timing_event(const PmeGpu* pmeGpu, size_t PMEStageId);
+void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId);
+
+/*! \brief
+ * Tells if CUDA-based performance tracking is enabled for PME.
+ *
+ * \param[in] pmeGpu         The PME GPU data structure.
+ * \returns                  True if timings are enabled, false otherwise.
+ */
+bool pme_gpu_timings_enabled(const PmeGpu* pmeGpu);
  
  /*! \libinternal \brief
- * Stops timing the certain PME GPU stage during a single computation (if timings are enabled).
+ * Finalizes all the active PME GPU stage timings for the current computation. Should be called at the end of every computation.
+ *
+ * \param[in] pmeGpu         The PME GPU structure.
+ */
+void pme_gpu_update_timings(const PmeGpu* pmeGpu);
+
+/*! \libinternal \brief
+ * Updates the internal list of active PME GPU stages (if timings are enabled).
   *
   * \param[in] pmeGpu         The PME GPU data structure.
- * \param[in] PMEStageId     The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
   */
-void pme_gpu_stop_timing(const PmeGpu* pmeGpu, size_t PMEStageId);
+void pme_gpu_reinit_timings(const PmeGpu* pmeGpu);
+
+/*! \brief
+ * Resets the PME GPU timings. To be called at the reset MD step.
+ *
+ * \param[in] pmeGpu         The PME GPU structure.
+ */
+void pme_gpu_reset_timings(const PmeGpu* pmeGpu);
+
+/*! \libinternal \brief
+ * Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end.
+ *
+ * \param[in] pmeGpu         The PME GPU structure.
+ * \param[in] timings        The gmx_wallclock_gpu_pme_t structure.
+ */
+void pme_gpu_get_timings(const PmeGpu* pmeGpu, gmx_wallclock_gpu_pme_t* timings);
  
  #endif
diff --git a/src/gromacs/ewald/pme_gpu_types.h b/src/gromacs/ewald/pme_gpu_types.h

index 3749d5748b91af38514be5d22fd3f7b14592a564..be501f2cfa83f953e69d63047e63762c5dcbffaa 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_types.h
+++ b/src/gromacs/ewald/pme_gpu_types.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -97,7 +97,7 @@ struct PmeGpuConstParams
  {
      /*! \brief Electrostatics coefficient = ONE_4PI_EPS0 / pme->epsilon_r */
      float elFactor;
-    /*! \brief Virial and energy GPU array. Size is PME_GPU_ENERGY_AND_VIRIAL_COUNT (7) floats.
+    /*! \brief Virial and energy GPU array. Size is c_virialAndEnergyCount (7) floats.
       * The element order is virxx, viryy, virzz, virxy, virxz, viryz, energy. */
      HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_virialAndEnergy;
  };
diff --git a/src/gromacs/ewald/pme_gpu_types_host.h b/src/gromacs/ewald/pme_gpu_types_host.h

index 21c77f94b3f1bd637372bbd9352b1177cd1ca692..45745c9a19dee3e6f5366a05897f299f3dd1dced 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_types_host.h
+++ b/src/gromacs/ewald/pme_gpu_types_host.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -56,10 +56,17 @@
  #include "gromacs/ewald/pme.h"
  #include "gromacs/ewald/pme_gpu_program.h"
  #include "gromacs/gpu_utils/clfftinitializer.h"
-#include "gromacs/gpu_utils/gpu_utils.h" // for GpuApiCallBehavior
  #include "gromacs/gpu_utils/hostallocator.h"
  #include "gromacs/math/vectypes.h"
  
+#include "pme_gpu_settings.h"
+#include "pme_gpu_staging.h"
+
+namespace gmx
+{
+class PmeDeviceBuffers;
+} // namespace gmx
+
  #if GMX_GPU != GMX_GPU_NONE
  struct PmeGpuSpecific;
  #else
@@ -82,86 +89,6 @@ typedef int PmeGpuKernelParams;
  
  struct gmx_device_info_t;
  
-/*! \internal \brief
- * The PME GPU settings structure, included in the main PME GPU structure by value.
- */
-struct PmeGpuSettings
-{
-    /* Permanent settings set on initialization */
-    /*! \brief A boolean which tells if the solving is performed on GPU. Currently always true */
-    bool performGPUSolve;
-    /*! \brief A boolean which tells if the gathering is performed on GPU. Currently always true */
-    bool performGPUGather;
-    /*! \brief A boolean which tells if the FFT is performed on GPU. Currently true for a single MPI rank. */
-    bool performGPUFFT;
-    /*! \brief A convenience boolean which tells if PME decomposition is used. */
-    bool useDecomposition;
-    /*! \brief True if PME forces are reduced on-GPU, false if reduction is done on the CPU;
-     *  in the former case transfer does not need to happen.
-     *
-     *  Note that this flag may change per-step.
-     */
-    bool useGpuForceReduction;
-
-    /*! \brief A boolean which tells if any PME GPU stage should copy all of its outputs to the
-     * host. Only intended to be used by the test framework.
-     */
-    bool copyAllOutputs;
-    /*! \brief An enum which tells whether most PME GPU D2H/H2D data transfers should be synchronous. */
-    GpuApiCallBehavior transferKind;
-    /*! \brief Various flags for the current PME computation, corresponding to the GMX_PME_ flags in pme.h. */
-    int currentFlags;
-    /*! \brief
-     *  Currently only supported by CUDA.
-     *  Controls if we should use order (i.e. 4) threads per atom for the GPU
-     *  or order*order (i.e. 16) threads per atom.
-     */
-    bool useOrderThreadsPerAtom;
-    /*! \brief
-     * Currently only supported by CUDA.
-     * Controls if we should recalculate the splines in the gather or
-     * save the values in the spread and reload in the gather.
-     */
-    bool recalculateSplines;
-};
-
-// TODO There's little value in computing the Coulomb and LJ virial
-// separately, so we should simplify that.
-// TODO The matrices might be best as a view, but not currently
-// possible. Use mdspan?
-struct PmeOutput
-{
-    gmx::ArrayRef<gmx::RVec> forces_; //!< Host staging area for PME forces
-    bool                     haveForceOutput_ =
-            false; //!< True if forces have been staged other false (when forces are reduced on the GPU).
-    real   coulombEnergy_ = 0;         //!< Host staging area for PME coulomb energy
-    matrix coulombVirial_ = { { 0 } }; //!< Host staging area for PME coulomb virial contributions
-    real   lennardJonesEnergy_ = 0;    //!< Host staging area for PME LJ energy
-    matrix lennardJonesVirial_ = { { 0 } }; //!< Host staging area for PME LJ virial contributions
-};
-
-/*! \internal \brief
- * The PME GPU intermediate buffers structure, included in the main PME GPU structure by value.
- * Buffers are managed by the PME GPU module.
- */
-struct PmeGpuStaging
-{
-    //! Host-side force buffer
-    gmx::PaddedHostVector<gmx::RVec> h_forces;
-
-    /*! \brief Virial and energy intermediate host-side buffer. Size is PME_GPU_VIRIAL_AND_ENERGY_COUNT. */
-    float* h_virialAndEnergy;
-    /*! \brief B-spline values intermediate host-side buffer. */
-    float* h_splineModuli;
-
-    /*! \brief Pointer to the host memory with B-spline values. Only used for host-side gather, or unit tests */
-    float* h_theta;
-    /*! \brief Pointer to the host memory with B-spline derivative values. Only used for host-side gather, or unit tests */
-    float* h_dtheta;
-    /*! \brief Pointer to the host memory with ivec atom gridline indices. Only used for host-side gather, or unit tests */
-    int* h_gridlineIndices;
-};
-
  /*! \internal \brief
   * The PME GPU structure for all the data copied directly from the CPU PME structure.
   * The copying is done when the CPU PME structure is already (re-)initialized
@@ -211,7 +138,7 @@ struct PmeGpu
      std::shared_ptr<PmeShared> common; // TODO: make the CPU structure use the same type
  
      //! A handle to the program created by buildPmeGpuProgram()
-    PmeGpuProgramHandle programHandle_;
+    const PmeGpuProgram* programHandle_;
  
      //! Handle that ensures the clFFT library has been initialized once per process.
      std::unique_ptr<gmx::ClfftInitializer> initializedClfftLibrary_;
diff --git a/src/gromacs/ewald/pme_internal.h b/src/gromacs/ewald/pme_internal.h

index a0dc1c7247f84c33e75bcd95e28513223a0929f5..686d63e42456f163c7bac2bf5f126cb60d4d5639 100644 (file)
--- a/src/gromacs/ewald/pme_internal.h
+++ b/src/gromacs/ewald/pme_internal.h
@@ -56,13 +56,16 @@
  
  #include "config.h"
  
+#include <vector>
+
  #include "gromacs/math/gmxcomplex.h"
+#include "gromacs/utility/alignedallocator.h"
+#include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/basedefinitions.h"
  #include "gromacs/utility/defaultinitializationallocator.h"
  #include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
  
-#include "pme_gpu_types_host.h"
+#include "spline_vectors.h"
  
  //! A repeat of typedef from parallel_3dfft.h
  typedef struct gmx_parallel_3dfft* gmx_parallel_3dfft_t;
@@ -71,6 +74,8 @@ struct t_commrec;
  struct t_inputrec;
  struct PmeGpu;
  
+enum class PmeRunMode;
+
  //@{
  //! Grid indices for A state for charge and Lennard-Jones C6
  #define PME_GRID_QA 0
@@ -97,17 +102,6 @@ static const real lb_scale_factor_symm[] = { 2.0 / 64, 12.0 / 64, 30.0 / 64, 20.
   */
  #define PME_ORDER_MAX 12
  
-/*! \brief As gmx_pme_init, but takes most settings, except the grid/Ewald coefficients, from
- * pme_src. This is only called when the PME cut-off/grid size changes.
- */
-void gmx_pme_reinit(struct gmx_pme_t** pmedata,
-                    const t_commrec*   cr,
-                    struct gmx_pme_t*  pme_src,
-                    const t_inputrec*  ir,
-                    const ivec         grid_size,
-                    real               ewaldcoeff_q,
-                    real               ewaldcoeff_lj);
-
  
  /* Temporary suppression until these structs become opaque and don't live in
   * a header that is included by other headers. Also, until then I have no
@@ -158,9 +152,6 @@ struct AtomToThreadMap
      FastVector<int> i;
  };
  
-/*! \brief Helper typedef for spline vectors */
-typedef real* splinevec[DIM];
-
  /*! \internal
   * \brief Coefficients for theta or dtheta
   */
@@ -436,19 +427,4 @@ struct gmx_pme_t
  
  //! @endcond
  
-/*! \brief
- * Finds out if PME is currently running on GPU.
- * TODO: should this be removed eventually?
- *
- * \param[in] pme  The PME structure.
- * \returns        True if PME runs on GPU currently, false otherwise.
- */
-inline bool pme_gpu_active(const gmx_pme_t* pme)
-{
-    return (pme != nullptr) && (pme->runMode != PmeRunMode::CPU);
-}
-
-/*! \brief Tell our PME-only node to switch to a new grid size */
-void gmx_pme_send_switchgrid(const t_commrec* cr, ivec grid_size, real ewaldcoeff_q, real ewaldcoeff_lj);
-
  #endif
diff --git a/src/gromacs/ewald/pme_load_balancing.cpp b/src/gromacs/ewald/pme_load_balancing.cpp

index c551bd7d95f2801139cf99605531872e7e8bf117..33924d5565db1f0b38886e5a56fc5334315770d2 100644 (file)
--- a/src/gromacs/ewald/pme_load_balancing.cpp
+++ b/src/gromacs/ewald/pme_load_balancing.cpp
@@ -80,6 +80,7 @@
  #include "gromacs/utility/strconvert.h"
  
  #include "pme_internal.h"
+#include "pme_pp.h"
  
  /*! \brief Parameters and settings for one PP-PME setup */
  struct pme_setup_t
diff --git a/src/gromacs/ewald/pme_only.cpp b/src/gromacs/ewald/pme_only.cpp

index babba1b559339ef5d95eeeec4e6cc7b4fb914923..9ca5df248cbd6991a3c2f83d2966fc341b7ecb76 100644 (file)
--- a/src/gromacs/ewald/pme_only.cpp
+++ b/src/gromacs/ewald/pme_only.cpp
@@ -60,6 +60,8 @@
  
  #include "gmxpre.h"
  
+#include "pme_only.h"
+
  #include "config.h"
  
  #include <cassert>
@@ -97,7 +99,7 @@
  #include "gromacs/utility/smalloc.h"
  
  #include "pme_gpu_internal.h"
-#include "pme_internal.h"
+#include "pme_output.h"
  #include "pme_pp_communication.h"
  
  /*! \brief environment variable to enable GPU P2P communication */
@@ -193,7 +195,7 @@ static gmx_pme_t* gmx_pmeonly_switch(std::vector<gmx_pme_t*>* pmedata,
      for (auto& pme : *pmedata)
      {
          GMX_ASSERT(pme, "Bad PME tuning list element pointer");
-        if (pme->nkx == grid_size[XX] && pme->nky == grid_size[YY] && pme->nkz == grid_size[ZZ])
+        if (gmx_pme_grid_matches(*pme, grid_size))
          {
              /* Here we have found an existing PME data structure that suits us.
               * However, in the GPU case, we have to reinitialize it - there's only one GPU structure.
diff --git a/src/gromacs/ewald/pme_only.h b/src/gromacs/ewald/pme_only.h

new file mode 100644 (file)

index 0000000..0ed37f1
--- /dev/null
+++ b/src/gromacs/ewald/pme_only.h
@@ -0,0 +1,69 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * running on an MPI rank doing only PME long-ranged work.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \inlibraryapi
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_ONLY_H
+#define GMX_EWALD_PME_ONLY_H
+
+#include <string>
+
+#include "gromacs/timing/walltime_accounting.h"
+
+struct t_commrec;
+struct t_inputrec;
+struct t_nrnb;
+struct gmx_pme_t;
+struct gmx_wallcycle;
+
+enum class PmeRunMode;
+
+/*! \brief Called on the nodes that do PME exclusively */
+int gmx_pmeonly(gmx_pme_t*                pme,
+                const t_commrec*          cr,
+                t_nrnb*                   mynrnb,
+                gmx_wallcycle*            wcycle,
+                gmx_walltime_accounting_t walltime_accounting,
+                t_inputrec*               ir,
+                PmeRunMode                runMode);
+
+#endif
diff --git a/src/gromacs/ewald/pme_output.h b/src/gromacs/ewald/pme_output.h

new file mode 100644 (file)

index 0000000..6567341
--- /dev/null
+++ b/src/gromacs/ewald/pme_output.h
@@ -0,0 +1,70 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ * \brief Defines a struct useful for transferring the PME output
+ * values
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_OUTPUT_H
+#define GMX_EWALD_PME_OUTPUT_H
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/arrayref.h"
+
+// TODO There's little value in computing the Coulomb and LJ virial
+// separately, so we should simplify that.
+// TODO The matrices might be best as a view, but not currently
+// possible. Use mdspan?
+struct PmeOutput
+{
+    //!< Host staging area for PME forces
+    gmx::ArrayRef<gmx::RVec> forces_;
+    //!< True if forces have been staged other false (when forces are reduced on the GPU).
+    bool haveForceOutput_ = false;
+    //!< Host staging area for PME coulomb energy
+    real coulombEnergy_ = 0;
+    //!< Host staging area for PME coulomb virial contributions
+    matrix coulombVirial_ = { { 0 } };
+    //!< Host staging area for PME LJ energy
+    real lennardJonesEnergy_ = 0;
+    //!< Host staging area for PME LJ virial contributions
+    matrix lennardJonesVirial_ = { { 0 } };
+};
+
+#endif
diff --git a/src/gromacs/ewald/pme_pp.cpp b/src/gromacs/ewald/pme_pp.cpp

index 691cee18dc509d4996d05006037097794104d6b2..d22f91e1de4468895f18dc7a1fd38a37151a2a87 100644 (file)
--- a/src/gromacs/ewald/pme_pp.cpp
+++ b/src/gromacs/ewald/pme_pp.cpp
@@ -47,6 +47,8 @@
  
  #include "gmxpre.h"
  
+#include "pme_pp.h"
+
  #include "config.h"
  
  #include <cstdio>
@@ -71,7 +73,6 @@
  #include "gromacs/utility/gmxmpi.h"
  #include "gromacs/utility/smalloc.h"
  
-#include "pme_internal.h"
  #include "pme_pp_communication.h"
  
  /*! \brief Block to wait for communication to PME ranks to complete
diff --git a/src/gromacs/ewald/pme_pp.h b/src/gromacs/ewald/pme_pp.h

new file mode 100644 (file)

index 0000000..d85880a
--- /dev/null
+++ b/src/gromacs/ewald/pme_pp.h
@@ -0,0 +1,114 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief This file contains function declarations necessary for
+ * mananging the PP side of PME-only ranks.
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_PME_PP_H
+#define GMX_EWALD_PME_PP_H
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/basedefinitions.h"
+
+struct gmx_wallcycle;
+struct interaction_const_t;
+struct t_commrec;
+struct t_forcerec;
+
+class GpuEventSynchronizer;
+
+namespace gmx
+{
+class ForceWithVirial;
+class PmePpCommGpu;
+} // namespace gmx
+
+/*! \brief Send the charges and maxshift to out PME-only node. */
+void gmx_pme_send_parameters(const t_commrec*           cr,
+                             const interaction_const_t* ic,
+                             gmx_bool                   bFreeEnergy_q,
+                             gmx_bool                   bFreeEnergy_lj,
+                             real*                      chargeA,
+                             real*                      chargeB,
+                             real*                      sqrt_c6A,
+                             real*                      sqrt_c6B,
+                             real*                      sigmaA,
+                             real*                      sigmaB,
+                             int                        maxshift_x,
+                             int                        maxshift_y);
+
+/*! \brief Send the coordinates to our PME-only node and request a PME calculation */
+void gmx_pme_send_coordinates(t_forcerec*           fr,
+                              const t_commrec*      cr,
+                              const matrix          box,
+                              const rvec*           x,
+                              real                  lambda_q,
+                              real                  lambda_lj,
+                              gmx_bool              bEnerVir,
+                              int64_t               step,
+                              bool                  useGpuPmePpComms,
+                              bool                  reinitGpuPmePpComms,
+                              bool                  sendCoordinatesFromGpu,
+                              GpuEventSynchronizer* coordinatesReadyOnDeviceEvent,
+                              gmx_wallcycle*        wcycle);
+
+/*! \brief Tell our PME-only node to finish */
+void gmx_pme_send_finish(const t_commrec* cr);
+
+/*! \brief Tell our PME-only node to reset all cycle and flop counters */
+void gmx_pme_send_resetcounters(const t_commrec* cr, int64_t step);
+
+/*! \brief PP nodes receive the long range forces from the PME nodes */
+void gmx_pme_receive_f(gmx::PmePpCommGpu*    pmePpCommGpu,
+                       const t_commrec*      cr,
+                       gmx::ForceWithVirial* forceWithVirial,
+                       real*                 energy_q,
+                       real*                 energy_lj,
+                       real*                 dvdlambda_q,
+                       real*                 dvdlambda_lj,
+                       bool                  useGpuPmePpComms,
+                       bool                  receivePmeForceToGpu,
+                       float*                pme_cycles);
+
+/*! \brief Tell our PME-only node to switch to a new grid size */
+void gmx_pme_send_switchgrid(const t_commrec* cr, ivec grid_size, real ewaldcoeff_q, real ewaldcoeff_lj);
+
+#endif
diff --git a/src/gromacs/ewald/pme_solve.cpp b/src/gromacs/ewald/pme_solve.cpp

index aa6a69670eebb42984002987565758204e5dde5f..755802abc11fe3391c48fb70a43bd188b2f0ee80 100644 (file)
--- a/src/gromacs/ewald/pme_solve.cpp
+++ b/src/gromacs/ewald/pme_solve.cpp
@@ -53,6 +53,7 @@
  #include "gromacs/utility/smalloc.h"
  
  #include "pme_internal.h"
+#include "pme_output.h"
  
  #if GMX_SIMD_HAVE_REAL
  /* Turn on arbitrary width SIMD intrinsics for PME solve */
diff --git a/src/gromacs/ewald/pme_spread.cpp b/src/gromacs/ewald/pme_spread.cpp

index c6a057a1986e3303aa1cba59381a0453cf8f6443..c5711797a46c428be93b15126c7c886b4a036b5a 100644 (file)
--- a/src/gromacs/ewald/pme_spread.cpp
+++ b/src/gromacs/ewald/pme_spread.cpp
@@ -58,6 +58,7 @@
  #include "pme_internal.h"
  #include "pme_simd.h"
  #include "pme_spline_work.h"
+#include "spline_vectors.h"
  
  /* TODO consider split of pme-spline from this file */
  
diff --git a/src/gromacs/ewald/spline_vectors.h b/src/gromacs/ewald/spline_vectors.h

new file mode 100644 (file)

index 0000000..24b0b55
--- /dev/null
+++ b/src/gromacs/ewald/spline_vectors.h
@@ -0,0 +1,53 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief This file declares a type useful for spline vectors
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_ewald
+ */
+
+#ifndef GMX_EWALD_SPLINE_VECTORS_H
+#define GMX_EWALD_SPLINE_VECTORS_H
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/real.h"
+
+/*! \brief Helper typedef for spline vectors */
+typedef real* splinevec[DIM];
+
+#endif
diff --git a/src/gromacs/ewald/tests/pmetestcommon.cpp b/src/gromacs/ewald/tests/pmetestcommon.cpp

index 57bed345c4573ce8cd395cad7b6407b6afeae56d..543dff85ff7c38e5e0a9b5c62e89b70e611056ce 100644 (file)
--- a/src/gromacs/ewald/tests/pmetestcommon.cpp
+++ b/src/gromacs/ewald/tests/pmetestcommon.cpp
@@ -50,6 +50,7 @@
  #include "gromacs/domdec/domdec.h"
  #include "gromacs/ewald/pme_gather.h"
  #include "gromacs/ewald/pme_gpu_internal.h"
+#include "gromacs/ewald/pme_gpu_staging.h"
  #include "gromacs/ewald/pme_grid.h"
  #include "gromacs/ewald/pme_internal.h"
  #include "gromacs/ewald/pme_redistribute.h"
@@ -104,7 +105,7 @@ uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder)
  PmeSafePointer pmeInitWrapper(const t_inputrec*        inputRec,
                                const CodePath           mode,
                                const gmx_device_info_t* gpuInfo,
-                              PmeGpuProgramHandle      pmeGpuProgram,
+                              const PmeGpuProgram*     pmeGpuProgram,
                                const Matrix3x3&         box,
                                const real               ewaldCoeff_q,
                                const real               ewaldCoeff_lj)
@@ -149,7 +150,7 @@ PmeSafePointer pmeInitWrapper(const t_inputrec*        inputRec,
  PmeSafePointer pmeInitEmpty(const t_inputrec*        inputRec,
                              const CodePath           mode,
                              const gmx_device_info_t* gpuInfo,
-                            PmeGpuProgramHandle      pmeGpuProgram,
+                            const PmeGpuProgram*     pmeGpuProgram,
                              const Matrix3x3&         box,
                              real                     ewaldCoeff_q,
                              real                     ewaldCoeff_lj)
@@ -505,7 +506,7 @@ void pmeSetGridLineIndices(gmx_pme_t* pme, CodePath mode, const GridLineIndicesV
      switch (mode)
      {
          case CodePath::GPU:
-            memcpy(pme->gpu->staging.h_gridlineIndices, gridLineIndices.data(),
+            memcpy(pme_gpu_staging(pme->gpu).h_gridlineIndices, gridLineIndices.data(),
                     atomCount * sizeof(gridLineIndices[0]));
              break;
  
@@ -622,7 +623,7 @@ GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t* pme, CodePath mode)
      {
          case CodePath::GPU:
              gridLineIndices = arrayRefFromArray(
-                    reinterpret_cast<IVec*>(pme->gpu->staging.h_gridlineIndices), atomCount);
+                    reinterpret_cast<IVec*>(pme_gpu_staging(pme->gpu).h_gridlineIndices), atomCount);
              break;
  
          case CodePath::CPU: gridLineIndices = atc->idx; break;
diff --git a/src/gromacs/ewald/tests/pmetestcommon.h b/src/gromacs/ewald/tests/pmetestcommon.h

index 1679dfb6c157da961fa62fff821c856f389a1186..870b9f7aa67fe3d9ded76cca30203a1b4d628c62 100644 (file)
--- a/src/gromacs/ewald/tests/pmetestcommon.h
+++ b/src/gromacs/ewald/tests/pmetestcommon.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -122,7 +122,7 @@ uint64_t getSplineModuliDoublePrecisionUlps(int splineOrder);
  PmeSafePointer pmeInitWrapper(const t_inputrec*        inputRec,
                                CodePath                 mode,
                                const gmx_device_info_t* gpuInfo,
-                              PmeGpuProgramHandle      pmeGpuProgram,
+                              const PmeGpuProgram*     pmeGpuProgram,
                                const Matrix3x3&         box,
                                real                     ewaldCoeff_q  = 1.0F,
                                real                     ewaldCoeff_lj = 1.0F);
@@ -130,7 +130,7 @@ PmeSafePointer pmeInitWrapper(const t_inputrec*        inputRec,
  PmeSafePointer pmeInitEmpty(const t_inputrec*        inputRec,
                              CodePath                 mode          = CodePath::CPU,
                              const gmx_device_info_t* gpuInfo       = nullptr,
-                            PmeGpuProgramHandle      pmeGpuProgram = nullptr,
+                            const PmeGpuProgram*     pmeGpuProgram = nullptr,
                              const Matrix3x3& box = { { 1.0F, 0.0F, 0.0F, 0.0F, 1.0F, 0.0F, 0.0F, 0.0F, 1.0F } },
                              real             ewaldCoeff_q  = 0.0F,
                              real             ewaldCoeff_lj = 0.0F);
diff --git a/src/gromacs/ewald/tests/testhardwarecontexts.h b/src/gromacs/ewald/tests/testhardwarecontexts.h

index 20d208d870bb0d76263d184bb09af189ec67161d..e7d49c5a79553fe235b0758df94f72c8f6aff9ce 100644 (file)
--- a/src/gromacs/ewald/tests/testhardwarecontexts.h
+++ b/src/gromacs/ewald/tests/testhardwarecontexts.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -90,7 +90,7 @@ public:
      //! Returns the device info pointer
      const gmx_device_info_t* getDeviceInfo() const { return deviceInfo_; }
      //! Returns the persistent PME GPU kernels
-    PmeGpuProgramHandle getPmeGpuProgram() const { return program_.get(); }
+    const PmeGpuProgram* getPmeGpuProgram() const { return program_.get(); }
      //! Constructs the context
      TestHardwareContext(CodePath codePath, const char* description, const gmx_device_info_t* deviceInfo) :
          codePath_(codePath),
diff --git a/src/gromacs/mdlib/resethandler.cpp b/src/gromacs/mdlib/resethandler.cpp

index 3625d4a0dd371cea24de19c54a96ee596bed001d..7e807061ac970663c1c93cc43519a220dbb447db 100644 (file)
--- a/src/gromacs/mdlib/resethandler.cpp
+++ b/src/gromacs/mdlib/resethandler.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -47,6 +47,7 @@
  #include "gromacs/domdec/domdec.h"
  #include "gromacs/ewald/pme.h"
  #include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/gmxlib/nrnb.h"
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/mdrunutility/printtime.h"
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index 2f0018f4d12e188329570591505d4b2af24df3e1..23f13f66110b5272514dd0770da0bf94a0cacf5e 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -53,6 +53,7 @@
  #include "gromacs/domdec/partition.h"
  #include "gromacs/essentialdynamics/edsam.h"
  #include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/ewald/pme_pp_comm_gpu.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp

index 99421cbd9a3249b76d47c17c3a57a390633f83d0..15cf25ac1d98cc77284db1ab5d500c655f91e82c 100644 (file)
--- a/src/gromacs/mdrun/md.cpp
+++ b/src/gromacs/mdrun/md.cpp
@@ -61,8 +61,8 @@
  #include "gromacs/domdec/mdsetup.h"
  #include "gromacs/domdec/partition.h"
  #include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
  #include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/fileio/trxio.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
diff --git a/src/gromacs/mdrun/mimic.cpp b/src/gromacs/mdrun/mimic.cpp

index 6c2e4e7ba57a1fcdb9f9fc381b449122cbd2ea24..a934bdbffa7801971ed8c208d4ff203efcc34512 100644 (file)
--- a/src/gromacs/mdrun/mimic.cpp
+++ b/src/gromacs/mdrun/mimic.cpp
@@ -60,8 +60,8 @@
  #include "gromacs/domdec/mdsetup.h"
  #include "gromacs/domdec/partition.h"
  #include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
  #include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/fileio/trxio.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
diff --git a/src/gromacs/mdrun/minimize.cpp b/src/gromacs/mdrun/minimize.cpp

index 99d846757909c89c471abd2ad32c864201c33997..93cf1c19e1fdc3593c3f9772af5165c25c9b8082 100644 (file)
--- a/src/gromacs/mdrun/minimize.cpp
+++ b/src/gromacs/mdrun/minimize.cpp
@@ -61,7 +61,7 @@
  #include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/domdec/mdsetup.h"
  #include "gromacs/domdec/partition.h"
-#include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/fileio/confio.h"
  #include "gromacs/fileio/mtxio.h"
  #include "gromacs/gmxlib/network.h"
diff --git a/src/gromacs/mdrun/rerun.cpp b/src/gromacs/mdrun/rerun.cpp

index bbf7655135be424db65a9dbc3c233ab01dc5e7f2..6b57ab32cda213b42f989f68523c3b85f6f9db28 100644 (file)
--- a/src/gromacs/mdrun/rerun.cpp
+++ b/src/gromacs/mdrun/rerun.cpp
@@ -61,8 +61,8 @@
  #include "gromacs/domdec/mdsetup.h"
  #include "gromacs/domdec/partition.h"
  #include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
  #include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/fileio/trxio.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index fe26bd7d1d03bf0a1b3e9149085f81effcc8b954..deb9c503dd636310f84a9a204c1fce96927ef08a 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -64,8 +64,8 @@
  #include "gromacs/domdec/localatomsetmanager.h"
  #include "gromacs/domdec/partition.h"
  #include "gromacs/ewald/ewald_utils.h"
-#include "gromacs/ewald/pme.h"
  #include "gromacs/ewald/pme_gpu_program.h"
+#include "gromacs/ewald/pme_only.h"
  #include "gromacs/ewald/pme_pp_comm_gpu.h"
  #include "gromacs/fileio/checkpoint.h"
  #include "gromacs/fileio/gmxfio.h"
diff --git a/src/gromacs/modularsimulator/modularsimulator.cpp b/src/gromacs/modularsimulator/modularsimulator.cpp

index b116f29b2cfc8029a035dd396ea493017ec4fa03..353ea9c00e7d4090339ba5ac5d5c09df249b9853 100644 (file)
--- a/src/gromacs/modularsimulator/modularsimulator.cpp
+++ b/src/gromacs/modularsimulator/modularsimulator.cpp
@@ -47,6 +47,7 @@
  #include "gromacs/domdec/domdec.h"
  #include "gromacs/ewald/pme.h"
  #include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
  #include "gromacs/math/vec.h"
diff --git a/src/programs/mdrun/tests/pmetest.cpp b/src/programs/mdrun/tests/pmetest.cpp

index 285adf87e76f72a4ebf18bdab31c12cb35e11f74..4b5aadda1b60231523267cc1c7f2e66f478329bb 100644 (file)
--- a/src/programs/mdrun/tests/pmetest.cpp
+++ b/src/programs/mdrun/tests/pmetest.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -126,6 +126,7 @@ void PmeTest::runTest(const RunModesList& runModes)
  
      for (const auto& mode : runModes)
      {
+        SCOPED_TRACE("mdrun " + joinStrings(mode.second, " "));
          auto modeTargetsGpus = (mode.first.find("Gpu") != std::string::npos);
          if (modeTargetsGpus && !s_hasCompatibleGpus)
          {
author	Mark Abraham <mark.j.abraham@gmail.com>
	Sun, 5 Jan 2020 21:16:52 +0000 (22:16 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Thu, 9 Jan 2020 15:37:01 +0000 (16:37 +0100)
src/gromacs/domdec/partition.cpp		patch \| blob \| history
src/gromacs/ewald/calculate_spline_moduli.cpp		patch \| blob \| history
src/gromacs/ewald/calculate_spline_moduli.h		patch \| blob \| history
src/gromacs/ewald/long_range_correction.cpp		patch \| blob \| history
src/gromacs/ewald/pme.cpp		patch \| blob \| history
src/gromacs/ewald/pme.h		patch \| blob \| history
src/gromacs/ewald/pme_coordinate_receiver_gpu.h		patch \| blob \| history
src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cpp		patch \| blob \| history
src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.cu		patch \| blob \| history
src/gromacs/ewald/pme_coordinate_receiver_gpu_impl.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_3dfft.cu		patch \| blob \| history
src/gromacs/ewald/pme_gpu_3dfft_ocl.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_internal.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_internal.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_program.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_settings.h	[new file with mode: 0644]	patch \| blob
src/gromacs/ewald/pme_gpu_staging.h	[new file with mode: 0644]	patch \| blob
src/gromacs/ewald/pme_gpu_timings.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_timings.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_types.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_types_host.h		patch \| blob \| history
src/gromacs/ewald/pme_internal.h		patch \| blob \| history
src/gromacs/ewald/pme_load_balancing.cpp		patch \| blob \| history
src/gromacs/ewald/pme_only.cpp		patch \| blob \| history
src/gromacs/ewald/pme_only.h	[new file with mode: 0644]	patch \| blob
src/gromacs/ewald/pme_output.h	[new file with mode: 0644]	patch \| blob
src/gromacs/ewald/pme_pp.cpp		patch \| blob \| history
src/gromacs/ewald/pme_pp.h	[new file with mode: 0644]	patch \| blob
src/gromacs/ewald/pme_solve.cpp		patch \| blob \| history
src/gromacs/ewald/pme_spread.cpp		patch \| blob \| history
src/gromacs/ewald/spline_vectors.h	[new file with mode: 0644]	patch \| blob
src/gromacs/ewald/tests/pmetestcommon.cpp		patch \| blob \| history
src/gromacs/ewald/tests/pmetestcommon.h		patch \| blob \| history
src/gromacs/ewald/tests/testhardwarecontexts.h		patch \| blob \| history
src/gromacs/mdlib/resethandler.cpp		patch \| blob \| history
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/mdrun/md.cpp		patch \| blob \| history
src/gromacs/mdrun/mimic.cpp		patch \| blob \| history
src/gromacs/mdrun/minimize.cpp		patch \| blob \| history
src/gromacs/mdrun/rerun.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/modularsimulator/modularsimulator.cpp		patch \| blob \| history
src/programs/mdrun/tests/pmetest.cpp		patch \| blob \| history