Stop PME GPU code from compiling on non-CUDA builds
authorAleksei Iupinov <a.yupinov@gmail.com>
Tue, 27 Mar 2018 11:18:51 +0000 (13:18 +0200)
committerAleksei Iupinov <a.yupinov@gmail.com>
Thu, 12 Apr 2018 14:50:38 +0000 (16:50 +0200)
On non-CUDA builds: all entrypoints into PME GPU code are
marked as stubs; pme-gpu.cpp and pme-gpu-internal.cpp are
excluded from compiling. Several common functions (such as
check for GPU support at the end of PME initialization)
are moved to pme.cpp, to still work with non-GPU builds.

Change-Id: Iaa9ea7b3e42665c60b788e76510a810457a69f20

src/gromacs/ewald/CMakeLists.txt
src/gromacs/ewald/pme-gpu-internal.cpp
src/gromacs/ewald/pme-gpu-internal.h
src/gromacs/ewald/pme-gpu.cpp
src/gromacs/ewald/pme.cpp
src/gromacs/ewald/pme.h
src/gromacs/ewald/tests/pmetestcommon.cpp
src/gromacs/mdlib/sim_util.cpp

index 05210bda317462254b81c587ebd8aa6c461cd198..2c53c77a20d0e9b05d46bc0318ace5eb4f280c51 100644 (file)
@@ -1,7 +1,7 @@
 #
 # This file is part of the GROMACS molecular simulation package.
 #
-# Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
+# Copyright (c) 2014,2015,2016,2017,2018, by the GROMACS development team, led by
 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 # and including many others, as listed in the AUTHORS file in the
 # top-level source directory and at http://www.gromacs.org.
 # the research papers on the package. Check out http://www.gromacs.org.
 
 file(GLOB EWALD_SOURCES *.cpp)
-set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${EWALD_SOURCES} PARENT_SCOPE)
+file(GLOB EWALD_GPU_SHARED_SOURCES pme-gpu*.cpp)
+file(GLOB EWALD_CUDA_SOURCES *.cu)
+
 if (GMX_USE_CUDA)
-    file(GLOB EWALD_CUDA_SOURCES *.cu)
     gmx_add_libgromacs_sources(${EWALD_CUDA_SOURCES})
+else ()
+    # Removing the GPU PME plain C++ fles from the build target
+    # TODO: not remove them for GMX_USE_OPENCL.
+    foreach (GPU_ONLY_SOURCE ${EWALD_GPU_SHARED_SOURCES})
+        list(REMOVE_ITEM EWALD_SOURCES ${GPU_ONLY_SOURCE})
+    endforeach()
 endif()
 
+set(LIBGROMACS_SOURCES ${LIBGROMACS_SOURCES} ${EWALD_SOURCES} PARENT_SCOPE)
+
 if (BUILD_TESTING)
     add_subdirectory(tests)
 endif()
index f215ab44fc57de6252c48ac0f311d4118b48c8c2..2a6fa22f0d12e72b8a0fbfb0f21339a7c97d786d 100644 (file)
@@ -46,8 +46,6 @@
 
 #include "pme-gpu-internal.h"
 
-#include "config.h"
-
 #include <list>
 #include <string>
 
@@ -219,53 +217,6 @@ static void pme_gpu_copy_common_data_from(const gmx_pme_t *pme)
     pmeGpu->common->boxScaler = pme->boxScaler;
 }
 
-/*! \brief \libinternal
- * Finds out if PME with given inputs is possible to run on GPU.
- *
- * \param[in]  pme          The PME structure.
- * \param[out] error        The error message if the input is not supported on GPU.
- * \returns                 True if this PME input is possible to run on GPU, false otherwise.
- */
-static bool pme_gpu_check_restrictions(const gmx_pme_t *pme, std::string *error)
-{
-    std::list<std::string> errorReasons;
-    if (pme->nnodes != 1)
-    {
-        errorReasons.push_back("PME decomposition");
-    }
-    if (pme->pme_order != 4)
-    {
-        errorReasons.push_back("interpolation orders other than 4");
-    }
-    if (pme->bFEP)
-    {
-        errorReasons.push_back("free energy calculations (multiple grids)");
-    }
-    if (pme->doLJ)
-    {
-        errorReasons.push_back("Lennard-Jones PME");
-    }
-#if GMX_DOUBLE
-    {
-        errorReasons.push_back("double precision");
-    }
-#endif
-#if GMX_GPU != GMX_GPU_CUDA
-    {
-        errorReasons.push_back("non-CUDA build of GROMACS");
-    }
-#endif
-
-    bool inputSupported = errorReasons.empty();
-    if (!inputSupported && error)
-    {
-        std::string regressionTestMarker = "PME GPU does not support";
-        // this prefix is tested for in the regression tests script gmxtest.pl
-        *error = regressionTestMarker + ": " + gmx::joinStrings(errorReasons, "; ") + ".";
-    }
-    return inputSupported;
-}
-
 /*! \libinternal \brief
  * Initializes the PME GPU data at the beginning of the run.
  *
@@ -274,13 +225,6 @@ static bool pme_gpu_check_restrictions(const gmx_pme_t *pme, std::string *error)
  */
 static void pme_gpu_init(gmx_pme_t *pme, gmx_device_info_t *gpuInfo)
 {
-    std::string errorString;
-    bool        canRunOnGpu = pme_gpu_check_restrictions(pme, &errorString);
-    if (!canRunOnGpu)
-    {
-        GMX_THROW(gmx::NotImplementedError(errorString));
-    }
-
     pme->gpu          = new PmeGpu();
     PmeGpu *pmeGpu = pme->gpu;
     changePinningPolicy(&pmeGpu->staging.h_forces, gmx::PinningPolicy::CanBePinned);
index 1622c76b2e041ab5484cc9d7b4ed80f90e449f03..38de16523b94be48e7950acc34413e972210fd1a 100644 (file)
@@ -555,8 +555,11 @@ inline bool pme_gpu_performs_solve(const PmeGpu *pmeGpu)
  */
 inline void pme_gpu_set_testing(PmeGpu *pmeGpu, bool testing)
 {
-    pmeGpu->settings.copyAllOutputs = testing;
-    pmeGpu->settings.transferKind   = testing ? GpuApiCallBehavior::Sync : GpuApiCallBehavior::Async;
+    if (pmeGpu)
+    {
+        pmeGpu->settings.copyAllOutputs = testing;
+        pmeGpu->settings.transferKind   = testing ? GpuApiCallBehavior::Sync : GpuApiCallBehavior::Async;
+    }
 }
 
 /*! \libinternal \brief
@@ -578,7 +581,7 @@ inline bool pme_gpu_is_testing(const PmeGpu *pmeGpu)
  * \param[in] pmeGpu             The PME GPU structure.
  * \returns                      The input/output forces.
  */
-gmx::ArrayRef<gmx::RVec> pme_gpu_get_forces(PmeGpu *pmeGpu);
+CUDA_FUNC_QUALIFIER gmx::ArrayRef<gmx::RVec> pme_gpu_get_forces(PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu)) CUDA_FUNC_TERM_WITH_RETURN(gmx::EmptyArrayRef())
 
 /*! \libinternal \brief
  * Returns the output virial and energy of the PME solving.
@@ -587,7 +590,9 @@ gmx::ArrayRef<gmx::RVec> pme_gpu_get_forces(PmeGpu *pmeGpu);
  * \param[out] energy            The output energy.
  * \param[out] virial            The output virial matrix.
  */
-void pme_gpu_get_energy_virial(const PmeGpu *pmeGpu, real *energy, matrix virial);
+CUDA_FUNC_QUALIFIER void pme_gpu_get_energy_virial(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu),
+                                                   real *CUDA_FUNC_ARGUMENT(energy),
+                                                   matrix CUDA_FUNC_ARGUMENT(virial)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_computation().
@@ -595,7 +600,8 @@ void pme_gpu_get_energy_virial(const PmeGpu *pmeGpu, real *energy, matrix virial
  * \param[in] pmeGpu         The PME GPU structure.
  * \param[in] box            The unit cell box.
  */
-void pme_gpu_update_input_box(PmeGpu *pmeGpu, const matrix box);
+CUDA_FUNC_QUALIFIER void pme_gpu_update_input_box(PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu),
+                                                  const matrix CUDA_FUNC_ARGUMENT(box)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Finishes the PME GPU computation, waiting for the output forces and/or energy/virial to be copied to the host.
@@ -626,8 +632,11 @@ enum class PmeLayoutTransform
  * \param[in]  dimIndex   Dimension index.
  * \param[in]  transform  Layout transform type
  */
-void pme_gpu_transform_spline_atom_data(const PmeGpu *pmeGpu, const pme_atomcomm_t *atc,
-                                        PmeSplineDataType type, int dimIndex, PmeLayoutTransform transform);
+CUDA_FUNC_QUALIFIER void pme_gpu_transform_spline_atom_data(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu),
+                                                            const pme_atomcomm_t *CUDA_FUNC_ARGUMENT(atc),
+                                                            PmeSplineDataType CUDA_FUNC_ARGUMENT(type),
+                                                            int CUDA_FUNC_ARGUMENT(dimIndex),
+                                                            PmeLayoutTransform CUDA_FUNC_ARGUMENT(transform)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Gets a unique index to an element in a spline parameter buffer (theta/dtheta),
@@ -655,7 +664,9 @@ CUDA_FUNC_QUALIFIER int getSplineParamFullIndex(int CUDA_FUNC_ARGUMENT(order),
  * \param[out] gridSize          Pointer to the grid dimensions to fill in.
  * \param[out] paddedGridSize    Pointer to the padded grid dimensions to fill in.
  */
-void pme_gpu_get_real_grid_sizes(const PmeGpu *pmeGpu, gmx::IVec *gridSize, gmx::IVec *paddedGridSize);
+CUDA_FUNC_QUALIFIER void pme_gpu_get_real_grid_sizes(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu),
+                                                     gmx::IVec *CUDA_FUNC_ARGUMENT(gridSize),
+                                                     gmx::IVec *CUDA_FUNC_ARGUMENT(paddedGridSize)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
@@ -664,14 +675,15 @@ void pme_gpu_get_real_grid_sizes(const PmeGpu *pmeGpu, gmx::IVec *gridSize, gmx:
  * \param[in,out] gpuInfo   The GPU information structure.
  * \throws gmx::NotImplementedError if this generally valid PME structure is not valid for GPU runs.
  */
-void pme_gpu_reinit(gmx_pme_t *pme, gmx_device_info_t *gpuInfo);
+CUDA_FUNC_QUALIFIER void pme_gpu_reinit(gmx_pme_t *CUDA_FUNC_ARGUMENT(pme),
+                                        gmx_device_info_t *CUDA_FUNC_ARGUMENT(gpuInfo)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Destroys the PME GPU data at the end of the run.
  *
  * \param[in] pmeGpu     The PME GPU structure.
  */
-void pme_gpu_destroy(PmeGpu *pmeGpu);
+CUDA_FUNC_QUALIFIER void pme_gpu_destroy(PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
@@ -683,9 +695,9 @@ void pme_gpu_destroy(PmeGpu *pmeGpu);
  * This is a function that should only be called in the beginning of the run and on domain decomposition.
  * Should be called before the pme_gpu_set_io_ranges.
  */
-void pme_gpu_reinit_atoms(PmeGpu           *pmeGpu,
-                          const int         nAtoms,
-                          const real       *charges);
+CUDA_FUNC_QUALIFIER void pme_gpu_reinit_atoms(PmeGpu *CUDA_FUNC_ARGUMENT(pmeGpu),
+                                              const int         CUDA_FUNC_ARGUMENT(nAtoms),
+                                              const real       *CUDA_FUNC_ARGUMENT(charges)) CUDA_FUNC_TERM
 
 /*! \brief \libinternal
  * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
index ba53868eb3cdfbe71888d0d4b6de7d2c8e1ef8ac..4f6a4dee8f7f3734d5e91591c72eedf1e3fafd0e 100644 (file)
@@ -42,8 +42,6 @@
 
 #include "gmxpre.h"
 
-#include "config.h"
-
 #include <list>
 
 #include "gromacs/ewald/ewald-utils.h"
 #include "pme-internal.h"
 #include "pme-solve.h"
 
-PmeRunMode pme_run_mode(const gmx_pme_t *pme)
-{
-    GMX_ASSERT(pme != nullptr, "Expecting valid PME data pointer");
-    return pme->runMode;
-}
-
-bool pme_gpu_supports_input(const t_inputrec *ir, std::string *error)
-{
-    std::list<std::string> errorReasons;
-    if (!EEL_PME(ir->coulombtype))
-    {
-        errorReasons.push_back("systems that do not use PME for electrostatics");
-    }
-    if (ir->pme_order != 4)
-    {
-        errorReasons.push_back("interpolation orders other than 4");
-    }
-    if (ir->efep != efepNO)
-    {
-        errorReasons.push_back("free energy calculations (multiple grids)");
-    }
-    if (EVDW_PME(ir->vdwtype))
-    {
-        errorReasons.push_back("Lennard-Jones PME");
-    }
-#if GMX_DOUBLE
-    {
-        errorReasons.push_back("double precision");
-    }
-#endif
-#if GMX_GPU != GMX_GPU_CUDA
-    {
-        errorReasons.push_back("non-CUDA build of GROMACS");
-    }
-#endif
-    if (ir->cutoff_scheme == ecutsGROUP)
-    {
-        errorReasons.push_back("group cutoff scheme");
-    }
-    if (EI_TPI(ir->eI))
-    {
-        errorReasons.push_back("test particle insertion");
-    }
-
-    bool inputSupported = errorReasons.empty();
-    if (!inputSupported && error)
-    {
-        std::string regressionTestMarker = "PME GPU does not support";
-        // this prefix is tested for in the regression tests script gmxtest.pl
-        *error = regressionTestMarker + ": " + gmx::joinStrings(errorReasons, "; ") + ".";
-    }
-    return inputSupported;
-}
-
 void pme_gpu_reset_timings(const gmx_pme_t *pme)
 {
     if (pme_gpu_active(pme))
index 0171fe8f4ebf52d15a72c9d8e30c27114757695c..c28d2d51fb6e7b8c16fd11414aa94b407e432d8f 100644 (file)
@@ -81,6 +81,7 @@
 #include <cmath>
 
 #include <algorithm>
+#include <list>
 
 #include "gromacs/ewald/ewald-utils.h"
 #include "gromacs/fft/parallel_3dfft.h"
 #include "pme-spline-work.h"
 #include "pme-spread.h"
 
+bool pme_gpu_supports_input(const t_inputrec *ir, std::string *error)
+{
+    std::list<std::string> errorReasons;
+    if (!EEL_PME(ir->coulombtype))
+    {
+        errorReasons.push_back("systems that do not use PME for electrostatics");
+    }
+    if (ir->pme_order != 4)
+    {
+        errorReasons.push_back("interpolation orders other than 4");
+    }
+    if (ir->efep != efepNO)
+    {
+        errorReasons.push_back("free energy calculations (multiple grids)");
+    }
+    if (EVDW_PME(ir->vdwtype))
+    {
+        errorReasons.push_back("Lennard-Jones PME");
+    }
+#if GMX_DOUBLE
+    {
+        errorReasons.push_back("double precision");
+    }
+#endif
+#if GMX_GPU != GMX_GPU_CUDA
+    {
+        errorReasons.push_back("non-CUDA build of GROMACS");
+    }
+#endif
+    if (ir->cutoff_scheme == ecutsGROUP)
+    {
+        errorReasons.push_back("group cutoff scheme");
+    }
+    if (EI_TPI(ir->eI))
+    {
+        errorReasons.push_back("test particle insertion");
+    }
+
+    bool inputSupported = errorReasons.empty();
+    if (!inputSupported && error)
+    {
+        std::string regressionTestMarker = "PME GPU does not support";
+        // this prefix is tested for in the regression tests script gmxtest.pl
+        *error = regressionTestMarker + ": " + gmx::joinStrings(errorReasons, "; ") + ".";
+    }
+    return inputSupported;
+}
+
+/*! \brief \libinternal
+ * Finds out if PME with given inputs is possible to run on GPU.
+ * This function is an internal final check, validating the whole PME structure on creation,
+ * but it still duplicates the preliminary checks from the above (externally exposed) pme_gpu_supports_input() - just in case.
+ *
+ * \param[in]  pme          The PME structure.
+ * \param[out] error        The error message if the input is not supported on GPU.
+ * \returns                 True if this PME input is possible to run on GPU, false otherwise.
+ */
+static bool pme_gpu_check_restrictions(const gmx_pme_t *pme, std::string *error)
+{
+    std::list<std::string> errorReasons;
+    if (pme->nnodes != 1)
+    {
+        errorReasons.push_back("PME decomposition");
+    }
+    if (pme->pme_order != 4)
+    {
+        errorReasons.push_back("interpolation orders other than 4");
+    }
+    if (pme->bFEP)
+    {
+        errorReasons.push_back("free energy calculations (multiple grids)");
+    }
+    if (pme->doLJ)
+    {
+        errorReasons.push_back("Lennard-Jones PME");
+    }
+#if GMX_DOUBLE
+    {
+        errorReasons.push_back("double precision");
+    }
+#endif
+#if GMX_GPU != GMX_GPU_CUDA
+    {
+        errorReasons.push_back("non-CUDA build of GROMACS");
+    }
+#endif
+
+    bool inputSupported = errorReasons.empty();
+    if (!inputSupported && error)
+    {
+        std::string regressionTestMarker = "PME GPU does not support";
+        // this prefix is tested for in the regression tests script gmxtest.pl
+        *error = regressionTestMarker + ": " + gmx::joinStrings(errorReasons, "; ") + ".";
+    }
+    return inputSupported;
+}
+
+PmeRunMode pme_run_mode(const gmx_pme_t *pme)
+{
+    GMX_ASSERT(pme != nullptr, "Expecting valid PME data pointer");
+    return pme->runMode;
+}
+
 /*! \brief Number of bytes in a cache line.
  *
  * Must also be a multiple of the SIMD and SIMD4 register size, to
@@ -832,7 +936,21 @@ gmx_pme_t *gmx_pme_init(const t_commrec     *cr,
     pme->lb_buf2       = nullptr;
     pme->lb_buf_nalloc = 0;
 
-    pme_gpu_reinit(pme.get(), gpuInfo);
+    if (pme_gpu_active(pme.get()))
+    {
+        if (!pme->gpu)
+        {
+            // Initial check of validity of the data
+            std::string errorString;
+            bool        canRunOnGpu = pme_gpu_check_restrictions(pme.get(), &errorString);
+            if (!canRunOnGpu)
+            {
+                GMX_THROW(gmx::NotImplementedError(errorString));
+            }
+        }
+
+        pme_gpu_reinit(pme.get(), gpuInfo);
+    }
 
     pme_init_all_work(&pme->solve_work, pme->nthread, pme->nkx);
 
index 6533eebb970e149b713c174c3940b70ffaab468c..073f2881232053e475166e91a2ad83794eb63315 100644 (file)
@@ -50,6 +50,7 @@
 
 #include <string>
 
+#include "gromacs/gpu_utils/gpu_macros.h"
 #include "gromacs/math/vectypes.h"
 #include "gromacs/timing/walltime_accounting.h"
 #include "gromacs/utility/arrayref.h"
@@ -266,12 +267,15 @@ inline bool pme_gpu_task_enabled(const gmx_pme_t *pme)
     return (pme != nullptr) && (pme_run_mode(pme) != PmeRunMode::CPU);
 }
 
+// The following functions are all the PME GPU entry points,
+// currently inlining to nothing on non-CUDA builds.
+
 /*! \brief
  * Resets the PME GPU timings. To be called at the reset step.
  *
  * \param[in] pme            The PME structure.
  */
-void pme_gpu_reset_timings(const gmx_pme_t *pme);
+CUDA_FUNC_QUALIFIER void pme_gpu_reset_timings(const gmx_pme_t *CUDA_FUNC_ARGUMENT(pme)) CUDA_FUNC_TERM
 
 /*! \brief
  * Copies the PME GPU timings to the gmx_wallclock_gpu_pme_t structure (for log output). To be called at the run end.
@@ -279,8 +283,8 @@ void pme_gpu_reset_timings(const gmx_pme_t *pme);
  * \param[in] pme               The PME structure.
  * \param[in] timings           The gmx_wallclock_gpu_pme_t structure.
  */
-void pme_gpu_get_timings(const gmx_pme_t         *pme,
-                         gmx_wallclock_gpu_pme_t *timings);
+CUDA_FUNC_QUALIFIER void pme_gpu_get_timings(const gmx_pme_t         *CUDA_FUNC_ARGUMENT(pme),
+                                             gmx_wallclock_gpu_pme_t *CUDA_FUNC_ARGUMENT(timings)) CUDA_FUNC_TERM
 
 /* The main PME GPU functions */
 
@@ -293,11 +297,11 @@ void pme_gpu_get_timings(const gmx_pme_t         *pme,
  * \param[in] flags             The combination of flags to affect this PME computation.
  *                              The flags are the GMX_PME_ flags from pme.h.
  */
-void pme_gpu_prepare_computation(gmx_pme_t      *pme,
-                                 bool            needToUpdateBox,
-                                 const matrix    box,
-                                 gmx_wallcycle  *wcycle,
-                                 int             flags);
+CUDA_FUNC_QUALIFIER void pme_gpu_prepare_computation(gmx_pme_t      *CUDA_FUNC_ARGUMENT(pme),
+                                                     bool            CUDA_FUNC_ARGUMENT(needToUpdateBox),
+                                                     const matrix    CUDA_FUNC_ARGUMENT(box),
+                                                     gmx_wallcycle  *CUDA_FUNC_ARGUMENT(wcycle),
+                                                     int             CUDA_FUNC_ARGUMENT(flags)) CUDA_FUNC_TERM
 
 /*! \brief
  * Launches first stage of PME on GPU - H2D input transfers, spreading kernel, and D2H grid transfer if needed.
@@ -306,9 +310,9 @@ void pme_gpu_prepare_computation(gmx_pme_t      *pme,
  * \param[in] x                 The array of local atoms' coordinates.
  * \param[in] wcycle            The wallclock counter.
  */
-void pme_gpu_launch_spread(gmx_pme_t      *pme,
-                           const rvec     *x,
-                           gmx_wallcycle  *wcycle);
+CUDA_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t      *CUDA_FUNC_ARGUMENT(pme),
+                                               const rvec     *CUDA_FUNC_ARGUMENT(x),
+                                               gmx_wallcycle  *CUDA_FUNC_ARGUMENT(wcycle)) CUDA_FUNC_TERM
 
 /*! \brief
  * Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.
@@ -316,8 +320,8 @@ void pme_gpu_launch_spread(gmx_pme_t      *pme,
  * \param[in] pme               The PME data structure.
  * \param[in] wcycle            The wallclock counter.
  */
-void pme_gpu_launch_complex_transforms(gmx_pme_t       *pme,
-                                       gmx_wallcycle   *wcycle);
+CUDA_FUNC_QUALIFIER void pme_gpu_launch_complex_transforms(gmx_pme_t       *CUDA_FUNC_ARGUMENT(pme),
+                                                           gmx_wallcycle   *CUDA_FUNC_ARGUMENT(wcycle)) CUDA_FUNC_TERM
 
 /*! \brief
  * Launches last stage of PME on GPU - force gathering and D2H force transfer.
@@ -328,9 +332,9 @@ void pme_gpu_launch_complex_transforms(gmx_pme_t       *pme,
  *                               the output reciprocal forces into the host array, or copies its contents to the GPU first
  *                               and accumulates. The reduction is non-atomic.
  */
-void pme_gpu_launch_gather(const gmx_pme_t        *pme,
-                           gmx_wallcycle          *wcycle,
-                           PmeForceOutputHandling  forceTreatment);
+CUDA_FUNC_QUALIFIER void pme_gpu_launch_gather(const gmx_pme_t        *CUDA_FUNC_ARGUMENT(pme),
+                                               gmx_wallcycle          *CUDA_FUNC_ARGUMENT(wcycle),
+                                               PmeForceOutputHandling  CUDA_FUNC_ARGUMENT(forceTreatment)) CUDA_FUNC_TERM
 
 /*! \brief
  * Blocks until PME GPU tasks are completed, and gets the output forces and virial/energy
@@ -342,11 +346,11 @@ void pme_gpu_launch_gather(const gmx_pme_t        *pme,
  * \param[out] virial         The output virial matrix.
  * \param[out] energy         The output energy.
  */
-void pme_gpu_wait_finish_task(const gmx_pme_t                *pme,
-                              gmx_wallcycle                  *wcycle,
-                              gmx::ArrayRef<const gmx::RVec> *forces,
-                              matrix                          virial,
-                              real                           *energy);
+CUDA_FUNC_QUALIFIER void pme_gpu_wait_finish_task(const gmx_pme_t                *CUDA_FUNC_ARGUMENT(pme),
+                                                  gmx_wallcycle                  *CUDA_FUNC_ARGUMENT(wcycle),
+                                                  gmx::ArrayRef<const gmx::RVec> *CUDA_FUNC_ARGUMENT(forces),
+                                                  matrix                          CUDA_FUNC_ARGUMENT(virial),
+                                                  real                           *CUDA_FUNC_ARGUMENT(energy)) CUDA_FUNC_TERM
 /*! \brief
  * Attempts to complete PME GPU tasks.
  *
@@ -368,12 +372,12 @@ void pme_gpu_wait_finish_task(const gmx_pme_t                *pme,
  * \param[in]  completionKind  Indicates whether PME task completion should only be checked rather than waited for
  * \returns                   True if the PME GPU tasks have completed
  */
-bool pme_gpu_try_finish_task(const gmx_pme_t                *pme,
-                             gmx_wallcycle                  *wcycle,
-                             gmx::ArrayRef<const gmx::RVec> *forces,
-                             matrix                          virial,
-                             real                           *energy,
-                             GpuTaskCompletion               completionKind);
+CUDA_FUNC_QUALIFIER bool pme_gpu_try_finish_task(const gmx_pme_t                *CUDA_FUNC_ARGUMENT(pme),
+                                                 gmx_wallcycle                  *CUDA_FUNC_ARGUMENT(wcycle),
+                                                 gmx::ArrayRef<const gmx::RVec> *CUDA_FUNC_ARGUMENT(forces),
+                                                 matrix                          CUDA_FUNC_ARGUMENT(virial),
+                                                 real                           *CUDA_FUNC_ARGUMENT(energy),
+                                                 GpuTaskCompletion               CUDA_FUNC_ARGUMENT(completionKind)) CUDA_FUNC_TERM_WITH_RETURN(false)
 
 
 #endif
index a9c831882a35f1b02b4b83482842490c98338193..ca65e70984b6e677180723deb495d31b4dceee62 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2016,2017, by the GROMACS development team, led by
+ * Copyright (c) 2016,2017,2018, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -472,7 +472,7 @@ void pmeSetGridLineIndices(const gmx_pme_t *pme, CodePath mode,
     const size_t                atomCount   = atc->n;
     GMX_RELEASE_ASSERT(atomCount == gridLineIndices.size(), "Mismatch in gridline indices size");
 
-    IVec paddedGridSizeUnused, gridSize;
+    IVec paddedGridSizeUnused, gridSize(0, 0, 0);
     pmeGetRealGridSizesInternal(pme, mode, gridSize, paddedGridSizeUnused);
 
     for (const auto &index : gridLineIndices)
@@ -526,7 +526,7 @@ static void pmeSetGridInternal(const gmx_pme_t *pme, CodePath mode,
                                GridOrdering gridOrdering,
                                const SparseGridValuesInput<ValueType> &gridValues)
 {
-    IVec       gridSize, paddedGridSize;
+    IVec       gridSize(0, 0, 0), paddedGridSize(0, 0, 0);
     ValueType *grid;
     pmeGetGridAndSizesInternal<ValueType>(pme, mode, grid, gridSize, paddedGridSize);
 
@@ -622,7 +622,7 @@ GridLineIndicesVector pmeGetGridlineIndices(const gmx_pme_t *pme, CodePath mode)
 template<typename ValueType>
 static SparseGridValuesOutput<ValueType> pmeGetGridInternal(const gmx_pme_t *pme, CodePath mode, GridOrdering gridOrdering)
 {
-    IVec       gridSize, paddedGridSize;
+    IVec       gridSize(0, 0, 0), paddedGridSize(0, 0, 0);
     ValueType *grid;
     pmeGetGridAndSizesInternal<ValueType>(pme, mode, grid, gridSize, paddedGridSize);
     SparseGridValuesOutput<ValueType> gridValues;
@@ -675,7 +675,7 @@ PmeSolveOutput pmeGetReciprocalEnergyAndVirial(const gmx_pme_t *pme, CodePath mo
 {
     real      energy = 0.0f;
     Matrix3x3 virial;
-    matrix    virialTemp; //TODO get rid of
+    matrix    virialTemp = {{0}}; //TODO get rid of
     switch (mode)
     {
         case CodePath::CPU:
index 184db980eb1ab85406c36098c139d6b932d9e2f1..5a82af6181bc807bc4c0ae0f5b56e793c77ea708 100644 (file)
@@ -1591,7 +1591,7 @@ static void do_force_cutsVERLET(FILE *fplog, const t_commrec *cr,
     {
         gmx::ArrayRef<const gmx::RVec> pmeGpuForces;
         matrix vir_Q;
-        real   Vlr_q;
+        real   Vlr_q = 0.0;
         pme_gpu_wait_finish_task(fr->pmedata, wcycle, &pmeGpuForces, vir_Q, &Vlr_q);
         pme_gpu_reduce_outputs(wcycle, &forceWithVirial, pmeGpuForces, enerd, vir_Q, Vlr_q);
     }