Template x buffer ops call on whether GPU is used
authorSzilárd Páll <pall.szilard@gmail.com>
Mon, 29 Apr 2019 10:02:32 +0000 (12:02 +0200)
committerMark Abraham <mark.j.abraham@gmail.com>
Wed, 29 May 2019 09:32:43 +0000 (11:32 +0200)
This allows the code to get optimized to avoid the overhead of merged
CPU and GPU codepaths.

Change-Id: If98c5eff327f5f9ecb81f278ca27b6d139bc19ab

src/gromacs/nbnxm/atomdata.cpp
src/gromacs/nbnxm/atomdata.h
src/gromacs/nbnxm/nbnxm.cpp

index 1cb4550003f598d074452a5db8509cd31a6e8b54..fc507739b8d232d7c3de559c3d84d6e38e69d284 100644 (file)
@@ -999,12 +999,12 @@ void nbnxn_atomdata_copy_shiftvec(gmx_bool          bDynamicBox,
 }
 
 /* Copies (and reorders) the coordinates to nbnxn_atomdata_t */
+template <bool useGpu>
 void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet     &gridSet,
                                      const Nbnxm::AtomLocality locality,
                                      gmx_bool                  FillLocal,
                                      const rvec               *x,
                                      nbnxn_atomdata_t         *nbat,
-                                     bool                      useGpu,
                                      gmx_nbnxn_gpu_t          *gpu_nbv,
                                      void                     *xPmeDevicePtr)
 {
@@ -1100,6 +1100,23 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet     &gridSet,
     }
 }
 
+template
+void nbnxn_atomdata_copy_x_to_nbat_x<true>(const Nbnxm::GridSet &,
+                                           const Nbnxm::AtomLocality,
+                                           gmx_bool,
+                                           const rvec*,
+                                           nbnxn_atomdata_t *,
+                                           gmx_nbnxn_gpu_t*,
+                                           void *);
+template
+void nbnxn_atomdata_copy_x_to_nbat_x<false>(const Nbnxm::GridSet &,
+                                            const Nbnxm::AtomLocality,
+                                            gmx_bool,
+                                            const rvec*,
+                                            nbnxn_atomdata_t *,
+                                            gmx_nbnxn_gpu_t*,
+                                            void *);
+
 static void
 nbnxn_atomdata_clear_reals(gmx::ArrayRef<real> dest,
                            int i0, int i1)
index 7538566d27c133d1d2a85019401108212793dc53..4e4e0c0a55ec831b5dd83731b7844248ea5bf408 100644 (file)
@@ -308,14 +308,31 @@ void nbnxn_atomdata_copy_shiftvec(gmx_bool          dynamic_box,
 /* Copy x to nbat->x.
  * FillLocal tells if the local filler particle coordinates should be zeroed.
  */
-void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet &gridSet,
-                                     Nbnxm::AtomLocality   locality,
-                                     gmx_bool              FillLocal,
-                                     const rvec           *x,
-                                     nbnxn_atomdata_t     *nbat,
-                                     bool                  useGpu,
-                                     gmx_nbnxn_gpu_t      *gpu_nbv,
-                                     void                 *xPmeDevicePtr);
+template <bool useGpu>
+void nbnxn_atomdata_copy_x_to_nbat_x(const Nbnxm::GridSet       &gridSet,
+                                     Nbnxm::AtomLocality         locality,
+                                     gmx_bool                    FillLocal,
+                                     const rvec                 *x,
+                                     nbnxn_atomdata_t           *nbat,
+                                     gmx_nbnxn_gpu_t            *gpu_nbv,
+                                     void                       *xPmeDevicePtr);
+
+extern template
+void nbnxn_atomdata_copy_x_to_nbat_x<true>(const Nbnxm::GridSet &,
+                                           const Nbnxm::AtomLocality,
+                                           gmx_bool,
+                                           const rvec*,
+                                           nbnxn_atomdata_t *,
+                                           gmx_nbnxn_gpu_t*,
+                                           void *);
+extern template
+void nbnxn_atomdata_copy_x_to_nbat_x<false>(const Nbnxm::GridSet &,
+                                            const Nbnxm::AtomLocality,
+                                            gmx_bool,
+                                            const rvec*,
+                                            nbnxn_atomdata_t *,
+                                            gmx_nbnxn_gpu_t*,
+                                            void *);
 
 //! Add the computed forces to \p f, an internal reduction might be performed as well
 void reduceForces(nbnxn_atomdata_t     *nbat,
index 355517864e3a758b7e500d15585ad7e38bfe3b6f..31037b24769ba4cae12d7986497fd36de4706605 100644 (file)
@@ -141,9 +141,15 @@ void nonbonded_verlet_t::setCoordinates(const Nbnxm::AtomLocality       locality
 {
     wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
     wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
-    nbnxn_atomdata_copy_x_to_nbat_x(pairSearch_->gridSet(), locality, fillLocal,
-                                    as_rvec_array(x.data()),
-                                    nbat.get(), useGpu, gpu_nbv, xPmeDevicePtr);
+
+    auto fnPtr = useGpu ?
+        nbnxn_atomdata_copy_x_to_nbat_x<true> :
+        nbnxn_atomdata_copy_x_to_nbat_x<false>;
+
+    fnPtr(pairSearch_->gridSet(), locality, fillLocal,
+          as_rvec_array(x.data()),
+          nbat.get(), gpu_nbv, xPmeDevicePtr);
+
     wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
     wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
 }