Fix CUDA code issues

author Paul Bauer <paul.bauer.q@gmail.com>

Thu, 29 Oct 2020 16:47:33 +0000 (17:47 +0100)

committer Paul Bauer <paul.bauer.q@gmail.com>

Thu, 29 Oct 2020 16:54:25 +0000 (17:54 +0100)
author Paul Bauer <paul.bauer.q@gmail.com>
Thu, 29 Oct 2020 16:47:33 +0000 (17:47 +0100)
committer Paul Bauer <paul.bauer.q@gmail.com>
Thu, 29 Oct 2020 16:54:25 +0000 (17:54 +0100)
diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cu b/src/gromacs/domdec/gpuhaloexchange_impl.cu

index 9efebf69046f39b924781df34245549857c83cd1..e7045d8b2a6fccf05247fc8b46c16d53f4051e72 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange_impl.cu
+++ b/src/gromacs/domdec/gpuhaloexchange_impl.cu
@@ -464,7 +464,6 @@ GpuHaloExchange::Impl::Impl(gmx_domdec_t*        dd,
                              int                  pulse,
                              gmx_wallcycle*       wcycle) :
      dd_(dd),
-    dimIndex_(dimIndex),
      sendRankX_(dd->neighbor[dimIndex][1]),
      recvRankX_(dd->neighbor[dimIndex][0]),
      sendRankF_(dd->neighbor[dimIndex][0]),
@@ -475,6 +474,7 @@ GpuHaloExchange::Impl::Impl(gmx_domdec_t*        dd,
      deviceContext_(deviceContext),
      localStream_(localStream),
      nonLocalStream_(nonLocalStream),
+    dimIndex_(dimIndex),
      pulse_(pulse),
      wcycle_(wcycle)
  {
diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cuh b/src/gromacs/domdec/gpuhaloexchange_impl.cuh

index 761938a0133c3c5f5645d17e2121d818356ca9a7..5dd619a343fe5827e93c0a9103dd2d47588734f1 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange_impl.cuh
+++ b/src/gromacs/domdec/gpuhaloexchange_impl.cuh
@@ -204,8 +204,6 @@ private:
      int dimIndex_ = 0;
      //! The pulse corresponding to this halo exchange instance
      int pulse_ = 0;
-    //! Number of zones. Always 1 for 1-D case.
-    const int nzone_ = 1;
      //! The wallclock counter
      gmx_wallcycle* wcycle_ = nullptr;
      //! The atom offset for receive (x) or send (f) for dimension index and pulse corresponding to this halo exchange instance
diff --git a/src/gromacs/gpu_utils/device_stream.cu b/src/gromacs/gpu_utils/device_stream.cu

index 0e07b00b2743661558aeb806d24e48ca2d58d17e..cc1f8798622bc30a284cdef90f3967cef8eae88e 100644 (file)
--- a/src/gromacs/gpu_utils/device_stream.cu
+++ b/src/gromacs/gpu_utils/device_stream.cu
@@ -117,4 +117,4 @@ void DeviceStream::synchronize() const
                         gmx::formatString("cudaStreamSynchronize failed  (CUDA error %d: %s).", stat,
                                           cudaGetErrorString(stat))
                                 .c_str());
-}
-\ No newline at end of file
+}
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh b/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh

index db3fb4a939ddcc5ae1704241fe9028bc61ef06d6..9a9ffc6c1ce8ca46bd4f726eb4fab006e1c2d2cd 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh
+++ b/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh
@@ -117,53 +117,3 @@ static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns,
          }
      }
  }
-
-/*! \brief CUDA kernel to sum up the force components
- *
- * \tparam        accumulateForce  If the initial forces in \p gm_fTotal should be saved.
- * \tparam        addPmeForce      Whether the PME force should be added to the total.
- *
- * \param[in]     gm_fNB     Non-bonded forces in nbnxm format.
- * \param[in]     gm_fPme    PME forces.
- * \param[in,out] gm_fTotal  Force buffer to be reduced into.
- * \param[in]     cell       Cell index mapping.
- * \param[in]     atomStart  Start atom index.
- * \param[in]     numAtoms   Number of atoms.
- */
-template<bool accumulateForce, bool addPmeForce>
-static __global__ void nbnxn_gpu_add_nbat_f_to_f_kernel(const float3* __restrict__ gm_fNB,
-                                                        const float3* __restrict__ gm_fPme,
-                                                        float3* gm_fTotal,
-                                                        const int* __restrict__ gm_cell,
-                                                        const int atomStart,
-                                                        const int numAtoms)
-{
-
-    /* map particle-level parallelism to 1D CUDA thread and block index */
-    const int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
-
-    /* perform addition for each particle*/
-    if (threadIndex < numAtoms)
-    {
-
-        const int i        = gm_cell[atomStart + threadIndex];
-        float3*   gm_fDest = &gm_fTotal[atomStart + threadIndex];
-        float3    temp;
-
-        if (accumulateForce)
-        {
-            temp = *gm_fDest;
-            temp += gm_fNB[i];
-        }
-        else
-        {
-            temp = gm_fNB[i];
-        }
-        if (addPmeForce)
-        {
-            temp += gm_fPme[atomStart + threadIndex];
-        }
-        *gm_fDest = temp;
-    }
-    return;
-}
author	Paul Bauer <paul.bauer.q@gmail.com>
	Thu, 29 Oct 2020 16:47:33 +0000 (17:47 +0100)
committer	Paul Bauer <paul.bauer.q@gmail.com>
	Thu, 29 Oct 2020 16:54:25 +0000 (17:54 +0100)
src/gromacs/domdec/gpuhaloexchange_impl.cu		patch \| blob \| history
src/gromacs/domdec/gpuhaloexchange_impl.cuh		patch \| blob \| history
src/gromacs/gpu_utils/device_stream.cu		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh		patch \| blob \| history