Fix CUDA code issues
authorPaul Bauer <paul.bauer.q@gmail.com>
Thu, 29 Oct 2020 16:47:33 +0000 (17:47 +0100)
committerPaul Bauer <paul.bauer.q@gmail.com>
Thu, 29 Oct 2020 16:54:25 +0000 (17:54 +0100)
Remove unused template.
Fix variable initialization order and remove obsolete variable.
Fix missing newline.

src/gromacs/domdec/gpuhaloexchange_impl.cu
src/gromacs/domdec/gpuhaloexchange_impl.cuh
src/gromacs/gpu_utils/device_stream.cu
src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh

index 9efebf69046f39b924781df34245549857c83cd1..e7045d8b2a6fccf05247fc8b46c16d53f4051e72 100644 (file)
@@ -464,7 +464,6 @@ GpuHaloExchange::Impl::Impl(gmx_domdec_t*        dd,
                             int                  pulse,
                             gmx_wallcycle*       wcycle) :
     dd_(dd),
-    dimIndex_(dimIndex),
     sendRankX_(dd->neighbor[dimIndex][1]),
     recvRankX_(dd->neighbor[dimIndex][0]),
     sendRankF_(dd->neighbor[dimIndex][0]),
@@ -475,6 +474,7 @@ GpuHaloExchange::Impl::Impl(gmx_domdec_t*        dd,
     deviceContext_(deviceContext),
     localStream_(localStream),
     nonLocalStream_(nonLocalStream),
+    dimIndex_(dimIndex),
     pulse_(pulse),
     wcycle_(wcycle)
 {
index 761938a0133c3c5f5645d17e2121d818356ca9a7..5dd619a343fe5827e93c0a9103dd2d47588734f1 100644 (file)
@@ -204,8 +204,6 @@ private:
     int dimIndex_ = 0;
     //! The pulse corresponding to this halo exchange instance
     int pulse_ = 0;
-    //! Number of zones. Always 1 for 1-D case.
-    const int nzone_ = 1;
     //! The wallclock counter
     gmx_wallcycle* wcycle_ = nullptr;
     //! The atom offset for receive (x) or send (f) for dimension index and pulse corresponding to this halo exchange instance
index 0e07b00b2743661558aeb806d24e48ca2d58d17e..cc1f8798622bc30a284cdef90f3967cef8eae88e 100644 (file)
@@ -117,4 +117,4 @@ void DeviceStream::synchronize() const
                        gmx::formatString("cudaStreamSynchronize failed  (CUDA error %d: %s).", stat,
                                          cudaGetErrorString(stat))
                                .c_str());
-}
\ No newline at end of file
+}
index db3fb4a939ddcc5ae1704241fe9028bc61ef06d6..9a9ffc6c1ce8ca46bd4f726eb4fab006e1c2d2cd 100644 (file)
@@ -117,53 +117,3 @@ static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns,
         }
     }
 }
-
-/*! \brief CUDA kernel to sum up the force components
- *
- * \tparam        accumulateForce  If the initial forces in \p gm_fTotal should be saved.
- * \tparam        addPmeForce      Whether the PME force should be added to the total.
- *
- * \param[in]     gm_fNB     Non-bonded forces in nbnxm format.
- * \param[in]     gm_fPme    PME forces.
- * \param[in,out] gm_fTotal  Force buffer to be reduced into.
- * \param[in]     cell       Cell index mapping.
- * \param[in]     atomStart  Start atom index.
- * \param[in]     numAtoms   Number of atoms.
- */
-template<bool accumulateForce, bool addPmeForce>
-static __global__ void nbnxn_gpu_add_nbat_f_to_f_kernel(const float3* __restrict__ gm_fNB,
-                                                        const float3* __restrict__ gm_fPme,
-                                                        float3* gm_fTotal,
-                                                        const int* __restrict__ gm_cell,
-                                                        const int atomStart,
-                                                        const int numAtoms)
-{
-
-    /* map particle-level parallelism to 1D CUDA thread and block index */
-    const int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
-
-    /* perform addition for each particle*/
-    if (threadIndex < numAtoms)
-    {
-
-        const int i        = gm_cell[atomStart + threadIndex];
-        float3*   gm_fDest = &gm_fTotal[atomStart + threadIndex];
-        float3    temp;
-
-        if (accumulateForce)
-        {
-            temp = *gm_fDest;
-            temp += gm_fNB[i];
-        }
-        else
-        {
-            temp = gm_fNB[i];
-        }
-        if (addPmeForce)
-        {
-            temp += gm_fPme[atomStart + threadIndex];
-        }
-        *gm_fDest = temp;
-    }
-    return;
-}