Apply clang-format to source tree
[alexxy/gromacs.git] / src / gromacs / mdlib / leapfrog_cuda.cu
index d99ed0cec4bfbfa6a1b853dea01eb84bde7fd5b7..6c4b5b5bf1b49d79aefef0b7f61cdbb8c02c4457 100644 (file)
@@ -83,9 +83,9 @@ constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock;
  */
 enum class NumTempScaleValues
 {
-    None,      //!< No temperature coupling
-    Single,    //!< Single T-scaling value (one group)
-    Multiple   //!< Multiple T-scaling values, need to use T-group indices
+    None,    //!< No temperature coupling
+    Single,  //!< Single T-scaling value (one group)
+    Multiple //!< Multiple T-scaling values, need to use T-group indices
 };
 
 /*! \brief Different variants of the Parrinello-Rahman velocity scaling
@@ -95,9 +95,9 @@ enum class NumTempScaleValues
  */
 enum class VelocityScalingType
 {
-    None,      //!< Do not apply velocity scaling (not a PR-coupling run or step)
-    Diagonal,  //!< Apply velocity scaling using a diagonal matrix
-    Full       //!< Apply velocity scaling using a full matrix
+    None,     //!< Do not apply velocity scaling (not a PR-coupling run or step)
+    Diagonal, //!< Apply velocity scaling using a diagonal matrix
+    Full      //!< Apply velocity scaling using a full matrix
 };
 
 /*! \brief Main kernel for Leap-Frog integrator.
@@ -126,44 +126,44 @@ enum class VelocityScalingType
  * \param[in]     velocityScalingMatrixDiagonal  Diagonal elements of Parrinello-Rahman velocity scaling matrix
  */
 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
-__launch_bounds__(c_maxThreadsPerBlock)
-__global__ void leapfrog_kernel(const int                             numAtoms,
-                                float3* __restrict__                  gm_x,
-                                float3* __restrict__                  gm_xp,
-                                float3* __restrict__                  gm_v,
-                                const float3* __restrict__            gm_f,
-                                const float*  __restrict__            gm_inverseMasses,
-                                const float                           dt,
-                                const float*  __restrict__            gm_lambdas,
-                                const unsigned short*    __restrict__ gm_tempScaleGroups,
-                                const float3                          velocityScalingMatrixDiagonal);
+__launch_bounds__(c_maxThreadsPerBlock) __global__
+        void leapfrog_kernel(const int numAtoms,
+                             float3* __restrict__ gm_x,
+                             float3* __restrict__ gm_xp,
+                             float3* __restrict__ gm_v,
+                             const float3* __restrict__ gm_f,
+                             const float* __restrict__ gm_inverseMasses,
+                             const float dt,
+                             const float* __restrict__ gm_lambdas,
+                             const unsigned short* __restrict__ gm_tempScaleGroups,
+                             const float3 velocityScalingMatrixDiagonal);
 
 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
-__launch_bounds__(c_maxThreadsPerBlock)
-__global__ void leapfrog_kernel(const int                             numAtoms,
-                                float3* __restrict__                  gm_x,
-                                float3* __restrict__                  gm_xp,
-                                float3* __restrict__                  gm_v,
-                                const float3* __restrict__            gm_f,
-                                const float*  __restrict__            gm_inverseMasses,
-                                const float                           dt,
-                                const float*  __restrict__            gm_lambdas,
-                                const unsigned short*    __restrict__ gm_tempScaleGroups,
-                                const float3                          velocityScalingMatrixDiagonal)
+__launch_bounds__(c_maxThreadsPerBlock) __global__
+        void leapfrog_kernel(const int numAtoms,
+                             float3* __restrict__ gm_x,
+                             float3* __restrict__ gm_xp,
+                             float3* __restrict__ gm_v,
+                             const float3* __restrict__ gm_f,
+                             const float* __restrict__ gm_inverseMasses,
+                             const float dt,
+                             const float* __restrict__ gm_lambdas,
+                             const unsigned short* __restrict__ gm_tempScaleGroups,
+                             const float3 velocityScalingMatrixDiagonal)
 {
-    int threadIndex = blockIdx.x*blockDim.x + threadIdx.x;
+    int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
     if (threadIndex < numAtoms)
     {
-        float3 x           = gm_x[threadIndex];
-        float3 v           = gm_v[threadIndex];
-        float3 f           = gm_f[threadIndex];
-        float  im          = gm_inverseMasses[threadIndex];
-        float  imdt        = im*dt;
+        float3 x    = gm_x[threadIndex];
+        float3 v    = gm_v[threadIndex];
+        float3 f    = gm_f[threadIndex];
+        float  im   = gm_inverseMasses[threadIndex];
+        float  imdt = im * dt;
 
         // Swapping places for xp and x so that the x will contain the updated coordinates and xp - the
         // coordinates before update. This should be taken into account when (if) constraints are applied
         // after the update: x and xp have to be passed to constraints in the 'wrong' order.
-        gm_xp[threadIndex]  = x;
+        gm_xp[threadIndex] = x;
 
         if (numTempScaleValues != NumTempScaleValues::None || velocityScaling != VelocityScalingType::None)
         {
@@ -174,7 +174,7 @@ __global__ void leapfrog_kernel(const int                             numAtoms,
                 float lambda = 1.0F;
                 if (numTempScaleValues == NumTempScaleValues::Single)
                 {
-                    lambda    = gm_lambdas[0];
+                    lambda = gm_lambdas[0];
                 }
                 else if (numTempScaleValues == NumTempScaleValues::Multiple)
                 {
@@ -186,20 +186,19 @@ __global__ void leapfrog_kernel(const int                             numAtoms,
 
             if (velocityScaling == VelocityScalingType::Diagonal)
             {
-                vp.x -= velocityScalingMatrixDiagonal.x*v.x;
-                vp.y -= velocityScalingMatrixDiagonal.y*v.y;
-                vp.z -= velocityScalingMatrixDiagonal.z*v.z;
+                vp.x -= velocityScalingMatrixDiagonal.x * v.x;
+                vp.y -= velocityScalingMatrixDiagonal.y * v.y;
+                vp.z -= velocityScalingMatrixDiagonal.z * v.z;
             }
 
             v = vp;
         }
 
-        v += f*imdt;
-
-        x                  += v*dt;
-        gm_v[threadIndex]   = v;
-        gm_x[threadIndex]   = x;
+        v += f * imdt;
 
+        x += v * dt;
+        gm_v[threadIndex] = v;
+        gm_x[threadIndex] = x;
     }
     return;
 }
@@ -234,11 +233,12 @@ inline auto selectLeapFrogKernelPtr(int numTempScaleValues, VelocityScalingType
         }
         else
         {
-            GMX_RELEASE_ASSERT(false, "Number of temperature coupling groups should be greater than zero (zero for no coupling).");
+            GMX_RELEASE_ASSERT(false,
+                               "Number of temperature coupling groups should be greater than zero "
+                               "(zero for no coupling).");
         }
     }
-    else
-    if (velocityScaling == VelocityScalingType::Diagonal)
+    else if (velocityScaling == VelocityScalingType::Diagonal)
     {
         if (numTempScaleValues == 0)
         {
@@ -254,72 +254,74 @@ inline auto selectLeapFrogKernelPtr(int numTempScaleValues, VelocityScalingType
         }
         else
         {
-            GMX_RELEASE_ASSERT(false, "Number of temperature coupling groups should be greater than zero (zero for no coupling).");
+            GMX_RELEASE_ASSERT(false,
+                               "Number of temperature coupling groups should be greater than zero "
+                               "(zero for no coupling).");
         }
     }
     else
     {
-        GMX_RELEASE_ASSERT(false, "Only isotropic Parrinello-Rahman pressure coupling is supported.");
+        GMX_RELEASE_ASSERT(false,
+                           "Only isotropic Parrinello-Rahman pressure coupling is supported.");
     }
     return kernelPtr;
 }
 
-void LeapFrogCuda::integrate(const float3                      *d_x,
-                             float3                            *d_xp,
-                             float3                            *d_v,
-                             const float3                      *d_f,
-                             const real                         dt,
-                             const bool                         doTempCouple,
-                             gmx::ArrayRef<const t_grp_tcstat>  tcstat,
-                             const bool                         doPressureCouple,
-                             const float                        dtPressureCouple,
-                             const matrix                       velocityScalingMatrix)
+void LeapFrogCuda::integrate(const float3*                     d_x,
+                             float3*                           d_xp,
+                             float3*                           d_v,
+                             const float3*                     d_f,
+                             const real                        dt,
+                             const bool                        doTempCouple,
+                             gmx::ArrayRef<const t_grp_tcstat> tcstat,
+                             const bool                        doPressureCouple,
+                             const float                       dtPressureCouple,
+                             const matrix                      velocityScalingMatrix)
 {
 
     ensureNoPendingCudaError("In CUDA version of Leap-Frog integrator");
 
-    auto kernelPtr          = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::None>;
+    auto kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::None>;
     if (doTempCouple || doPressureCouple)
     {
         if (doTempCouple)
         {
-            GMX_ASSERT(numTempScaleValues_ == ssize(h_lambdas_), "Number of temperature scaling factors changed since it was set for the last time.");
+            GMX_ASSERT(numTempScaleValues_ == ssize(h_lambdas_),
+                       "Number of temperature scaling factors changed since it was set for the "
+                       "last time.");
             for (int i = 0; i < numTempScaleValues_; i++)
             {
                 h_lambdas_[i] = tcstat[i].lambda;
             }
-            copyToDeviceBuffer(&d_lambdas_, h_lambdas_.data(),
-                               0, numTempScaleValues_, commandStream_, GpuApiCallBehavior::Async, nullptr);
+            copyToDeviceBuffer(&d_lambdas_, h_lambdas_.data(), 0, numTempScaleValues_,
+                               commandStream_, GpuApiCallBehavior::Async, nullptr);
         }
         VelocityScalingType velocityScaling = VelocityScalingType::None;
         if (doPressureCouple)
         {
             velocityScaling = VelocityScalingType::Diagonal;
-            GMX_ASSERT(velocityScalingMatrix[YY][XX] == 0 && velocityScalingMatrix[ZZ][XX] == 0 && velocityScalingMatrix[ZZ][YY] == 0 &&
-                       velocityScalingMatrix[XX][YY] == 0 && velocityScalingMatrix[XX][ZZ] == 0 && velocityScalingMatrix[YY][ZZ] == 0,
-                       "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported in GPU version of Leap-Frog integrator.");
-            velocityScalingMatrixDiagonal_ = make_float3(dtPressureCouple*velocityScalingMatrix[XX][XX],
-                                                         dtPressureCouple*velocityScalingMatrix[YY][YY],
-                                                         dtPressureCouple*velocityScalingMatrix[ZZ][ZZ]);
+            GMX_ASSERT(velocityScalingMatrix[YY][XX] == 0 && velocityScalingMatrix[ZZ][XX] == 0
+                               && velocityScalingMatrix[ZZ][YY] == 0 && velocityScalingMatrix[XX][YY] == 0
+                               && velocityScalingMatrix[XX][ZZ] == 0 && velocityScalingMatrix[YY][ZZ] == 0,
+                       "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
+                       "in GPU version of Leap-Frog integrator.");
+            velocityScalingMatrixDiagonal_ =
+                    make_float3(dtPressureCouple * velocityScalingMatrix[XX][XX],
+                                dtPressureCouple * velocityScalingMatrix[YY][YY],
+                                dtPressureCouple * velocityScalingMatrix[ZZ][ZZ]);
         }
         kernelPtr = selectLeapFrogKernelPtr(numTempScaleValues_, velocityScaling);
     }
 
-    const auto            kernelArgs = prepareGpuKernelArguments(kernelPtr, kernelLaunchConfig_,
-                                                                 &numAtoms_,
-                                                                 &d_x, &d_xp,
-                                                                 &d_v,
-                                                                 &d_f,
-                                                                 &d_inverseMasses_, &dt,
-                                                                 &d_lambdas_, &d_tempScaleGroups_,
-                                                                 &velocityScalingMatrixDiagonal_);
+    const auto kernelArgs = prepareGpuKernelArguments(
+            kernelPtr, kernelLaunchConfig_, &numAtoms_, &d_x, &d_xp, &d_v, &d_f, &d_inverseMasses_,
+            &dt, &d_lambdas_, &d_tempScaleGroups_, &velocityScalingMatrixDiagonal_);
     launchGpuKernel(kernelPtr, kernelLaunchConfig_, nullptr, "leapfrog_kernel", kernelArgs);
 
     return;
 }
 
-LeapFrogCuda::LeapFrogCuda(CommandStream commandStream) :
-    commandStream_(commandStream)
+LeapFrogCuda::LeapFrogCuda(CommandStream commandStream) : commandStream_(commandStream)
 {
     numAtoms_ = 0;
 
@@ -335,33 +337,32 @@ LeapFrogCuda::LeapFrogCuda(CommandStream commandStream) :
 LeapFrogCuda::~LeapFrogCuda()
 {
     freeDeviceBuffer(&d_inverseMasses_);
-
 }
 
-void LeapFrogCuda::setPbc(const t_pbc *pbc)
+void LeapFrogCuda::setPbc(const t_pbcpbc)
 {
     setPbcAiuc(pbc->ndim_ePBC, pbc->box, &pbcAiuc_);
 }
 
-void LeapFrogCuda::set(const t_mdatoms      &md,
-                       const int             numTempScaleValues,
-                       const unsigned short *tempScaleGroups)
+void LeapFrogCuda::set(const t_mdatoms& md, const int numTempScaleValues, const unsigned short* tempScaleGroups)
 {
     numAtoms_                       = md.nr;
-    kernelLaunchConfig_.gridSize[0] = (numAtoms_ + c_threadsPerBlock - 1)/c_threadsPerBlock;
+    kernelLaunchConfig_.gridSize[0] = (numAtoms_ + c_threadsPerBlock - 1) / c_threadsPerBlock;
 
     numTempScaleValues_ = numTempScaleValues;
 
-    reallocateDeviceBuffer(&d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, nullptr);
-    copyToDeviceBuffer(&d_inverseMasses_, (float*)md.invmass,
-                       0, numAtoms_, commandStream_, GpuApiCallBehavior::Sync, nullptr);
+    reallocateDeviceBuffer(&d_inverseMasses_, numAtoms_, &numInverseMasses_,
+                           &numInverseMassesAlloc_, nullptr);
+    copyToDeviceBuffer(&d_inverseMasses_, (float*)md.invmass, 0, numAtoms_, commandStream_,
+                       GpuApiCallBehavior::Sync, nullptr);
 
     // Temperature scale group map only used if there are more then one group
     if (numTempScaleValues > 1)
     {
-        reallocateDeviceBuffer(&d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_, &numTempScaleGroupsAlloc_, nullptr);
-        copyToDeviceBuffer(&d_tempScaleGroups_, tempScaleGroups,
-                           0, numAtoms_, commandStream_, GpuApiCallBehavior::Sync, nullptr);
+        reallocateDeviceBuffer(&d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_,
+                               &numTempScaleGroupsAlloc_, nullptr);
+        copyToDeviceBuffer(&d_tempScaleGroups_, tempScaleGroups, 0, numAtoms_, commandStream_,
+                           GpuApiCallBehavior::Sync, nullptr);
     }
 
     // If the temperature coupling is enabled, we need to make space for scaling factors
@@ -370,7 +371,6 @@ void LeapFrogCuda::set(const t_mdatoms      &md,
         h_lambdas_.resize(numTempScaleValues);
         reallocateDeviceBuffer(&d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_, nullptr);
     }
-
 }
 
-} //namespace gmx
+} // namespace gmx