*/
enum class NumTempScaleValues
{
- None, //!< No temperature coupling
- Single, //!< Single T-scaling value (one group)
- Multiple //!< Multiple T-scaling values, need to use T-group indices
+ None, //!< No temperature coupling
+ Single, //!< Single T-scaling value (one group)
+ Multiple //!< Multiple T-scaling values, need to use T-group indices
};
/*! \brief Different variants of the Parrinello-Rahman velocity scaling
*/
enum class VelocityScalingType
{
- None, //!< Do not apply velocity scaling (not a PR-coupling run or step)
- Diagonal, //!< Apply velocity scaling using a diagonal matrix
- Full //!< Apply velocity scaling using a full matrix
+ None, //!< Do not apply velocity scaling (not a PR-coupling run or step)
+ Diagonal, //!< Apply velocity scaling using a diagonal matrix
+ Full //!< Apply velocity scaling using a full matrix
};
/*! \brief Main kernel for Leap-Frog integrator.
* \param[in] velocityScalingMatrixDiagonal Diagonal elements of Parrinello-Rahman velocity scaling matrix
*/
template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
-__launch_bounds__(c_maxThreadsPerBlock)
-__global__ void leapfrog_kernel(const int numAtoms,
- float3* __restrict__ gm_x,
- float3* __restrict__ gm_xp,
- float3* __restrict__ gm_v,
- const float3* __restrict__ gm_f,
- const float* __restrict__ gm_inverseMasses,
- const float dt,
- const float* __restrict__ gm_lambdas,
- const unsigned short* __restrict__ gm_tempScaleGroups,
- const float3 velocityScalingMatrixDiagonal);
+__launch_bounds__(c_maxThreadsPerBlock) __global__
+ void leapfrog_kernel(const int numAtoms,
+ float3* __restrict__ gm_x,
+ float3* __restrict__ gm_xp,
+ float3* __restrict__ gm_v,
+ const float3* __restrict__ gm_f,
+ const float* __restrict__ gm_inverseMasses,
+ const float dt,
+ const float* __restrict__ gm_lambdas,
+ const unsigned short* __restrict__ gm_tempScaleGroups,
+ const float3 velocityScalingMatrixDiagonal);
template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
-__launch_bounds__(c_maxThreadsPerBlock)
-__global__ void leapfrog_kernel(const int numAtoms,
- float3* __restrict__ gm_x,
- float3* __restrict__ gm_xp,
- float3* __restrict__ gm_v,
- const float3* __restrict__ gm_f,
- const float* __restrict__ gm_inverseMasses,
- const float dt,
- const float* __restrict__ gm_lambdas,
- const unsigned short* __restrict__ gm_tempScaleGroups,
- const float3 velocityScalingMatrixDiagonal)
+__launch_bounds__(c_maxThreadsPerBlock) __global__
+ void leapfrog_kernel(const int numAtoms,
+ float3* __restrict__ gm_x,
+ float3* __restrict__ gm_xp,
+ float3* __restrict__ gm_v,
+ const float3* __restrict__ gm_f,
+ const float* __restrict__ gm_inverseMasses,
+ const float dt,
+ const float* __restrict__ gm_lambdas,
+ const unsigned short* __restrict__ gm_tempScaleGroups,
+ const float3 velocityScalingMatrixDiagonal)
{
- int threadIndex = blockIdx.x*blockDim.x + threadIdx.x;
+ int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
if (threadIndex < numAtoms)
{
- float3 x = gm_x[threadIndex];
- float3 v = gm_v[threadIndex];
- float3 f = gm_f[threadIndex];
- float im = gm_inverseMasses[threadIndex];
- float imdt = im*dt;
+ float3 x = gm_x[threadIndex];
+ float3 v = gm_v[threadIndex];
+ float3 f = gm_f[threadIndex];
+ float im = gm_inverseMasses[threadIndex];
+ float imdt = im * dt;
// Swapping places for xp and x so that the x will contain the updated coordinates and xp - the
// coordinates before update. This should be taken into account when (if) constraints are applied
// after the update: x and xp have to be passed to constraints in the 'wrong' order.
- gm_xp[threadIndex] = x;
+ gm_xp[threadIndex] = x;
if (numTempScaleValues != NumTempScaleValues::None || velocityScaling != VelocityScalingType::None)
{
float lambda = 1.0F;
if (numTempScaleValues == NumTempScaleValues::Single)
{
- lambda = gm_lambdas[0];
+ lambda = gm_lambdas[0];
}
else if (numTempScaleValues == NumTempScaleValues::Multiple)
{
if (velocityScaling == VelocityScalingType::Diagonal)
{
- vp.x -= velocityScalingMatrixDiagonal.x*v.x;
- vp.y -= velocityScalingMatrixDiagonal.y*v.y;
- vp.z -= velocityScalingMatrixDiagonal.z*v.z;
+ vp.x -= velocityScalingMatrixDiagonal.x * v.x;
+ vp.y -= velocityScalingMatrixDiagonal.y * v.y;
+ vp.z -= velocityScalingMatrixDiagonal.z * v.z;
}
v = vp;
}
- v += f*imdt;
-
- x += v*dt;
- gm_v[threadIndex] = v;
- gm_x[threadIndex] = x;
+ v += f * imdt;
+ x += v * dt;
+ gm_v[threadIndex] = v;
+ gm_x[threadIndex] = x;
}
return;
}
}
else
{
- GMX_RELEASE_ASSERT(false, "Number of temperature coupling groups should be greater than zero (zero for no coupling).");
+ GMX_RELEASE_ASSERT(false,
+ "Number of temperature coupling groups should be greater than zero "
+ "(zero for no coupling).");
}
}
- else
- if (velocityScaling == VelocityScalingType::Diagonal)
+ else if (velocityScaling == VelocityScalingType::Diagonal)
{
if (numTempScaleValues == 0)
{
}
else
{
- GMX_RELEASE_ASSERT(false, "Number of temperature coupling groups should be greater than zero (zero for no coupling).");
+ GMX_RELEASE_ASSERT(false,
+ "Number of temperature coupling groups should be greater than zero "
+ "(zero for no coupling).");
}
}
else
{
- GMX_RELEASE_ASSERT(false, "Only isotropic Parrinello-Rahman pressure coupling is supported.");
+ GMX_RELEASE_ASSERT(false,
+ "Only isotropic Parrinello-Rahman pressure coupling is supported.");
}
return kernelPtr;
}
-void LeapFrogCuda::integrate(const float3 *d_x,
- float3 *d_xp,
- float3 *d_v,
- const float3 *d_f,
- const real dt,
- const bool doTempCouple,
- gmx::ArrayRef<const t_grp_tcstat> tcstat,
- const bool doPressureCouple,
- const float dtPressureCouple,
- const matrix velocityScalingMatrix)
+void LeapFrogCuda::integrate(const float3* d_x,
+ float3* d_xp,
+ float3* d_v,
+ const float3* d_f,
+ const real dt,
+ const bool doTempCouple,
+ gmx::ArrayRef<const t_grp_tcstat> tcstat,
+ const bool doPressureCouple,
+ const float dtPressureCouple,
+ const matrix velocityScalingMatrix)
{
ensureNoPendingCudaError("In CUDA version of Leap-Frog integrator");
- auto kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::None>;
+ auto kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::None>;
if (doTempCouple || doPressureCouple)
{
if (doTempCouple)
{
- GMX_ASSERT(numTempScaleValues_ == ssize(h_lambdas_), "Number of temperature scaling factors changed since it was set for the last time.");
+ GMX_ASSERT(numTempScaleValues_ == ssize(h_lambdas_),
+ "Number of temperature scaling factors changed since it was set for the "
+ "last time.");
for (int i = 0; i < numTempScaleValues_; i++)
{
h_lambdas_[i] = tcstat[i].lambda;
}
- copyToDeviceBuffer(&d_lambdas_, h_lambdas_.data(),
- 0, numTempScaleValues_, commandStream_, GpuApiCallBehavior::Async, nullptr);
+ copyToDeviceBuffer(&d_lambdas_, h_lambdas_.data(), 0, numTempScaleValues_,
+ commandStream_, GpuApiCallBehavior::Async, nullptr);
}
VelocityScalingType velocityScaling = VelocityScalingType::None;
if (doPressureCouple)
{
velocityScaling = VelocityScalingType::Diagonal;
- GMX_ASSERT(velocityScalingMatrix[YY][XX] == 0 && velocityScalingMatrix[ZZ][XX] == 0 && velocityScalingMatrix[ZZ][YY] == 0 &&
- velocityScalingMatrix[XX][YY] == 0 && velocityScalingMatrix[XX][ZZ] == 0 && velocityScalingMatrix[YY][ZZ] == 0,
- "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported in GPU version of Leap-Frog integrator.");
- velocityScalingMatrixDiagonal_ = make_float3(dtPressureCouple*velocityScalingMatrix[XX][XX],
- dtPressureCouple*velocityScalingMatrix[YY][YY],
- dtPressureCouple*velocityScalingMatrix[ZZ][ZZ]);
+ GMX_ASSERT(velocityScalingMatrix[YY][XX] == 0 && velocityScalingMatrix[ZZ][XX] == 0
+ && velocityScalingMatrix[ZZ][YY] == 0 && velocityScalingMatrix[XX][YY] == 0
+ && velocityScalingMatrix[XX][ZZ] == 0 && velocityScalingMatrix[YY][ZZ] == 0,
+ "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
+ "in GPU version of Leap-Frog integrator.");
+ velocityScalingMatrixDiagonal_ =
+ make_float3(dtPressureCouple * velocityScalingMatrix[XX][XX],
+ dtPressureCouple * velocityScalingMatrix[YY][YY],
+ dtPressureCouple * velocityScalingMatrix[ZZ][ZZ]);
}
kernelPtr = selectLeapFrogKernelPtr(numTempScaleValues_, velocityScaling);
}
- const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, kernelLaunchConfig_,
- &numAtoms_,
- &d_x, &d_xp,
- &d_v,
- &d_f,
- &d_inverseMasses_, &dt,
- &d_lambdas_, &d_tempScaleGroups_,
- &velocityScalingMatrixDiagonal_);
+ const auto kernelArgs = prepareGpuKernelArguments(
+ kernelPtr, kernelLaunchConfig_, &numAtoms_, &d_x, &d_xp, &d_v, &d_f, &d_inverseMasses_,
+ &dt, &d_lambdas_, &d_tempScaleGroups_, &velocityScalingMatrixDiagonal_);
launchGpuKernel(kernelPtr, kernelLaunchConfig_, nullptr, "leapfrog_kernel", kernelArgs);
return;
}
-LeapFrogCuda::LeapFrogCuda(CommandStream commandStream) :
- commandStream_(commandStream)
+LeapFrogCuda::LeapFrogCuda(CommandStream commandStream) : commandStream_(commandStream)
{
numAtoms_ = 0;
LeapFrogCuda::~LeapFrogCuda()
{
freeDeviceBuffer(&d_inverseMasses_);
-
}
-void LeapFrogCuda::setPbc(const t_pbc *pbc)
+void LeapFrogCuda::setPbc(const t_pbc* pbc)
{
setPbcAiuc(pbc->ndim_ePBC, pbc->box, &pbcAiuc_);
}
-void LeapFrogCuda::set(const t_mdatoms &md,
- const int numTempScaleValues,
- const unsigned short *tempScaleGroups)
+void LeapFrogCuda::set(const t_mdatoms& md, const int numTempScaleValues, const unsigned short* tempScaleGroups)
{
numAtoms_ = md.nr;
- kernelLaunchConfig_.gridSize[0] = (numAtoms_ + c_threadsPerBlock - 1)/c_threadsPerBlock;
+ kernelLaunchConfig_.gridSize[0] = (numAtoms_ + c_threadsPerBlock - 1) / c_threadsPerBlock;
numTempScaleValues_ = numTempScaleValues;
- reallocateDeviceBuffer(&d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, nullptr);
- copyToDeviceBuffer(&d_inverseMasses_, (float*)md.invmass,
- 0, numAtoms_, commandStream_, GpuApiCallBehavior::Sync, nullptr);
+ reallocateDeviceBuffer(&d_inverseMasses_, numAtoms_, &numInverseMasses_,
+ &numInverseMassesAlloc_, nullptr);
+ copyToDeviceBuffer(&d_inverseMasses_, (float*)md.invmass, 0, numAtoms_, commandStream_,
+ GpuApiCallBehavior::Sync, nullptr);
// Temperature scale group map only used if there are more then one group
if (numTempScaleValues > 1)
{
- reallocateDeviceBuffer(&d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_, &numTempScaleGroupsAlloc_, nullptr);
- copyToDeviceBuffer(&d_tempScaleGroups_, tempScaleGroups,
- 0, numAtoms_, commandStream_, GpuApiCallBehavior::Sync, nullptr);
+ reallocateDeviceBuffer(&d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_,
+ &numTempScaleGroupsAlloc_, nullptr);
+ copyToDeviceBuffer(&d_tempScaleGroups_, tempScaleGroups, 0, numAtoms_, commandStream_,
+ GpuApiCallBehavior::Sync, nullptr);
}
// If the temperature coupling is enabled, we need to make space for scaling factors
h_lambdas_.resize(numTempScaleValues);
reallocateDeviceBuffer(&d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_, nullptr);
}
-
}
-} //namespace gmx
+} // namespace gmx