/*-------------------------------- CUDA kernels-------------------------------- */
/*------------------------------------------------------------------------------*/
-#define CUDA_DEG2RAD_F (CUDART_PI_F / 180.0f)
+#define CUDA_DEG2RAD_F (CUDART_PI_F / 180.0F)
/*---------------- BONDED CUDA kernels--------------*/
__device__ __forceinline__ static void
harmonic_gpu(const float kA, const float xA, const float x, float* V, float* F)
{
- constexpr float half = 0.5f;
+ constexpr float half = 0.5F;
float dx, dx2;
dx = x - xA;
{
if (i < numBonds)
{
- int3 bondData = *(int3*)(d_forceatoms + 3 * i);
- int type = bondData.x;
- int ai = bondData.y;
- int aj = bondData.z;
+ const int3 bondData = *(reinterpret_cast<const int3*>(d_forceatoms + 3 * i));
+ int type = bondData.x;
+ int ai = bondData.y;
+ int aj = bondData.z;
/* dx = xi - xj, corrected for periodic boundary conditions. */
float3 dx;
*vtot_loc += vbond;
}
- if (dr2 != 0.0f)
+ if (dr2 != 0.0F)
{
fbond *= rsqrtf(dr2);
{
if (i < numBonds)
{
- int4 angleData = *(int4*)(d_forceatoms + 4 * i);
- int type = angleData.x;
- int ai = angleData.y;
- int aj = angleData.z;
- int ak = angleData.w;
+ const int4 angleData = *(reinterpret_cast<const int4*>(d_forceatoms + 4 * i));
+ int type = angleData.x;
+ int ai = angleData.y;
+ int aj = angleData.z;
+ int ak = angleData.w;
float3 r_ij;
float3 r_kj;
}
float cos_theta2 = cos_theta * cos_theta;
- if (cos_theta2 < 1.0f)
+ if (cos_theta2 < 1.0F)
{
- float st = dVdt * rsqrtf(1.0f - cos_theta2);
+ float st = dVdt * rsqrtf(1.0F - cos_theta2);
float sth = st * cos_theta;
float nrij2 = norm2(r_ij);
float nrkj2 = norm2(r_kj);
{
if (i < numBonds)
{
- int4 ubData = *(int4*)(d_forceatoms + 4 * i);
- int type = ubData.x;
- int ai = ubData.y;
- int aj = ubData.z;
- int ak = ubData.w;
+ const int4 ubData = *(reinterpret_cast<const int4*>(d_forceatoms + 4 * i));
+ int type = ubData.x;
+ int ai = ubData.y;
+ int aj = ubData.z;
+ int ak = ubData.w;
float th0A = d_forceparams[type].u_b.thetaA * CUDA_DEG2RAD_F;
float kthA = d_forceparams[type].u_b.kthetaA;
harmonic_gpu(kUBA, r13A, dr, &vbond, &fbond);
float cos_theta2 = cos_theta * cos_theta;
- if (cos_theta2 < 1.0f)
+ if (cos_theta2 < 1.0F)
{
- float st = dVdt * rsqrtf(1.0f - cos_theta2);
+ float st = dVdt * rsqrtf(1.0F - cos_theta2);
float sth = st * cos_theta;
float nrkj2 = norm2(r_kj);
}
/* Time for the bond calculations */
- if (dr2 != 0.0f)
+ if (dr2 != 0.0F)
{
if (calcEner)
{
*n = cprod(*r_kj, *r_kl);
float phi = gmx_angle(*m, *n);
float ipr = iprod(*r_ij, *n);
- float sign = (ipr < 0.0f) ? -1.0f : 1.0f;
+ float sign = (ipr < 0.0F) ? -1.0F : 1.0F;
phi = sign * phi;
return phi;
mdphi = mult * phi - phiA * CUDA_DEG2RAD_F;
sdphi = sinf(mdphi);
- *v = cpA * (1.0f + cosf(mdphi));
+ *v = cpA * (1.0F + cosf(mdphi));
*f = -cpA * mult * sdphi;
}
float3 sm_fShiftLoc[],
const PbcAiuc pbcAiuc)
{
- constexpr float c0 = 0.0f, c1 = 1.0f, c2 = 2.0f, c3 = 3.0f, c4 = 4.0f, c5 = 5.0f;
+ constexpr float c0 = 0.0F, c1 = 1.0F, c2 = 2.0F, c3 = 3.0F, c4 = 4.0F, c5 = 5.0F;
if (i < numBonds)
{
/* dp cannot be outside (-pi,pi) */
if (*dp >= CUDART_PI_F)
{
- *dp -= 2.0f * CUDART_PI_F;
+ *dp -= 2.0F * CUDART_PI_F;
}
else if (*dp < -CUDART_PI_F)
{
- *dp += 2.0f * CUDART_PI_F;
+ *dp += 2.0F * CUDART_PI_F;
}
}
if (calcEner)
{
- *vtot_loc += -0.5f * ddphi * dp;
+ *vtot_loc += -0.5F * ddphi * dp;
}
}
}
if (i < numBonds)
{
// TODO this should be made into a separate type, the GPU and CPU sizes should be compared
- int3 pairData = *(int3*)(d_forceatoms + 3 * i);
- int type = pairData.x;
- int ai = pairData.y;
- int aj = pairData.z;
+ const int3 pairData = *(reinterpret_cast<const int3*>(d_forceatoms + 3 * i));
+ int type = pairData.x;
+ int ai = pairData.y;
+ int aj = pairData.z;
float qq = gm_xq[ai].w * gm_xq[aj].w;
float c6 = iparams[type].lj14.c6A;
float velec = scale_factor * qq * rinv;
/* Calculate the LJ force * r and add it to the Coulomb part */
- float fr = (12.0f * c12 * rinv6 - 6.0f * c6) * rinv6 + velec;
+ float fr = (12.0F * c12 * rinv6 - 6.0F * c6) * rinv6 + velec;
float finvr = fr * rinv2;
float3 f = finvr * dr;
extern __shared__ char sm_dynamicShmem[];
char* sm_nextSlotPtr = sm_dynamicShmem;
- float3* sm_fShiftLoc = (float3*)sm_nextSlotPtr;
+ float3* sm_fShiftLoc = reinterpret_cast<float3*>(sm_nextSlotPtr);
sm_nextSlotPtr += c_numShiftVectors * sizeof(float3);
if (calcVir)
{
if (threadIdx.x < c_numShiftVectors)
{
- sm_fShiftLoc[threadIdx.x] = make_float3(0.0f, 0.0f, 0.0f);
+ sm_fShiftLoc[threadIdx.x] = make_float3(0.0F, 0.0F, 0.0F);
}
__syncthreads();
}
int warpId = threadIdx.x / warpSize;
// Shared memory variables to hold block-local partial sum
- float* sm_vTot = (float*)sm_nextSlotPtr;
+ float* sm_vTot = reinterpret_cast<float*>(sm_nextSlotPtr);
sm_nextSlotPtr += numWarps * sizeof(float);
- float* sm_vTotVdw = (float*)sm_nextSlotPtr;
+ float* sm_vTotVdw = reinterpret_cast<float*>(sm_nextSlotPtr);
sm_nextSlotPtr += numWarps * sizeof(float);
- float* sm_vTotElec = (float*)sm_nextSlotPtr;
+ float* sm_vTotElec = reinterpret_cast<float*>(sm_nextSlotPtr);
if (threadIdx.x % warpSize == 0)
{