list(REMOVE_ITEM MDLIB_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/gpuforcereduction_impl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/gpuforcereduction_impl_internal_sycl.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/leapfrog_gpu_sycl.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/leapfrog_gpu.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/leapfrog_gpu_internal_sycl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu.cpp
${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu_internal_sycl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/settle_gpu.cpp
set(MDLIB_SOURCES ${MDLIB_SOURCES} PARENT_SCOPE)
if(GMX_GPU_CUDA)
gmx_add_libgromacs_sources(
- leapfrog_gpu.cu
gpuforcereduction_impl.cpp
gpuforcereduction_impl_internal.cu
+ leapfrog_gpu.cpp
+ leapfrog_gpu_internal.cu
lincs_gpu.cpp
lincs_gpu_internal.cu
settle_gpu.cpp
)
_gmx_add_files_to_property(CUDA_SOURCES
gpuforcereduction_impl.cpp
+ leapfrog_gpu.cpp
lincs_gpu.cpp
settle_gpu.cpp
update_constrain_gpu_impl.cpp
gmx_add_libgromacs_sources(
gpuforcereduction_impl.cpp
gpuforcereduction_impl_internal_sycl.cpp
- leapfrog_gpu_sycl.cpp
+ leapfrog_gpu.cpp
+ leapfrog_gpu_internal_sycl.cpp
lincs_gpu.cpp
lincs_gpu_internal_sycl.cpp
settle_gpu.cpp
_gmx_add_files_to_property(SYCL_SOURCES
gpuforcereduction_impl.cpp
gpuforcereduction_impl_internal_sycl.cpp
- leapfrog_gpu_sycl.cpp
+ leapfrog_gpu.cpp
+ leapfrog_gpu_internal_sycl.cpp
lincs_gpu.cpp
lincs_gpu_internal_sycl.cpp
settle_gpu.cpp
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Implements Leap-Frog using CUDA
+ *
+ * This file contains backend-agnostic code for Leap-Frog integrator class on GPU,
+ * including class initialization, and data-structures management.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+#include "gmxpre.h"
+
+#include "leapfrog_gpu.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <cmath>
+
+#include "gromacs/gpu_utils/devicebuffer.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/leapfrog_gpu_internal.h"
+#include "gromacs/mdtypes/group.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/utility/arrayref.h"
+
+namespace gmx
+{
+
+void LeapFrogGpu::integrate(DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ DeviceBuffer<Float3> d_v,
+ const DeviceBuffer<Float3> d_f,
+ const float dt,
+ const bool doTemperatureScaling,
+ gmx::ArrayRef<const t_grp_tcstat> tcstat,
+ const bool doParrinelloRahman,
+ const float dtPressureCouple,
+ const matrix prVelocityScalingMatrix)
+{
+
+ if (doTemperatureScaling)
+ {
+ GMX_ASSERT(checkDeviceBuffer(d_lambdas_, numTempScaleValues_),
+ "Number of temperature scaling factors changed since it was set for the "
+ "last time.");
+ GMX_ASSERT(numTempScaleValues_ == ssize(h_lambdas_),
+ "Number of temperature scaling factors changed since it was set for the "
+ "last time.");
+
+ /* In SYCL, we could use host accessors here, without h_lambdas_.
+ * According to a quick test, host accessor is slightly faster when using DPC++ and
+ * LevelZero compared to using h_lambdas_ + cgh.copy. But with DPC++ and OpenCL, the host
+ * accessor waits for fReadyOnDevice in UpdateConstrainGpu::Impl::integrate. See #4023. */
+ for (int i = 0; i < numTempScaleValues_; i++)
+ {
+ h_lambdas_[i] = tcstat[i].lambda;
+ }
+ copyToDeviceBuffer(&d_lambdas_,
+ h_lambdas_.data(),
+ 0,
+ numTempScaleValues_,
+ deviceStream_,
+ GpuApiCallBehavior::Async,
+ nullptr);
+ }
+ VelocityScalingType prVelocityScalingType = VelocityScalingType::None;
+ if (doParrinelloRahman)
+ {
+ prVelocityScalingType = VelocityScalingType::Diagonal;
+ GMX_ASSERT(prVelocityScalingMatrix[YY][XX] == 0 && prVelocityScalingMatrix[ZZ][XX] == 0
+ && prVelocityScalingMatrix[ZZ][YY] == 0 && prVelocityScalingMatrix[XX][YY] == 0
+ && prVelocityScalingMatrix[XX][ZZ] == 0 && prVelocityScalingMatrix[YY][ZZ] == 0,
+ "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
+ "in GPU version of Leap-Frog integrator.");
+ prVelocityScalingMatrixDiagonal_ = Float3{ dtPressureCouple * prVelocityScalingMatrix[XX][XX],
+ dtPressureCouple * prVelocityScalingMatrix[YY][YY],
+ dtPressureCouple * prVelocityScalingMatrix[ZZ][ZZ] };
+ }
+
+ launchLeapFrogKernel(numAtoms_,
+ d_x,
+ d_xp,
+ d_v,
+ d_f,
+ d_inverseMasses_,
+ dt,
+ doTemperatureScaling,
+ numTempScaleValues_,
+ d_tempScaleGroups_,
+ d_lambdas_,
+ prVelocityScalingType,
+ prVelocityScalingMatrixDiagonal_,
+ deviceStream_);
+}
+
+LeapFrogGpu::LeapFrogGpu(const DeviceContext& deviceContext,
+ const DeviceStream& deviceStream,
+ const int numTempScaleValues) :
+ deviceContext_(deviceContext), deviceStream_(deviceStream), numTempScaleValues_(numTempScaleValues)
+{
+ numAtoms_ = 0;
+
+ changePinningPolicy(&h_lambdas_, gmx::PinningPolicy::PinnedIfSupported);
+
+ // If the temperature coupling is enabled, we need to make space for scaling factors
+ if (numTempScaleValues_ > 0)
+ {
+ h_lambdas_.resize(numTempScaleValues_);
+ reallocateDeviceBuffer(
+ &d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_, deviceContext_);
+ }
+}
+
+LeapFrogGpu::~LeapFrogGpu()
+{
+ freeDeviceBuffer(&d_inverseMasses_);
+}
+
+void LeapFrogGpu::set(const int numAtoms, const real* inverseMasses, const unsigned short* tempScaleGroups)
+{
+ numAtoms_ = numAtoms;
+
+ reallocateDeviceBuffer(
+ &d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, deviceContext_);
+ copyToDeviceBuffer(
+ &d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
+
+ // Temperature scale group map only used if there are more then one group
+ if (numTempScaleValues_ > 1)
+ {
+ reallocateDeviceBuffer(
+ &d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_, &numTempScaleGroupsAlloc_, deviceContext_);
+ copyToDeviceBuffer(
+ &d_tempScaleGroups_, tempScaleGroups, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
+ }
+}
+
+} // namespace gmx
DeviceBuffer<Float3> d_xp,
DeviceBuffer<Float3> d_v,
const DeviceBuffer<Float3> d_f,
- const real dt,
+ const float dt,
const bool doTemperatureScaling,
gmx::ArrayRef<const t_grp_tcstat> tcstat,
const bool doParrinelloRahman,
const DeviceContext& deviceContext_;
//! GPU stream
const DeviceStream& deviceStream_;
- //! GPU kernel launch config
- KernelLaunchConfig kernelLaunchConfig_;
+
//! Number of atoms
int numAtoms_;
*
* \brief Implements Leap-Frog using CUDA
*
- * This file contains implementation of basic Leap-Frog integrator
- * using CUDA, including class initialization, data-structures management
- * and GPU kernel.
+ * This file contains CUDA implementation of back-end specific code for Leap-Frog.
*
* \author Artem Zhmurov <zhmurov@gmail.com>
*
*/
#include "gmxpre.h"
-#include "leapfrog_gpu.h"
-
-#include <assert.h>
-#include <stdio.h>
-
-#include <cmath>
-
-#include <algorithm>
+#include "leapfrog_gpu_internal.h"
#include "gromacs/gpu_utils/cudautils.cuh"
#include "gromacs/gpu_utils/devicebuffer.h"
* \param[in] prVelocityScalingMatrixDiagonal Diagonal elements of Parrinello-Rahman velocity scaling matrix
*/
template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
-__launch_bounds__(c_maxThreadsPerBlock) __global__
- void leapfrog_kernel(const int numAtoms,
- float3* __restrict__ gm_x,
- float3* __restrict__ gm_xp,
- float3* __restrict__ gm_v,
- const float3* __restrict__ gm_f,
- const float* __restrict__ gm_inverseMasses,
- const float dt,
- const float* __restrict__ gm_lambdas,
- const unsigned short* __restrict__ gm_tempScaleGroups,
- const float3 prVelocityScalingMatrixDiagonal);
-
-template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
__launch_bounds__(c_maxThreadsPerBlock) __global__
void leapfrog_kernel(const int numAtoms,
float3* __restrict__ gm_x,
gm_v[threadIndex] = v;
gm_x[threadIndex] = x;
}
- return;
}
/*! \brief Select templated kernel.
return kernelPtr;
}
-void LeapFrogGpu::integrate(DeviceBuffer<Float3> d_x,
- DeviceBuffer<Float3> d_xp,
- DeviceBuffer<Float3> d_v,
- const DeviceBuffer<Float3> d_f,
- const real dt,
- const bool doTemperatureScaling,
- gmx::ArrayRef<const t_grp_tcstat> tcstat,
- const bool doParrinelloRahman,
- const float dtPressureCouple,
- const matrix prVelocityScalingMatrix)
+
+void launchLeapFrogKernel(const int numAtoms,
+ DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ DeviceBuffer<Float3> d_v,
+ const DeviceBuffer<Float3> d_f,
+ const DeviceBuffer<float> d_inverseMasses,
+ const float dt,
+ const bool doTemperatureScaling,
+ const int numTempScaleValues,
+ const DeviceBuffer<unsigned short> d_tempScaleGroups,
+ const DeviceBuffer<float> d_lambdas,
+ const VelocityScalingType prVelocityScalingType,
+ const Float3 prVelocityScalingMatrixDiagonal,
+ const DeviceStream& deviceStream)
{
+ // Checking the buffer types against the kernel argument types
+ static_assert(sizeof(*d_inverseMasses) == sizeof(float), "Incompatible types");
- ensureNoPendingDeviceError("In CUDA version of Leap-Frog integrator");
+ KernelLaunchConfig kernelLaunchConfig;
- auto kernelPtr = leapfrog_kernel<NumTempScaleValues::None, VelocityScalingType::None>;
- if (doTemperatureScaling || doParrinelloRahman)
- {
- if (doTemperatureScaling)
- {
- GMX_ASSERT(numTempScaleValues_ == ssize(h_lambdas_),
- "Number of temperature scaling factors changed since it was set for the "
- "last time.");
- for (int i = 0; i < numTempScaleValues_; i++)
- {
- h_lambdas_[i] = tcstat[i].lambda;
- }
- copyToDeviceBuffer(&d_lambdas_,
- h_lambdas_.data(),
- 0,
- numTempScaleValues_,
- deviceStream_,
- GpuApiCallBehavior::Async,
- nullptr);
- }
- VelocityScalingType prVelocityScalingType = VelocityScalingType::None;
- if (doParrinelloRahman)
- {
- prVelocityScalingType = VelocityScalingType::Diagonal;
- GMX_ASSERT(prVelocityScalingMatrix[YY][XX] == 0 && prVelocityScalingMatrix[ZZ][XX] == 0
- && prVelocityScalingMatrix[ZZ][YY] == 0
- && prVelocityScalingMatrix[XX][YY] == 0
- && prVelocityScalingMatrix[XX][ZZ] == 0
- && prVelocityScalingMatrix[YY][ZZ] == 0,
- "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
- "in GPU version of Leap-Frog integrator.");
- prVelocityScalingMatrixDiagonal_ =
- Float3{ dtPressureCouple * prVelocityScalingMatrix[XX][XX],
- dtPressureCouple * prVelocityScalingMatrix[YY][YY],
- dtPressureCouple * prVelocityScalingMatrix[ZZ][ZZ] };
- }
- kernelPtr = selectLeapFrogKernelPtr(doTemperatureScaling, numTempScaleValues_, prVelocityScalingType);
- }
+ kernelLaunchConfig.gridSize[0] = (numAtoms + c_threadsPerBlock - 1) / c_threadsPerBlock;
+ kernelLaunchConfig.blockSize[0] = c_threadsPerBlock;
+ kernelLaunchConfig.blockSize[1] = 1;
+ kernelLaunchConfig.blockSize[2] = 1;
+ kernelLaunchConfig.sharedMemorySize = 0;
+
+ auto kernelPtr =
+ selectLeapFrogKernelPtr(doTemperatureScaling, numTempScaleValues, prVelocityScalingType);
- // Checking the buffer types against the kernel argument types
- static_assert(sizeof(*d_inverseMasses_) == sizeof(float), "Incompatible types");
const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
- kernelLaunchConfig_,
- &numAtoms_,
+ kernelLaunchConfig,
+ &numAtoms,
asFloat3Pointer(&d_x),
asFloat3Pointer(&d_xp),
asFloat3Pointer(&d_v),
asFloat3Pointer(&d_f),
- &d_inverseMasses_,
+ &d_inverseMasses,
&dt,
- &d_lambdas_,
- &d_tempScaleGroups_,
- &prVelocityScalingMatrixDiagonal_);
- launchGpuKernel(kernelPtr, kernelLaunchConfig_, deviceStream_, nullptr, "leapfrog_kernel", kernelArgs);
-
- return;
-}
-
-LeapFrogGpu::LeapFrogGpu(const DeviceContext& deviceContext,
- const DeviceStream& deviceStream,
- const int numTempScaleValues) :
- deviceContext_(deviceContext), deviceStream_(deviceStream), numTempScaleValues_(numTempScaleValues)
-{
- numAtoms_ = 0;
-
- changePinningPolicy(&h_lambdas_, gmx::PinningPolicy::PinnedIfSupported);
-
- kernelLaunchConfig_.blockSize[0] = c_threadsPerBlock;
- kernelLaunchConfig_.blockSize[1] = 1;
- kernelLaunchConfig_.blockSize[2] = 1;
- kernelLaunchConfig_.sharedMemorySize = 0;
-
- // If the temperature coupling is enabled, we need to make space for scaling factors
- if (numTempScaleValues_ > 0)
- {
- h_lambdas_.resize(numTempScaleValues_);
- reallocateDeviceBuffer(
- &d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_, deviceContext_);
- }
-}
-
-LeapFrogGpu::~LeapFrogGpu()
-{
- freeDeviceBuffer(&d_inverseMasses_);
-}
-
-void LeapFrogGpu::set(const int numAtoms, const real* inverseMasses, const unsigned short* tempScaleGroups)
-{
- numAtoms_ = numAtoms;
- kernelLaunchConfig_.gridSize[0] = (numAtoms_ + c_threadsPerBlock - 1) / c_threadsPerBlock;
-
- reallocateDeviceBuffer(
- &d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, deviceContext_);
- copyToDeviceBuffer(
- &d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
-
- // Temperature scale group map only used if there are more then one group
- if (numTempScaleValues_ > 1)
- {
- reallocateDeviceBuffer(
- &d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_, &numTempScaleGroupsAlloc_, deviceContext_);
- copyToDeviceBuffer(
- &d_tempScaleGroups_, tempScaleGroups, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
- }
+ &d_lambdas,
+ &d_tempScaleGroups,
+ &prVelocityScalingMatrixDiagonal);
+ launchGpuKernel(kernelPtr, kernelLaunchConfig, deviceStream, nullptr, "leapfrog_kernel", kernelArgs);
}
} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ *
+ * \brief Declarations for backend specific GPU functions for Leap-Frog.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ * \inlibraryapi
+ */
+#ifndef GMX_MDLIB_LEAPFROG_GPU_INTERNAL_H
+#define GMX_MDLIB_LEAPFROG_GPU_INTERNAL_H
+
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
+#include "gromacs/mdlib/leapfrog_gpu.h"
+
+namespace gmx
+{
+
+void launchLeapFrogKernel(int numAtoms,
+ DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ DeviceBuffer<Float3> d_v,
+ DeviceBuffer<Float3> d_f,
+ DeviceBuffer<float> d_inverseMasses,
+ float dt,
+ bool doTemperatureScaling,
+ int numTempScaleValues,
+ DeviceBuffer<unsigned short> d_tempScaleGroups,
+ DeviceBuffer<float> d_lambdas,
+ VelocityScalingType prVelocityScalingType,
+ Float3 prVelocityScalingMatrixDiagonal,
+ const DeviceStream& deviceStream);
+
+
+} // namespace gmx
+
+#endif
*
* \brief Implements Leap-Frog using SYCL
*
- * This file contains implementation of basic Leap-Frog integrator
- * using SYCL, including class initialization, data-structures management
- * and GPU kernel.
+ * This file contains SYCL implementation of back-end specific code for Leap-Frog.
*
* \author Artem Zhmurov <zhmurov@gmail.com>
* \author Andrey Alekseenko <al42and@gmail.com>
*/
#include "gmxpre.h"
+#include "leapfrog_gpu_internal.h"
+
#include "gromacs/gpu_utils/devicebuffer.h"
#include "gromacs/gpu_utils/gmxsycl.h"
#include "gromacs/math/vec.h"
prVelocityScalingType);
}
-void LeapFrogGpu::integrate(DeviceBuffer<Float3> d_x,
- DeviceBuffer<Float3> d_xp,
- DeviceBuffer<Float3> d_v,
- DeviceBuffer<Float3> d_f,
- const real dt,
- const bool doTemperatureScaling,
- gmx::ArrayRef<const t_grp_tcstat> tcstat,
- const bool doParrinelloRahman,
- const float dtPressureCouple,
- const matrix prVelocityScalingMatrix)
+void launchLeapFrogKernel(int numAtoms,
+ DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ DeviceBuffer<Float3> d_v,
+ const DeviceBuffer<Float3> d_f,
+ const DeviceBuffer<float> d_inverseMasses,
+ const float dt,
+ const bool doTemperatureScaling,
+ const int numTempScaleValues,
+ const DeviceBuffer<unsigned short> d_tempScaleGroups,
+ const DeviceBuffer<float> d_lambdas,
+ const VelocityScalingType prVelocityScalingType,
+ const Float3 prVelocityScalingMatrixDiagonal,
+ const DeviceStream& deviceStream)
{
- if (doTemperatureScaling)
- {
- GMX_ASSERT(checkDeviceBuffer(d_lambdas_, numTempScaleValues_),
- "Number of temperature scaling factors changed since it was set for the "
- "last time.");
- GMX_RELEASE_ASSERT(gmx::ssize(h_lambdas_) == numTempScaleValues_,
- "Number of temperature scaling factors changed since it was set for the "
- "last time.");
- /* We could use host accessors here, without h_lambdas_.
- * According to a quick test, host accessor is slightly faster when using DPC++ and
- * LevelZero compared to using h_lambdas_ + cgh.copy. But with DPC++ and OpenCL, the host
- * accessor waits for fReadyOnDevice in UpdateConstrainGpu::Impl::integrate. See #4023. */
-
- for (int i = 0; i < numTempScaleValues_; i++)
- {
- h_lambdas_[i] = tcstat[i].lambda;
- }
- copyToDeviceBuffer(&d_lambdas_,
- h_lambdas_.data(),
- 0,
- numTempScaleValues_,
- deviceStream_,
- GpuApiCallBehavior::Async,
- nullptr);
- }
NumTempScaleValues tempVelocityScalingType =
- getTempScalingType(doTemperatureScaling, numTempScaleValues_);
+ getTempScalingType(doTemperatureScaling, numTempScaleValues);
- VelocityScalingType prVelocityScalingType = VelocityScalingType::None;
- if (doParrinelloRahman)
- {
- prVelocityScalingType = VelocityScalingType::Diagonal;
- GMX_ASSERT(prVelocityScalingMatrix[YY][XX] == 0 && prVelocityScalingMatrix[ZZ][XX] == 0
- && prVelocityScalingMatrix[ZZ][YY] == 0 && prVelocityScalingMatrix[XX][YY] == 0
- && prVelocityScalingMatrix[XX][ZZ] == 0 && prVelocityScalingMatrix[YY][ZZ] == 0,
- "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
- "in GPU version of Leap-Frog integrator.");
- prVelocityScalingMatrixDiagonal_ = dtPressureCouple
- * Float3{ prVelocityScalingMatrix[XX][XX],
- prVelocityScalingMatrix[YY][YY],
- prVelocityScalingMatrix[ZZ][ZZ] };
- }
launchLeapFrogKernel(tempVelocityScalingType,
prVelocityScalingType,
- deviceStream_,
- numAtoms_,
+ deviceStream,
+ numAtoms,
d_x,
d_xp,
d_v,
d_f,
- d_inverseMasses_,
+ d_inverseMasses,
dt,
- d_lambdas_,
- d_tempScaleGroups_,
- prVelocityScalingMatrixDiagonal_);
-}
-
-LeapFrogGpu::LeapFrogGpu(const DeviceContext& deviceContext,
- const DeviceStream& deviceStream,
- const int numTempScaleValues) :
- deviceContext_(deviceContext),
- deviceStream_(deviceStream),
- numAtoms_(0),
- numTempScaleValues_(numTempScaleValues)
-{
- // If the temperature coupling is enabled, we need to make space for scaling factors
- if (numTempScaleValues_ > 0)
- {
- h_lambdas_.resize(numTempScaleValues_);
- reallocateDeviceBuffer(
- &d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_, deviceContext_);
- }
-}
-
-LeapFrogGpu::~LeapFrogGpu()
-{
- freeDeviceBuffer(&d_inverseMasses_);
-}
-
-void LeapFrogGpu::set(const int numAtoms, const real* inverseMasses, const unsigned short* tempScaleGroups)
-{
- numAtoms_ = numAtoms;
-
- reallocateDeviceBuffer(
- &d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, deviceContext_);
- copyToDeviceBuffer(
- &d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
-
- // Temperature scale group map only used if there are more then one group
- if (numTempScaleValues_ > 1)
- {
- reallocateDeviceBuffer(
- &d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_, &numTempScaleGroupsAlloc_, deviceContext_);
- copyToDeviceBuffer(
- &d_tempScaleGroups_, tempScaleGroups, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
- }
+ d_lambdas,
+ d_tempScaleGroups,
+ prVelocityScalingMatrixDiagonal);
}
} // namespace gmx