list(REMOVE_ITEM MDLIB_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/leapfrog_gpu_sycl.cpp
${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu.cpp
- ${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu_internal_sycl.cpp)
+ ${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu_internal_sycl.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/settle_gpu.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/settle_gpu_internal_sycl.cpp)
set(MDLIB_SOURCES ${MDLIB_SOURCES} PARENT_SCOPE)
if(GMX_GPU_CUDA)
leapfrog_gpu.cu
lincs_gpu.cpp
lincs_gpu_internal.cu
- settle_gpu.cu
+ settle_gpu.cpp
+ settle_gpu_internal.cu
update_constrain_gpu_impl.cu
gpuforcereduction_impl.cu
)
_gmx_add_files_to_property(CUDA_SOURCES
lincs_gpu.cpp
+ settle_gpu.cpp
)
endif()
leapfrog_gpu_sycl.cpp
lincs_gpu.cpp
lincs_gpu_internal_sycl.cpp
+ settle_gpu.cpp
+ settle_gpu_internal_sycl.cpp
)
+
_gmx_add_files_to_property(SYCL_SOURCES
leapfrog_gpu_sycl.cpp
lincs_gpu.cpp
lincs_gpu_internal_sycl.cpp
+ settle_gpu.cpp
+ settle_gpu_internal_sycl.cpp
)
endif()
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Implements SETTLE using GPU
+ *
+ * This file contains implementation for the data management of GPU version of SETTLE constraints algorithm.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+#include "gmxpre.h"
+
+#include "settle_gpu.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#include <cmath>
+
+#include <algorithm>
+
+#include "gromacs/gpu_utils/devicebuffer.h"
+#include "gromacs/gpu_utils/gputraits.h"
+#include "gromacs/math/functions.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/settle_gpu_internal.h"
+#include "gromacs/pbcutil/pbc.h"
+
+namespace gmx
+{
+
+void SettleGpu::apply(const DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ const bool updateVelocities,
+ DeviceBuffer<Float3> d_v,
+ const real invdt,
+ const bool computeVirial,
+ tensor virialScaled,
+ const PbcAiuc pbcAiuc)
+{
+
+ // Early exit if no settles
+ if (numSettles_ == 0)
+ {
+ return;
+ }
+
+ if (computeVirial)
+ {
+ // Fill with zeros so the values can be reduced to it
+ // Only 6 values are needed because virial is symmetrical
+ clearDeviceBufferAsync(&d_virialScaled_, 0, 6, deviceStream_);
+ }
+
+ launchSettleGpuKernel(numSettles_,
+ d_atomIds_,
+ settleParameters_,
+ d_x,
+ d_xp,
+ updateVelocities,
+ d_v,
+ invdt,
+ computeVirial,
+ d_virialScaled_,
+ pbcAiuc,
+ deviceStream_);
+
+
+ if (computeVirial)
+ {
+ copyFromDeviceBuffer(
+ h_virialScaled_.data(), &d_virialScaled_, 0, 6, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
+
+ // Mapping [XX, XY, XZ, YY, YZ, ZZ] internal format to a tensor object
+ virialScaled[XX][XX] += h_virialScaled_[0];
+ virialScaled[XX][YY] += h_virialScaled_[1];
+ virialScaled[XX][ZZ] += h_virialScaled_[2];
+
+ virialScaled[YY][XX] += h_virialScaled_[1];
+ virialScaled[YY][YY] += h_virialScaled_[3];
+ virialScaled[YY][ZZ] += h_virialScaled_[4];
+
+ virialScaled[ZZ][XX] += h_virialScaled_[2];
+ virialScaled[ZZ][YY] += h_virialScaled_[4];
+ virialScaled[ZZ][ZZ] += h_virialScaled_[5];
+ }
+
+ return;
+}
+
+SettleGpu::SettleGpu(const gmx_mtop_t& mtop, const DeviceContext& deviceContext, const DeviceStream& deviceStream) :
+ deviceContext_(deviceContext),
+ deviceStream_(deviceStream)
+{
+ static_assert(sizeof(real) == sizeof(float),
+ "Real numbers should be in single precision in GPU code.");
+
+ // This is to prevent the assertion failure for the systems without water
+ int totalSettles = 0;
+ for (unsigned mt = 0; mt < mtop.moltype.size(); mt++)
+ {
+ const int nral1 = 1 + NRAL(F_SETTLE);
+ InteractionList interactionList = mtop.moltype[mt].ilist[F_SETTLE];
+ std::vector<int> iatoms = interactionList.iatoms;
+ totalSettles += iatoms.size() / nral1;
+ }
+ if (totalSettles == 0)
+ {
+ return;
+ }
+
+ // TODO This should be lifted to a separate subroutine that gets the values of Oxygen and
+ // Hydrogen masses, checks if they are consistent across the topology and if there is no more
+ // than two values for each mass if the free energy perturbation is enabled. In later case,
+ // masses may need to be updated on a regular basis (i.e. in set(...) method).
+ // TODO Do the checks for FEP
+ real mO = -1.0;
+ real mH = -1.0;
+
+ for (unsigned mt = 0; mt < mtop.moltype.size(); mt++)
+ {
+ const int nral1 = 1 + NRAL(F_SETTLE);
+ InteractionList interactionList = mtop.moltype[mt].ilist[F_SETTLE];
+ std::vector<int> iatoms = interactionList.iatoms;
+ for (unsigned i = 0; i < iatoms.size() / nral1; i++)
+ {
+ WaterMolecule settler;
+ settler.ow1 = iatoms[i * nral1 + 1]; // Oxygen index
+ settler.hw2 = iatoms[i * nral1 + 2]; // First hydrogen index
+ settler.hw3 = iatoms[i * nral1 + 3]; // Second hydrogen index
+ t_atom ow1 = mtop.moltype[mt].atoms.atom[settler.ow1];
+ t_atom hw2 = mtop.moltype[mt].atoms.atom[settler.hw2];
+ t_atom hw3 = mtop.moltype[mt].atoms.atom[settler.hw3];
+
+ if (mO < 0)
+ {
+ mO = ow1.m;
+ }
+ if (mH < 0)
+ {
+ mH = hw2.m;
+ }
+ GMX_RELEASE_ASSERT(mO == ow1.m,
+ "Topology has different values for oxygen mass. Should be identical "
+ "in order to use SETTLE.");
+ GMX_RELEASE_ASSERT(hw2.m == hw3.m && hw2.m == mH,
+ "Topology has different values for hydrogen mass. Should be "
+ "identical in order to use SETTLE.");
+ }
+ }
+
+ GMX_RELEASE_ASSERT(mO > 0, "Could not find oxygen mass in the topology. Needed in SETTLE.");
+ GMX_RELEASE_ASSERT(mH > 0, "Could not find hydrogen mass in the topology. Needed in SETTLE.");
+
+ // TODO Very similar to SETTLE initialization on CPU. Should be lifted to a separate method
+ // (one that gets dOH and dHH values and checks them for consistency)
+ int settle_type = -1;
+ for (unsigned mt = 0; mt < mtop.moltype.size(); mt++)
+ {
+ const int nral1 = 1 + NRAL(F_SETTLE);
+ InteractionList interactionList = mtop.moltype[mt].ilist[F_SETTLE];
+ for (int i = 0; i < interactionList.size(); i += nral1)
+ {
+ if (settle_type == -1)
+ {
+ settle_type = interactionList.iatoms[i];
+ }
+ else if (interactionList.iatoms[i] != settle_type)
+ {
+ gmx_fatal(FARGS,
+ "The [molecules] section of your topology specifies more than one block "
+ "of\n"
+ "a [moleculetype] with a [settles] block. Only one such is allowed.\n"
+ "If you are trying to partition your solvent into different *groups*\n"
+ "(e.g. for freezing, T-coupling, etc.), you are using the wrong "
+ "approach. Index\n"
+ "files specify groups. Otherwise, you may wish to change the least-used\n"
+ "block of molecules with SETTLE constraints into 3 normal constraints.");
+ }
+ }
+ }
+
+ GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles");
+
+ real dOH = mtop.ffparams.iparams[settle_type].settle.doh;
+ real dHH = mtop.ffparams.iparams[settle_type].settle.dhh;
+
+ settleParameters_ = settleParameters(mO, mH, 1.0 / mO, 1.0 / mH, dOH, dHH);
+
+ allocateDeviceBuffer(&d_virialScaled_, 6, deviceContext_);
+ h_virialScaled_.resize(6);
+}
+
+SettleGpu::~SettleGpu()
+{
+ // Early exit if there is no settles
+ if (numSettles_ == 0)
+ {
+ return;
+ }
+ freeDeviceBuffer(&d_virialScaled_);
+ if (numAtomIdsAlloc_ > 0)
+ {
+ freeDeviceBuffer(&d_atomIds_);
+ }
+}
+
+void SettleGpu::set(const InteractionDefinitions& idef)
+{
+ const int nral1 = 1 + NRAL(F_SETTLE);
+ const InteractionList& il_settle = idef.il[F_SETTLE];
+ ArrayRef<const int> iatoms = il_settle.iatoms;
+ numSettles_ = il_settle.size() / nral1;
+
+ reallocateDeviceBuffer(&d_atomIds_, numSettles_, &numAtomIds_, &numAtomIdsAlloc_, deviceContext_);
+ h_atomIds_.resize(numSettles_);
+ for (int i = 0; i < numSettles_; i++)
+ {
+ WaterMolecule settler;
+ settler.ow1 = iatoms[i * nral1 + 1]; // Oxygen index
+ settler.hw2 = iatoms[i * nral1 + 2]; // First hydrogen index
+ settler.hw3 = iatoms[i * nral1 + 3]; // Second hydrogen index
+ h_atomIds_[i] = settler;
+ }
+ copyToDeviceBuffer(
+ &d_atomIds_, h_atomIds_.data(), 0, numSettles_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+} // namespace gmx
*
* \ingroup module_mdlib
*/
-#ifndef GMX_MDLIB_SETTLE_GPU_CUH
-#define GMX_MDLIB_SETTLE_GPU_CUH
+#ifndef GMX_MDLIB_SETTLE_GPU_H
+#define GMX_MDLIB_SETTLE_GPU_H
#include "gmxpre.h"
namespace gmx
{
+//! Indices of atoms in a water molecule
+struct WaterMolecule
+{
+ //! Oxygen atom
+ int ow1;
+ //! First hydrogen atom
+ int hw2;
+ //! Second hydrogen atom
+ int hw3;
+};
+
+
/*! \internal \brief Class with interfaces and data for GPU version of SETTLE. */
class SettleGpu
{
//! Scaled virial tensor (9 reals, GPU)
std::vector<float> h_virialScaled_;
//! Scaled virial tensor (9 reals, GPU)
- float* d_virialScaled_;
+ DeviceBuffer<float> d_virialScaled_;
//! Number of settles
int numSettles_ = 0;
- //! Indexes of atoms (.x for oxygen, .y and.z for hydrogens, CPU)
- std::vector<int3> h_atomIds_;
- //! Indexes of atoms (.x for oxygen, .y and.z for hydrogens, GPU)
- int3* d_atomIds_;
+ //! Indexes of atoms (.i for oxygen, .j and.k for hydrogens, CPU)
+ std::vector<WaterMolecule> h_atomIds_;
+ //! Indexes of atoms (.i for oxygen, .j and.k for hydrogens, GPU)
+ DeviceBuffer<WaterMolecule> d_atomIds_;
//! Current size of the array of atom IDs
int numAtomIds_ = -1;
//! Allocated size for the array of atom IDs
} // namespace gmx
-#endif // GMX_MDLIB_SETTLE_GPU_CUH
+#endif // GMX_MDLIB_SETTLE_GPU_H
*/
/*! \internal \file
*
- * \brief Implements SETTLE using CUDA
- *
- * This file contains implementation of SETTLE constraints algorithm
- * using CUDA, including class initialization, data-structures management
- * and GPU kernel.
+ * \brief CUDA-specific routines for the GPU implementation of SETTLE constraints algorithm.
*
*
* \author Artem Zhmurov <zhmurov@gmail.com>
*/
#include "gmxpre.h"
-#include "settle_gpu.cuh"
+#include "settle_gpu_internal.h"
#include <assert.h>
#include <stdio.h>
{
//! Number of CUDA threads in a block
-constexpr static int c_threadsPerBlock = 256;
+constexpr static int sc_threadsPerBlock = 256;
+
//! Maximum number of threads in a block (for __launch_bounds__)
-constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock;
+constexpr static int sc_maxThreadsPerBlock = sc_threadsPerBlock;
/*! \brief SETTLE constraints kernel
*
* \param [in] pbcAiuc Periodic boundary conditions data.
*/
template<bool updateVelocities, bool computeVirial>
-__launch_bounds__(c_maxThreadsPerBlock) __global__
+__launch_bounds__(sc_maxThreadsPerBlock) __global__
void settle_kernel(const int numSettles,
- const int3* __restrict__ gm_settles,
+ const WaterMolecule* __restrict__ gm_settles,
const SettleParameters pars,
const float3* __restrict__ gm_x,
float3* __restrict__ gm_xprime,
{
// These are the indexes of three atoms in a single 'water' molecule.
// TODO Can be reduced to one integer if atoms are consecutive in memory.
- int3 indices = gm_settles[tid];
+ WaterMolecule indices = gm_settles[tid];
- float3 x_ow1 = gm_x[indices.x];
- float3 x_hw2 = gm_x[indices.y];
- float3 x_hw3 = gm_x[indices.z];
+ float3 x_ow1 = gm_x[indices.ow1];
+ float3 x_hw2 = gm_x[indices.hw2];
+ float3 x_hw3 = gm_x[indices.hw3];
- float3 xprime_ow1 = gm_xprime[indices.x];
- float3 xprime_hw2 = gm_xprime[indices.y];
- float3 xprime_hw3 = gm_xprime[indices.z];
+ float3 xprime_ow1 = gm_xprime[indices.ow1];
+ float3 xprime_hw2 = gm_xprime[indices.hw2];
+ float3 xprime_hw3 = gm_xprime[indices.hw3];
float3 dist21 = pbcDxAiuc(pbcAiuc, x_hw2, x_ow1);
float3 dist31 = pbcDxAiuc(pbcAiuc, x_hw3, x_ow1);
const float3 dxHw2 = b3 - b1;
const float3 dxHw3 = c3 - c1;
- gm_xprime[indices.x] = xprime_ow1 + dxOw1;
- gm_xprime[indices.y] = xprime_hw2 + dxHw2;
- gm_xprime[indices.z] = xprime_hw3 + dxHw3;
+ gm_xprime[indices.ow1] = xprime_ow1 + dxOw1;
+ gm_xprime[indices.hw2] = xprime_hw2 + dxHw2;
+ gm_xprime[indices.hw3] = xprime_hw3 + dxHw3;
if (updateVelocities)
{
- float3 v_ow1 = gm_v[indices.x];
- float3 v_hw2 = gm_v[indices.y];
- float3 v_hw3 = gm_v[indices.z];
+ float3 v_ow1 = gm_v[indices.ow1];
+ float3 v_hw2 = gm_v[indices.hw2];
+ float3 v_hw3 = gm_v[indices.hw3];
/* Add the position correction divided by dt to the velocity */
v_ow1 = dxOw1 * invdt + v_ow1;
v_hw2 = dxHw2 * invdt + v_hw2;
v_hw3 = dxHw3 * invdt + v_hw3;
- gm_v[indices.x] = v_ow1;
- gm_v[indices.y] = v_hw2;
- gm_v[indices.z] = v_hw3;
+ gm_v[indices.ow1] = v_ow1;
+ gm_v[indices.hw2] = v_hw2;
+ gm_v[indices.hw3] = v_hw3;
}
if (computeVirial)
return kernelPtr;
}
-void SettleGpu::apply(const DeviceBuffer<Float3> d_x,
- DeviceBuffer<Float3> d_xp,
- const bool updateVelocities,
- DeviceBuffer<Float3> d_v,
- const real invdt,
- const bool computeVirial,
- tensor virialScaled,
- const PbcAiuc pbcAiuc)
+void launchSettleGpuKernel(const int numSettles,
+ const DeviceBuffer<WaterMolecule> d_atomIds,
+ const SettleParameters settleParameters,
+ const DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ const bool updateVelocities,
+ DeviceBuffer<Float3> d_v,
+ const real invdt,
+ const bool computeVirial,
+ DeviceBuffer<float> virialScaled,
+ const PbcAiuc pbcAiuc,
+ const DeviceStream& deviceStream)
{
-
- ensureNoPendingDeviceError("In CUDA version SETTLE");
-
- // Early exit if no settles
- if (numSettles_ == 0)
- {
- return;
- }
-
- if (computeVirial)
- {
- // Fill with zeros so the values can be reduced to it
- // Only 6 values are needed because virial is symmetrical
- clearDeviceBufferAsync(&d_virialScaled_, 0, 6, deviceStream_);
- }
+ static_assert(
+ gmx::isPowerOfTwo(sc_threadsPerBlock),
+ "Number of threads per block should be a power of two in order for reduction to work.");
auto kernelPtr = getSettleKernelPtr(updateVelocities, computeVirial);
KernelLaunchConfig config;
- config.blockSize[0] = c_threadsPerBlock;
+ config.blockSize[0] = sc_threadsPerBlock;
config.blockSize[1] = 1;
config.blockSize[2] = 1;
- config.gridSize[0] = (numSettles_ + c_threadsPerBlock - 1) / c_threadsPerBlock;
+ config.gridSize[0] = (numSettles + sc_threadsPerBlock - 1) / sc_threadsPerBlock;
config.gridSize[1] = 1;
config.gridSize[2] = 1;
+
// Shared memory is only used for virial reduction
if (computeVirial)
{
- config.sharedMemorySize = c_threadsPerBlock * 6 * sizeof(float);
+ config.sharedMemorySize = sc_threadsPerBlock * 6 * sizeof(float);
}
else
{
const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
config,
- &numSettles_,
- &d_atomIds_,
- &settleParameters_,
+ &numSettles,
+ &d_atomIds,
+ &settleParameters,
asFloat3Pointer(&d_x),
asFloat3Pointer(&d_xp),
&invdt,
asFloat3Pointer(&d_v),
- &d_virialScaled_,
+ &virialScaled,
&pbcAiuc);
launchGpuKernel(kernelPtr,
config,
- deviceStream_,
+ deviceStream,
nullptr,
"settle_kernel<updateVelocities, computeVirial>",
kernelArgs);
- if (computeVirial)
- {
- copyFromDeviceBuffer(
- h_virialScaled_.data(), &d_virialScaled_, 0, 6, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
-
- // Mapping [XX, XY, XZ, YY, YZ, ZZ] internal format to a tensor object
- virialScaled[XX][XX] += h_virialScaled_[0];
- virialScaled[XX][YY] += h_virialScaled_[1];
- virialScaled[XX][ZZ] += h_virialScaled_[2];
-
- virialScaled[YY][XX] += h_virialScaled_[1];
- virialScaled[YY][YY] += h_virialScaled_[3];
- virialScaled[YY][ZZ] += h_virialScaled_[4];
-
- virialScaled[ZZ][XX] += h_virialScaled_[2];
- virialScaled[ZZ][YY] += h_virialScaled_[4];
- virialScaled[ZZ][ZZ] += h_virialScaled_[5];
- }
-
return;
}
-SettleGpu::SettleGpu(const gmx_mtop_t& mtop, const DeviceContext& deviceContext, const DeviceStream& deviceStream) :
- deviceContext_(deviceContext),
- deviceStream_(deviceStream)
-{
- static_assert(sizeof(real) == sizeof(float),
- "Real numbers should be in single precision in GPU code.");
- static_assert(
- gmx::isPowerOfTwo(c_threadsPerBlock),
- "Number of threads per block should be a power of two in order for reduction to work.");
-
- // This is to prevent the assertion failure for the systems without water
- int totalSettles = 0;
- for (unsigned mt = 0; mt < mtop.moltype.size(); mt++)
- {
- const int nral1 = 1 + NRAL(F_SETTLE);
- InteractionList interactionList = mtop.moltype.at(mt).ilist[F_SETTLE];
- std::vector<int> iatoms = interactionList.iatoms;
- totalSettles += iatoms.size() / nral1;
- }
- if (totalSettles == 0)
- {
- return;
- }
-
- // TODO This should be lifted to a separate subroutine that gets the values of Oxygen and
- // Hydrogen masses, checks if they are consistent across the topology and if there is no more
- // than two values for each mass if the free energy perturbation is enabled. In later case,
- // masses may need to be updated on a regular basis (i.e. in set(...) method).
- // TODO Do the checks for FEP
- real mO = -1.0;
- real mH = -1.0;
-
- for (unsigned mt = 0; mt < mtop.moltype.size(); mt++)
- {
- const int nral1 = 1 + NRAL(F_SETTLE);
- InteractionList interactionList = mtop.moltype.at(mt).ilist[F_SETTLE];
- std::vector<int> iatoms = interactionList.iatoms;
- for (unsigned i = 0; i < iatoms.size() / nral1; i++)
- {
- int3 settler;
- settler.x = iatoms[i * nral1 + 1]; // Oxygen index
- settler.y = iatoms[i * nral1 + 2]; // First hydrogen index
- settler.z = iatoms[i * nral1 + 3]; // Second hydrogen index
- t_atom ow1 = mtop.moltype.at(mt).atoms.atom[settler.x];
- t_atom hw2 = mtop.moltype.at(mt).atoms.atom[settler.y];
- t_atom hw3 = mtop.moltype.at(mt).atoms.atom[settler.z];
-
- if (mO < 0)
- {
- mO = ow1.m;
- }
- if (mH < 0)
- {
- mH = hw2.m;
- }
- GMX_RELEASE_ASSERT(mO == ow1.m,
- "Topology has different values for oxygen mass. Should be identical "
- "in order to use SETTLE.");
- GMX_RELEASE_ASSERT(hw2.m == hw3.m && hw2.m == mH,
- "Topology has different values for hydrogen mass. Should be "
- "identical in order to use SETTLE.");
- }
- }
-
- GMX_RELEASE_ASSERT(mO > 0, "Could not find oxygen mass in the topology. Needed in SETTLE.");
- GMX_RELEASE_ASSERT(mH > 0, "Could not find hydrogen mass in the topology. Needed in SETTLE.");
-
- // TODO Very similar to SETTLE initialization on CPU. Should be lifted to a separate method
- // (one that gets dOH and dHH values and checks them for consistency)
- int settle_type = -1;
- for (unsigned mt = 0; mt < mtop.moltype.size(); mt++)
- {
- const int nral1 = 1 + NRAL(F_SETTLE);
- InteractionList interactionList = mtop.moltype.at(mt).ilist[F_SETTLE];
- for (int i = 0; i < interactionList.size(); i += nral1)
- {
- if (settle_type == -1)
- {
- settle_type = interactionList.iatoms[i];
- }
- else if (interactionList.iatoms[i] != settle_type)
- {
- gmx_fatal(FARGS,
- "The [molecules] section of your topology specifies more than one block "
- "of\n"
- "a [moleculetype] with a [settles] block. Only one such is allowed.\n"
- "If you are trying to partition your solvent into different *groups*\n"
- "(e.g. for freezing, T-coupling, etc.), you are using the wrong "
- "approach. Index\n"
- "files specify groups. Otherwise, you may wish to change the least-used\n"
- "block of molecules with SETTLE constraints into 3 normal constraints.");
- }
- }
- }
-
- GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles");
-
- real dOH = mtop.ffparams.iparams[settle_type].settle.doh;
- real dHH = mtop.ffparams.iparams[settle_type].settle.dhh;
-
- settleParameters_ = settleParameters(mO, mH, 1.0 / mO, 1.0 / mH, dOH, dHH);
-
- allocateDeviceBuffer(&d_virialScaled_, 6, deviceContext_);
- h_virialScaled_.resize(6);
-}
-
-SettleGpu::~SettleGpu()
-{
- // Early exit if there is no settles
- if (numSettles_ == 0)
- {
- return;
- }
- freeDeviceBuffer(&d_virialScaled_);
- if (numAtomIdsAlloc_ > 0)
- {
- freeDeviceBuffer(&d_atomIds_);
- }
-}
-
-void SettleGpu::set(const InteractionDefinitions& idef)
-{
- const int nral1 = 1 + NRAL(F_SETTLE);
- const InteractionList& il_settle = idef.il[F_SETTLE];
- ArrayRef<const int> iatoms = il_settle.iatoms;
- numSettles_ = il_settle.size() / nral1;
-
- reallocateDeviceBuffer(&d_atomIds_, numSettles_, &numAtomIds_, &numAtomIdsAlloc_, deviceContext_);
- h_atomIds_.resize(numSettles_);
- for (int i = 0; i < numSettles_; i++)
- {
- int3 settler;
- settler.x = iatoms[i * nral1 + 1]; // Oxygen index
- settler.y = iatoms[i * nral1 + 2]; // First hydrogen index
- settler.z = iatoms[i * nral1 + 3]; // Second hydrogen index
- h_atomIds_.at(i) = settler;
- }
- copyToDeviceBuffer(
- &d_atomIds_, h_atomIds_.data(), 0, numSettles_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
-}
-
} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Declares backend-specific functions for GPU implementation of SETTLE.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+#ifndef GMX_MDLIB_SETTLE_GPU_INTERNAL_H
+#define GMX_MDLIB_SETTLE_GPU_INTERNAL_H
+
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
+#include "gromacs/gpu_utils/gputraits.h"
+#include "gromacs/mdlib/settle_gpu.h"
+
+namespace gmx
+{
+
+/*! \brief Apply SETTLE.
+ *
+ * Applies SETTLE to coordinates and velocities, stored on GPU. Data at pointers d_xp and
+ * d_v change in the GPU memory. The results are not automatically copied back to the CPU
+ * memory. Method uses this class data structures which should be updated when needed using
+ * update method.
+ *
+ * \param[in] numSettles Number of SETTLE constraints.
+ * \param[in] d_atomIds Device buffer with indices of atoms to be SETTLEd.
+ * \param[in] settleParameters Parameters for SETTLE constraints.
+ * \param[in] d_x Coordinates before timestep (in GPU memory)
+ * \param[in,out] d_xp Coordinates after timestep (in GPU memory). The
+ * resulting constrained coordinates will be saved here.
+ * \param[in] updateVelocities If the velocities should be updated.
+ * \param[in,out] d_v Velocities to update (in GPU memory, can be nullptr
+ * if not updated)
+ * \param[in] invdt Reciprocal timestep (to scale Lagrange
+ * multipliers when velocities are updated)
+ * \param[in] computeVirial If virial should be updated.
+ * \param[in,out] virialScaled Scaled virial tensor to be updated.
+ * \param[in] pbcAiuc PBC data.
+ * \param[in] deviceStream Device stream to launch kernel in.
+ */
+void launchSettleGpuKernel(int numSettles,
+ const DeviceBuffer<WaterMolecule> d_atomIds,
+ const SettleParameters settleParameters,
+ const DeviceBuffer<Float3> d_x,
+ DeviceBuffer<Float3> d_xp,
+ const bool updateVelocities,
+ DeviceBuffer<Float3> d_v,
+ const real invdt,
+ const bool computeVirial,
+ DeviceBuffer<float> virialScaled,
+ const PbcAiuc pbcAiuc,
+ const DeviceStream& deviceStream);
+
+} // namespace gmx
+
+#endif // GMX_MDLIB_SETTLE_GPU_INTERNAL_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief SYCL-specific routines for the GPU implementation of SETTLE constraints algorithm.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+
+#include "settle_gpu_internal.h"
+
+#include "gromacs/utility/gmxassert.h"
+
+namespace gmx
+{
+
+void launchSettleGpuKernel(const int /* numSettles */,
+ const DeviceBuffer<WaterMolecule> /* d_atomIds */,
+ const SettleParameters /* settleParameters */,
+ const DeviceBuffer<Float3> /* d_x */,
+ DeviceBuffer<Float3> /* d_xp */,
+ const bool /* updateVelocities */,
+ DeviceBuffer<Float3> /* d_v */,
+ const real /* invdt */,
+ const bool /* computeVirial */,
+ DeviceBuffer<float> /* virialScaled */,
+ const PbcAiuc /* pbcAiuc */,
+ const DeviceStream& /* deviceStream */)
+{
+ // SYCL_TODO
+ GMX_RELEASE_ASSERT(false, "SETTLE is not yet implemented in SYCL.");
+
+ return;
+}
+
+} // namespace gmx
#include "gromacs/gpu_utils/devicebuffer.cuh"
#include "gromacs/gpu_utils/gputraits.h"
#include "gromacs/hardware/device_information.h"
-#include "gromacs/mdlib/settle_gpu.cuh"
+#include "gromacs/mdlib/settle_gpu.h"
#include "gromacs/utility/unique_cptr.h"
#include "testutils/test_device.h"
#include "gromacs/gpu_utils/gputraits.cuh"
#include "gromacs/gpu_utils/vectype_ops.cuh"
#include "gromacs/mdlib/leapfrog_gpu.h"
-#include "gromacs/mdlib/lincs_gpu.h"
-#include "gromacs/mdlib/settle_gpu.cuh"
#include "gromacs/mdlib/update_constrain_gpu.h"
#include "gromacs/mdtypes/mdatom.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/mdlib/leapfrog_gpu.h"
#include "gromacs/mdlib/lincs_gpu.h"
-#include "gromacs/mdlib/settle_gpu.cuh"
+#include "gromacs/mdlib/settle_gpu.h"
#include "gromacs/mdlib/update_constrain_gpu.h"
#include "gromacs/mdtypes/inputrec.h"