From 4e90f9ebb3bf6ad109715859cbdfda1d70976bd3 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Fri, 23 Apr 2021 11:36:31 +0000 Subject: [PATCH] Make SETTLE setup code platform-agnostic --- src/gromacs/mdlib/CMakeLists.txt | 13 +- src/gromacs/mdlib/settle_gpu.cpp | 263 +++++++++++++++++ .../mdlib/{settle_gpu.cuh => settle_gpu.h} | 28 +- .../{settle_gpu.cu => settle_gpu_internal.cu} | 264 ++++-------------- src/gromacs/mdlib/settle_gpu_internal.h | 91 ++++++ .../mdlib/settle_gpu_internal_sycl.cpp | 70 +++++ src/gromacs/mdlib/tests/settletestrunners.cu | 2 +- .../mdlib/update_constrain_gpu_impl.cu | 2 - src/gromacs/mdlib/update_constrain_gpu_impl.h | 2 +- 9 files changed, 504 insertions(+), 231 deletions(-) create mode 100644 src/gromacs/mdlib/settle_gpu.cpp rename src/gromacs/mdlib/{settle_gpu.cuh => settle_gpu.h} (91%) rename src/gromacs/mdlib/{settle_gpu.cu => settle_gpu_internal.cu} (64%) create mode 100644 src/gromacs/mdlib/settle_gpu_internal.h create mode 100644 src/gromacs/mdlib/settle_gpu_internal_sycl.cpp diff --git a/src/gromacs/mdlib/CMakeLists.txt b/src/gromacs/mdlib/CMakeLists.txt index 623fcb31df..3f905dedcf 100644 --- a/src/gromacs/mdlib/CMakeLists.txt +++ b/src/gromacs/mdlib/CMakeLists.txt @@ -40,7 +40,9 @@ file(GLOB MDLIB_SOURCES *.cpp) list(REMOVE_ITEM MDLIB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/leapfrog_gpu_sycl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu_internal_sycl.cpp) + ${CMAKE_CURRENT_SOURCE_DIR}/lincs_gpu_internal_sycl.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/settle_gpu.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/settle_gpu_internal_sycl.cpp) set(MDLIB_SOURCES ${MDLIB_SOURCES} PARENT_SCOPE) if(GMX_GPU_CUDA) @@ -48,12 +50,14 @@ if(GMX_GPU_CUDA) leapfrog_gpu.cu lincs_gpu.cpp lincs_gpu_internal.cu - settle_gpu.cu + settle_gpu.cpp + settle_gpu_internal.cu update_constrain_gpu_impl.cu gpuforcereduction_impl.cu ) _gmx_add_files_to_property(CUDA_SOURCES lincs_gpu.cpp + settle_gpu.cpp ) endif() @@ -62,11 +66,16 @@ if(GMX_GPU_SYCL) leapfrog_gpu_sycl.cpp lincs_gpu.cpp lincs_gpu_internal_sycl.cpp + settle_gpu.cpp + settle_gpu_internal_sycl.cpp ) + _gmx_add_files_to_property(SYCL_SOURCES leapfrog_gpu_sycl.cpp lincs_gpu.cpp lincs_gpu_internal_sycl.cpp + settle_gpu.cpp + settle_gpu_internal_sycl.cpp ) endif() diff --git a/src/gromacs/mdlib/settle_gpu.cpp b/src/gromacs/mdlib/settle_gpu.cpp new file mode 100644 index 0000000000..0c19d7ad99 --- /dev/null +++ b/src/gromacs/mdlib/settle_gpu.cpp @@ -0,0 +1,263 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Implements SETTLE using GPU + * + * This file contains implementation for the data management of GPU version of SETTLE constraints algorithm. + * + * \author Artem Zhmurov + * + * \ingroup module_mdlib + */ +#include "gmxpre.h" + +#include "settle_gpu.h" + +#include +#include + +#include + +#include + +#include "gromacs/gpu_utils/devicebuffer.h" +#include "gromacs/gpu_utils/gputraits.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/settle_gpu_internal.h" +#include "gromacs/pbcutil/pbc.h" + +namespace gmx +{ + +void SettleGpu::apply(const DeviceBuffer d_x, + DeviceBuffer d_xp, + const bool updateVelocities, + DeviceBuffer d_v, + const real invdt, + const bool computeVirial, + tensor virialScaled, + const PbcAiuc pbcAiuc) +{ + + // Early exit if no settles + if (numSettles_ == 0) + { + return; + } + + if (computeVirial) + { + // Fill with zeros so the values can be reduced to it + // Only 6 values are needed because virial is symmetrical + clearDeviceBufferAsync(&d_virialScaled_, 0, 6, deviceStream_); + } + + launchSettleGpuKernel(numSettles_, + d_atomIds_, + settleParameters_, + d_x, + d_xp, + updateVelocities, + d_v, + invdt, + computeVirial, + d_virialScaled_, + pbcAiuc, + deviceStream_); + + + if (computeVirial) + { + copyFromDeviceBuffer( + h_virialScaled_.data(), &d_virialScaled_, 0, 6, deviceStream_, GpuApiCallBehavior::Sync, nullptr); + + // Mapping [XX, XY, XZ, YY, YZ, ZZ] internal format to a tensor object + virialScaled[XX][XX] += h_virialScaled_[0]; + virialScaled[XX][YY] += h_virialScaled_[1]; + virialScaled[XX][ZZ] += h_virialScaled_[2]; + + virialScaled[YY][XX] += h_virialScaled_[1]; + virialScaled[YY][YY] += h_virialScaled_[3]; + virialScaled[YY][ZZ] += h_virialScaled_[4]; + + virialScaled[ZZ][XX] += h_virialScaled_[2]; + virialScaled[ZZ][YY] += h_virialScaled_[4]; + virialScaled[ZZ][ZZ] += h_virialScaled_[5]; + } + + return; +} + +SettleGpu::SettleGpu(const gmx_mtop_t& mtop, const DeviceContext& deviceContext, const DeviceStream& deviceStream) : + deviceContext_(deviceContext), + deviceStream_(deviceStream) +{ + static_assert(sizeof(real) == sizeof(float), + "Real numbers should be in single precision in GPU code."); + + // This is to prevent the assertion failure for the systems without water + int totalSettles = 0; + for (unsigned mt = 0; mt < mtop.moltype.size(); mt++) + { + const int nral1 = 1 + NRAL(F_SETTLE); + InteractionList interactionList = mtop.moltype[mt].ilist[F_SETTLE]; + std::vector iatoms = interactionList.iatoms; + totalSettles += iatoms.size() / nral1; + } + if (totalSettles == 0) + { + return; + } + + // TODO This should be lifted to a separate subroutine that gets the values of Oxygen and + // Hydrogen masses, checks if they are consistent across the topology and if there is no more + // than two values for each mass if the free energy perturbation is enabled. In later case, + // masses may need to be updated on a regular basis (i.e. in set(...) method). + // TODO Do the checks for FEP + real mO = -1.0; + real mH = -1.0; + + for (unsigned mt = 0; mt < mtop.moltype.size(); mt++) + { + const int nral1 = 1 + NRAL(F_SETTLE); + InteractionList interactionList = mtop.moltype[mt].ilist[F_SETTLE]; + std::vector iatoms = interactionList.iatoms; + for (unsigned i = 0; i < iatoms.size() / nral1; i++) + { + WaterMolecule settler; + settler.ow1 = iatoms[i * nral1 + 1]; // Oxygen index + settler.hw2 = iatoms[i * nral1 + 2]; // First hydrogen index + settler.hw3 = iatoms[i * nral1 + 3]; // Second hydrogen index + t_atom ow1 = mtop.moltype[mt].atoms.atom[settler.ow1]; + t_atom hw2 = mtop.moltype[mt].atoms.atom[settler.hw2]; + t_atom hw3 = mtop.moltype[mt].atoms.atom[settler.hw3]; + + if (mO < 0) + { + mO = ow1.m; + } + if (mH < 0) + { + mH = hw2.m; + } + GMX_RELEASE_ASSERT(mO == ow1.m, + "Topology has different values for oxygen mass. Should be identical " + "in order to use SETTLE."); + GMX_RELEASE_ASSERT(hw2.m == hw3.m && hw2.m == mH, + "Topology has different values for hydrogen mass. Should be " + "identical in order to use SETTLE."); + } + } + + GMX_RELEASE_ASSERT(mO > 0, "Could not find oxygen mass in the topology. Needed in SETTLE."); + GMX_RELEASE_ASSERT(mH > 0, "Could not find hydrogen mass in the topology. Needed in SETTLE."); + + // TODO Very similar to SETTLE initialization on CPU. Should be lifted to a separate method + // (one that gets dOH and dHH values and checks them for consistency) + int settle_type = -1; + for (unsigned mt = 0; mt < mtop.moltype.size(); mt++) + { + const int nral1 = 1 + NRAL(F_SETTLE); + InteractionList interactionList = mtop.moltype[mt].ilist[F_SETTLE]; + for (int i = 0; i < interactionList.size(); i += nral1) + { + if (settle_type == -1) + { + settle_type = interactionList.iatoms[i]; + } + else if (interactionList.iatoms[i] != settle_type) + { + gmx_fatal(FARGS, + "The [molecules] section of your topology specifies more than one block " + "of\n" + "a [moleculetype] with a [settles] block. Only one such is allowed.\n" + "If you are trying to partition your solvent into different *groups*\n" + "(e.g. for freezing, T-coupling, etc.), you are using the wrong " + "approach. Index\n" + "files specify groups. Otherwise, you may wish to change the least-used\n" + "block of molecules with SETTLE constraints into 3 normal constraints."); + } + } + } + + GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles"); + + real dOH = mtop.ffparams.iparams[settle_type].settle.doh; + real dHH = mtop.ffparams.iparams[settle_type].settle.dhh; + + settleParameters_ = settleParameters(mO, mH, 1.0 / mO, 1.0 / mH, dOH, dHH); + + allocateDeviceBuffer(&d_virialScaled_, 6, deviceContext_); + h_virialScaled_.resize(6); +} + +SettleGpu::~SettleGpu() +{ + // Early exit if there is no settles + if (numSettles_ == 0) + { + return; + } + freeDeviceBuffer(&d_virialScaled_); + if (numAtomIdsAlloc_ > 0) + { + freeDeviceBuffer(&d_atomIds_); + } +} + +void SettleGpu::set(const InteractionDefinitions& idef) +{ + const int nral1 = 1 + NRAL(F_SETTLE); + const InteractionList& il_settle = idef.il[F_SETTLE]; + ArrayRef iatoms = il_settle.iatoms; + numSettles_ = il_settle.size() / nral1; + + reallocateDeviceBuffer(&d_atomIds_, numSettles_, &numAtomIds_, &numAtomIdsAlloc_, deviceContext_); + h_atomIds_.resize(numSettles_); + for (int i = 0; i < numSettles_; i++) + { + WaterMolecule settler; + settler.ow1 = iatoms[i * nral1 + 1]; // Oxygen index + settler.hw2 = iatoms[i * nral1 + 2]; // First hydrogen index + settler.hw3 = iatoms[i * nral1 + 3]; // Second hydrogen index + h_atomIds_[i] = settler; + } + copyToDeviceBuffer( + &d_atomIds_, h_atomIds_.data(), 0, numSettles_, deviceStream_, GpuApiCallBehavior::Sync, nullptr); +} + +} // namespace gmx diff --git a/src/gromacs/mdlib/settle_gpu.cuh b/src/gromacs/mdlib/settle_gpu.h similarity index 91% rename from src/gromacs/mdlib/settle_gpu.cuh rename to src/gromacs/mdlib/settle_gpu.h index f09fbd344f..641c53d95a 100644 --- a/src/gromacs/mdlib/settle_gpu.cuh +++ b/src/gromacs/mdlib/settle_gpu.h @@ -40,8 +40,8 @@ * * \ingroup module_mdlib */ -#ifndef GMX_MDLIB_SETTLE_GPU_CUH -#define GMX_MDLIB_SETTLE_GPU_CUH +#ifndef GMX_MDLIB_SETTLE_GPU_H +#define GMX_MDLIB_SETTLE_GPU_H #include "gmxpre.h" @@ -62,6 +62,18 @@ class InteractionDefinitions; namespace gmx { +//! Indices of atoms in a water molecule +struct WaterMolecule +{ + //! Oxygen atom + int ow1; + //! First hydrogen atom + int hw2; + //! Second hydrogen atom + int hw3; +}; + + /*! \internal \brief Class with interfaces and data for GPU version of SETTLE. */ class SettleGpu { @@ -134,15 +146,15 @@ private: //! Scaled virial tensor (9 reals, GPU) std::vector h_virialScaled_; //! Scaled virial tensor (9 reals, GPU) - float* d_virialScaled_; + DeviceBuffer d_virialScaled_; //! Number of settles int numSettles_ = 0; - //! Indexes of atoms (.x for oxygen, .y and.z for hydrogens, CPU) - std::vector h_atomIds_; - //! Indexes of atoms (.x for oxygen, .y and.z for hydrogens, GPU) - int3* d_atomIds_; + //! Indexes of atoms (.i for oxygen, .j and.k for hydrogens, CPU) + std::vector h_atomIds_; + //! Indexes of atoms (.i for oxygen, .j and.k for hydrogens, GPU) + DeviceBuffer d_atomIds_; //! Current size of the array of atom IDs int numAtomIds_ = -1; //! Allocated size for the array of atom IDs @@ -154,4 +166,4 @@ private: } // namespace gmx -#endif // GMX_MDLIB_SETTLE_GPU_CUH +#endif // GMX_MDLIB_SETTLE_GPU_H diff --git a/src/gromacs/mdlib/settle_gpu.cu b/src/gromacs/mdlib/settle_gpu_internal.cu similarity index 64% rename from src/gromacs/mdlib/settle_gpu.cu rename to src/gromacs/mdlib/settle_gpu_internal.cu index 0cc25c50ce..11dd63b035 100644 --- a/src/gromacs/mdlib/settle_gpu.cu +++ b/src/gromacs/mdlib/settle_gpu_internal.cu @@ -34,11 +34,7 @@ */ /*! \internal \file * - * \brief Implements SETTLE using CUDA - * - * This file contains implementation of SETTLE constraints algorithm - * using CUDA, including class initialization, data-structures management - * and GPU kernel. + * \brief CUDA-specific routines for the GPU implementation of SETTLE constraints algorithm. * * * \author Artem Zhmurov @@ -47,7 +43,7 @@ */ #include "gmxpre.h" -#include "settle_gpu.cuh" +#include "settle_gpu_internal.h" #include #include @@ -71,9 +67,10 @@ namespace gmx { //! Number of CUDA threads in a block -constexpr static int c_threadsPerBlock = 256; +constexpr static int sc_threadsPerBlock = 256; + //! Maximum number of threads in a block (for __launch_bounds__) -constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock; +constexpr static int sc_maxThreadsPerBlock = sc_threadsPerBlock; /*! \brief SETTLE constraints kernel * @@ -94,9 +91,9 @@ constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock; * \param [in] pbcAiuc Periodic boundary conditions data. */ template -__launch_bounds__(c_maxThreadsPerBlock) __global__ +__launch_bounds__(sc_maxThreadsPerBlock) __global__ void settle_kernel(const int numSettles, - const int3* __restrict__ gm_settles, + const WaterMolecule* __restrict__ gm_settles, const SettleParameters pars, const float3* __restrict__ gm_x, float3* __restrict__ gm_xprime, @@ -131,15 +128,15 @@ __launch_bounds__(c_maxThreadsPerBlock) __global__ { // These are the indexes of three atoms in a single 'water' molecule. // TODO Can be reduced to one integer if atoms are consecutive in memory. - int3 indices = gm_settles[tid]; + WaterMolecule indices = gm_settles[tid]; - float3 x_ow1 = gm_x[indices.x]; - float3 x_hw2 = gm_x[indices.y]; - float3 x_hw3 = gm_x[indices.z]; + float3 x_ow1 = gm_x[indices.ow1]; + float3 x_hw2 = gm_x[indices.hw2]; + float3 x_hw3 = gm_x[indices.hw3]; - float3 xprime_ow1 = gm_xprime[indices.x]; - float3 xprime_hw2 = gm_xprime[indices.y]; - float3 xprime_hw3 = gm_xprime[indices.z]; + float3 xprime_ow1 = gm_xprime[indices.ow1]; + float3 xprime_hw2 = gm_xprime[indices.hw2]; + float3 xprime_hw3 = gm_xprime[indices.hw3]; float3 dist21 = pbcDxAiuc(pbcAiuc, x_hw2, x_ow1); float3 dist31 = pbcDxAiuc(pbcAiuc, x_hw3, x_ow1); @@ -274,24 +271,24 @@ __launch_bounds__(c_maxThreadsPerBlock) __global__ const float3 dxHw2 = b3 - b1; const float3 dxHw3 = c3 - c1; - gm_xprime[indices.x] = xprime_ow1 + dxOw1; - gm_xprime[indices.y] = xprime_hw2 + dxHw2; - gm_xprime[indices.z] = xprime_hw3 + dxHw3; + gm_xprime[indices.ow1] = xprime_ow1 + dxOw1; + gm_xprime[indices.hw2] = xprime_hw2 + dxHw2; + gm_xprime[indices.hw3] = xprime_hw3 + dxHw3; if (updateVelocities) { - float3 v_ow1 = gm_v[indices.x]; - float3 v_hw2 = gm_v[indices.y]; - float3 v_hw3 = gm_v[indices.z]; + float3 v_ow1 = gm_v[indices.ow1]; + float3 v_hw2 = gm_v[indices.hw2]; + float3 v_hw3 = gm_v[indices.hw3]; /* Add the position correction divided by dt to the velocity */ v_ow1 = dxOw1 * invdt + v_ow1; v_hw2 = dxHw2 * invdt + v_hw2; v_hw3 = dxHw3 * invdt + v_hw3; - gm_v[indices.x] = v_ow1; - gm_v[indices.y] = v_hw2; - gm_v[indices.z] = v_hw3; + gm_v[indices.ow1] = v_ow1; + gm_v[indices.hw2] = v_hw2; + gm_v[indices.hw3] = v_hw3; } if (computeVirial) @@ -397,44 +394,37 @@ inline auto getSettleKernelPtr(const bool updateVelocities, const bool computeVi return kernelPtr; } -void SettleGpu::apply(const DeviceBuffer d_x, - DeviceBuffer d_xp, - const bool updateVelocities, - DeviceBuffer d_v, - const real invdt, - const bool computeVirial, - tensor virialScaled, - const PbcAiuc pbcAiuc) +void launchSettleGpuKernel(const int numSettles, + const DeviceBuffer d_atomIds, + const SettleParameters settleParameters, + const DeviceBuffer d_x, + DeviceBuffer d_xp, + const bool updateVelocities, + DeviceBuffer d_v, + const real invdt, + const bool computeVirial, + DeviceBuffer virialScaled, + const PbcAiuc pbcAiuc, + const DeviceStream& deviceStream) { - - ensureNoPendingDeviceError("In CUDA version SETTLE"); - - // Early exit if no settles - if (numSettles_ == 0) - { - return; - } - - if (computeVirial) - { - // Fill with zeros so the values can be reduced to it - // Only 6 values are needed because virial is symmetrical - clearDeviceBufferAsync(&d_virialScaled_, 0, 6, deviceStream_); - } + static_assert( + gmx::isPowerOfTwo(sc_threadsPerBlock), + "Number of threads per block should be a power of two in order for reduction to work."); auto kernelPtr = getSettleKernelPtr(updateVelocities, computeVirial); KernelLaunchConfig config; - config.blockSize[0] = c_threadsPerBlock; + config.blockSize[0] = sc_threadsPerBlock; config.blockSize[1] = 1; config.blockSize[2] = 1; - config.gridSize[0] = (numSettles_ + c_threadsPerBlock - 1) / c_threadsPerBlock; + config.gridSize[0] = (numSettles + sc_threadsPerBlock - 1) / sc_threadsPerBlock; config.gridSize[1] = 1; config.gridSize[2] = 1; + // Shared memory is only used for virial reduction if (computeVirial) { - config.sharedMemorySize = c_threadsPerBlock * 6 * sizeof(float); + config.sharedMemorySize = sc_threadsPerBlock * 6 * sizeof(float); } else { @@ -443,184 +433,24 @@ void SettleGpu::apply(const DeviceBuffer d_x, const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, config, - &numSettles_, - &d_atomIds_, - &settleParameters_, + &numSettles, + &d_atomIds, + &settleParameters, asFloat3Pointer(&d_x), asFloat3Pointer(&d_xp), &invdt, asFloat3Pointer(&d_v), - &d_virialScaled_, + &virialScaled, &pbcAiuc); launchGpuKernel(kernelPtr, config, - deviceStream_, + deviceStream, nullptr, "settle_kernel", kernelArgs); - if (computeVirial) - { - copyFromDeviceBuffer( - h_virialScaled_.data(), &d_virialScaled_, 0, 6, deviceStream_, GpuApiCallBehavior::Sync, nullptr); - - // Mapping [XX, XY, XZ, YY, YZ, ZZ] internal format to a tensor object - virialScaled[XX][XX] += h_virialScaled_[0]; - virialScaled[XX][YY] += h_virialScaled_[1]; - virialScaled[XX][ZZ] += h_virialScaled_[2]; - - virialScaled[YY][XX] += h_virialScaled_[1]; - virialScaled[YY][YY] += h_virialScaled_[3]; - virialScaled[YY][ZZ] += h_virialScaled_[4]; - - virialScaled[ZZ][XX] += h_virialScaled_[2]; - virialScaled[ZZ][YY] += h_virialScaled_[4]; - virialScaled[ZZ][ZZ] += h_virialScaled_[5]; - } - return; } -SettleGpu::SettleGpu(const gmx_mtop_t& mtop, const DeviceContext& deviceContext, const DeviceStream& deviceStream) : - deviceContext_(deviceContext), - deviceStream_(deviceStream) -{ - static_assert(sizeof(real) == sizeof(float), - "Real numbers should be in single precision in GPU code."); - static_assert( - gmx::isPowerOfTwo(c_threadsPerBlock), - "Number of threads per block should be a power of two in order for reduction to work."); - - // This is to prevent the assertion failure for the systems without water - int totalSettles = 0; - for (unsigned mt = 0; mt < mtop.moltype.size(); mt++) - { - const int nral1 = 1 + NRAL(F_SETTLE); - InteractionList interactionList = mtop.moltype.at(mt).ilist[F_SETTLE]; - std::vector iatoms = interactionList.iatoms; - totalSettles += iatoms.size() / nral1; - } - if (totalSettles == 0) - { - return; - } - - // TODO This should be lifted to a separate subroutine that gets the values of Oxygen and - // Hydrogen masses, checks if they are consistent across the topology and if there is no more - // than two values for each mass if the free energy perturbation is enabled. In later case, - // masses may need to be updated on a regular basis (i.e. in set(...) method). - // TODO Do the checks for FEP - real mO = -1.0; - real mH = -1.0; - - for (unsigned mt = 0; mt < mtop.moltype.size(); mt++) - { - const int nral1 = 1 + NRAL(F_SETTLE); - InteractionList interactionList = mtop.moltype.at(mt).ilist[F_SETTLE]; - std::vector iatoms = interactionList.iatoms; - for (unsigned i = 0; i < iatoms.size() / nral1; i++) - { - int3 settler; - settler.x = iatoms[i * nral1 + 1]; // Oxygen index - settler.y = iatoms[i * nral1 + 2]; // First hydrogen index - settler.z = iatoms[i * nral1 + 3]; // Second hydrogen index - t_atom ow1 = mtop.moltype.at(mt).atoms.atom[settler.x]; - t_atom hw2 = mtop.moltype.at(mt).atoms.atom[settler.y]; - t_atom hw3 = mtop.moltype.at(mt).atoms.atom[settler.z]; - - if (mO < 0) - { - mO = ow1.m; - } - if (mH < 0) - { - mH = hw2.m; - } - GMX_RELEASE_ASSERT(mO == ow1.m, - "Topology has different values for oxygen mass. Should be identical " - "in order to use SETTLE."); - GMX_RELEASE_ASSERT(hw2.m == hw3.m && hw2.m == mH, - "Topology has different values for hydrogen mass. Should be " - "identical in order to use SETTLE."); - } - } - - GMX_RELEASE_ASSERT(mO > 0, "Could not find oxygen mass in the topology. Needed in SETTLE."); - GMX_RELEASE_ASSERT(mH > 0, "Could not find hydrogen mass in the topology. Needed in SETTLE."); - - // TODO Very similar to SETTLE initialization on CPU. Should be lifted to a separate method - // (one that gets dOH and dHH values and checks them for consistency) - int settle_type = -1; - for (unsigned mt = 0; mt < mtop.moltype.size(); mt++) - { - const int nral1 = 1 + NRAL(F_SETTLE); - InteractionList interactionList = mtop.moltype.at(mt).ilist[F_SETTLE]; - for (int i = 0; i < interactionList.size(); i += nral1) - { - if (settle_type == -1) - { - settle_type = interactionList.iatoms[i]; - } - else if (interactionList.iatoms[i] != settle_type) - { - gmx_fatal(FARGS, - "The [molecules] section of your topology specifies more than one block " - "of\n" - "a [moleculetype] with a [settles] block. Only one such is allowed.\n" - "If you are trying to partition your solvent into different *groups*\n" - "(e.g. for freezing, T-coupling, etc.), you are using the wrong " - "approach. Index\n" - "files specify groups. Otherwise, you may wish to change the least-used\n" - "block of molecules with SETTLE constraints into 3 normal constraints."); - } - } - } - - GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles"); - - real dOH = mtop.ffparams.iparams[settle_type].settle.doh; - real dHH = mtop.ffparams.iparams[settle_type].settle.dhh; - - settleParameters_ = settleParameters(mO, mH, 1.0 / mO, 1.0 / mH, dOH, dHH); - - allocateDeviceBuffer(&d_virialScaled_, 6, deviceContext_); - h_virialScaled_.resize(6); -} - -SettleGpu::~SettleGpu() -{ - // Early exit if there is no settles - if (numSettles_ == 0) - { - return; - } - freeDeviceBuffer(&d_virialScaled_); - if (numAtomIdsAlloc_ > 0) - { - freeDeviceBuffer(&d_atomIds_); - } -} - -void SettleGpu::set(const InteractionDefinitions& idef) -{ - const int nral1 = 1 + NRAL(F_SETTLE); - const InteractionList& il_settle = idef.il[F_SETTLE]; - ArrayRef iatoms = il_settle.iatoms; - numSettles_ = il_settle.size() / nral1; - - reallocateDeviceBuffer(&d_atomIds_, numSettles_, &numAtomIds_, &numAtomIdsAlloc_, deviceContext_); - h_atomIds_.resize(numSettles_); - for (int i = 0; i < numSettles_; i++) - { - int3 settler; - settler.x = iatoms[i * nral1 + 1]; // Oxygen index - settler.y = iatoms[i * nral1 + 2]; // First hydrogen index - settler.z = iatoms[i * nral1 + 3]; // Second hydrogen index - h_atomIds_.at(i) = settler; - } - copyToDeviceBuffer( - &d_atomIds_, h_atomIds_.data(), 0, numSettles_, deviceStream_, GpuApiCallBehavior::Sync, nullptr); -} - } // namespace gmx diff --git a/src/gromacs/mdlib/settle_gpu_internal.h b/src/gromacs/mdlib/settle_gpu_internal.h new file mode 100644 index 0000000000..9963944e74 --- /dev/null +++ b/src/gromacs/mdlib/settle_gpu_internal.h @@ -0,0 +1,91 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Declares backend-specific functions for GPU implementation of SETTLE. + * + * \author Artem Zhmurov + * + * \ingroup module_mdlib + */ +#ifndef GMX_MDLIB_SETTLE_GPU_INTERNAL_H +#define GMX_MDLIB_SETTLE_GPU_INTERNAL_H + +#include "gromacs/gpu_utils/devicebuffer_datatype.h" +#include "gromacs/gpu_utils/gputraits.h" +#include "gromacs/mdlib/settle_gpu.h" + +namespace gmx +{ + +/*! \brief Apply SETTLE. + * + * Applies SETTLE to coordinates and velocities, stored on GPU. Data at pointers d_xp and + * d_v change in the GPU memory. The results are not automatically copied back to the CPU + * memory. Method uses this class data structures which should be updated when needed using + * update method. + * + * \param[in] numSettles Number of SETTLE constraints. + * \param[in] d_atomIds Device buffer with indices of atoms to be SETTLEd. + * \param[in] settleParameters Parameters for SETTLE constraints. + * \param[in] d_x Coordinates before timestep (in GPU memory) + * \param[in,out] d_xp Coordinates after timestep (in GPU memory). The + * resulting constrained coordinates will be saved here. + * \param[in] updateVelocities If the velocities should be updated. + * \param[in,out] d_v Velocities to update (in GPU memory, can be nullptr + * if not updated) + * \param[in] invdt Reciprocal timestep (to scale Lagrange + * multipliers when velocities are updated) + * \param[in] computeVirial If virial should be updated. + * \param[in,out] virialScaled Scaled virial tensor to be updated. + * \param[in] pbcAiuc PBC data. + * \param[in] deviceStream Device stream to launch kernel in. + */ +void launchSettleGpuKernel(int numSettles, + const DeviceBuffer d_atomIds, + const SettleParameters settleParameters, + const DeviceBuffer d_x, + DeviceBuffer d_xp, + const bool updateVelocities, + DeviceBuffer d_v, + const real invdt, + const bool computeVirial, + DeviceBuffer virialScaled, + const PbcAiuc pbcAiuc, + const DeviceStream& deviceStream); + +} // namespace gmx + +#endif // GMX_MDLIB_SETTLE_GPU_INTERNAL_H diff --git a/src/gromacs/mdlib/settle_gpu_internal_sycl.cpp b/src/gromacs/mdlib/settle_gpu_internal_sycl.cpp new file mode 100644 index 0000000000..ef063c36d5 --- /dev/null +++ b/src/gromacs/mdlib/settle_gpu_internal_sycl.cpp @@ -0,0 +1,70 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief SYCL-specific routines for the GPU implementation of SETTLE constraints algorithm. + * + * \author Artem Zhmurov + * + * \ingroup module_mdlib + */ + +#include "settle_gpu_internal.h" + +#include "gromacs/utility/gmxassert.h" + +namespace gmx +{ + +void launchSettleGpuKernel(const int /* numSettles */, + const DeviceBuffer /* d_atomIds */, + const SettleParameters /* settleParameters */, + const DeviceBuffer /* d_x */, + DeviceBuffer /* d_xp */, + const bool /* updateVelocities */, + DeviceBuffer /* d_v */, + const real /* invdt */, + const bool /* computeVirial */, + DeviceBuffer /* virialScaled */, + const PbcAiuc /* pbcAiuc */, + const DeviceStream& /* deviceStream */) +{ + // SYCL_TODO + GMX_RELEASE_ASSERT(false, "SETTLE is not yet implemented in SYCL."); + + return; +} + +} // namespace gmx diff --git a/src/gromacs/mdlib/tests/settletestrunners.cu b/src/gromacs/mdlib/tests/settletestrunners.cu index 7aab750547..b35cf0be9b 100644 --- a/src/gromacs/mdlib/tests/settletestrunners.cu +++ b/src/gromacs/mdlib/tests/settletestrunners.cu @@ -54,7 +54,7 @@ #include "gromacs/gpu_utils/devicebuffer.cuh" #include "gromacs/gpu_utils/gputraits.h" #include "gromacs/hardware/device_information.h" -#include "gromacs/mdlib/settle_gpu.cuh" +#include "gromacs/mdlib/settle_gpu.h" #include "gromacs/utility/unique_cptr.h" #include "testutils/test_device.h" diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cu b/src/gromacs/mdlib/update_constrain_gpu_impl.cu index ba3a1d12af..7ef37c0940 100644 --- a/src/gromacs/mdlib/update_constrain_gpu_impl.cu +++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cu @@ -64,8 +64,6 @@ #include "gromacs/gpu_utils/gputraits.cuh" #include "gromacs/gpu_utils/vectype_ops.cuh" #include "gromacs/mdlib/leapfrog_gpu.h" -#include "gromacs/mdlib/lincs_gpu.h" -#include "gromacs/mdlib/settle_gpu.cuh" #include "gromacs/mdlib/update_constrain_gpu.h" #include "gromacs/mdtypes/mdatom.h" #include "gromacs/timing/wallcycle.h" diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.h b/src/gromacs/mdlib/update_constrain_gpu_impl.h index dcca5dc2a9..bad3898602 100644 --- a/src/gromacs/mdlib/update_constrain_gpu_impl.h +++ b/src/gromacs/mdlib/update_constrain_gpu_impl.h @@ -50,7 +50,7 @@ #include "gromacs/mdlib/leapfrog_gpu.h" #include "gromacs/mdlib/lincs_gpu.h" -#include "gromacs/mdlib/settle_gpu.cuh" +#include "gromacs/mdlib/settle_gpu.h" #include "gromacs/mdlib/update_constrain_gpu.h" #include "gromacs/mdtypes/inputrec.h" -- 2.22.0