)
endif()
+if(GMX_USE_CUDA)
+ gmx_add_libgromacs_sources(
+ leapfrog_cuda_impl.cu
+ )
+endif()
+
gmx_install_headers(simulationsignal.h)
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ *
+ * \brief Declaration of high-level functions of CUDA implementation of Leap-Frog.
+ *
+ * \todo This should only list interfaces needed for libgromacs clients (e.g.
+ * management of coordinates, velocities and forces should not be here).
+ * \todo Reconsider naming towards using "gpu" suffix instead of "cuda".
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ * \inlibraryapi
+ */
+#ifndef GMX_MDLIB_LEAPFROG_CUDA_H
+#define GMX_MDLIB_LEAPFROG_CUDA_H
+
+#include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/utility/classhelpers.h"
+
+namespace gmx
+{
+
+class LeapFrogCuda
+{
+
+ public:
+ /*! \brief Creates Leap-Frog object.
+ *
+ * \param[in] numAtoms Number of atoms
+ */
+ LeapFrogCuda(int numAtoms);
+ ~LeapFrogCuda();
+
+ /*! \brief Integrate
+ *
+ * Integrates the equation of motion using Leap-Frog algorithm.
+ * Updates xpDevice_ and vDevice_ fields of this object.
+ *
+ * \param[in] dt Timestep
+ */
+ void integrate(real dt);
+
+ /*! \brief
+ * Update PBC data.
+ *
+ * Converts PBC data from t_pbc into the PbcAiuc format and stores the latter.
+ *
+ * \param[in] pbc The PBC data in t_pbc format.
+ */
+ void setPbc(const t_pbc *pbc);
+
+ /*! \brief Set the integrator
+ *
+ * Copies inverse masses from CPU to GPU.
+ *
+ * \param[in] md MD atoms, from which inverse masses are taken.
+ */
+ void set(const t_mdatoms &md);
+
+ /*! \brief
+ * Copy coordinates from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_x CPU pointer where coordinates should be copied from.
+ */
+ void copyCoordinatesToGpu(const rvec *h_x);
+
+ /*! \brief
+ * Copy velocities from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v CPU pointer where velocities should be copied from.
+ */
+ void copyVelocitiesToGpu(const rvec *h_v);
+
+ /*! \brief
+ * Copy forces from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f CPU pointer where forces should be copied from.
+ */
+ void copyForcesToGpu(const rvec *h_f);
+
+ /*! \brief
+ * Copy coordinates from GPU to CPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[out] h_xp CPU pointer where coordinates should be copied to.
+ */
+ void copyCoordinatesFromGpu(rvec *h_xp);
+
+ /*! \brief
+ * Copy velocities from GPU to CPU.
+ *
+ * The velocities are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v Pointer to velocities data.
+ */
+ void copyVelocitiesFromGpu(rvec *h_v);
+
+ /*! \brief
+ * Copy forces from GPU to CPU.
+ *
+ * The forces are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f Pointer to forces data.
+ */
+ void copyForcesFromGpu(rvec *h_f);
+
+ /*! \brief
+ * Set the internal GPU-memory x, xprime and v pointers.
+ *
+ * Data is not copied. The data are assumed to be in float3/fvec format
+ * (float3 is used internally, but the data layout should be identical).
+ *
+ * \param[in] d_x Pointer to the coordinates for the input (on GPU)
+ * \param[in] d_xp Pointer to the coordinates for the output (on GPU)
+ * \param[in] d_v Pointer to the velocities (on GPU)
+ * \param[in] d_f Pointer to the forces (on GPU)
+ */
+ void setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f);
+
+ private:
+ class Impl;
+ gmx::PrivateImplPointer<Impl> impl_;
+
+};
+
+} //namespace gmx
+
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief May be used to implement Leap-Frog CUDA interfaces for non-GPU builds.
+ *
+ * Currently, reports and exits if any of the interfaces are called.
+ * Needed to satisfy compiler on systems, where CUDA is not available.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+#include "gmxpre.h"
+
+#include "config.h"
+
+#include "gromacs/mdlib/leapfrog_cuda.h"
+
+#if GMX_GPU != GMX_GPU_CUDA
+
+namespace gmx
+{
+
+/*!\brief Impl class stub. */
+class LeapFrogCuda::Impl
+{
+};
+
+/*!\brief Constructor stub. */
+LeapFrogCuda::LeapFrogCuda(gmx_unused int numAtoms)
+ : impl_(nullptr)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+LeapFrogCuda::~LeapFrogCuda() = default;
+
+/*!\brief Apply LINCS stub. */
+void LeapFrogCuda::integrate(gmx_unused const real dt)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+/*!\brief Set PBC stub. */
+void LeapFrogCuda::setPbc(gmx_unused const t_pbc *pbc)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+/*! \brief Copy inverse masses from CPU to GPU stub function. */
+void LeapFrogCuda::set(gmx_unused const t_mdatoms &md)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+/*! \brief Copy coordinates from provided CPU location to GPU stub. */
+void LeapFrogCuda::copyCoordinatesToGpu(gmx_unused const rvec *h_x)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+/*! \brief Copy velocities from provided CPU location to GPU stub. */
+void LeapFrogCuda::copyVelocitiesToGpu(gmx_unused const rvec *h_v)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+/*! \brief Copy forces from CPU to GPU stub. */
+void LeapFrogCuda::copyForcesToGpu(gmx_unused const rvec *h_f)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+/*! \brief Copy coordinates from GPU to provided CPU location stub. */
+void LeapFrogCuda::copyCoordinatesFromGpu(gmx_unused rvec *h_xp)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+/*! \brief Copy velocities from GPU to provided CPU location stub. */
+void LeapFrogCuda::copyVelocitiesFromGpu(gmx_unused rvec *h_v)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+/*! \brief Copy forces from GPU to CPU stub. */
+void LeapFrogCuda::copyForcesFromGpu(gmx_unused rvec *h_f)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+/*! \brief Set the internal GPU-memory x, xprime and v pointers stub. */
+void LeapFrogCuda::setXVFPointers(gmx_unused rvec *d_x,
+ gmx_unused rvec *d_xp,
+ gmx_unused rvec *d_v,
+ gmx_unused rvec *d_f)
+{
+ GMX_ASSERT(false, "A CPU stub for LeapFrog was called insted of the correct implementation.");
+}
+
+} // namespace gmx
+
+#endif /* GMX_GPU != GMX_GPU_CUDA */
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Implements Leap-Frog using CUDA
+ *
+ * This file contains implementation of basic Leap-Frog integrator
+ * using CUDA, including class initialization, data-structures management
+ * and GPU kernel.
+ *
+ * \todo Reconsider naming towards using "gpu" suffix instead of "cuda".
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+#include "gmxpre.h"
+
+#include "leapfrog_cuda_impl.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#include <cmath>
+
+#include <algorithm>
+
+#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/devicebuffer.cuh"
+#include "gromacs/gpu_utils/gputraits.cuh"
+#include "gromacs/gpu_utils/vectype_ops.cuh"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/leapfrog_cuda.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
+
+namespace gmx
+{
+
+//! Number of CUDA threads in a block
+constexpr static int c_threadsPerBlock = 256;
+//! Maximum number of threads in a block (for __launch_bounds__)
+constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock;
+
+/*! \brief Main kernel for Leap-Frog integrator.
+ *
+ * Each GPU thread works with a single particle. Empty declaration is needed to
+ * avoid "no previous prototype for function" clang warning.
+ *
+ * \todo Check if the force should be set to zero here.
+ * \todo This kernel can also accumulate incidental temperatures for each atom.
+ *
+ * \param[in] numAtoms Total number of atoms.
+ * \param[in] gm_x Coordinates before the timestep
+ * \param[out] gm_xp Coordinates after the timestep.
+ * \param[in,out] gm_v Velocities to update.
+ * \param[in] gm_f Atomic forces.
+ * \param[in] gm_inverseMasses Reciprocal masses.
+ * \param[in] dt Timestep.
+ */
+__launch_bounds__(c_maxThreadsPerBlock)
+__global__ void leapfrog_kernel(const int numAtoms,
+ const float3* __restrict__ gm_x,
+ float3* __restrict__ gm_xp,
+ float3* __restrict__ gm_v,
+ const float3* __restrict__ gm_f,
+ const float* __restrict__ gm_inverseMasses,
+ const float dt);
+
+__launch_bounds__(c_maxThreadsPerBlock)
+__global__ void leapfrog_kernel(const int numAtoms,
+ const float3* __restrict__ gm_x,
+ float3* __restrict__ gm_xp,
+ float3* __restrict__ gm_v,
+ const float3* __restrict__ gm_f,
+ const float* __restrict__ gm_inverseMasses,
+ const float dt)
+{
+ int threadIndex = blockIdx.x*blockDim.x + threadIdx.x;
+ if (threadIndex < numAtoms)
+ {
+ float3 xi = gm_x[threadIndex];
+ float3 vi = gm_v[threadIndex];
+ float3 fi = gm_f[threadIndex];
+ float imi = gm_inverseMasses[threadIndex];
+ float imidt = imi*dt;
+ vi += fi*imidt;
+ xi += vi*dt;
+ gm_v[threadIndex] = vi;
+ gm_xp[threadIndex] = xi;
+ }
+ return;
+}
+
+/*! \brief Integrate
+ *
+ * Integrates the equation of motion using Leap-Frog algorithm.
+ * Updates d_xp_ and d_v_ fields of this object.
+ *
+ * \param[in] dt Timestep
+ */
+void LeapFrogCuda::Impl::integrate(const real dt)
+{
+
+ ensureNoPendingCudaError("In CUDA version of Leap-Frog integrator");
+
+ KernelLaunchConfig config;
+ config.blockSize[0] = c_threadsPerBlock;
+ config.blockSize[1] = 1;
+ config.blockSize[2] = 1;
+ config.gridSize[0] = (numAtoms_ + c_threadsPerBlock - 1)/c_threadsPerBlock;
+ config.sharedMemorySize = 0;
+ config.stream = stream_;
+
+ auto kernelPtr = leapfrog_kernel;
+ const float3 *d_x = d_x_;
+ float3 *d_xp = d_xp_;
+ float3 *d_v = d_v_;
+ const float3 *d_f = d_f_;
+ const float *d_inverseMasses = d_inverseMasses_;
+
+ const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, config,
+ &numAtoms_,
+ &d_x, &d_xp,
+ &d_v,
+ &d_f,
+ &d_inverseMasses, &dt);
+ launchGpuKernel(kernelPtr, config, nullptr, "leapfrog_kernel", kernelArgs);
+
+ return;
+}
+
+/*! \brief Create Leap-Frog object
+ *
+ * \param[in] numAtoms Number of atoms.
+ */
+LeapFrogCuda::Impl::Impl(int numAtoms)
+ : numAtoms_(numAtoms)
+{
+ allocateDeviceBuffer(&d_x_, numAtoms, nullptr);
+ allocateDeviceBuffer(&d_xp_, numAtoms, nullptr);
+ allocateDeviceBuffer(&d_v_, numAtoms, nullptr);
+ allocateDeviceBuffer(&d_f_, numAtoms, nullptr);
+ allocateDeviceBuffer(&d_inverseMasses_, numAtoms, nullptr);
+
+ // TODO When the code will be integrated into the schedule, it will be assigned non-default stream.
+ stream_ = nullptr;
+}
+
+LeapFrogCuda::Impl::~Impl()
+{
+ freeDeviceBuffer(&d_x_);
+ freeDeviceBuffer(&d_xp_);
+ freeDeviceBuffer(&d_v_);
+ freeDeviceBuffer(&d_f_);
+ freeDeviceBuffer(&d_inverseMasses_);
+
+}
+
+/*! \brief
+ * Update PBC data.
+ *
+ * Converts pbc data from t_pbc into the PbcAiuc format and stores the latter.
+ *
+ * \param[in] pbc The PBC data in t_pbc format.
+ */
+void LeapFrogCuda::Impl::setPbc(const t_pbc *pbc)
+{
+ setPbcAiuc(pbc->ndim_ePBC, pbc->box, &pbcAiuc_);
+}
+
+/*! \brief Set the integrator
+ *
+ * Copies inverse masses from CPU to GPU.
+ *
+ * \param[in] md MD atoms, from which inverse masses are taken.
+ */
+void LeapFrogCuda::Impl::set(const t_mdatoms &md)
+{
+ copyToDeviceBuffer(&d_inverseMasses_, (float*)md.invmass,
+ 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy coordinates from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_x CPU pointer where coordinates should be copied from.
+ */
+void LeapFrogCuda::Impl::copyCoordinatesToGpu(const rvec *h_x)
+{
+ copyToDeviceBuffer(&d_x_, (float3*)h_x, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy velocities from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v CPU pointer where velocities should be copied from.
+ */
+void LeapFrogCuda::Impl::copyVelocitiesToGpu(const rvec *h_v)
+{
+ copyToDeviceBuffer(&d_v_, (float3*)h_v, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy forces from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f CPU pointer where forces should be copied from.
+ */
+void LeapFrogCuda::Impl::copyForcesToGpu(const rvec *h_f)
+{
+ copyToDeviceBuffer(&d_f_, (float3*)h_f, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy coordinates from GPU to CPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[out] h_xp CPU pointer where coordinates should be copied to.
+ */
+void LeapFrogCuda::Impl::copyCoordinatesFromGpu(rvec *h_xp)
+{
+ copyFromDeviceBuffer((float3*)h_xp, &d_xp_, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy velocities from GPU to CPU.
+ *
+ * The velocities are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v Pointer to velocities data.
+ */
+void LeapFrogCuda::Impl::copyVelocitiesFromGpu(rvec *h_v)
+{
+ copyFromDeviceBuffer((float3*)h_v, &d_v_, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy forces from GPU to CPU.
+ *
+ * The forces are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f Pointer to forces data.
+ */
+void LeapFrogCuda::Impl::copyForcesFromGpu(rvec *h_f)
+{
+ copyFromDeviceBuffer((float3*)h_f, &d_f_, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Set the internal GPU-memory x, xprime and v pointers.
+ *
+ * Data is not copied. The data are assumed to be in float3/fvec format
+ * (float3 is used internally, but the data layout should be identical).
+ *
+ * \param[in] d_x Pointer to the coordinates for the input (on GPU)
+ * \param[in] d_xp Pointer to the coordinates for the output (on GPU)
+ * \param[in] d_v Pointer to the velocities (on GPU)
+ * \param[in] d_f Pointer to the forces (on GPU)
+ */
+void LeapFrogCuda::Impl::setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f)
+{
+ d_x_ = (float3*)d_x;
+ d_xp_ = (float3*)d_xp;
+ d_v_ = (float3*)d_v;
+ d_f_ = (float3*)d_f;
+}
+
+
+LeapFrogCuda::LeapFrogCuda(const int numAtoms)
+ : impl_(new Impl(numAtoms))
+{
+}
+
+LeapFrogCuda::~LeapFrogCuda() = default;
+
+void LeapFrogCuda::integrate(const real dt)
+{
+ impl_->integrate(dt);
+}
+
+void LeapFrogCuda::setPbc(const t_pbc *pbc)
+{
+ impl_->setPbc(pbc);
+}
+
+void LeapFrogCuda::set(const t_mdatoms &md)
+{
+ impl_->set(md);
+}
+
+void LeapFrogCuda::copyCoordinatesToGpu(const rvec *h_x)
+{
+ impl_->copyCoordinatesToGpu(h_x);
+}
+
+void LeapFrogCuda::copyVelocitiesToGpu(const rvec *h_v)
+{
+ impl_->copyVelocitiesToGpu(h_v);
+}
+
+void LeapFrogCuda::copyForcesToGpu(const rvec *h_f)
+{
+ impl_->copyForcesToGpu(h_f);
+}
+
+void LeapFrogCuda::copyCoordinatesFromGpu(rvec *h_xp)
+{
+ impl_->copyCoordinatesFromGpu(h_xp);
+}
+
+void LeapFrogCuda::copyVelocitiesFromGpu(rvec *h_v)
+{
+ impl_->copyVelocitiesFromGpu(h_v);
+}
+
+void LeapFrogCuda::copyForcesFromGpu(rvec *h_f)
+{
+ impl_->copyForcesFromGpu(h_f);
+}
+
+void LeapFrogCuda::setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f)
+{
+ impl_->setXVFPointers(d_x, d_xp, d_v, d_f);
+}
+
+} //namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Declares CUDA implementation class for Leap-Frog
+ *
+ * This header file is needed to include from both the device-side
+ * kernels file, and the host-side management code.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+#ifndef GMX_MDLIB_LEAPFROG_CUDA_IMPL_H
+#define GMX_MDLIB_LEAPFROG_CUDA_IMPL_H
+
+#include "gromacs/mdlib/leapfrog_cuda.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
+
+namespace gmx
+{
+
+/*! \internal \brief Class with interfaces and data for CUDA version of LINCS. */
+class LeapFrogCuda::Impl
+{
+
+ public:
+ /*! \brief Creates Leap-Frog object.
+ *
+ * \param[in] numAtoms Number of atoms
+ */
+ Impl(int numAtoms);
+ ~Impl();
+
+ /*! \brief Integrate
+ *
+ * Integrates the equation of motion using Leap-Frog algorithm.
+ * Updates xpDevice_ and vDevice_ fields of this object.
+ *
+ * \param[in] dt Timestep
+ */
+ void integrate(real dt);
+
+ /*! \brief
+ * Update PBC data.
+ *
+ * Converts PBC data from t_pbc into the PbcAiuc format and stores the latter.
+ *
+ * \param[in] pbc The PBC data in t_pbc format.
+ */
+ void setPbc(const t_pbc *pbc);
+
+ /*! \brief Set the integrator
+ *
+ * Copies inverse masses from CPU to GPU.
+ *
+ * \param[in] md MD atoms, from which inverse masses are taken.
+ */
+ void set(const t_mdatoms &md);
+
+ /*! \brief
+ * Copy coordinates from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_x CPU pointer where coordinates should be copied from.
+ */
+ void copyCoordinatesToGpu(const rvec *h_x);
+
+ /*! \brief
+ * Copy velocities from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v CPU pointer where velocities should be copied from.
+ */
+ void copyVelocitiesToGpu(const rvec *h_v);
+
+ /*! \brief
+ * Copy forces from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f CPU pointer where forces should be copied from.
+ */
+ void copyForcesToGpu(const rvec *h_f);
+
+ /*! \brief
+ * Copy coordinates from GPU to CPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[out] h_xp CPU pointer where coordinates should be copied to.
+ */
+ void copyCoordinatesFromGpu(rvec *h_xp);
+
+ /*! \brief
+ * Copy velocities from GPU to CPU.
+ *
+ * The velocities are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v Pointer to velocities data.
+ */
+ void copyVelocitiesFromGpu(rvec *h_v);
+
+ /*! \brief
+ * Copy forces from GPU to CPU.
+ *
+ * The forces are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f Pointer to forces data.
+ */
+ void copyForcesFromGpu(rvec *h_f);
+
+ /*! \brief
+ * Set the internal GPU-memory x, xprime and v pointers.
+ *
+ * Data is not copied. The data are assumed to be in float3/fvec format
+ * (float3 is used internally, but the data layout should be identical).
+ *
+ * \param[in] d_x Pointer to the coordinates for the input (on GPU)
+ * \param[in] d_xp Pointer to the coordinates for the output (on GPU)
+ * \param[in] d_v Pointer to the velocities (on GPU)
+ * \param[in] d_f Pointer to the forces (on GPU)
+ */
+ void setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f);
+
+ private:
+
+ //! CUDA stream
+ cudaStream_t stream_;
+ //! Periodic boundary data
+ PbcAiuc pbcAiuc_;
+ //! Number of atoms
+ int numAtoms_;
+ //! Coordinates before the timestep (on GPU)
+ float3 *d_x_;
+ //! Coordinates after the timestep (on GPU).
+ float3 *d_xp_;
+ //! Velocities of atoms (on GPU)
+ float3 *d_v_;
+ //! Forces, exerted by atoms (on GPU)
+ float3 *d_f_;
+ //! 1/mass for all atoms (GPU)
+ real *d_inverseMasses_;
+
+};
+
+} // namespace gmx
+
+#endif
constr.cpp
ebin.cpp
energyoutput.cpp
+ leapfrog.cpp
settle.cpp
shake.cpp
simulationsignal.cpp
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief Integrator tests
+ *
+ * Test for CUDA implementation of the Leap-Frog integrator.
+ *
+ * \todo Connect to the CPU-based version.
+ * \todo Prepare for temperature and pressure controlled integrators.
+ * \todo Add PBC handling test.
+ * \todo Take over coordinates/velocities/forces handling - this infrastructure
+ * will be removed from integrator.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ * \ingroup module_mdlib
+ */
+
+#include "gmxpre.h"
+
+#include "config.h"
+
+#include <assert.h>
+
+#include <cmath>
+
+#include <algorithm>
+#include <unordered_map>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "gromacs/gpu_utils/gpu_utils.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/leapfrog_cuda.h"
+#include "gromacs/utility/stringutil.h"
+
+#include "testutils/refdata.h"
+#include "testutils/testasserts.h"
+
+namespace gmx
+{
+namespace test
+{
+
+#if GMX_GPU == GMX_GPU_CUDA
+
+/*! \brief The parameter space for the test.
+ *
+ * The test will run for all possible combinations of accessible
+ * values of the:
+ * 1. Number of atoms
+ * 2. Timestep
+ * 3-5. Velocity components
+ * 6-8. Force components
+ * 9. Number of steps
+ */
+typedef std::tuple<int, real, real, real, real, real, real, real, int> IntegratorTestParameters;
+
+/*! \brief Test fixture for integrator.
+ *
+ * Creates a system of independent particles exerting constant external forces,
+ * makes several numerical integration timesteps and compares the result
+ * with analytical solution.
+ *
+ */
+class IntegratorTest : public ::testing::TestWithParam<IntegratorTestParameters>
+{
+ public:
+ //! Number of atoms in the system
+ int numAtoms_;
+ //! Integration timestep
+ real timestep_;
+
+ //! Initial coordinates
+ std::vector<RVec> x0_;
+ //! Current coordinates
+ std::vector<RVec> x_;
+ //! Coordinates after integrator update
+ std::vector<RVec> xPrime_;
+ //! Initial velocities
+ std::vector<RVec> v0_;
+ //! Current velocities
+ std::vector<RVec> v_;
+ //! External forces
+ std::vector<RVec> f_;
+ //! Inverse masses of the particles
+ std::vector<real> inverseMasses_;
+ //! MD atoms structure in which inverse masses will be passed to the integrator
+ t_mdatoms mdAtoms_;
+
+ /*! \brief Test initialization function.
+ *
+ * \param[in] numAtoms Number of atoms in the system
+ * \param[in] timestep Integration timestep
+ * \param[in] v0 Initial velocity (same for all particles)
+ * \param[in] f0 External constant force, acting on all particles
+ */
+ void init(int numAtoms, real timestep, rvec v0, rvec f0)
+ {
+ numAtoms_ = numAtoms;
+ timestep_ = timestep;
+
+ x0_.resize(numAtoms_);
+ x_.resize(numAtoms_);
+ xPrime_.resize(numAtoms_);
+ v0_.resize(numAtoms_);
+ v_.resize(numAtoms_);
+ f_.resize(numAtoms_);
+ inverseMasses_.resize(numAtoms_);
+ for (unsigned i = 0; i < x_.size(); i++)
+ {
+ // Typical PBC box size is tens of nanometers
+ x_[i][XX] = (i%21)*1.0;
+ x_[i][YY] = 6.5 + (i%13)*(-1.0);
+ x_[i][ZZ] = (i%32)*(0.0);
+
+ for (int d = 0; d < DIM; d++)
+ {
+ xPrime_[i][d] = 0.0;
+ // Thermal velocity is ~1 nm/ps (|v0| = 1-2 nm/ps)
+ v_[i][d] = v0[d];
+ // TODO Check what value typical MD forces have (now ~ 1 kJ/mol/nm)
+ f_[i][d] = f0[d];
+
+ x0_[i][d] = x_[i][d];
+ v0_[i][d] = v_[i][d];
+ }
+ // Atom masses are ~1-100 g/mol
+ inverseMasses_[i] = 1.0/(1.0 + i%100);
+ }
+
+ mdAtoms_.invmass = inverseMasses_.data();
+ }
+
+};
+
+TEST_P(IntegratorTest, SimpleIntegration){
+ // Do nothing if it is a CUDA build but there are no CUDA-capable GPUs
+ std::string errorMessage;
+ if (!canDetectGpus(&errorMessage))
+ {
+ return;
+ }
+ int numAtoms; // 1. Number of atoms
+ real timestep; // 2. Timestep
+ rvec v0; // 3. Velocity
+ rvec f0; // 4. Force
+ int nStep; // 5. Number of steps
+ std::tie(numAtoms, timestep, v0[XX], v0[YY], v0[ZZ], f0[XX], f0[YY], f0[ZZ], nStep) = GetParam();
+
+ std::string testDescription = formatString("while testing %d atoms for %d timestep (dt = %f, v0=(%f, %f, %f), f0=(%f, %f, %f))",
+ numAtoms, nStep, timestep,
+ v0[XX], v0[YY], v0[ZZ],
+ f0[XX], f0[YY], f0[ZZ]);
+
+
+ init(numAtoms, timestep, v0, f0);
+
+ std::unique_ptr<LeapFrogCuda> integrator = std::make_unique<LeapFrogCuda>(numAtoms);
+
+ integrator->set(mdAtoms_);
+ integrator->copyCoordinatesToGpu((rvec*)(x_.data()));
+ integrator->copyVelocitiesToGpu((rvec*)(v_.data()));
+ integrator->copyForcesToGpu((rvec*)(f_.data()));
+
+ for (int step = 0; step < nStep; step++)
+ {
+ integrator->integrate(timestep_);
+ integrator->copyCoordinatesFromGpu((rvec*)(xPrime_.data()));
+ integrator->copyCoordinatesToGpu((rvec*)(xPrime_.data()));
+ }
+
+ integrator->copyCoordinatesFromGpu((rvec*)(xPrime_.data()));
+ integrator->copyVelocitiesFromGpu((rvec*)(v_.data()));
+
+ real totalTime = nStep*timestep;
+ // TODO For the case of constant force, the numerical scheme is exact and
+ // the only source of errors is floating point arithmetic. Hence,
+ // the tolerance can be calculated.
+ FloatingPointTolerance tolerance = absoluteTolerance(nStep*0.000005);
+
+ for (unsigned i = 0; i < x_.size(); i++)
+ {
+ rvec xAnalytical;
+ rvec vAnalytical;
+ for (int d = 0; d < DIM; d++)
+ {
+ // Analytical solution for constant-force particle movement
+ xAnalytical[d] = x0_[i][d] + v0_[i][d]*totalTime + 0.5*f_[i][d]*totalTime*totalTime*inverseMasses_[i];
+ vAnalytical[d] = v0_[i][d] + f_[i][d]*totalTime*inverseMasses_[i];
+
+ EXPECT_REAL_EQ_TOL(xPrime_[i][d], xAnalytical[d], tolerance)
+ << gmx::formatString("Coordinate %d of atom %d is different from analytical solution", d, i)
+ << testDescription;
+
+ EXPECT_REAL_EQ_TOL(v_[i][d], vAnalytical[d], tolerance)
+ << gmx::formatString("Velocity component %d of atom %d is different from analytical solution", d, i)
+ << testDescription;
+ }
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(WithParameters, IntegratorTest,
+ ::testing::Combine(
+ ::testing::Values(1, 10, 300), // Number of atoms
+ ::testing::Values(0.001, 0.0005), // Timestep
+ ::testing::Values(-2.0, 0.0), // vx
+ ::testing::Values( 0.0, 2.0), // vy
+ ::testing::Values( 0.0), // vz
+ ::testing::Values(-1.0, 0.0), // fx
+ ::testing::Values( 0.0, 1.0), // fy
+ ::testing::Values( 2.0), // fz
+ ::testing::Values(1, 10) // Number of steps
+ ));
+#endif // GMX_GPU == GMX_GPU_CUDA
+
+} // namespace test
+} // namespace gmx
#include "gromacs/mdlib/force.h"
#include "gromacs/mdlib/force_flags.h"
#include "gromacs/mdlib/forcerec.h"
+#include "gromacs/mdlib/leapfrog_cuda.h"
#include "gromacs/mdlib/md_support.h"
#include "gromacs/mdlib/mdatoms.h"
#include "gromacs/mdlib/mdoutf.h"
using gmx::SimulationSignaller;
+//! Whether the GPU version of Leap-Frog integrator should be used.
+static const bool c_useGpuLeapFrog = (getenv("GMX_INTEGRATE_GPU") != nullptr);
+
void gmx::Integrator::do_md()
{
// TODO Historically, the EM and MD "integrators" used different
* Loop over MD steps
*
************************************************************/
+ std::unique_ptr<LeapFrogCuda> integrator;
+ if (c_useGpuLeapFrog)
+ {
+ GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
+ GMX_RELEASE_ASSERT(ir->etc == etcNO, "Temperature coupling is not supported on the GPU.");
+ GMX_RELEASE_ASSERT(ir->epc == epcNO, "Pressure coupling is not supported on the GPU.");
+ GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
+ integrator = std::make_unique<LeapFrogCuda>(state->natoms);
+ integrator->set(*mdatoms);
+ }
bFirstStep = TRUE;
/* Skip the first Nose-Hoover integration when we get the state from tpx */
copy_rvecn(as_rvec_array(state->x.data()), cbuf, 0, state->natoms);
}
- update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
- ekind, M, &upd, etrtPOSITION, cr, constr);
+
+ if (c_useGpuLeapFrog)
+ {
+ integrator->copyCoordinatesToGpu(state->x.rvec_array());
+ integrator->copyVelocitiesToGpu(state->v.rvec_array());
+ integrator->copyForcesToGpu(as_rvec_array(f.data()));
+ integrator->integrate(ir->delta_t);
+ integrator->copyCoordinatesFromGpu(upd.xp()->rvec_array());
+ integrator->copyVelocitiesFromGpu(state->v.rvec_array());
+ }
+ else
+ {
+ update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
+ ekind, M, &upd, etrtPOSITION, cr, constr);
+ }
wallcycle_stop(wcycle, ewcUPDATE);
constrain_coordinates(step, &dvdl_constr, state,