endif()
if(GMX_USE_CUDA)
gmx_add_libgromacs_sources(
+ leapfrog_cuda_impl.cu
lincs_cuda_impl.cu
settle_cuda_impl.cu
- )
-endif()
-
-if(GMX_USE_CUDA)
- gmx_add_libgromacs_sources(
- leapfrog_cuda_impl.cu
+ update_constrain_cuda_impl.cu
)
endif()
#include "gromacs/math/vec.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdlib/lincs.h"
-#include "gromacs/mdlib/lincs_cuda.h"
#include "gromacs/mdlib/settle.h"
-#include "gromacs/mdlib/settle_cuda.h"
#include "gromacs/mdlib/shake.h"
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/inputrec.h"
namespace gmx
{
-//! Whether the GPU version of LINCS should be used.
-static const bool c_useGpuLincs = (getenv("GMX_LINCS_GPU") != nullptr);
-//! Whether the GPU version of SETTLE should be used.
-static const bool c_useGpuSettle = (getenv("GMX_SETTLE_GPU") != nullptr);
-
/* \brief Impl class for Constraints
*
* \todo Members like md, idef are valid only for the lifetime of a
/*!\brief Input options.
*
* \todo Replace with IMdpOptions */
- const t_inputrec &ir;
+ const t_inputrec &ir;
//! Flop counting support.
- t_nrnb *nrnb = nullptr;
+ t_nrnb *nrnb = nullptr;
//! Tracks wallcycle usage.
- gmx_wallcycle *wcycle;
- //! Valid LINCS CUDA object when that implementation is being used, nullptr otherwise.
- std::unique_ptr<LincsCuda> lincsCuda;
- //! SETTLE CUDA object or a dummy if CUDA is not enabled for SETTLE.
- std::unique_ptr<SettleCuda> settleCuda;
+ gmx_wallcycle *wcycle;
};
Constraints::~Constraints() = default;
bDump = TRUE;
}
}
- else
- {
- if (lincsCuda != nullptr && c_useGpuLincs)
- {
- GMX_RELEASE_ASSERT(ir.efep == efepNO || dvdlambda == nullptr,
- "Free energy perturbation is not supported by the GPU version of LINCS.\n");
- lincsCuda->setPbc(pbc_null);
- lincsCuda->copyCoordinatesToGpu(x, xprime);
- lincsCuda->copyVelocitiesToGpu(v);
- lincsCuda->apply(v != nullptr,
- invdt,
- vir != nullptr, vir_r_m_dr);
- lincsCuda->copyCoordinatesFromGpu(xprime);
- if (v != nullptr)
- {
- lincsCuda->copyVelocitiesFromGpu(v);
- }
- }
- }
if (shaked != nullptr)
{
switch (econq)
{
case ConstraintVariable::Positions:
- // If settled was initialized, use CPU version of settle
- if (settled != nullptr)
- {
#pragma omp parallel for num_threads(nth) schedule(static)
- for (th = 0; th < nth; th++)
+ for (th = 0; th < nth; th++)
+ {
+ try
{
- try
+ if (th > 0)
{
- if (th > 0)
- {
- clear_mat(vir_r_m_dr_th[th]);
- }
-
- csettle(settled,
- nth, th,
- pbc_null,
- x[0], xprime[0],
- invdt, v ? v[0] : nullptr,
- vir != nullptr,
- th == 0 ? vir_r_m_dr : vir_r_m_dr_th[th],
- th == 0 ? &bSettleErrorHasOccurred0 : &bSettleErrorHasOccurred[th]);
+ clear_mat(vir_r_m_dr_th[th]);
}
- GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
- }
- inc_nrnb(nrnb, eNR_SETTLE, nsettle);
- if (v != nullptr)
- {
- inc_nrnb(nrnb, eNR_CONSTR_V, nsettle*3);
- }
- if (vir != nullptr)
- {
- inc_nrnb(nrnb, eNR_CONSTR_VIR, nsettle*3);
+
+ csettle(settled,
+ nth, th,
+ pbc_null,
+ x[0], xprime[0],
+ invdt, v ? v[0] : nullptr,
+ vir != nullptr,
+ th == 0 ? vir_r_m_dr : vir_r_m_dr_th[th],
+ th == 0 ? &bSettleErrorHasOccurred0 : &bSettleErrorHasOccurred[th]);
}
+ GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
}
- else
+ inc_nrnb(nrnb, eNR_SETTLE, nsettle);
+ if (v != nullptr)
{
- // If CPU version of SETTLE was not initialized, GPU version should have being.
- GMX_ASSERT(settleCuda != nullptr, "There are settles, but nither CPU nor CUDA version of SETTLE was initialized.");
- settleCuda->setPbc(pbc_null);
- settleCuda->copyCoordinatesToGpu(x, xprime);
- settleCuda->copyVelocitiesToGpu(v);
- settleCuda->apply(v != nullptr, invdt,
- vir != nullptr, vir_r_m_dr);
- settleCuda->copyCoordinatesFromGpu(xprime);
- if (v != nullptr)
- {
- settleCuda->copyVelocitiesFromGpu(v);
- }
+ inc_nrnb(nrnb, eNR_CONSTR_V, nsettle*3);
+ }
+ if (vir != nullptr)
+ {
+ inc_nrnb(nrnb, eNR_CONSTR_VIR, nsettle*3);
}
break;
case ConstraintVariable::Velocities:
case ConstraintVariable::Derivative:
case ConstraintVariable::Force:
case ConstraintVariable::ForceDispl:
- GMX_RELEASE_ASSERT(settled != nullptr, "SETTLE projection correction is not implemented on GPU.");
#pragma omp parallel for num_threads(nth) schedule(static)
for (th = 0; th < nth; th++)
{
*/
if (ir.eConstrAlg == econtLINCS)
{
- if (c_useGpuLincs)
- {
- lincsCuda->set(top.idef, md);
- }
- else
- {
- set_lincs(top.idef, md, EI_DYNAMICS(ir.eI), cr, lincsd);
- }
+ set_lincs(top.idef, md, EI_DYNAMICS(ir.eI), cr, lincsd);
}
if (ir.eConstrAlg == econtSHAKE)
{
settle_set_constraints(settled,
&idef->il[F_SETTLE], md);
}
- else if (settleCuda != nullptr && c_useGpuSettle)
- {
- settleCuda->set(top.idef, md);
- }
/* Make a selection of the local atoms for essential dynamics */
if (ed && cr->dd)
if (ir.eConstrAlg == econtLINCS)
{
- if (c_useGpuLincs)
- {
- fprintf(log, "Initializing LINCS on a GPU for %d atoms\n", mtop.natoms);
- GMX_RELEASE_ASSERT(nflexcon == 0, "Flexible constraints are not supported by the GPU-based implementation of LINCS.\n");
- GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr), "CUDA version of LINCS is not supported with domain decomposition");
- lincsCuda = std::make_unique<LincsCuda>(mtop.natoms, ir.nLincsIter, ir.nProjOrder);
- }
- else
- {
- lincsd = init_lincs(log, mtop,
- nflexcon, at2con_mt,
- DOMAINDECOMP(cr) && cr->dd->splitConstraints,
- ir.nLincsIter, ir.nProjOrder);
- }
+ lincsd = init_lincs(log, mtop,
+ nflexcon, at2con_mt,
+ DOMAINDECOMP(cr) && cr->dd->splitConstraints,
+ ir.nLincsIter, ir.nProjOrder);
}
if (ir.eConstrAlg == econtSHAKE)
bInterCGsettles = inter_charge_group_settles(mtop);
- if (c_useGpuSettle)
- {
- fprintf(log, "Initializing SETTLE on a GPU for %d atoms\n", mtop.natoms);
- GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr), "CUDA version of SETTLE is not supported with domain decomposition");
- gmx_omp_nthreads_set(emntSETTLE, 1);
- settleCuda = std::make_unique<SettleCuda>(mtop.natoms, mtop);
- }
- else
- {
- settled = settle_init(mtop);
- }
+ settled = settle_init(mtop);
+
/* Make an atom to settle index for use in domain decomposition */
for (size_t mt = 0; mt < mtop.moltype.size(); mt++)
{
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ *
+ * \brief Declaration of high-level functions of CUDA implementation of update and constrain class.
+ *
+ * \todo This should only list interfaces needed for libgromacs clients (e.g.
+ * management of coordinates, velocities and forces should not be here).
+ * \todo Change "cuda" suffix to "gpu"
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ * \inlibraryapi
+ */
+#ifndef GMX_MDLIB_UPDATE_CONSTRAIN_CUDA_H
+#define GMX_MDLIB_UPDATE_CONSTRAIN_CUDA_H
+
+#include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/topology/idef.h"
+#include "gromacs/utility/classhelpers.h"
+
+namespace gmx
+{
+
+class UpdateConstrainCuda
+{
+
+ public:
+ /*! \brief Create Update-Constrain object.
+ *
+ * \param[in] numAtoms Number of atoms.
+ *
+ * \param[in] ir Input record data: LINCS takes number of iterations and order of
+ * projection from it.
+ * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms
+ * and target O-H and H-H distances from this object.
+ */
+ UpdateConstrainCuda(int numAtoms,
+ const t_inputrec &ir,
+ const gmx_mtop_t &mtop);
+
+ ~UpdateConstrainCuda();
+
+ /*! \brief Integrate
+ *
+ * Integrates the equation of motion using Leap-Frog algorithm and applies
+ * LINCS and SETTLE constraints.
+ * Updates d_xp_ and d_v_ fields of this object.
+ *
+ * \param[in] dt Timestep
+ * \param[in] updateVelocities If the velocities should be constrained.
+ * \param[in] computeVirial If virial should be updated.
+ * \param[out] virial Place to save virial tensor.
+ */
+ void integrate(real dt,
+ bool updateVelocities,
+ bool computeVirial,
+ tensor virial);
+
+ /*! \brief
+ * Update data-structures (e.g. after NB search step).
+ *
+ * \param[in] idef System topology
+ * \param[in] md Atoms data. Can be used to update masses if needed (not used now).
+ */
+ void set(const t_idef &idef,
+ const t_mdatoms &md);
+
+ /*! \brief
+ * Update PBC data.
+ *
+ * Converts PBC data from t_pbc into the PbcAiuc format and stores the latter.
+ *
+ * \param[in] pbc The PBC data in t_pbc format.
+ */
+ void setPbc(const t_pbc *pbc);
+
+ /*! \brief
+ * Copy coordinates from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_x CPU pointer where coordinates should be copied from.
+ */
+ void copyCoordinatesToGpu(const rvec *h_x);
+
+ /*! \brief
+ * Copy velocities from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v CPU pointer where velocities should be copied from.
+ */
+ void copyVelocitiesToGpu(const rvec *h_v);
+
+ /*! \brief
+ * Copy forces from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f CPU pointer where forces should be copied from.
+ */
+ void copyForcesToGpu(const rvec *h_f);
+
+ /*! \brief
+ * Copy coordinates from GPU to CPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[out] h_xp CPU pointer where coordinates should be copied to.
+ */
+ void copyCoordinatesFromGpu(rvec *h_xp);
+
+ /*! \brief
+ * Copy velocities from GPU to CPU.
+ *
+ * The velocities are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v Pointer to velocities data.
+ */
+ void copyVelocitiesFromGpu(rvec *h_v);
+
+ /*! \brief
+ * Copy forces from GPU to CPU.
+ *
+ * The forces are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f Pointer to forces data.
+ */
+ void copyForcesFromGpu(rvec *h_f);
+
+ /*! \brief
+ * Set the internal GPU-memory x, xprime and v pointers.
+ *
+ * Data is not copied. The data are assumed to be in float3/fvec format
+ * (float3 is used internally, but the data layout should be identical).
+ *
+ * \param[in] d_x Pointer to the coordinates for the input (on GPU)
+ * \param[in] d_xp Pointer to the coordinates for the output (on GPU)
+ * \param[in] d_v Pointer to the velocities (on GPU)
+ * \param[in] d_f Pointer to the forces (on GPU)
+ */
+ void setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f);
+
+ private:
+ class Impl;
+ gmx::PrivateImplPointer<Impl> impl_;
+
+};
+
+} //namespace gmx
+
+#endif
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Stub for update and constraints class CPU implementation.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+#include "gmxpre.h"
+
+#include "config.h"
+
+#include "gromacs/mdlib/update_constrain_cuda.h"
+
+#if GMX_GPU != GMX_GPU_CUDA
+
+namespace gmx
+{
+
+/*!\brief Impl class stub. */
+class UpdateConstrainCuda::Impl
+{
+};
+
+/*!\brief Constructor stub. */
+UpdateConstrainCuda::UpdateConstrainCuda(gmx_unused int numAtoms,
+ gmx_unused const t_inputrec &ir,
+ gmx_unused const gmx_mtop_t &mtop)
+ : impl_(nullptr)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+UpdateConstrainCuda::~UpdateConstrainCuda() = default;
+
+/*!\brief integrate stub. */
+void UpdateConstrainCuda::integrate(gmx_unused const real dt,
+ gmx_unused const bool updateVelocities,
+ gmx_unused const bool computeVirial,
+ gmx_unused tensor virialScaled)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+/*!\brief Set method stub. */
+void UpdateConstrainCuda::set(gmx_unused const t_idef &idef,
+ gmx_unused const t_mdatoms &md)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+/*!\brief Set PBC stub. */
+void UpdateConstrainCuda::setPbc(gmx_unused const t_pbc *pbc)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+/*! \brief Copy coordinates from provided CPU location to GPU stub. */
+void UpdateConstrainCuda::copyCoordinatesToGpu(gmx_unused const rvec *h_x)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+/*! \brief Copy velocities from provided CPU location to GPU stub. */
+void UpdateConstrainCuda::copyVelocitiesToGpu(gmx_unused const rvec *h_v)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+/*! \brief Copy forces from CPU to GPU stub. */
+void UpdateConstrainCuda::copyForcesToGpu(gmx_unused const rvec *h_f)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+/*! \brief Copy coordinates from GPU to provided CPU location stub. */
+void UpdateConstrainCuda::copyCoordinatesFromGpu(gmx_unused rvec *h_xp)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+/*! \brief Copy velocities from GPU to provided CPU location stub. */
+void UpdateConstrainCuda::copyVelocitiesFromGpu(gmx_unused rvec *h_v)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+/*! \brief Copy forces from GPU to CPU stub. */
+void UpdateConstrainCuda::copyForcesFromGpu(gmx_unused rvec *h_f)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+/*! \brief Set the internal GPU-memory x, xprime and v pointers stub. */
+void UpdateConstrainCuda::setXVFPointers(gmx_unused rvec *d_x,
+ gmx_unused rvec *d_xp,
+ gmx_unused rvec *d_v,
+ gmx_unused rvec *d_f)
+{
+ GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation.");
+}
+
+} // namespace gmx
+
+#endif /* GMX_GPU != GMX_GPU_CUDA */
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Implements update and constraints class using CUDA.
+ *
+ * The class combines Leap-Frog integrator with LINCS and SETTLE constraints.
+ *
+ * \todo This class should take over the management of coordinates, velocities
+ * forces, virial, and PBC from its members (i.e. from Leap-Frog, LINCS
+ * and SETTLE).
+ * \todo The computational procedures in members should be integrated to improve
+ * computational performance.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+#include "gmxpre.h"
+
+#include "update_constrain_cuda_impl.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#include <cmath>
+
+#include <algorithm>
+
+#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/gpu_utils/devicebuffer.cuh"
+#include "gromacs/gpu_utils/gputraits.cuh"
+#include "gromacs/gpu_utils/vectype_ops.cuh"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/leapfrog_cuda.h"
+#include "gromacs/mdlib/lincs_cuda.h"
+#include "gromacs/mdlib/settle_cuda.h"
+#include "gromacs/mdlib/update_constrain_cuda.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
+
+namespace gmx
+{
+
+/*! \brief Integrate
+ *
+ * Integrates the equation of motion using Leap-Frog algorithm and applies
+ * LINCS and SETTLE constraints.
+ * Updates d_xp_ and d_v_ fields of this object.
+ *
+ * \param[in] dt Timestep
+ * \param[in] updateVelocities If the velocities should be constrained.
+ * \param[in] computeVirial If virial should be updated.
+ * \param[out] virial Place to save virial tensor.
+ */
+void UpdateConstrainCuda::Impl::integrate(const real dt,
+ const bool updateVelocities,
+ const bool computeVirial,
+ tensor virial)
+{
+ // Clearing virial matrix
+ // TODO There is no point in having saparate virial matrix for constraints
+ clear_mat(virial);
+
+ integrator_->integrate(dt);
+ lincsCuda_->apply(updateVelocities, 1.0/dt, computeVirial, virial);
+ settleCuda_->apply(updateVelocities, 1.0/dt, computeVirial, virial);
+
+ // scaledVirial -> virial (methods above returns scaled values)
+ float scaleFactor = 0.5f/(dt*dt);
+ for (int i = 0; i < DIM; i++)
+ {
+ for (int j = 0; j < DIM; j++)
+ {
+ virial[i][j] = scaleFactor*virial[i][j];
+ }
+ }
+
+ return;
+}
+
+/*! \brief Create Update-Constrain object
+ *
+ * \param[in] numAtoms Number of atoms.
+ * \param[in] ir Input record data: LINCS takes number of iterations and order of
+ * projection from it.
+ * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms
+ * and target O-H and H-H distances from this object.
+ */
+UpdateConstrainCuda::Impl::Impl(int numAtoms,
+ const t_inputrec &ir,
+ const gmx_mtop_t &mtop)
+ : numAtoms_(numAtoms)
+{
+ allocateDeviceBuffer(&d_x_, numAtoms, nullptr);
+ allocateDeviceBuffer(&d_xp_, numAtoms, nullptr);
+ allocateDeviceBuffer(&d_v_, numAtoms, nullptr);
+ allocateDeviceBuffer(&d_f_, numAtoms, nullptr);
+ allocateDeviceBuffer(&d_inverseMasses_, numAtoms, nullptr);
+
+ // TODO When the code will be integrated into the schedule, it will be assigned non-default stream.
+ stream_ = nullptr;
+
+ GMX_RELEASE_ASSERT(numAtoms == mtop.natoms, "State and topology number of atoms should be the same.");
+ integrator_ = std::make_unique<LeapFrogCuda>(numAtoms);
+ lincsCuda_ = std::make_unique<LincsCuda>(mtop.natoms, ir.nLincsIter, ir.nProjOrder);
+ settleCuda_ = std::make_unique<SettleCuda>(mtop.natoms, mtop);
+
+ integrator_->setXVFPointers((rvec*)d_x_, (rvec*)d_xp_, (rvec*)d_v_, (rvec*)d_f_);
+ lincsCuda_->setXVPointers((rvec*)d_x_, (rvec*)d_xp_, (rvec*)d_v_);
+ settleCuda_->setXVPointers((rvec*)d_x_, (rvec*)d_xp_, (rvec*)d_v_);
+}
+
+UpdateConstrainCuda::Impl::~Impl()
+{
+}
+
+/*! \brief
+ * Update data-structures (e.g. after NB search step).
+ *
+ * \param[in] idef System topology
+ * \param[in] md Atoms data.
+ */
+void UpdateConstrainCuda::Impl::set(const t_idef &idef,
+ const t_mdatoms &md)
+{
+ // Integrator should also update something, but it does not even have a method yet
+ integrator_->set(md);
+ lincsCuda_->set(idef, md);
+ settleCuda_->set(idef, md);
+}
+
+/*! \brief
+ * Update PBC data.
+ *
+ * Converts pbc data from t_pbc into the PbcAiuc format and stores the latter.
+ *
+ * \param[in] pbc The PBC data in t_pbc format.
+ */
+void UpdateConstrainCuda::Impl::setPbc(const t_pbc *pbc)
+{
+ setPbcAiuc(pbc->ndim_ePBC, pbc->box, &pbcAiuc_);
+ integrator_->setPbc(pbc);
+ lincsCuda_->setPbc(pbc);
+ settleCuda_->setPbc(pbc);
+}
+
+/*! \brief
+ * Copy coordinates from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_x CPU pointer where coordinates should be copied from.
+ */
+void UpdateConstrainCuda::Impl::copyCoordinatesToGpu(const rvec *h_x)
+{
+ copyToDeviceBuffer(&d_x_, (float3*)h_x, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy velocities from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v CPU pointer where velocities should be copied from.
+ */
+void UpdateConstrainCuda::Impl::copyVelocitiesToGpu(const rvec *h_v)
+{
+ copyToDeviceBuffer(&d_v_, (float3*)h_v, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy forces from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f CPU pointer where forces should be copied from.
+ */
+void UpdateConstrainCuda::Impl::copyForcesToGpu(const rvec *h_f)
+{
+ copyToDeviceBuffer(&d_f_, (float3*)h_f, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy coordinates from GPU to CPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[out] h_xp CPU pointer where coordinates should be copied to.
+ */
+void UpdateConstrainCuda::Impl::copyCoordinatesFromGpu(rvec *h_xp)
+{
+ copyFromDeviceBuffer((float3*)h_xp, &d_xp_, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy velocities from GPU to CPU.
+ *
+ * The velocities are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v Pointer to velocities data.
+ */
+void UpdateConstrainCuda::Impl::copyVelocitiesFromGpu(rvec *h_v)
+{
+ copyFromDeviceBuffer((float3*)h_v, &d_v_, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Copy forces from GPU to CPU.
+ *
+ * The forces are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f Pointer to forces data.
+ */
+void UpdateConstrainCuda::Impl::copyForcesFromGpu(rvec *h_f)
+{
+ copyFromDeviceBuffer((float3*)h_f, &d_f_, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr);
+}
+
+/*! \brief
+ * Set the internal GPU-memory x, xprime and v pointers.
+ *
+ * Data is not copied. The data are assumed to be in float3/fvec format
+ * (float3 is used internally, but the data layout should be identical).
+ *
+ * \param[in] d_x Pointer to the coordinates for the input (on GPU)
+ * \param[in] d_xp Pointer to the coordinates for the output (on GPU)
+ * \param[in] d_v Pointer to the velocities (on GPU)
+ * \param[in] d_f Pointer to the forces (on GPU)
+ */
+void UpdateConstrainCuda::Impl::setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f)
+{
+ d_x_ = (float3*)d_x;
+ d_xp_ = (float3*)d_xp;
+ d_v_ = (float3*)d_v;
+ d_f_ = (float3*)d_f;
+}
+
+
+UpdateConstrainCuda::UpdateConstrainCuda(int numAtoms,
+ const t_inputrec &ir,
+ const gmx_mtop_t &mtop)
+ : impl_(new Impl(numAtoms, ir, mtop))
+{
+}
+
+UpdateConstrainCuda::~UpdateConstrainCuda() = default;
+
+void UpdateConstrainCuda::integrate(const real dt,
+ const bool updateVelocities,
+ const bool computeVirial,
+ tensor virialScaled)
+{
+ impl_->integrate(dt, updateVelocities, computeVirial, virialScaled);
+}
+
+void UpdateConstrainCuda::set(const t_idef &idef,
+ const t_mdatoms gmx_unused &md)
+{
+ impl_->set(idef, md);
+}
+
+void UpdateConstrainCuda::setPbc(const t_pbc *pbc)
+{
+ impl_->setPbc(pbc);
+}
+
+void UpdateConstrainCuda::copyCoordinatesToGpu(const rvec *h_x)
+{
+ impl_->copyCoordinatesToGpu(h_x);
+}
+
+void UpdateConstrainCuda::copyVelocitiesToGpu(const rvec *h_v)
+{
+ impl_->copyVelocitiesToGpu(h_v);
+}
+
+void UpdateConstrainCuda::copyForcesToGpu(const rvec *h_f)
+{
+ impl_->copyForcesToGpu(h_f);
+}
+
+void UpdateConstrainCuda::copyCoordinatesFromGpu(rvec *h_xp)
+{
+ impl_->copyCoordinatesFromGpu(h_xp);
+}
+
+void UpdateConstrainCuda::copyVelocitiesFromGpu(rvec *h_v)
+{
+ impl_->copyVelocitiesFromGpu(h_v);
+}
+
+void UpdateConstrainCuda::copyForcesFromGpu(rvec *h_f)
+{
+ impl_->copyForcesFromGpu(h_f);
+}
+
+void UpdateConstrainCuda::setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f)
+{
+ impl_->setXVFPointers(d_x, d_xp, d_v, d_f);
+}
+
+} //namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2019, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ *
+ * \brief Declares CUDA implementation class for update and constraints.
+ *
+ * This header file is needed to include from both the device-side
+ * kernels file, and the host-side management code.
+ *
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * \ingroup module_mdlib
+ */
+#ifndef GMX_MDLIB_UPDATE_CONSTRAIN_CUDA_IMPL_H
+#define GMX_MDLIB_UPDATE_CONSTRAIN_CUDA_IMPL_H
+
+#include "gromacs/mdlib/leapfrog_cuda.h"
+#include "gromacs/mdlib/lincs_cuda.h"
+#include "gromacs/mdlib/settle_cuda.h"
+#include "gromacs/mdlib/update_constrain_cuda.h"
+#include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
+#include "gromacs/topology/idef.h"
+
+namespace gmx
+{
+
+/*! \internal \brief Class with interfaces and data for CUDA version of Update-Constraint. */
+class UpdateConstrainCuda::Impl
+{
+
+ public:
+ /*! \brief Create Update-Constrain object
+ *
+ * \param[in] numAtoms Number of atoms.
+ * \param[in] ir Input record data: LINCS takes number of iterations and order of
+ * projection from it.
+ * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms
+ * and target O-H and H-H distances from this object.
+ */
+ Impl(int numAtoms,
+ const t_inputrec &ir,
+ const gmx_mtop_t &mtop);
+
+ ~Impl();
+
+ /*! \brief Integrate
+ *
+ * Integrates the equation of motion using Leap-Frog algorithm and applies
+ * LINCS and SETTLE constraints.
+ * Updates d_xp_ and d_v_ fields of this object.
+ *
+ * \param[in] dt Timestep
+ * \param[in] updateVelocities If the velocities should be constrained.
+ * \param[in] computeVirial If virial should be updated.
+ * \param[out] virial Place to save virial tensor.
+ */
+ void integrate(const real dt,
+ const bool updateVelocities,
+ const bool computeVirial,
+ tensor virial);
+
+ /*! \brief
+ * Update data-structures (e.g. after NB search step).
+ *
+ * \param[in] idef System topology
+ * \param[in] md Atoms data. Can be used to update masses if needed (not used now).
+ */
+ void set(const t_idef &idef,
+ const t_mdatoms &md);
+
+ /*! \brief
+ * Update PBC data.
+ *
+ * Converts PBC data from t_pbc into the PbcAiuc format and stores the latter.
+ *
+ * \param[in] pbc The PBC data in t_pbc format.
+ */
+ void setPbc(const t_pbc *pbc);
+
+ /*! \brief
+ * Copy coordinates from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_x CPU pointer where coordinates should be copied from.
+ */
+ void copyCoordinatesToGpu(const rvec *h_x);
+
+ /*! \brief
+ * Copy velocities from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v CPU pointer where velocities should be copied from.
+ */
+ void copyVelocitiesToGpu(const rvec *h_v);
+
+ /*! \brief
+ * Copy forces from CPU to GPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f CPU pointer where forces should be copied from.
+ */
+ void copyForcesToGpu(const rvec *h_f);
+
+ /*! \brief
+ * Copy coordinates from GPU to CPU.
+ *
+ * The data are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[out] h_xp CPU pointer where coordinates should be copied to.
+ */
+ void copyCoordinatesFromGpu(rvec *h_xp);
+
+ /*! \brief
+ * Copy velocities from GPU to CPU.
+ *
+ * The velocities are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_v Pointer to velocities data.
+ */
+ void copyVelocitiesFromGpu(rvec *h_v);
+
+ /*! \brief
+ * Copy forces from GPU to CPU.
+ *
+ * The forces are assumed to be in float3/fvec format (single precision).
+ *
+ * \param[in] h_f Pointer to forces data.
+ */
+ void copyForcesFromGpu(rvec *h_f);
+
+ /*! \brief
+ * Set the internal GPU-memory x, xprime and v pointers.
+ *
+ * Data is not copied. The data are assumed to be in float3/fvec format
+ * (float3 is used internally, but the data layout should be identical).
+ *
+ * \param[in] d_x Pointer to the coordinates for the input (on GPU)
+ * \param[in] d_xp Pointer to the coordinates for the output (on GPU)
+ * \param[in] d_v Pointer to the velocities (on GPU)
+ * \param[in] d_f Pointer to the forces (on GPU)
+ */
+ void setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f);
+
+ private:
+
+ //! CUDA stream
+ cudaStream_t stream_;
+
+ //! Periodic boundary data
+ PbcAiuc pbcAiuc_;
+
+ //! Number of atoms
+ int numAtoms_;
+
+ //! Coordinates before the timestep (on GPU)
+ float3 *d_x_;
+ //! Coordinates after the timestep (on GPU).
+ float3 *d_xp_;
+ //! Velocities of atoms (on GPU)
+ float3 *d_v_;
+ //! Forces, exerted by atoms (on GPU)
+ float3 *d_f_;
+
+ //! 1/mass for all atoms (GPU)
+ real *d_inverseMasses_;
+
+ //! Leap-Frog integrator
+ std::unique_ptr<LeapFrogCuda> integrator_;
+ //! LINCS CUDA object to use for non-water constraints
+ std::unique_ptr<LincsCuda> lincsCuda_;
+ //! SETTLE CUDA object for water constrains
+ std::unique_ptr<SettleCuda> settleCuda_;
+
+};
+
+} // namespace gmx
+
+#endif
#include "gromacs/mdlib/force.h"
#include "gromacs/mdlib/force_flags.h"
#include "gromacs/mdlib/forcerec.h"
-#include "gromacs/mdlib/leapfrog_cuda.h"
#include "gromacs/mdlib/md_support.h"
#include "gromacs/mdlib/mdatoms.h"
#include "gromacs/mdlib/mdoutf.h"
#include "gromacs/mdlib/tgroup.h"
#include "gromacs/mdlib/trajectory_writing.h"
#include "gromacs/mdlib/update.h"
+#include "gromacs/mdlib/update_constrain_cuda.h"
#include "gromacs/mdlib/vcm.h"
#include "gromacs/mdlib/vsite.h"
#include "gromacs/mdrunutility/handlerestart.h"
using gmx::SimulationSignaller;
-//! Whether the GPU version of Leap-Frog integrator should be used.
-static const bool c_useGpuLeapFrog = (getenv("GMX_INTEGRATE_GPU") != nullptr);
+//! Whether the GPU versions of Leap-Frog integrator and LINCS and SHAKE constraints
+static const bool c_useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr);
void gmx::Simulator::do_md()
{
std::unique_ptr<t_state> stateInstance;
t_state * state;
+
+ auto mdatoms = mdAtoms->mdatoms();
+
+ std::unique_ptr<UpdateConstrainCuda> integrator;
+
if (DOMAINDECOMP(cr))
{
dd_init_local_top(*top_global, &top);
&graph, mdAtoms, constr, vsite, shellfc);
upd.setNumAtoms(state->natoms);
- }
- auto mdatoms = mdAtoms->mdatoms();
+ if (c_useGpuUpdateConstrain)
+ {
+ GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
+ GMX_RELEASE_ASSERT(ir->etc == etcNO, "Temperature coupling is not supported on the GPU.");
+ GMX_RELEASE_ASSERT(ir->epc == epcNO, "Pressure coupling is not supported on the GPU.");
+ GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
+ GMX_LOG(mdlog.info).asParagraph().
+ appendText("Using CUDA GPU-based update and constraints module.");
+ integrator = std::make_unique<UpdateConstrainCuda>(state->natoms, *ir, *top_global);
+ integrator->set(top.idef, *mdatoms);
+ t_pbc pbc;
+ set_pbc(&pbc, epbcXYZ, state->box);
+ integrator->setPbc(&pbc);
+ }
+
+ }
// NOTE: The global state is no longer used at this point.
// But state_global is still used as temporary storage space for writing
* Loop over MD steps
*
************************************************************/
- std::unique_ptr<LeapFrogCuda> integrator;
- if (c_useGpuLeapFrog)
- {
- GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU.");
- GMX_RELEASE_ASSERT(ir->etc == etcNO, "Temperature coupling is not supported on the GPU.");
- GMX_RELEASE_ASSERT(ir->epc == epcNO, "Pressure coupling is not supported on the GPU.");
- GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU");
- integrator = std::make_unique<LeapFrogCuda>(state->natoms);
- integrator->set(*mdatoms);
- }
bFirstStep = TRUE;
/* Skip the first Nose-Hoover integration when we get the state from tpx */
copy_rvecn(as_rvec_array(state->x.data()), cbuf, 0, state->natoms);
}
-
- if (c_useGpuLeapFrog)
+ if (c_useGpuUpdateConstrain)
{
integrator->copyCoordinatesToGpu(state->x.rvec_array());
integrator->copyVelocitiesToGpu(state->v.rvec_array());
integrator->copyForcesToGpu(as_rvec_array(f.data()));
- integrator->integrate(ir->delta_t);
- integrator->copyCoordinatesFromGpu(upd.xp()->rvec_array());
+
+ // This applies Leap-Frog, LINCS and SETTLE in a succession
+ integrator->integrate(ir->delta_t, true, bCalcVir, shake_vir);
+
+ integrator->copyCoordinatesFromGpu(state->x.rvec_array());
integrator->copyVelocitiesFromGpu(state->v.rvec_array());
}
else
{
update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd,
ekind, M, &upd, etrtPOSITION, cr, constr);
+
+ wallcycle_stop(wcycle, ewcUPDATE);
+
+ constrain_coordinates(step, &dvdl_constr, state,
+ shake_vir,
+ &upd, constr,
+ bCalcVir, do_log, do_ene);
+
+ update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
+ cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
+ finish_update(ir, mdatoms,
+ state, graph,
+ nrnb, wcycle, &upd, constr);
}
- wallcycle_stop(wcycle, ewcUPDATE);
-
- constrain_coordinates(step, &dvdl_constr, state,
- shake_vir,
- &upd, constr,
- bCalcVir, do_log, do_ene);
- update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state,
- cr, nrnb, wcycle, &upd, constr, do_log, do_ene);
- finish_update(ir, mdatoms,
- state, graph,
- nrnb, wcycle, &upd, constr);
if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
{