# GROMACS 2019 4
# GROMACS 2020 5
# GROMACS 2021 6
+# GROMACS 2022 7
# LIBRARY_SOVERSION_MINOR so minor version for the built libraries.
# Should be increased for each release that changes only the implementation.
# In GROMACS, the typical policy is to increase it for each patch version
# The GROMACS convention is that these are the version number of the next
# release that is going to be made from this branch.
-set(GMX_VERSION_MAJOR 2021)
-set(GMX_VERSION_PATCH 4)
+set(GMX_VERSION_MAJOR 2022)
+set(GMX_VERSION_PATCH 0)
# The suffix, on the other hand, is used mainly for betas and release
# candidates, where it signifies the most recent such release from
# this branch; it will be empty before the first such release, as well
# here. The important thing is to minimize the chance of third-party
# code being able to dynamically link with a version of libgromacs
# that might not work.
-set(LIBRARY_SOVERSION_MAJOR 6)
+set(LIBRARY_SOVERSION_MAJOR 7)
set(LIBRARY_SOVERSION_MINOR 0)
set(LIBRARY_VERSION ${LIBRARY_SOVERSION_MAJOR}.${LIBRARY_SOVERSION_MINOR}.0)
endif()
set(REGRESSIONTEST_VERSION "${GMX_VERSION_STRING}")
-set(REGRESSIONTEST_BRANCH "release-2021")
+set(REGRESSIONTEST_BRANCH "master")
# Run the regressiontests packaging job with the correct pakage
# version string, and the release box checked, in order to have it
# build the regressiontests tarball with all the right naming. The
# naming affects the md5sum that has to go here, and if it isn't right
# release workflow will report a failure.
- set(REGRESSIONTEST_MD5SUM "62606f5f6ea37f6114b6e9cf58218f13" CACHE INTERNAL "MD5 sum of the regressiontests tarball for this GROMACS version")
+ set(REGRESSIONTEST_MD5SUM "93956ea42c4d16fdd541518c05972989" CACHE INTERNAL "MD5 sum of the regressiontests tarball for this GROMACS version")
math(EXPR GMX_VERSION_NUMERIC
"${GMX_VERSION_MAJOR}*10000 + ${GMX_VERSION_PATCH}")
dev-manual/doxygen.rst
dev-manual/error-handling.rst
dev-manual/formatting.rst
- dev-manual/gitlab.rst
+ dev-manual/gitlab-ci.rst
dev-manual/gmxtree.rst
dev-manual/includestyle.rst
dev-manual/index.rst
how-to/visualize.rst
install-guide/index.rst
release-notes/index.rst
+ release-notes/2022/major/highlights.rst
+ release-notes/2022/major/features.rst
+ release-notes/2022/major/performance.rst
+ release-notes/2022/major/tools.rst
+ release-notes/2022/major/bugs-fixed.rst
+ release-notes/2022/major/removed-functionality.rst
+ release-notes/2022/major/deprecated-functionality.rst
+ release-notes/2022/major/portability.rst
+ release-notes/2022/major/miscellaneous.rst
+ release-notes/2022/major/api.rst
+ release-notes/2021/2021.4.rst
release-notes/2021/2021.3.rst
release-notes/2021/2021.2.rst
release-notes/2021/2021.1.rst
functionality supported, whereas patch releases contain only fixes for
issues identified in the corresponding major releases.
-Two versions of |Gromacs| are under active maintenance, the 2021
-series and the 2020 series. In the latter, only highly conservative
+Two versions of |Gromacs| are under active maintenance, the 2022
+series and the 2021 series. In the latter, only highly conservative
fixes will be made, and only to address issues that affect scientific
correctness. Naturally, some of those releases will be made after the
-year 2020 ends, but we keep 2019 in the name so users understand how
+year 2021 ends, but we keep 2021 in the name so users understand how
up to date their version is. Such fixes will also be incorporated into
-the 2021 release series, as appropriate. Around the time the 2022
-release is made, the 2020 series will no longer be maintained.
+the 2022 release series, as appropriate. Around the time the 2023
+release is made, the 2021 series will no longer be maintained.
Where issue numbers are reported in these release notes, more details
can be found on the `issue tracker`_ at that issue number.
-|Gromacs| 2021 series
+|Gromacs| 2022 series
---------------------
.. todolist::
.. toctree::
:maxdepth: 1
+ 2021/2021.4
2021/2021.3
2021/2021.2
2021/2021.1
+Major release
+^^^^^^^^^^^^^
+
+.. toctree::
+ :maxdepth: 1
+
+ 2022/major/highlights
+ 2022/major/features
+ 2022/major/performance
+ 2022/major/api
+ 2022/major/tools
+ 2022/major/bugs-fixed
+ 2022/major/deprecated-functionality
+ 2022/major/removed-functionality
+ 2022/major/portability
+ 2022/major/miscellaneous
+
+
+|Gromacs| 2021 series
+---------------------
+
Major release
^^^^^^^^^^^^^
2021/major/portability
2021/major/miscellaneous
+
+Older (unmaintained) |Gromacs| series
+-------------------------------------------------------
+
|Gromacs| 2020 series
---------------------
2019/major/portability
2019/major/miscellaneous
-Older (unmaintained) |Gromacs| series
--------------------------------------------------------
-
|Gromacs| 2018 series
---------------------
# configure and run CMake. CMake could be invoked directly by the user or a
# parent package, but the Python distribution would not be packaged automatically.
# Reference https://gitlab.com/gromacs/gromacs/-/issues/2896 for additional discussion.
-cmake_minimum_required(VERSION 3.13.0)
+cmake_minimum_required(VERSION 3.16.3)
# This needs to be set before project() in order to pick up toolchain files
#list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../cmake)
# Note that this is the gmxapi._gmxapi Python bindings package version,
# not the C++ API version. It is not essential that it match the pure Python
# package version, but is likely to do so.
-project(gmxapi VERSION 0.2.1)
+project(gmxapi VERSION 0.3.0)
# Check if Python package is being built directly or via add_subdirectory
set(GMXAPI_MASTER_PROJECT OFF)
endif()
if(GMXAPI_MASTER_PROJECT)
- find_package(gmxapi 0.2.1 REQUIRED
+ find_package(gmxapi 0.2 REQUIRED
HINTS "$ENV{GROMACS_DIR}"
)
+ if (gmxapi_VERSION VERSION_LESS 0.2.1)
+ message(WARNING "Your GROMACS installation does not support custom MD plugins. "
+ "If you need this feature, please install GROMACS 2021.3 or higher.")
+ endif ()
endif()
if(gmxapi_FOUND)
set(_suffix "")
${GMXAPI_PYTHON_EXTENSION_SOURCES}
)
+ if (gmxapi_VERSION VERSION_GREATER_EQUAL 0.2.1)
+ target_sources(_gmxapi PRIVATE gmxapi/launch_021.cpp)
+ else()
+ target_sources(_gmxapi PRIVATE gmxapi/launch_020.cpp)
+ endif()
+
target_include_directories(_gmxapi PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/gmxapi
${CMAKE_CURRENT_BINARY_DIR}/gmxapi
#include "gmxapi/session.h"
#include "gmxapi/status.h"
#include "gmxapi/system.h"
+ #include "gmxapi/version.h"
#include "pycontext.h"
#include "pysystem.h"
// required to maintain and to pass to the API.
py::class_<::gmxapi::Session, std::shared_ptr<::gmxapi::Session>> session(m, "MDSession");
session.def("run", &::gmxapi::Session::run, "Run the simulation workflow");
- session.def("close", &::gmxapi::Session::close,
+ session.def("close",
+ &::gmxapi::Session::close,
"Shut down the execution environment and close the session.");
// Export system container class
py::class_<System, std::shared_ptr<System>> system(m, "MDSystem");
- system.def(
- "launch",
- [](System* system, std::shared_ptr<PyContext> context) {
- auto work = gmxapi::getWork(*system->get());
- auto newSession = context->launch(*work);
- return newSession;
- },
- "Launch the configured workflow in the provided context.");
+ system.def("launch", &launch, "Launch the configured workflow in the provided context.");
// Module-level function
- m.def("from_tpr", &gmxpy::from_tpr,
+ m.def("from_tpr",
+ &gmxpy::from_tpr,
"Return a system container initialized from the given input record.");
}
used when guessing a toolchain, because setup.py does not know which corresponds
to the gmxapi support library.
--If specifying GMXTOOLCHAINDIR and gmxapi_DIR, the tool chain directory must be
++If specifying GMXTOOLCHAINDIR and gmxapi_DIR, the tool chain directory must be
located within a subdirectory of gmxapi_DIR.
Refer to project web site for complete documentation.
cmake_gmxapi_hint = '-Dgmxapi_ROOT={}'.format(gmxapi_DIR)
cmake_args = [cmake_platform_hints, cmake_gmxapi_hint]
+ long_description = """gmxapi provides Python access to GROMACS molecular simulation tools.
+ Operations can be connected flexibly to allow high performance simulation and
+ analysis with complex control and data flows. Users can define new operations
+ in C++ or Python with the same tool kit used to implement this package.
+
+ This Python package requires a compatible GROMACS installation with the API
+ libraries and headers.
+
+ See http://gmxapi.org/ for details on installation and usage.
+ """
+
setup(
name='gmxapi',
# TODO: single-source version information (currently repeated in gmxapi/version.py and CMakeLists.txt)
- version='0.2.2',
- python_requires='>=3.6',
+ version='0.3.0a3',
+ python_requires='>=3.7',
install_requires=['networkx>=2.0',
'numpy>=1'],
author='M. Eric Irrgang',
author_email='info@gmxapi.org',
description='gmxapi Python interface for GROMACS',
+ long_description=long_description,
license='LGPL',
url='http://gmxapi.org/',
#include "gromacs/domdec/builder.h"
#include "gromacs/domdec/collect.h"
+#include "gromacs/domdec/computemultibodycutoffs.h"
#include "gromacs/domdec/dlb.h"
#include "gromacs/domdec/dlbtiming.h"
#include "gromacs/domdec/domdec_network.h"
#include "gromacs/domdec/ga2la.h"
#include "gromacs/domdec/gpuhaloexchange.h"
+#include "gromacs/domdec/localtopologychecker.h"
#include "gromacs/domdec/options.h"
#include "gromacs/domdec/partition.h"
+#include "gromacs/ewald/pme.h"
+#include "gromacs/domdec/reversetopology.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/gpu_utils/device_stream_manager.h"
#include "gromacs/mdlib/constr.h"
#include "gromacs/mdlib/constraintrange.h"
#include "gromacs/mdlib/updategroups.h"
+#include "gromacs/mdlib/vcm.h"
#include "gromacs/mdlib/vsite.h"
+#include "gromacs/mdrun/mdmodules.h"
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/forceoutput.h"
+#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/mdrunoptions.h"
#include "gromacs/mdtypes/state.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/basenetwork.h"
#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/logger.h"
+#include "gromacs/utility/mdmodulesnotifiers.h"
#include "gromacs/utility/real.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/strconvert.h"
#include "utility.h"
// TODO remove this when moving domdec into gmx namespace
+using gmx::ArrayRef;
using gmx::DdRankOrder;
using gmx::DlbOption;
using gmx::DomdecOptions;
+using gmx::RangePartitioning;
-static const char* edlbs_names[int(DlbState::nr)] = { "off", "off", "auto", "locked", "on", "on" };
-
-/* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
-#define DD_CGIBS 2
-
-/* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
-#define DD_FLAG_NRCG 65535
-#define DD_FLAG_FW(d) (1 << (16 + (d)*2))
-#define DD_FLAG_BW(d) (1 << (16 + (d)*2 + 1))
+static const char* enumValueToString(DlbState enumValue)
+{
+ static constexpr gmx::EnumerationArray<DlbState, const char*> dlbStateNames = {
+ "off", "off", "auto", "locked", "on", "on"
+ };
+ return dlbStateNames[enumValue];
+}
/* The DD zone order */
static const ivec dd_zo[DD_MAXZONE] = { { 0, 0, 0 }, { 1, 0, 0 }, { 1, 1, 0 }, { 0, 1, 0 },
int ddglatnr(const gmx_domdec_t* dd, int i)
{
- int atnr;
+ int atnr = 0;
if (dd == nullptr)
{
{
gmx_fatal(FARGS,
"glatnr called with %d, which is larger than the local number of atoms (%d)",
- i, dd->comm->atomRanges.numAtomsTotal());
+ i,
+ dd->comm->atomRanges.numAtomsTotal());
}
atnr = dd->globalAtomIndices[i] + 1;
}
return atnr;
}
-gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd)
+void dd_store_state(const gmx_domdec_t& dd, t_state* state)
{
- GMX_RELEASE_ASSERT(dd.comm, "Need a valid dd.comm");
- return dd.comm->systemInfo.updateGroupingPerMoleculetype;
-}
-
-void dd_store_state(gmx_domdec_t* dd, t_state* state)
-{
- int i;
-
- if (state->ddp_count != dd->ddp_count)
+ if (state->ddp_count != dd.ddp_count)
{
gmx_incons("The MD state does not match the domain decomposition state");
}
- state->cg_gl.resize(dd->ncg_home);
- for (i = 0; i < dd->ncg_home; i++)
+ state->cg_gl.resize(dd.numHomeAtoms);
+ for (int i = 0; i < dd.numHomeAtoms; i++)
{
- state->cg_gl[i] = dd->globalAtomGroupIndices[i];
+ state->cg_gl[i] = dd.globalAtomGroupIndices[i];
}
- state->ddp_count_cg_gl = dd->ddp_count;
+ state->ddp_count_cg_gl = dd.ddp_count;
}
gmx_domdec_zones_t* domdec_zones(gmx_domdec_t* dd)
return dd.comm->atomRanges.numHomeAtoms();
}
-int dd_natoms_mdatoms(const gmx_domdec_t* dd)
+int dd_natoms_mdatoms(const gmx_domdec_t& dd)
{
/* We currently set mdatoms entries for all atoms:
* local + non-local + communicated for vsite + constraints
*/
- return dd->comm->atomRanges.numAtomsTotal();
+ return dd.comm->atomRanges.numAtomsTotal();
}
-int dd_natoms_vsite(const gmx_domdec_t* dd)
+int dd_natoms_vsite(const gmx_domdec_t& dd)
{
- return dd->comm->atomRanges.end(DDAtomRanges::Type::Vsites);
+ return dd.comm->atomRanges.end(DDAtomRanges::Type::Vsites);
}
-void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end)
+void dd_get_constraint_range(const gmx_domdec_t& dd, int* at_start, int* at_end)
{
- *at_start = dd->comm->atomRanges.start(DDAtomRanges::Type::Constraints);
- *at_end = dd->comm->atomRanges.end(DDAtomRanges::Type::Constraints);
+ *at_start = dd.comm->atomRanges.start(DDAtomRanges::Type::Constraints);
+ *at_end = dd.comm->atomRanges.end(DDAtomRanges::Type::Constraints);
}
void dd_move_x(gmx_domdec_t* dd, const matrix box, gmx::ArrayRef<gmx::RVec> x, gmx_wallcycle* wcycle)
{
- wallcycle_start(wcycle, ewcMOVEX);
+ wallcycle_start(wcycle, WallCycleCounter::MoveX);
- int nzone, nat_tot;
- gmx_domdec_comm_t* comm;
- gmx_domdec_comm_dim_t* cd;
- rvec shift = { 0, 0, 0 };
- gmx_bool bPBC, bScrew;
+ rvec shift = { 0, 0, 0 };
- comm = dd->comm;
+ gmx_domdec_comm_t* comm = dd->comm;
- nzone = 1;
- nat_tot = comm->atomRanges.numHomeAtoms();
+ int nzone = 1;
+ int nat_tot = comm->atomRanges.numHomeAtoms();
for (int d = 0; d < dd->ndim; d++)
{
- bPBC = (dd->ci[dd->dim[d]] == 0);
- bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
+ const bool bPBC = (dd->ci[dd->dim[d]] == 0);
+ const bool bScrew = (bPBC && dd->unitCellInfo.haveScrewPBC && dd->dim[d] == XX);
if (bPBC)
{
copy_rvec(box[dd->dim[d]], shift);
}
- cd = &comm->cd[d];
+ gmx_domdec_comm_dim_t* cd = &comm->cd[d];
for (const gmx_domdec_ind_t& ind : cd->ind)
{
DDBufferAccess<gmx::RVec> sendBufferAccess(comm->rvecBuffer, ind.nsend[nzone + 1]);
nzone += nzone;
}
- wallcycle_stop(wcycle, ewcMOVEX);
+ wallcycle_stop(wcycle, WallCycleCounter::MoveX);
}
void dd_move_f(gmx_domdec_t* dd, gmx::ForceWithShiftForces* forceWithShiftForces, gmx_wallcycle* wcycle)
{
- wallcycle_start(wcycle, ewcMOVEF);
+ wallcycle_start(wcycle, WallCycleCounter::MoveF);
gmx::ArrayRef<gmx::RVec> f = forceWithShiftForces->force();
gmx::ArrayRef<gmx::RVec> fshift = forceWithShiftForces->shiftForces();
/* Determine which shift vector we need */
ivec vis = { 0, 0, 0 };
vis[dd->dim[d]] = 1;
- const int is = IVEC2IS(vis);
+ const int is = gmx::ivecToShiftIndex(vis);
/* Loop over the pulses */
const gmx_domdec_comm_dim_t& cd = comm.cd[d];
}
nzone /= 2;
}
- wallcycle_stop(wcycle, ewcMOVEF);
+ wallcycle_stop(wcycle, WallCycleCounter::MoveF);
}
/* Convenience function for extracting a real buffer from an rvec buffer
void dd_atom_spread_real(gmx_domdec_t* dd, real v[])
{
- int nzone, nat_tot;
- gmx_domdec_comm_t* comm;
- gmx_domdec_comm_dim_t* cd;
-
- comm = dd->comm;
+ gmx_domdec_comm_t* comm = dd->comm;
- nzone = 1;
- nat_tot = comm->atomRanges.numHomeAtoms();
+ int nzone = 1;
+ int nat_tot = comm->atomRanges.numHomeAtoms();
for (int d = 0; d < dd->ndim; d++)
{
- cd = &comm->cd[d];
+ gmx_domdec_comm_dim_t* cd = &comm->cd[d];
for (const gmx_domdec_ind_t& ind : cd->ind)
{
/* Note: We provision for RVec instead of real, so a factor of 3
void dd_atom_sum_real(gmx_domdec_t* dd, real v[])
{
- int nzone, nat_tot;
- gmx_domdec_comm_t* comm;
- gmx_domdec_comm_dim_t* cd;
-
- comm = dd->comm;
+ gmx_domdec_comm_t* comm = dd->comm;
- nzone = comm->zones.n / 2;
- nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
+ int nzone = comm->zones.n / 2;
+ int nat_tot = comm->atomRanges.end(DDAtomRanges::Type::Zones);
for (int d = dd->ndim - 1; d >= 0; d--)
{
- cd = &comm->cd[d];
+ gmx_domdec_comm_dim_t* cd = &comm->cd[d];
for (int p = cd->numPulses() - 1; p >= 0; p--)
{
const gmx_domdec_ind_t& ind = cd->ind[p];
real dd_cutoff_twobody(const gmx_domdec_t* dd)
{
- real r_mb;
-
- r_mb = dd_cutoff_multibody(dd);
+ const real r_mb = dd_cutoff_multibody(dd);
return std::max(dd->comm->systemInfo.cutoff, r_mb);
}
return pmeRanks;
}
-static int gmx_ddcoord2pmeindex(const t_commrec* cr, int x, int y, int z)
+static int gmx_ddcoord2pmeindex(const gmx_domdec_t& dd, int x, int y, int z)
{
- gmx_domdec_t* dd;
- ivec coords;
- int slab;
+ ivec coords;
- dd = cr->dd;
- coords[XX] = x;
- coords[YY] = y;
- coords[ZZ] = z;
- slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->numCells, coords));
+ coords[XX] = x;
+ coords[YY] = y;
+ coords[ZZ] = z;
+ const int slab = ddindex2pmeindex(dd.comm->ddRankSetup, dd_index(dd.numCells, coords));
return slab;
}
}
else
{
- if (cr->dd->comm->ddRankSetup.usePmeOnlyRanks)
+ const DDRankSetup& rankSetup = cr->dd->comm->ddRankSetup;
+ if (rankSetup.rankOrder != DdRankOrder::pp_pme && rankSetup.usePmeOnlyRanks)
{
- nodeid = ddindex + gmx_ddcoord2pmeindex(cr, x, y, z);
+ nodeid = ddindex + gmx_ddcoord2pmeindex(*cr->dd, x, y, z);
}
else
{
else
{
/* The slab corresponds to the nodeid in the PME group */
- if (gmx_ddcoord2pmeindex(cr, x, y, z) == pmenodeid)
+ if (gmx_ddcoord2pmeindex(*cr->dd, x, y, z) == pmenodeid)
{
ddranks.push_back(ddcoord2simnodeid(cr, x, y, z));
}
static gmx_bool receive_vir_ener(const gmx_domdec_t* dd, gmx::ArrayRef<const int> pmeRanks, const t_commrec* cr)
{
- gmx_bool bReceive = TRUE;
+ bool bReceive = true;
const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
if (ddRankSetup.usePmeOnlyRanks)
coords[cartSetup.cartpmedim]++;
if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
{
- int rank;
+ int rank = 0;
MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
if (dd_simnode2pmenode(ddRankSetup, cartSetup, pmeRanks, cr, rank) == pmenode)
{
static void set_slb_pme_dim_f(gmx_domdec_t* dd, int dim, real** dim_f)
{
- gmx_domdec_comm_t* comm;
- int i;
-
- comm = dd->comm;
+ gmx_domdec_comm_t* comm = dd->comm;
snew(*dim_f, dd->numCells[dim] + 1);
(*dim_f)[0] = 0;
- for (i = 1; i < dd->numCells[dim]; i++)
+ for (int i = 1; i < dd->numCells[dim]; i++)
{
if (comm->slb_frac[dim])
{
*/
if (dimind == 0 || xyz[XX] == dd->ci[XX])
{
- const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
- int slab;
- if (dimind == 0)
- {
- slab = pmeindex / nso;
- }
- else
- {
- slab = pmeindex % ddpme->nslab;
- }
+ const int pmeindex = ddindex2pmeindex(ddRankSetup, i);
+ const int slab = (dimind == 0) ? (pmeindex / nso) : (pmeindex % ddpme->nslab);
ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
}
set_slb_pme_dim_f(dd, ddpme->dim, &ddpme->slb_dim_f);
}
-int dd_pme_maxshift_x(const gmx_domdec_t* dd)
+int dd_pme_maxshift_x(const gmx_domdec_t& dd)
{
- const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
+ const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
if (ddRankSetup.ddpme[0].dim == XX)
{
}
}
-int dd_pme_maxshift_y(const gmx_domdec_t* dd)
+int dd_pme_maxshift_y(const gmx_domdec_t& dd)
{
- const DDRankSetup& ddRankSetup = dd->comm->ddRankSetup;
+ const DDRankSetup& ddRankSetup = dd.comm->ddRankSetup;
if (ddRankSetup.ddpme[0].dim == YY)
{
}
}
-bool ddHaveSplitConstraints(const gmx_domdec_t& dd)
-{
- return dd.comm->systemInfo.haveSplitConstraints;
-}
-
bool ddUsesUpdateGroups(const gmx_domdec_t& dd)
{
return dd.comm->systemInfo.useUpdateGroups;
#if GMX_MPI
static void make_load_communicator(gmx_domdec_t* dd, int dim_ind, ivec loc)
{
- MPI_Comm c_row;
- int dim, i, rank;
+ MPI_Comm c_row = MPI_COMM_NULL;
ivec loc_c;
- gmx_bool bPartOfGroup = FALSE;
+ bool bPartOfGroup = false;
- dim = dd->dim[dim_ind];
+ const int dim = dd->dim[dim_ind];
copy_ivec(loc, loc_c);
- for (i = 0; i < dd->numCells[dim]; i++)
+ for (int i = 0; i < dd->numCells[dim]; i++)
{
- loc_c[dim] = i;
- rank = dd_index(dd->numCells, loc_c);
+ loc_c[dim] = i;
+ const int rank = dd_index(dd->numCells, loc_c);
if (rank == dd->rank)
{
/* This process is part of the group */
void dd_setup_dlb_resource_sharing(const t_commrec* cr, int gpu_id)
{
#if GMX_MPI
- int physicalnode_id_hash;
- gmx_domdec_t* dd;
- MPI_Comm mpi_comm_pp_physicalnode;
+ MPI_Comm mpi_comm_pp_physicalnode = MPI_COMM_NULL;
if (!thisRankHasDuty(cr, DUTY_PP) || gpu_id < 0)
{
return;
}
- physicalnode_id_hash = gmx_physicalnode_id_hash();
+ const int physicalnode_id_hash = gmx_physicalnode_id_hash();
- dd = cr->dd;
+ gmx_domdec_t* dd = cr->dd;
if (debug)
{
fprintf(debug, "dd_setup_dd_dlb_gpu_sharing:\n");
- fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank,
- physicalnode_id_hash, gpu_id);
+ fprintf(debug, "DD PP rank %d physical node hash %d gpu_id %d\n", dd->rank, physicalnode_id_hash, gpu_id);
}
/* Split the PP communicator over the physical nodes */
/* TODO: See if we should store this (before), as it's also used for
static void make_load_communicators(gmx_domdec_t gmx_unused* dd)
{
#if GMX_MPI
- int dim0, dim1, i, j;
ivec loc;
if (debug)
make_load_communicator(dd, 0, loc);
if (dd->ndim > 1)
{
- dim0 = dd->dim[0];
- for (i = 0; i < dd->numCells[dim0]; i++)
+ const int dim0 = dd->dim[0];
+ for (int i = 0; i < dd->numCells[dim0]; i++)
{
loc[dim0] = i;
make_load_communicator(dd, 1, loc);
}
if (dd->ndim > 2)
{
- dim0 = dd->dim[0];
- for (i = 0; i < dd->numCells[dim0]; i++)
+ const int dim0 = dd->dim[0];
+ for (int i = 0; i < dd->numCells[dim0]; i++)
{
- loc[dim0] = i;
- dim1 = dd->dim[1];
- for (j = 0; j < dd->numCells[dim1]; j++)
+ loc[dim0] = i;
+ const int dim1 = dd->dim[1];
+ for (int j = 0; j < dd->numCells[dim1]; j++)
{
loc[dim1] = j;
make_load_communicator(dd, 2, loc);
/*! \brief Sets up the relation between neighboring domains and zones */
static void setup_neighbor_relations(gmx_domdec_t* dd)
{
- int d, dim, m;
- ivec tmp, s;
- gmx_domdec_zones_t* zones;
+ ivec tmp, s;
GMX_ASSERT((dd->ndim >= 0) && (dd->ndim <= DIM), "Must have valid number of dimensions for DD");
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
- dim = dd->dim[d];
+ const int dim = dd->dim[d];
copy_ivec(dd->ci, tmp);
tmp[dim] = (tmp[dim] + 1) % dd->numCells[dim];
dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
if (debug)
{
- fprintf(debug, "DD rank %d neighbor ranks in dir %d are + %d - %d\n", dd->rank, dim,
- dd->neighbor[d][0], dd->neighbor[d][1]);
+ fprintf(debug,
+ "DD rank %d neighbor ranks in dir %d are + %d - %d\n",
+ dd->rank,
+ dim,
+ dd->neighbor[d][0],
+ dd->neighbor[d][1]);
}
}
int nizone = (1 << std::max(dd->ndim - 1, 0));
assert(nizone >= 1 && nizone <= DD_MAXIZONE);
- zones = &dd->comm->zones;
+ gmx_domdec_zones_t* zones = &dd->comm->zones;
for (int i = 0; i < nzone; i++)
{
- m = 0;
+ int m = 0;
clear_ivec(zones->shift[i]);
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
}
zones->n = nzone;
for (int i = 0; i < nzone; i++)
{
- for (d = 0; d < DIM; d++)
+ for (int d = 0; d < DIM; d++)
{
s[d] = dd->ci[d] - zones->shift[i][d];
if (s[d] < 0)
*/
iZone.jZoneRange = gmx::Range<int>(std::min(ddNonbondedZonePairRanges[iZoneIndex][1], nzone),
std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
- for (dim = 0; dim < DIM; dim++)
+ for (int dim = 0; dim < DIM; dim++)
{
if (dd->numCells[dim] == 1)
{
static void make_pp_communicator(const gmx::MDLogger& mdlog,
gmx_domdec_t* dd,
t_commrec gmx_unused* cr,
- bool gmx_unused reorder)
+ bool gmx_unused reorder)
{
#if GMX_MPI
gmx_domdec_comm_t* comm = dd->comm;
/* Set up cartesian communication for the particle-particle part */
GMX_LOG(mdlog.info)
.appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
- dd->numCells[XX], dd->numCells[YY], dd->numCells[ZZ]);
+ dd->numCells[XX],
+ dd->numCells[YY],
+ dd->numCells[ZZ]);
ivec periods;
for (int i = 0; i < DIM; i++)
{
periods[i] = TRUE;
}
- MPI_Comm comm_cart;
- MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder),
- &comm_cart);
+ MPI_Comm comm_cart = MPI_COMM_NULL;
+ MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder), &comm_cart);
/* We overwrite the old communicator with the new cartesian one */
cr->mpi_comm_mygroup = comm_cart;
}
/* Get the rank of the DD master,
* above we made sure that the master node is a PP node.
*/
- int rank;
- if (MASTER(cr))
- {
- rank = dd->rank;
- }
- else
- {
- rank = 0;
- }
+ int rank = MASTER(cr) ? dd->rank : 0;
MPI_Allreduce(&rank, &dd->masterrank, 1, MPI_INT, MPI_SUM, dd->mpi_comm_all);
}
else if (cartSetup.bCartesianPP)
buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
}
/* Communicate the ddindex to simulation nodeid index */
- MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
- cr->mpi_comm_mysim);
+ MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
/* Determine the master coordinates and rank.
* The DD master should be the same node as the master of this sim.
#endif
GMX_LOG(mdlog.info)
- .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n", dd->rank,
- dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
+ .appendTextFormatted("Domain decomposition rank %d, coordinates %d %d %d\n",
+ dd->rank,
+ dd->ci[XX],
+ dd->ci[YY],
+ dd->ci[ZZ]);
if (debug)
{
- fprintf(debug, "Domain decomposition rank %d, coordinates %d %d %d\n\n", dd->rank,
- dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
+ fprintf(debug,
+ "Domain decomposition rank %d, coordinates %d %d %d\n\n",
+ dd->rank,
+ dd->ci[XX],
+ dd->ci[YY],
+ dd->ci[ZZ]);
}
}
buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
}
/* Communicate the ddindex to simulation nodeid index */
- MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
- cr->mpi_comm_mysim);
+ MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
}
#else
GMX_UNUSED_VALUE(dd);
static CartesianRankSetup split_communicator(const gmx::MDLogger& mdlog,
t_commrec* cr,
- bool gmx_unused reorder,
- const DDRankSetup& ddRankSetup,
- ivec ddCellIndex,
- std::vector<int>* pmeRanks)
+ const DdRankOrder ddRankOrder,
+ bool gmx_unused reorder,
+ const DDRankSetup& ddRankSetup,
+ ivec ddCellIndex,
+ std::vector<int>* pmeRanks)
{
CartesianRankSetup cartSetup;
- const DdRankOrder ddRankOrder = ddRankSetup.rankOrder;
-
cartSetup.bCartesianPP = (ddRankOrder == DdRankOrder::cartesian);
cartSetup.bCartesianPP_PME = false;
.appendTextFormatted(
"Number of PME-only ranks (%d) is not a multiple of nx*ny (%d*%d) or "
"nx*nz (%d*%d)",
- ddRankSetup.numRanksDoingPme, numDDCells[XX], numDDCells[YY],
- numDDCells[XX], numDDCells[ZZ]);
+ ddRankSetup.numRanksDoingPme,
+ numDDCells[XX],
+ numDDCells[YY],
+ numDDCells[XX],
+ numDDCells[ZZ]);
GMX_LOG(mdlog.info)
.appendText("Will not use a Cartesian communicator for PP <-> PME\n");
}
if (cartSetup.bCartesianPP_PME)
{
#if GMX_MPI
- int rank;
+ int rank = 0;
ivec periods;
GMX_LOG(mdlog.info)
.appendTextFormatted(
"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d",
- cartSetup.ntot[XX], cartSetup.ntot[YY], cartSetup.ntot[ZZ]);
+ cartSetup.ntot[XX],
+ cartSetup.ntot[YY],
+ cartSetup.ntot[ZZ]);
for (int i = 0; i < DIM; i++)
{
periods[i] = TRUE;
}
- MPI_Comm comm_cart;
- MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder),
- &comm_cart);
+ MPI_Comm comm_cart = MPI_COMM_NULL;
+ MPI_Cart_create(cr->mpi_comm_mysim, DIM, cartSetup.ntot, periods, static_cast<int>(reorder), &comm_cart);
MPI_Comm_rank(comm_cart, &rank);
if (MASTER(cr) && rank != 0)
{
MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, ddCellIndex);
GMX_LOG(mdlog.info)
- .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n", cr->sim_nodeid,
- ddCellIndex[XX], ddCellIndex[YY], ddCellIndex[ZZ]);
+ .appendTextFormatted("Cartesian rank %d, coordinates %d %d %d\n",
+ cr->sim_nodeid,
+ ddCellIndex[XX],
+ ddCellIndex[YY],
+ ddCellIndex[ZZ]);
if (ddCellIndex[cartSetup.cartpmedim] < numDDCells[cartSetup.cartpmedim])
{
}
/* Split the sim communicator into PP and PME only nodes */
- MPI_Comm_split(cr->mpi_comm_mysim, getThisRankDuties(cr),
- dd_index(cartSetup.ntot, ddCellIndex), &cr->mpi_comm_mygroup);
+ MPI_Comm_split(cr->mpi_comm_mysim,
+ getThisRankDuties(cr),
+ dd_index(cartSetup.ntot, ddCellIndex),
+ &cr->mpi_comm_mygroup);
#else
GMX_UNUSED_VALUE(ddCellIndex);
#endif
*/
static CartesianRankSetup makeGroupCommunicators(const gmx::MDLogger& mdlog,
const DDSettings& ddSettings,
+ const DdRankOrder ddRankOrder,
const DDRankSetup& ddRankSetup,
t_commrec* cr,
ivec ddCellIndex,
if (ddRankSetup.usePmeOnlyRanks)
{
/* Split the communicator into a PP and PME part */
- cartSetup = split_communicator(mdlog, cr, ddSettings.useCartesianReorder, ddRankSetup,
- ddCellIndex, pmeRanks);
+ cartSetup = split_communicator(
+ mdlog, cr, ddRankOrder, ddSettings.useCartesianReorder, ddRankSetup, ddCellIndex, pmeRanks);
}
else
{
dd->pme_receive_vir_ener = receive_vir_ener(dd, pmeRanks, cr);
if (debug)
{
- fprintf(debug, "My pme_nodeid %d receive ener %s\n", dd->pme_nodeid,
+ fprintf(debug,
+ "My pme_nodeid %d receive ener %s\n",
+ dd->pme_nodeid,
gmx::boolToString(dd->pme_receive_vir_ener));
}
}
static real* get_slb_frac(const gmx::MDLogger& mdlog, const char* dir, int nc, const char* size_string)
{
- real * slb_frac, tot;
- int i, n;
- double dbl;
-
- slb_frac = nullptr;
+ real* slb_frac = nullptr;
if (nc > 1 && size_string != nullptr)
{
GMX_LOG(mdlog.info).appendTextFormatted("Using static load balancing for the %s direction", dir);
snew(slb_frac, nc);
- tot = 0;
- for (i = 0; i < nc; i++)
+ real tot = 0;
+ for (int i = 0; i < nc; i++)
{
- dbl = 0;
+ double dbl = 0;
+ int n = 0;
sscanf(size_string, "%20lf%n", &dbl, &n);
if (dbl == 0)
{
gmx_fatal(FARGS,
"Incorrect or not enough DD cell size entries for direction %s: '%s'",
- dir, size_string);
+ dir,
+ size_string);
}
slb_frac[i] = dbl;
size_string += n;
}
/* Normalize */
std::string relativeCellSizes = "Relative cell sizes:";
- for (i = 0; i < nc; i++)
+ for (int i = 0; i < nc; i++)
{
slb_frac[i] /= tot;
relativeCellSizes += gmx::formatString(" %5.3f", slb_frac[i]);
return slb_frac;
}
-static int multi_body_bondeds_count(const gmx_mtop_t* mtop)
+static int multi_body_bondeds_count(const gmx_mtop_t& mtop)
{
- int n = 0;
- gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
- int nmol;
- while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(iloop, &nmol))
+ int n = 0;
+ for (const auto ilists : IListRange(mtop))
{
- for (auto& ilist : extractILists(*ilists, IF_BOND))
+ for (auto& ilist : extractILists(ilists.list(), IF_BOND))
{
if (NRAL(ilist.functionType) > 2)
{
- n += nmol * (ilist.iatoms.size() / ilistStride(ilist));
+ n += ilists.nmol() * (ilist.iatoms.size() / ilistStride(ilist));
}
}
}
static int dd_getenv(const gmx::MDLogger& mdlog, const char* env_var, int def)
{
- char* val;
- int nst;
-
- nst = def;
- val = getenv(env_var);
+ int nst = def;
+ char* val = getenv(env_var);
if (val)
{
if (sscanf(val, "%20d", &nst) <= 0)
return nst;
}
-static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
+static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec& inputrec, const gmx::MDLogger& mdlog)
{
- if (ir->pbcType == PbcType::Screw
+ if (inputrec.pbcType == PbcType::Screw
&& (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
{
- gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction",
- c_pbcTypeNames[ir->pbcType].c_str());
+ gmx_fatal(FARGS,
+ "With pbc=%s can only do domain decomposition in the x-direction",
+ c_pbcTypeNames[inputrec.pbcType].c_str());
}
- if (ir->nstlist == 0)
+ if (inputrec.nstlist == 0)
{
gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
}
- if (ir->comm_mode == ecmANGULAR && ir->pbcType != PbcType::No)
+ if (inputrec.comm_mode == ComRemovalAlgorithm::Angular && inputrec.pbcType != PbcType::No)
{
GMX_LOG(mdlog.warning)
.appendText(
* \param [in] dlbOption Enum value for the DLB option.
* \param [in] bRecordLoad True if the load balancer is recording load information.
* \param [in] mdrunOptions Options for mdrun.
- * \param [in] ir Pointer mdrun to input parameters.
+ * \param [in] inputrec Pointer mdrun to input parameters.
* \returns DLB initial/startup state.
*/
static DlbState determineInitialDlbState(const gmx::MDLogger& mdlog,
DlbOption dlbOption,
gmx_bool bRecordLoad,
const gmx::MdrunOptions& mdrunOptions,
- const t_inputrec* ir)
+ const t_inputrec& inputrec)
{
DlbState dlbState = DlbState::offCanTurnOn;
}
/* Unsupported integrators */
- if (!EI_DYNAMICS(ir->eI))
+ if (!EI_DYNAMICS(inputrec.eI))
{
- auto reasonStr = gmx::formatString(
- "it is only supported with dynamics, not with integrator '%s'.", EI(ir->eI));
+ auto reasonStr =
+ gmx::formatString("it is only supported with dynamics, not with integrator '%s'.",
+ enumValueToString(inputrec.eI));
return forceDlbOffOrBail(dlbState, reasonStr, mdlog);
}
"ensured.",
mdlog);
default:
- gmx_fatal(FARGS, "Death horror: undefined case (%d) for load balancing choice",
+ gmx_fatal(FARGS,
+ "Death horror: undefined case (%d) for load balancing choice",
static_cast<int>(dlbState));
}
}
return comm;
}
-/* Returns whether mtop contains constraints and/or vsites */
-static bool systemHasConstraintsOrVsites(const gmx_mtop_t& mtop)
+static void setupUpdateGroups(const gmx::MDLogger& mdlog,
+ const gmx_mtop_t& mtop,
+ ArrayRef<const RangePartitioning> updateGroupingsPerMoleculeType,
+ const bool useUpdateGroups,
+ const real maxUpdateGroupRadius,
+ DDSystemInfo* systemInfo)
{
- auto ilistLoop = gmx_mtop_ilistloop_init(mtop);
- int nmol;
- while (const InteractionLists* ilists = gmx_mtop_ilistloop_next(ilistLoop, &nmol))
- {
- if (!extractILists(*ilists, IF_CONSTRAINT | IF_VSITE).empty())
- {
- return true;
- }
- }
-
- return false;
-}
-
-static void setupUpdateGroups(const gmx::MDLogger& mdlog,
- const gmx_mtop_t& mtop,
- const t_inputrec& inputrec,
- const real cutoffMargin,
- DDSystemInfo* systemInfo)
-{
- /* When we have constraints and/or vsites, it is beneficial to use
- * update groups (when possible) to allow independent update of groups.
- */
- if (!systemHasConstraintsOrVsites(mtop))
- {
- /* No constraints or vsites, atoms can be updated independently */
- return;
- }
-
- systemInfo->updateGroupingPerMoleculetype = gmx::makeUpdateGroups(mtop);
- systemInfo->useUpdateGroups = (!systemInfo->updateGroupingPerMoleculetype.empty()
- && getenv("GMX_NO_UPDATEGROUPS") == nullptr);
+ systemInfo->updateGroupingsPerMoleculeType = updateGroupingsPerMoleculeType;
+ systemInfo->useUpdateGroups = useUpdateGroups;
+ systemInfo->maxUpdateGroupRadius = maxUpdateGroupRadius;
if (systemInfo->useUpdateGroups)
{
for (const auto& molblock : mtop.molblock)
{
numUpdateGroups += molblock.nmol
- * systemInfo->updateGroupingPerMoleculetype[molblock.type].numBlocks();
+ * systemInfo->updateGroupingsPerMoleculeType[molblock.type].numBlocks();
}
- systemInfo->maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
- mtop, systemInfo->updateGroupingPerMoleculetype, maxReferenceTemperature(inputrec));
-
- /* To use update groups, the large domain-to-domain cutoff distance
- * should be compatible with the box size.
- */
- systemInfo->useUpdateGroups = (atomToAtomIntoDomainToDomainCutoff(*systemInfo, 0) < cutoffMargin);
-
- if (systemInfo->useUpdateGroups)
- {
- GMX_LOG(mdlog.info)
- .appendTextFormatted(
- "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
- "nm\n",
- numUpdateGroups, mtop.natoms / static_cast<double>(numUpdateGroups),
- systemInfo->maxUpdateGroupRadius);
- }
- else
- {
- GMX_LOG(mdlog.info)
- .appendTextFormatted(
- "The combination of rlist and box size prohibits the use of update "
- "groups\n");
- systemInfo->updateGroupingPerMoleculetype.clear();
- }
+ GMX_LOG(mdlog.info)
+ .appendTextFormatted(
+ "Using update groups, nr %d, average size %.1f atoms, max. radius %.3f "
+ "nm\n",
+ numUpdateGroups,
+ mtop.natoms / static_cast<double>(numUpdateGroups),
+ systemInfo->maxUpdateGroupRadius);
}
}
/* Returns whether molecules are always whole, i.e. not broken by PBC */
static bool moleculesAreAlwaysWhole(const gmx_mtop_t& mtop,
const bool useUpdateGroups,
- gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingPerMoleculetype)
+ gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingsPerMoleculeType)
{
if (useUpdateGroups)
{
- GMX_RELEASE_ASSERT(updateGroupingPerMoleculetype.size() == mtop.moltype.size(),
+ GMX_RELEASE_ASSERT(updateGroupingsPerMoleculeType.size() == mtop.moltype.size(),
"Need one grouping per moltype");
for (size_t mol = 0; mol < mtop.moltype.size(); mol++)
{
- if (updateGroupingPerMoleculetype[mol].numBlocks() > 1)
+ if (updateGroupingsPerMoleculeType[mol].numBlocks() > 1)
{
return false;
}
}
/*! \brief Generate the simulation system information */
-static DDSystemInfo getSystemInfo(const gmx::MDLogger& mdlog,
- DDRole ddRole,
- MPI_Comm communicator,
- const DomdecOptions& options,
- const gmx_mtop_t& mtop,
- const t_inputrec& ir,
- const matrix box,
- gmx::ArrayRef<const gmx::RVec> xGlobal)
+static DDSystemInfo getSystemInfo(const gmx::MDLogger& mdlog,
+ DDRole ddRole,
+ MPI_Comm communicator,
+ const DomdecOptions& options,
+ const gmx_mtop_t& mtop,
+ const t_inputrec& ir,
+ const matrix box,
+ ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
+ const bool useUpdateGroups,
+ const real maxUpdateGroupRadius,
+ gmx::ArrayRef<const gmx::RVec> xGlobal)
{
const real tenPercentMargin = 1.1;
DDSystemInfo systemInfo;
- /* We need to decide on update groups early, as this affects communication distances */
- systemInfo.useUpdateGroups = false;
- if (ir.cutoff_scheme == ecutsVERLET)
- {
- real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
- setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
- }
+ setupUpdateGroups(
+ mdlog, mtop, updateGroupingPerMoleculeType, useUpdateGroups, maxUpdateGroupRadius, &systemInfo);
systemInfo.moleculesAreAlwaysWhole = moleculesAreAlwaysWhole(
- mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingPerMoleculetype);
+ mtop, systemInfo.useUpdateGroups, systemInfo.updateGroupingsPerMoleculeType);
systemInfo.haveInterDomainBondeds =
(!systemInfo.moleculesAreAlwaysWhole || mtop.bIntermolecularInteractions);
systemInfo.haveInterDomainMultiBodyBondeds =
- (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(&mtop) > 0);
+ (systemInfo.haveInterDomainBondeds && multi_body_bondeds_count(mtop) > 0);
if (systemInfo.useUpdateGroups)
{
- systemInfo.haveSplitConstraints = false;
- systemInfo.haveSplitSettles = false;
+ systemInfo.mayHaveSplitConstraints = false;
+ systemInfo.mayHaveSplitSettles = false;
}
else
{
- systemInfo.haveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
- || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
- systemInfo.haveSplitSettles = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
+ systemInfo.mayHaveSplitConstraints = (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0
+ || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0);
+ systemInfo.mayHaveSplitSettles = (gmx_mtop_ftype_count(mtop, F_SETTLE) > 0);
}
if (ir.rlist == 0)
*/
constexpr real c_chanceThatAtomMovesBeyondDomain = 1e-12;
const real limitForAtomDisplacement = minCellSizeForAtomDisplacement(
- mtop, ir, systemInfo.updateGroupingPerMoleculetype, c_chanceThatAtomMovesBeyondDomain);
+ mtop, ir, systemInfo.updateGroupingsPerMoleculeType, c_chanceThatAtomMovesBeyondDomain);
GMX_LOG(mdlog.info).appendTextFormatted("Minimum cell size due to atom displacement: %.3f nm", limitForAtomDisplacement);
systemInfo.cellsizeLimit = std::max(systemInfo.cellsizeLimit, limitForAtomDisplacement);
}
else
{
- real r_2b, r_mb;
+ real r_2b = 0;
+ real r_mb = 0;
if (ddRole == DDRole::Master)
{
- dd_bonded_cg_distance(mdlog, &mtop, &ir, xGlobal, box,
- options.checkBondedInteractions, &r_2b, &r_mb);
+ dd_bonded_cg_distance(mdlog, mtop, ir, xGlobal, box, options.ddBondedChecking, &r_2b, &r_mb);
}
gmx_bcast(sizeof(r_2b), &r_2b, communicator);
gmx_bcast(sizeof(r_mb), &r_mb, communicator);
}
systemInfo.constraintCommunicationRange = 0;
- if (systemInfo.haveSplitConstraints && options.constraintCommunicationRange <= 0)
+ if (systemInfo.mayHaveSplitConstraints && options.constraintCommunicationRange <= 0)
{
/* There is a cell size limit due to the constraints (P-LINCS) */
systemInfo.constraintCommunicationRange = gmx::constr_r_max(mdlog, &mtop, &ir);
{
if (options.numCells[XX] <= 0 && (ddGridSetup.numDomains[XX] == 0))
{
- char buf[STRLEN];
- gmx_bool bC = (systemInfo.haveSplitConstraints
- && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
- sprintf(buf, "Change the number of ranks or mdrun option %s%s%s", !bC ? "-rdd" : "-rcon",
- ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
- bC ? " or your LINCS settings" : "");
-
- gmx_fatal_collective(FARGS, communicator, ddRole == DDRole::Master,
+ const bool bC = (systemInfo.mayHaveSplitConstraints
+ && systemInfo.constraintCommunicationRange > systemInfo.minCutoffForMultiBody);
+ std::string message =
+ gmx::formatString("Change the number of ranks or mdrun option %s%s%s",
+ !bC ? "-rdd" : "-rcon",
+ ddSettings.initialDlbState != DlbState::offUser ? " or -dds" : "",
+ bC ? " or your LINCS settings" : "");
+
+ gmx_fatal_collective(FARGS,
+ communicator,
+ ddRole == DDRole::Master,
"There is no domain decomposition for %d ranks that is compatible "
"with the given box and a minimum cell size of %g nm\n"
"%s\n"
"Look in the log file for details on the domain decomposition",
- numNodes - ddGridSetup.numPmeOnlyRanks, cellsizeLimit, buf);
+ numNodes - ddGridSetup.numPmeOnlyRanks,
+ cellsizeLimit,
+ message.c_str());
}
const real acs = average_cellsize_min(ddbox, ddGridSetup.numDomains);
else
{
gmx_fatal_collective(
- FARGS, communicator, ddRole == DDRole::Master,
+ FARGS,
+ communicator,
+ ddRole == DDRole::Master,
"The initial cell size (%f) is smaller than the cell size limit (%f), change "
"options -dd, -rdd or -rcon, see the log file for details",
- acs, cellsizeLimit);
+ acs,
+ cellsizeLimit);
}
}
ddGridSetup.numDomains[XX] * ddGridSetup.numDomains[YY] * ddGridSetup.numDomains[ZZ];
if (numNodes - numPPRanks != ddGridSetup.numPmeOnlyRanks)
{
- gmx_fatal_collective(FARGS, communicator, ddRole == DDRole::Master,
+ gmx_fatal_collective(FARGS,
+ communicator,
+ ddRole == DDRole::Master,
"The size of the domain decomposition grid (%d) does not match the "
"number of PP ranks (%d). The total number of ranks is %d",
- numPPRanks, numNodes - ddGridSetup.numPmeOnlyRanks, numNodes);
+ numPPRanks,
+ numNodes - ddGridSetup.numPmeOnlyRanks,
+ numNodes);
}
if (ddGridSetup.numPmeOnlyRanks > numPPRanks)
{
- gmx_fatal_collective(FARGS, communicator, ddRole == DDRole::Master,
+ gmx_fatal_collective(FARGS,
+ communicator,
+ ddRole == DDRole::Master,
"The number of separate PME ranks (%d) is larger than the number of "
"PP ranks (%d), this is not supported.",
- ddGridSetup.numPmeOnlyRanks, numPPRanks);
+ ddGridSetup.numPmeOnlyRanks,
+ numPPRanks);
}
}
/*! \brief Set the cell size and interaction limits, as well as the DD grid */
static DDRankSetup getDDRankSetup(const gmx::MDLogger& mdlog,
int numNodes,
+ const DdRankOrder rankOrder,
const DDGridSetup& ddGridSetup,
const t_inputrec& ir)
{
GMX_LOG(mdlog.info)
.appendTextFormatted("Domain decomposition grid %d x %d x %d, separate PME ranks %d",
- ddGridSetup.numDomains[XX], ddGridSetup.numDomains[YY],
- ddGridSetup.numDomains[ZZ], ddGridSetup.numPmeOnlyRanks);
+ ddGridSetup.numDomains[XX],
+ ddGridSetup.numDomains[YY],
+ ddGridSetup.numDomains[ZZ],
+ ddGridSetup.numPmeOnlyRanks);
DDRankSetup ddRankSetup;
+ ddRankSetup.rankOrder = rankOrder;
+
ddRankSetup.numPPRanks = numNodes - ddGridSetup.numPmeOnlyRanks;
copy_ivec(ddGridSetup.numDomains, ddRankSetup.numPPCells);
}
GMX_LOG(mdlog.info)
.appendTextFormatted("PME domain decomposition: %d x %d x %d",
- ddRankSetup.npmenodes_x, ddRankSetup.npmenodes_y, 1);
+ ddRankSetup.npmenodes_x,
+ ddRankSetup.npmenodes_y,
+ 1);
}
else
{
const DDSystemInfo& systemInfo,
const DDGridSetup& ddGridSetup,
const int numPPRanks,
- const gmx_mtop_t* mtop,
- const t_inputrec* ir,
+ const gmx_mtop_t& mtop,
+ const t_inputrec& ir,
const gmx_ddbox_t& ddbox)
{
gmx_domdec_comm_t* comm = dd->comm;
* but that is not yet available here. But this anyhow only
* affect performance up to the second dd_partition_system call.
*/
- const int homeAtomCountEstimate = mtop->natoms / numPPRanks;
+ const int homeAtomCountEstimate = mtop.natoms / numPPRanks;
comm->updateGroupsCog = std::make_unique<gmx::UpdateGroupsCog>(
- *mtop, systemInfo.updateGroupingPerMoleculetype, maxReferenceTemperature(*ir),
- homeAtomCountEstimate);
+ mtop, systemInfo.updateGroupingsPerMoleculeType, maxReferenceTemperature(ir), homeAtomCountEstimate);
}
/* Set the DD setup given by ddGridSetup */
fprintf(debug,
"Bonded atom communication beyond the cut-off: %s\n"
"cellsize limit %f\n",
- gmx::boolToString(systemInfo.filterBondedCommunication), comm->cellsize_limit);
+ gmx::boolToString(systemInfo.filterBondedCommunication),
+ comm->cellsize_limit);
}
if (ddRole == DDRole::Master)
}
}
-void dd_init_bondeds(FILE* fplog,
- gmx_domdec_t* dd,
- const gmx_mtop_t& mtop,
- const gmx::VirtualSitesHandler* vsite,
- const t_inputrec* ir,
- gmx_bool bBCheck,
- gmx::ArrayRef<cginfo_mb_t> cginfo_mb)
-{
- gmx_domdec_comm_t* comm;
-
- dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
-
- comm = dd->comm;
-
- if (comm->systemInfo.filterBondedCommunication)
- {
- /* Communicate atoms beyond the cut-off for bonded interactions */
- comm->bondedLinks = makeBondedLinks(mtop, cginfo_mb);
- }
- else
- {
- /* Only communicate atoms based on cut-off */
- comm->bondedLinks = nullptr;
- }
-}
-
static void writeSettings(gmx::TextWriter* log,
gmx_domdec_t* dd,
- const gmx_mtop_t* mtop,
- const t_inputrec* ir,
+ const gmx_mtop_t& mtop,
+ const t_inputrec& ir,
gmx_bool bDynLoadBal,
real dlb_scale,
const gmx_ddbox_t* ddbox)
{
- gmx_domdec_comm_t* comm;
- int d;
- ivec np;
- real limit, shrink;
-
- comm = dd->comm;
+ gmx_domdec_comm_t* comm = dd->comm;
if (bDynLoadBal)
{
log->writeString("The maximum number of communication pulses is:");
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), comm->cd[d].np_dlb);
}
comm->cellsize_limit);
log->writeLineFormatted("The requested allowed shrink of DD cells (option -dds) is: %.2f", dlb_scale);
log->writeString("The allowed shrink of domain decomposition cells is:");
- for (d = 0; d < DIM; d++)
+ for (int d = 0; d < DIM; d++)
{
if (dd->numCells[d] > 1)
{
- if (d >= ddbox->npbcdim && dd->numCells[d] == 2)
- {
- shrink = 0;
- }
- else
- {
- shrink = comm->cellsize_min_dlb[d]
- / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
- }
+ const real shrink =
+ (d >= ddbox->npbcdim && dd->numCells[d] == 2)
+ ? 0
+ : comm->cellsize_min_dlb[d]
+ / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
}
}
}
else
{
+ ivec np;
set_dd_cell_sizes_slb(dd, ddbox, setcellsizeslbPULSE_ONLY, np);
log->writeString("The initial number of communication pulses is:");
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
log->writeStringFormatted(" %c %d", dim2char(dd->dim[d]), np[dd->dim[d]]);
}
log->ensureLineBreak();
log->writeString("The initial domain decomposition cell size is:");
- for (d = 0; d < DIM; d++)
+ for (int d = 0; d < DIM; d++)
{
if (dd->numCells[d] > 1)
{
}
const bool haveInterDomainVsites =
- (countInterUpdategroupVsites(*mtop, comm->systemInfo.updateGroupingPerMoleculetype) != 0);
+ (countInterUpdategroupVsites(mtop, comm->systemInfo.updateGroupingsPerMoleculeType) != 0);
if (comm->systemInfo.haveInterDomainBondeds || haveInterDomainVsites
- || comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
+ || comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
{
std::string decompUnits;
if (comm->systemInfo.useUpdateGroups)
log->writeLineFormatted("The maximum allowed distance for %s involved in interactions is:",
decompUnits.c_str());
- log->writeLineFormatted("%40s %-7s %6.3f nm", "non-bonded interactions", "",
- comm->systemInfo.cutoff);
+ log->writeLineFormatted(
+ "%40s %-7s %6.3f nm", "non-bonded interactions", "", comm->systemInfo.cutoff);
+ real limit = 0;
if (bDynLoadBal)
{
limit = dd->comm->cellsize_limit;
"deformation)");
}
limit = dd->comm->cellsize_min[XX];
- for (d = 1; d < DIM; d++)
+ for (int d = 1; d < DIM; d++)
{
limit = std::min(limit, dd->comm->cellsize_min[d]);
}
if (comm->systemInfo.haveInterDomainBondeds)
{
- log->writeLineFormatted("%40s %-7s %6.3f nm", "two-body bonded interactions", "(-rdd)",
+ log->writeLineFormatted("%40s %-7s %6.3f nm",
+ "two-body bonded interactions",
+ "(-rdd)",
std::max(comm->systemInfo.cutoff, comm->cutoff_mbody));
- log->writeLineFormatted("%40s %-7s %6.3f nm", "multi-body bonded interactions",
+ log->writeLineFormatted("%40s %-7s %6.3f nm",
+ "multi-body bonded interactions",
"(-rdd)",
(comm->systemInfo.filterBondedCommunication || isDlbOn(dd->comm))
? comm->cutoff_mbody
{
log->writeLineFormatted("%40s %-7s %6.3f nm", "virtual site constructions", "(-rcon)", limit);
}
- if (comm->systemInfo.haveSplitConstraints || comm->systemInfo.haveSplitSettles)
+ if (comm->systemInfo.mayHaveSplitConstraints || comm->systemInfo.mayHaveSplitSettles)
{
std::string separation =
- gmx::formatString("atoms separated by up to %d constraints", 1 + ir->nProjOrder);
+ gmx::formatString("atoms separated by up to %d constraints", 1 + ir.nProjOrder);
log->writeLineFormatted("%40s %-7s %6.3f nm\n", separation.c_str(), "(-rcon)", limit);
}
log->ensureLineBreak();
static void logSettings(const gmx::MDLogger& mdlog,
gmx_domdec_t* dd,
- const gmx_mtop_t* mtop,
- const t_inputrec* ir,
+ const gmx_mtop_t& mtop,
+ const t_inputrec& ir,
real dlb_scale,
const gmx_ddbox_t* ddbox)
{
static void set_cell_limits_dlb(const gmx::MDLogger& mdlog,
gmx_domdec_t* dd,
real dlb_scale,
- const t_inputrec* ir,
+ const t_inputrec& inputrec,
const gmx_ddbox_t* ddbox)
{
- gmx_domdec_comm_t* comm;
- int d, dim, npulse, npulse_d_max, npulse_d;
- gmx_bool bNoCutOff;
+ int npulse = 0;
+ int npulse_d_max = 0;
+ int npulse_d = 0;
- comm = dd->comm;
+ gmx_domdec_comm_t* comm = dd->comm;
- bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
+ bool bNoCutOff = (inputrec.rvdw == 0 || inputrec.rcoulomb == 0);
/* Determine the maximum number of comm. pulses in one dimension */
{
/* See if we can do with less pulses, based on dlb_scale */
npulse_d_max = 0;
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
- dim = dd->dim[d];
+ int dim = dd->dim[d];
npulse_d = static_cast<int>(
1
+ dd->numCells[dim] * comm->systemInfo.cutoff
}
/* This env var can override npulse */
- d = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
- if (d > 0)
+ const int ddPulseEnv = dd_getenv(mdlog, "GMX_DD_NPULSE", 0);
+ if (ddPulseEnv > 0)
{
- npulse = d;
+ npulse = ddPulseEnv;
}
comm->maxpulse = 1;
- comm->bVacDLBNoLimit = (ir->pbcType == PbcType::No);
- for (d = 0; d < dd->ndim; d++)
+ comm->bVacDLBNoLimit = (inputrec.pbcType == PbcType::No);
+ for (int d = 0; d < dd->ndim; d++)
{
comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
}
comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
/* Set the minimum cell size for each DD dimension */
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
if (comm->bVacDLBNoLimit || comm->cd[d].np_dlb * comm->cellsize_limit >= comm->systemInfo.cutoff)
{
return dd.comm->systemInfo.moleculesAreAlwaysWhole;
}
-gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType)
+bool dd_bonded_molpbc(const gmx_domdec_t& dd, PbcType pbcType)
{
/* If each molecule is a single charge group
* or we use domain decomposition for each periodic dimension,
* we do not need to take pbc into account for the bonded interactions.
*/
- return (pbcType != PbcType::No && dd->comm->systemInfo.haveInterDomainBondeds
- && !(dd->numCells[XX] > 1 && dd->numCells[YY] > 1
- && (dd->numCells[ZZ] > 1 || pbcType == PbcType::XY)));
+ return (pbcType != PbcType::No && dd.comm->systemInfo.haveInterDomainBondeds
+ && !(dd.numCells[XX] > 1 && dd.numCells[YY] > 1
+ && (dd.numCells[ZZ] > 1 || pbcType == PbcType::XY)));
}
/*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
static void set_ddgrid_parameters(const gmx::MDLogger& mdlog,
gmx_domdec_t* dd,
real dlb_scale,
- const gmx_mtop_t* mtop,
- const t_inputrec* ir,
+ const gmx_mtop_t& mtop,
+ const t_inputrec& inputrec,
const gmx_ddbox_t* ddbox)
{
gmx_domdec_comm_t* comm = dd->comm;
DDRankSetup& ddRankSetup = comm->ddRankSetup;
- if (EEL_PME(ir->coulombtype) || EVDW_PME(ir->vdwtype))
+ if (EEL_PME(inputrec.coulombtype) || EVDW_PME(inputrec.vdwtype))
{
init_ddpme(dd, &ddRankSetup.ddpme[0], 0);
if (ddRankSetup.npmedecompdim >= 2)
ddRankSetup.numRanksDoingPme = 0;
if (dd->pme_nodeid >= 0)
{
- gmx_fatal_collective(FARGS, dd->mpi_comm_all, DDMASTER(dd),
+ gmx_fatal_collective(FARGS,
+ dd->mpi_comm_all,
+ DDMASTER(dd),
"Can not have separate PME ranks without PME electrostatics");
}
}
}
if (!isDlbDisabled(comm))
{
- set_cell_limits_dlb(mdlog, dd, dlb_scale, ir, ddbox);
+ set_cell_limits_dlb(mdlog, dd, dlb_scale, inputrec, ddbox);
}
- logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
+ logSettings(mdlog, dd, mtop, inputrec, dlb_scale, ddbox);
- real vol_frac;
- if (ir->pbcType == PbcType::No)
- {
- vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
- }
- else
- {
- vol_frac = (1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
- / static_cast<double>(dd->nnodes);
- }
+ const real vol_frac = (inputrec.pbcType == PbcType::No)
+ ? (1 - 1 / static_cast<double>(dd->nnodes))
+ : ((1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
+ / static_cast<double>(dd->nnodes));
if (debug)
{
fprintf(debug, "Volume fraction for all DD zones: %f\n", vol_frac);
}
- int natoms_tot = mtop->natoms;
+ int natoms_tot = mtop.natoms;
dd->ga2la = new gmx_ga2la_t(natoms_tot, static_cast<int>(vol_frac * natoms_tot));
}
ddSettings.recordLoad = (wallcycle_have_counter() && recload > 0);
}
- ddSettings.initialDlbState = determineInitialDlbState(mdlog, options.dlbOption,
- ddSettings.recordLoad, mdrunOptions, &ir);
+ ddSettings.initialDlbState =
+ determineInitialDlbState(mdlog, options.dlbOption, ddSettings.recordLoad, mdrunOptions, ir);
GMX_LOG(mdlog.info)
.appendTextFormatted("Dynamic load balancing: %s",
- edlbs_names[static_cast<int>(ddSettings.initialDlbState)]);
+ enumValueToString(ddSettings.initialDlbState));
return ddSettings;
}
gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
+gmx_domdec_t::~gmx_domdec_t() = default;
+
namespace gmx
{
{
public:
//! Constructor
- Impl(const MDLogger& mdlog,
- t_commrec* cr,
- const DomdecOptions& options,
- const MdrunOptions& mdrunOptions,
- const gmx_mtop_t& mtop,
- const t_inputrec& ir,
- const matrix box,
- ArrayRef<const RVec> xGlobal);
+ Impl(const MDLogger& mdlog,
+ t_commrec* cr,
+ const DomdecOptions& options,
+ const MdrunOptions& mdrunOptions,
+ const gmx_mtop_t& mtop,
+ const t_inputrec& ir,
+ const MDModulesNotifiers& notifiers,
+ const matrix box,
+ ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
+ bool useUpdateGroups,
+ real maxUpdateGroupRadius,
+ ArrayRef<const RVec> xGlobal,
+ bool useGpuForNonbonded,
+ bool useGpuForPme);
//! Build the resulting DD manager
gmx_domdec_t* build(LocalAtomSetManager* atomSets);
const gmx_mtop_t& mtop_;
//! User input values from the tpr file
const t_inputrec& ir_;
+ //! MdModules object
+ const MDModulesNotifiers& notifiers_;
//! }
//! Internal objects used in constructing DD
//! }
};
-DomainDecompositionBuilder::Impl::Impl(const MDLogger& mdlog,
- t_commrec* cr,
- const DomdecOptions& options,
- const MdrunOptions& mdrunOptions,
- const gmx_mtop_t& mtop,
- const t_inputrec& ir,
- const matrix box,
- ArrayRef<const RVec> xGlobal) :
- mdlog_(mdlog),
- cr_(cr),
- options_(options),
- mtop_(mtop),
- ir_(ir)
+DomainDecompositionBuilder::Impl::Impl(const MDLogger& mdlog,
+ t_commrec* cr,
+ const DomdecOptions& options,
+ const MdrunOptions& mdrunOptions,
+ const gmx_mtop_t& mtop,
+ const t_inputrec& ir,
+ const MDModulesNotifiers& notifiers,
+ const matrix box,
+ ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
+ const bool useUpdateGroups,
+ const real maxUpdateGroupRadius,
+ ArrayRef<const RVec> xGlobal,
+ bool useGpuForNonbonded,
+ bool useGpuForPme) :
+ mdlog_(mdlog), cr_(cr), options_(options), mtop_(mtop), ir_(ir), notifiers_(notifiers)
{
GMX_LOG(mdlog_.info).appendTextFormatted("\nInitializing Domain Decomposition on %d ranks", cr_->sizeOfDefaultCommunicator);
srand(1 + cr_->rankInDefaultCommunicator);
}
- systemInfo_ = getSystemInfo(mdlog_, MASTER(cr_) ? DDRole::Master : DDRole::Agent,
- cr->mpiDefaultCommunicator, options_, mtop_, ir_, box, xGlobal);
+ systemInfo_ = getSystemInfo(mdlog_,
+ MASTER(cr_) ? DDRole::Master : DDRole::Agent,
+ cr->mpiDefaultCommunicator,
+ options_,
+ mtop_,
+ ir_,
+ box,
+ updateGroupingPerMoleculeType,
+ useUpdateGroups,
+ maxUpdateGroupRadius,
+ xGlobal);
const int numRanksRequested = cr_->sizeOfDefaultCommunicator;
const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
- checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype),
- options_.numPmeRanks, checkForLargePrimeFactors);
+
+
+ /* Checks for ability to use PME-only ranks */
+ auto separatePmeRanksPermitted = checkForSeparatePmeRanks(
+ notifiers_, options_, numRanksRequested, useGpuForNonbonded, useGpuForPme);
+
+ /* Checks for validity of requested Ranks setup */
+ checkForValidRankCountRequests(numRanksRequested,
+ EEL_PME(ir_.coulombtype) | EVDW_PME(ir_.vdwtype),
+ options_.numPmeRanks,
+ separatePmeRanksPermitted,
+ checkForLargePrimeFactors);
// DD grid setup uses a more different cell size limit for
// automated setup than the one in systemInfo_. The latter is used
// in set_dd_limits() to configure DLB, for example.
const real gridSetupCellsizeLimit =
- getDDGridSetupCellSizeLimit(mdlog_, !isDlbDisabled(ddSettings_.initialDlbState),
- options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
- ddGridSetup_ =
- getDDGridSetup(mdlog_, MASTER(cr_) ? DDRole::Master : DDRole::Agent,
- cr->mpiDefaultCommunicator, numRanksRequested, options_, ddSettings_,
- systemInfo_, gridSetupCellsizeLimit, mtop_, ir_, box, xGlobal, &ddbox_);
- checkDDGridSetup(ddGridSetup_, MASTER(cr_) ? DDRole::Master : DDRole::Agent,
- cr->mpiDefaultCommunicator, cr->sizeOfDefaultCommunicator, options_,
- ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
+ getDDGridSetupCellSizeLimit(mdlog_,
+ !isDlbDisabled(ddSettings_.initialDlbState),
+ options_.dlbScaling,
+ ir_,
+ systemInfo_.cellsizeLimit);
+ ddGridSetup_ = getDDGridSetup(mdlog_,
+ MASTER(cr_) ? DDRole::Master : DDRole::Agent,
+ cr->mpiDefaultCommunicator,
+ numRanksRequested,
+ options_,
+ ddSettings_,
+ systemInfo_,
+ gridSetupCellsizeLimit,
+ mtop_,
+ ir_,
+ separatePmeRanksPermitted,
+ box,
+ xGlobal,
+ &ddbox_);
+ checkDDGridSetup(ddGridSetup_,
+ MASTER(cr_) ? DDRole::Master : DDRole::Agent,
+ cr->mpiDefaultCommunicator,
+ cr->sizeOfDefaultCommunicator,
+ options_,
+ ddSettings_,
+ systemInfo_,
+ gridSetupCellsizeLimit,
+ ddbox_);
cr_->npmenodes = ddGridSetup_.numPmeOnlyRanks;
- ddRankSetup_ = getDDRankSetup(mdlog_, cr_->sizeOfDefaultCommunicator, ddGridSetup_, ir_);
- ddRankSetup_ = getDDRankSetup(mdlog_, cr_->sizeOfDefaultCommunicator, options_.rankOrder,
- ddGridSetup_, ir_);
++ ddRankSetup_ = getDDRankSetup(
++ mdlog_, cr_->sizeOfDefaultCommunicator, options_.rankOrder, ddGridSetup_, ir_);
/* Generate the group communicator, also decides the duty of each rank */
- cartSetup_ = makeGroupCommunicators(mdlog_, ddSettings_, ddRankSetup_, cr_, ddCellIndex_, &pmeRanks_);
+ cartSetup_ = makeGroupCommunicators(
+ mdlog_, ddSettings_, options_.rankOrder, ddRankSetup_, cr_, ddCellIndex_, &pmeRanks_);
}
gmx_domdec_t* DomainDecompositionBuilder::Impl::build(LocalAtomSetManager* atomSets)
dd->comm->ddRankSetup = ddRankSetup_;
dd->comm->cartesianRankSetup = cartSetup_;
- set_dd_limits(mdlog_, MASTER(cr_) ? DDRole::Master : DDRole::Agent, dd, options_, ddSettings_,
- systemInfo_, ddGridSetup_, ddRankSetup_.numPPRanks, &mtop_, &ir_, ddbox_);
+ set_dd_limits(mdlog_,
+ MASTER(cr_) ? DDRole::Master : DDRole::Agent,
+ dd,
+ options_,
+ ddSettings_,
+ systemInfo_,
+ ddGridSetup_,
+ ddRankSetup_.numPPRanks,
+ mtop_,
+ ir_,
+ ddbox_);
setupGroupCommunication(mdlog_, ddSettings_, pmeRanks_, cr_, mtop_.natoms, dd);
if (thisRankHasDuty(cr_, DUTY_PP))
{
- set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, &mtop_, &ir_, &ddbox_);
+ set_ddgrid_parameters(mdlog_, dd, options_.dlbScaling, mtop_, ir_, &ddbox_);
setup_neighbor_relations(dd);
}
/* Set overallocation to avoid frequent reallocation of arrays */
- set_over_alloc_dd(TRUE);
+ set_over_alloc_dd(true);
dd->atomSets = atomSets;
+ dd->localTopologyChecker = std::make_unique<LocalTopologyChecker>(
+ mdlog_, cr_, mtop_, dd->comm->systemInfo.useUpdateGroups);
+
return dd;
}
-DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger& mdlog,
- t_commrec* cr,
- const DomdecOptions& options,
- const MdrunOptions& mdrunOptions,
- const gmx_mtop_t& mtop,
- const t_inputrec& ir,
- const matrix box,
- ArrayRef<const RVec> xGlobal) :
- impl_(new Impl(mdlog, cr, options, mdrunOptions, mtop, ir, box, xGlobal))
+DomainDecompositionBuilder::DomainDecompositionBuilder(const MDLogger& mdlog,
+ t_commrec* cr,
+ const DomdecOptions& options,
+ const MdrunOptions& mdrunOptions,
+ const gmx_mtop_t& mtop,
+ const t_inputrec& ir,
+ const MDModulesNotifiers& notifiers,
+ const matrix box,
+ ArrayRef<const RangePartitioning> updateGroupingPerMoleculeType,
+ const bool useUpdateGroups,
+ const real maxUpdateGroupRadius,
+ ArrayRef<const RVec> xGlobal,
+ const bool useGpuForNonbonded,
+ const bool useGpuForPme) :
+ impl_(new Impl(mdlog,
+ cr,
+ options,
+ mdrunOptions,
+ mtop,
+ ir,
+ notifiers,
+ box,
+ updateGroupingPerMoleculeType,
+ useUpdateGroups,
+ maxUpdateGroupRadius,
+ xGlobal,
+ useGpuForNonbonded,
+ useGpuForPme))
{
}
static gmx_bool test_dd_cutoff(const t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
{
gmx_ddbox_t ddbox;
- int d, dim, np;
- real inv_cell_size;
- int LocallyLimited;
+ int LocallyLimited = 0;
const auto* dd = cr->dd;
LocallyLimited = 0;
- for (d = 0; d < dd->ndim; d++)
+ for (int d = 0; d < dd->ndim; d++)
{
- dim = dd->dim[d];
+ const int dim = dd->dim[d];
- inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
+ real inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
if (dd->unitCellInfo.ddBoxIsDynamic)
{
inv_cell_size *= DD_PRES_SCALE_MARGIN;
}
- np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
+ const int np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
{
/* If DLB is not active yet, we don't need to check the grid jumps.
* Actually we shouldn't, because then the grid jump data is not set.
*/
- if (isDlbOn(dd->comm) && check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
+ if (isDlbOn(dd->comm) && gmx::check_grid_jump(0, dd, cutoffRequested, &ddbox, FALSE))
{
LocallyLimited = 1;
}
return TRUE;
}
-gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
+bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested)
{
- gmx_bool bCutoffAllowed;
-
- bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
+ bool bCutoffAllowed = test_dd_cutoff(cr, box, x, cutoffRequested);
if (bCutoffAllowed)
{
for (int pulse = cr.dd->gpuHaloExchange[d].size(); pulse < cr.dd->comm->cd[d].numPulses(); pulse++)
{
cr.dd->gpuHaloExchange[d].push_back(std::make_unique<gmx::GpuHaloExchange>(
- cr.dd, d, cr.mpi_comm_mysim, deviceStreamManager.context(),
+ cr.dd,
+ d,
+ cr.mpi_comm_mygroup,
+ deviceStreamManager.context(),
deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
- deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal), pulse, wcycle));
+ deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal),
+ pulse,
+ wcycle));
}
}
}
}
}
- const auto& updateGrouping = dd.comm->systemInfo.updateGroupingPerMoleculetype[molblock.type];
+const gmx::LocalTopologyChecker& dd_localTopologyChecker(const gmx_domdec_t& dd)
+{
+ return *dd.localTopologyChecker;
+}
+
+gmx::LocalTopologyChecker* dd_localTopologyChecker(gmx_domdec_t* dd)
+{
+ return dd->localTopologyChecker.get();
+}
+
+void dd_init_local_state(const gmx_domdec_t& dd, const t_state* state_global, t_state* state_local)
+{
+ std::array<int, 5> buf;
+
+ if (DDMASTER(dd))
+ {
+ buf[0] = state_global->flags;
+ buf[1] = state_global->ngtc;
+ buf[2] = state_global->nnhpres;
+ buf[3] = state_global->nhchainlength;
+ buf[4] = state_global->dfhist ? state_global->dfhist->nlambda : 0;
+ }
+ dd_bcast(&dd, buf.size() * sizeof(int), buf.data());
+
+ init_gtc_state(state_local, buf[1], buf[2], buf[3]);
+ init_dfhist_state(state_local, buf[4]);
+ state_local->flags = buf[0];
+}
++
+ void putUpdateGroupAtomsInSamePeriodicImage(const gmx_domdec_t& dd,
+ const gmx_mtop_t& mtop,
+ const matrix box,
+ gmx::ArrayRef<gmx::RVec> positions)
+ {
+ int atomOffset = 0;
+ for (const gmx_molblock_t& molblock : mtop.molblock)
+ {
++ const auto& updateGrouping = dd.comm->systemInfo.updateGroupingsPerMoleculeType[molblock.type];
+
+ for (int mol = 0; mol < molblock.nmol; mol++)
+ {
+ for (int g = 0; g < updateGrouping.numBlocks(); g++)
+ {
+ const auto& block = updateGrouping.block(g);
+ const int atomBegin = atomOffset + block.begin();
+ const int atomEnd = atomOffset + block.end();
+ for (int a = atomBegin + 1; a < atomEnd; a++)
+ {
+ // Make sure that atoms in the same update group
+ // are in the same periodic image after restarts.
+ for (int d = DIM - 1; d >= 0; d--)
+ {
+ while (positions[a][d] - positions[atomBegin][d] > 0.5_real * box[d][d])
+ {
+ positions[a] -= box[d];
+ }
+ while (positions[a][d] - positions[atomBegin][d] < -0.5_real * box[d][d])
+ {
+ positions[a] += box[d];
+ }
+ }
+ }
+ }
+ atomOffset += updateGrouping.fullRange().end();
+ }
+ }
+ }
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/math/vectypes.h"
-#include "gromacs/utility/arrayref.h"
-#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/real.h"
-struct cginfo_mb_t;
struct gmx_domdec_t;
struct gmx_ddbox_t;
struct gmx_domdec_zones_t;
struct gmx_localtop_t;
struct gmx_mtop_t;
-struct t_block;
-struct t_blocka;
struct t_commrec;
struct t_forcerec;
struct t_inputrec;
namespace gmx
{
+struct AtomInfoWithinMoleculeBlock;
class DeviceStreamManager;
class ForceWithShiftForces;
+class LocalTopologyChecker;
class MDLogger;
class RangePartitioning;
class VirtualSitesHandler;
+template<typename>
+class ArrayRef;
} // namespace gmx
/*! \brief Returns the global topology atom number belonging to local atom index i.
*/
int ddglatnr(const gmx_domdec_t* dd, int i);
-/*! \brief Returns a list of update group partitioning for each molecule type or empty when update groups are not used */
-gmx::ArrayRef<const gmx::RangePartitioning> getUpdateGroupingPerMoleculetype(const gmx_domdec_t& dd);
-
/*! \brief Store the global cg indices of the home cgs in state,
*
* This means it can be reset, even after a new DD partitioning.
*/
-void dd_store_state(struct gmx_domdec_t* dd, t_state* state);
+void dd_store_state(const gmx_domdec_t& dd, t_state* state);
/*! \brief Returns a pointer to the gmx_domdec_zones_t struct */
struct gmx_domdec_zones_t* domdec_zones(struct gmx_domdec_t* dd);
int dd_numHomeAtoms(const gmx_domdec_t& dd);
/*! \brief Returns the atom range in the local state for atoms that need to be present in mdatoms */
-int dd_natoms_mdatoms(const gmx_domdec_t* dd);
+int dd_natoms_mdatoms(const gmx_domdec_t& dd);
/*! \brief Returns the atom range in the local state for atoms involved in virtual sites */
-int dd_natoms_vsite(const gmx_domdec_t* dd);
+int dd_natoms_vsite(const gmx_domdec_t& dd);
/*! \brief Sets the atom range for atom in the local state for atoms received in constraints communication */
-void dd_get_constraint_range(const gmx_domdec_t* dd, int* at_start, int* at_end);
+void dd_get_constraint_range(const gmx_domdec_t& dd, int* at_start, int* at_end);
/*! \libinternal \brief Struct for passing around the number of PME domains */
struct NumPmeDomains
std::vector<int> get_pme_ddranks(const t_commrec* cr, int pmenodeid);
/*! \brief Returns the maximum shift for coordinate communication in PME, dim x */
-int dd_pme_maxshift_x(const gmx_domdec_t* dd);
+int dd_pme_maxshift_x(const gmx_domdec_t& dd);
/*! \brief Returns the maximum shift for coordinate communication in PME, dim y */
-int dd_pme_maxshift_y(const gmx_domdec_t* dd);
-
-/*! \brief Return whether constraints, not including settles, cross domain boundaries */
-bool ddHaveSplitConstraints(const gmx_domdec_t& dd);
+int dd_pme_maxshift_y(const gmx_domdec_t& dd);
/*! \brief Return whether update groups are used */
bool ddUsesUpdateGroups(const gmx_domdec_t& dd);
-/*! \brief Initialize data structures for bonded interactions */
-void dd_init_bondeds(FILE* fplog,
- gmx_domdec_t* dd,
- const gmx_mtop_t& mtop,
- const gmx::VirtualSitesHandler* vsite,
- const t_inputrec* ir,
- gmx_bool bBCheck,
- gmx::ArrayRef<cginfo_mb_t> cginfo_mb);
-
/*! \brief Returns whether molecules are always whole, i.e. not broken by PBC */
bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd);
/*! \brief Returns if we need to do pbc for calculating bonded interactions */
-gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType);
+bool dd_bonded_molpbc(const gmx_domdec_t& dd, PbcType pbcType);
/*! \brief Change the DD non-bonded communication cut-off.
*
* \param[in] x Position vector, used for computing the dimensions of the system
* \param[in] cutoffRequested The requested atom to atom cut-off distance, usually the pair-list cutoff distance
*/
-gmx_bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested);
+bool change_dd_cutoff(t_commrec* cr, const matrix box, gmx::ArrayRef<const gmx::RVec> x, real cutoffRequested);
/*! \brief Set up communication for averaging GPU wait times over domains
*
const matrix box,
gmx::ArrayRef<gmx::RVec> x0,
gmx::ArrayRef<gmx::RVec> x1,
- gmx_bool bX1IsCoord);
+ bool bX1IsCoord);
/*! \brief Communicates the coordinates involved in virtual sites */
void dd_move_x_vsites(const gmx_domdec_t& dd, const matrix box, rvec* x);
+/*! \brief Communicates the positions and velocities involved in virtual sites */
+void dd_move_x_and_v_vsites(const gmx_domdec_t& dd, const matrix box, rvec* x, rvec* v);
/*! \brief Returns the local atom count array for all constraints
*
*/
gmx::ArrayRef<const int> dd_constraints_nlocalatoms(const gmx_domdec_t* dd);
-/* In domdec_top.c */
-
-/*! \brief Print error output when interactions are missing */
-[[noreturn]] void dd_print_missing_interactions(const gmx::MDLogger& mdlog,
- t_commrec* cr,
- int local_count,
- const gmx_mtop_t* top_global,
- const gmx_localtop_t* top_local,
- gmx::ArrayRef<const gmx::RVec> x,
- const matrix box);
-
-/*! \brief Generate and store the reverse topology */
-void dd_make_reverse_top(FILE* fplog,
- gmx_domdec_t* dd,
- const gmx_mtop_t* mtop,
- const gmx::VirtualSitesHandler* vsite,
- const t_inputrec* ir,
- gmx_bool bBCheck);
-
-/*! \brief Generate the local topology and virtual site data */
-void dd_make_local_top(struct gmx_domdec_t* dd,
- struct gmx_domdec_zones_t* zones,
- int npbcdim,
- matrix box,
- rvec cellsize_min,
- const ivec npulse,
- t_forcerec* fr,
- rvec* cgcm_or_x,
- const gmx_mtop_t& top,
- gmx_localtop_t* ltop);
-
-/*! \brief Sort ltop->ilist when we are doing free energy. */
-void dd_sort_local_top(gmx_domdec_t* dd, const t_mdatoms* mdatoms, gmx_localtop_t* ltop);
-
-/*! \brief Construct local state */
-void dd_init_local_state(struct gmx_domdec_t* dd, const t_state* state_global, t_state* local_state);
+/*! Const getter for the local topology checker
+ *
+ * \returns Const handle to local topology checker */
+const gmx::LocalTopologyChecker& dd_localTopologyChecker(const gmx_domdec_t& dd);
-/*! \brief Generate a list of links between atoms that are linked by bonded interactions
+/*! Getter for the local topology checker
*
- * Also stores whether atoms are linked in \p cginfo_mb.
- */
-t_blocka* makeBondedLinks(const gmx_mtop_t& mtop, gmx::ArrayRef<cginfo_mb_t> cginfo_mb);
-
-/*! \brief Calculate the maximum distance involved in 2-body and multi-body bonded interactions */
-void dd_bonded_cg_distance(const gmx::MDLogger& mdlog,
- const gmx_mtop_t* mtop,
- const t_inputrec* ir,
- gmx::ArrayRef<const gmx::RVec> x,
- const matrix box,
- gmx_bool bBCheck,
- real* r_2b,
- real* r_mb);
+ * \returns Handle to local topology checker */
+gmx::LocalTopologyChecker* dd_localTopologyChecker(gmx_domdec_t* dd);
+
+/*! \brief Construct local state */
+void dd_init_local_state(const gmx_domdec_t& dd, const t_state* state_global, t_state* local_state);
/*! \brief Construct the GPU halo exchange object(s).
*
*/
void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces);
+ /*! \brief Wraps the \c positions so that atoms from the same
+ * update group share the same periodic image wrt \c box.
+ *
+ * When DD and update groups are in use, the simulation master rank
+ * should call this to ensure that e.g. when restarting a simulation
+ * that did not use update groups that the coordinates satisfy the new
+ * requirements.
+ *
+ * This function can probably be removed when even single-rank
+ * simulations use domain decomposition, because then the choice of
+ * whether update groups are used is probably going to be the same
+ * regardless of the rank count.
+ *
+ * \param[in] dd The DD manager
+ * \param[in] mtop The system topology
+ * \param[in] box The global system box
+ * \param[in] positions The global system positions
+ */
+ void putUpdateGroupAtomsInSamePeriodicImage(const gmx_domdec_t& dd,
+ const gmx_mtop_t& mtop,
+ const matrix box,
+ gmx::ArrayRef<gmx::RVec> positions);
+
#endif
struct BalanceRegion;
+ namespace gmx
+ {
+ enum class DdRankOrder : int;
+ }
+ // namespace
+
+
//! Indices to communicate in a dimension
struct gmx_domdec_ind_t
{
std::vector<gmx_cgsort_t> moved;
/**< Integer buffer for sorting */
std::vector<int> intBuffer;
+ /**< Int64 buffer for sorting */
+ std::vector<int64_t> int64Buffer;
} gmx_domdec_sort_t;
/*! \brief Manages atom ranges and order for the local state atom vectors */
offTemporarilyLocked, /**< DLB is off and temporarily can't turn on */
onCanTurnOff, /**< DLB is on and can turn off when slow */
onUser, /**< DLB is permanently on per user request */
- nr /**< The number of DLB states */
+ Count /**< The number of DLB states */
};
/*! \brief The PME domain decomposition for one dimension */
//! True when update groups are used
bool useUpdateGroups = false;
//! Update atom grouping for each molecule type
- std::vector<gmx::RangePartitioning> updateGroupingPerMoleculetype;
+ gmx::ArrayRef<const gmx::RangePartitioning> updateGroupingsPerMoleculeType;
//! The maximum radius over all update groups
real maxUpdateGroupRadius;
real cellsizeLimit = 0;
//! Can atoms connected by constraints be assigned to different domains?
- bool haveSplitConstraints = false;
+ bool mayHaveSplitConstraints = false;
//! Can atoms connected by settles be assigned to different domains?
- bool haveSplitSettles = false;
+ bool mayHaveSplitSettles = false;
//! Estimated communication range needed for constraints
real constraintCommunicationRange = 0;
/*! \brief Information on how the DD ranks are set up */
struct DDRankSetup
{
+ /**< The rank ordering */
+ gmx::DdRankOrder rankOrder;
+
/**< The number of particle-particle (non PME-only) ranks */
int numPPRanks = 0;
/**< The DD PP grid */
/**< Cut-off for multi-body interactions, also 2-body bonded when \p cutoff_mody > \p cutoff */
real cutoff_mbody = 0;
/**< The minimum guaranteed cell-size, Cartesian indexing */
- rvec cellsize_min = {};
+ gmx::RVec cellsize_min = { 0, 0, 0 };
/**< The minimum guaranteed cell-size with dlb=auto */
- rvec cellsize_min_dlb = {};
+ gmx::RVec cellsize_min_dlb = { 0, 0, 0 };
/**< The lower limit for the DD cell size with DLB */
real cellsize_limit = 0;
/**< Effectively no NB cut-off limit with DLB for systems without PBC? */
- gmx_bool bVacDLBNoLimit = false;
+ bool bVacDLBNoLimit = false;
/** With PME load balancing we set limits on DLB */
- gmx_bool bPMELoadBalDLBLimits = false;
+ bool bPMELoadBalDLBLimits = false;
/** DLB needs to take into account that we want to allow this maximum
* cut-off (for PME load balancing), this could limit cell boundaries.
*/
real PMELoadBal_max_cutoff = 0;
/**< box lower corner, required with dim's without pbc and -gcom */
- rvec box0 = {};
+ gmx::RVec box0 = { 0, 0, 0 };
/**< box size, required with dim's without pbc and -gcom */
- rvec box_size = {};
+ gmx::RVec box_size = { 0, 0, 0 };
/**< The DD cell lower corner, in triclinic space */
- rvec cell_x0 = {};
+ gmx::RVec cell_x0 = { 0, 0, 0 };
/**< The DD cell upper corner, in triclinic space */
- rvec cell_x1 = {};
+ gmx::RVec cell_x1 = { 0, 0, 0 };
/**< The old \p cell_x0, to check cg displacements */
- rvec old_cell_x0 = {};
+ gmx::RVec old_cell_x0 = { 0, 0, 0 };
/**< The old \p cell_x1, to check cg displacements */
- rvec old_cell_x1 = {};
+ gmx::RVec old_cell_x1 = { 0, 0, 0 };
/** The communication setup and charge group boundaries for the zones */
gmx_domdec_zones_t zones;
* dynamic load balancing.
*/
/**< Zone limits for dim 1 with staggered grids */
- gmx_ddzone_t zone_d1[2];
+ std::array<gmx_ddzone_t, 2> zone_d1;
/**< Zone limits for dim 2 with staggered grids */
gmx_ddzone_t zone_d2[2][2];
/** The coordinate/force communication setup and indices */
- gmx_domdec_comm_dim_t cd[DIM];
+ std::array<gmx_domdec_comm_dim_t, DIM> cd;
/** Restricts the maximum number of cells to communicate with in one dimension
*
* Dynamic load balancing is not permitted to change sizes if it
int64_t master_cg_ddp_count = 0;
/** The number of cg's received from the direct neighbors */
- int zone_ncg1[DD_MAXZONE] = { 0 };
+ std::array<int, DD_MAXZONE> zone_ncg1 = { 0 };
/** The atom ranges in the local state */
DDAtomRanges atomRanges;
/* Cycle counters over nstlist steps */
/**< Total cycles counted */
- float cycl[ddCyclNr] = {};
+ std::array<float, ddCyclNr> cycl = { 0 };
/**< The number of cycle recordings */
- int cycl_n[ddCyclNr] = {};
+ std::array<int, ddCyclNr> cycl_n = { 0 };
/**< The maximum cycle count */
- float cycl_max[ddCyclNr] = {};
+ std::array<float, ddCyclNr> cycl_max = { 0 };
/**< Total flops counted */
double flop = 0.0;
/**< The number of flop recordings */
/**< Max \p load_sum over the ranks */
double load_max = 0.0;
/**< Was load balancing limited, per DD dim */
- ivec load_lim = {};
+ gmx::IVec load_lim = { 0, 0, 0 };
/**< Total time on PP done during PME overlap time */
double load_mdf = 0.0;
/**< Total time on our PME-only rank */
#include "gromacs/listed_forces/bonded.h"
#include "gromacs/math/functions.h"
#include "gromacs/math/units.h"
+#include "gromacs/math/utilities.h"
#include "gromacs/math/vec.h"
#include "gromacs/math/vecdump.h"
#include "gromacs/pbcutil/pbc.h"
static int calc_Nbin(real phi, int multiplicity, real core_frac)
{
- static const real r360 = 360 * DEG2RAD;
+ static const real r360 = 360 * gmx::c_deg2Rad;
real rot_width, core_width, core_offset, low, hi;
int bin;
/* with multiplicity 3 and core_frac 0.5
{
low = ((bin - 1) * rot_width) + core_offset;
hi = ((bin - 1) * rot_width) + core_offset + core_width;
- low *= DEG2RAD;
- hi *= DEG2RAD;
+ low *= gmx::c_deg2Rad;
+ hi *= gmx::c_deg2Rad;
if ((phi > low) && (phi < hi))
{
return bin;
multiplicity[k] = 3;
}
- low_ana_dih_trans(TRUE, fn_trans, TRUE, fn_histo, maxchi, dih, nlist, dlist, nframes, nangles,
- grpname, multiplicity, time, bRb, 0.5, oenv);
+ low_ana_dih_trans(
+ TRUE, fn_trans, TRUE, fn_histo, maxchi, dih, nlist, dlist, nframes, nangles, grpname, multiplicity, time, bRb, 0.5, oenv);
sfree(dlist);
sfree(multiplicity);
}
int (*calc_bin)(real, int, real);
real dt;
- if (1 <= nframes)
+ if (nframes <= 1)
{
return;
}
if (bAll)
{
/* print cuml rotamer vs time */
- print_one(oenv, "chiproduct", dlist[i].name, "chi product for",
- "cumulative rotamer", nframes, time, chi_prtrj);
+ print_one(oenv, "chiproduct", dlist[i].name, "chi product for", "cumulative rotamer", nframes, time, chi_prtrj);
}
/* make a histogram pf culm. rotamer occupancy too */
for (i = ix = 0; (ix < n3); i++, ix += 3)
{
- ang[i] = bond_angle(x_s[index[ix]], x_s[index[ix + 1]], x_s[index[ix + 2]], pbc, r_ij, r_kj,
- &costh, &t1, &t2);
+ ang[i] = bond_angle(
+ x_s[index[ix]], x_s[index[ix + 1]], x_s[index[ix + 2]], pbc, r_ij, r_kj, &costh, &t1, &t2);
}
if (debug)
{
- fprintf(debug, "Angle[0]=%g, costh=%g, index0 = %d, %d, %d\n", ang[0], costh, index[0],
- index[1], index[2]);
+ fprintf(debug, "Angle[0]=%g, costh=%g, index0 = %d, %d, %d\n", ang[0], costh, index[0], index[1], index[2]);
pr_rvec(debug, 0, "rij", r_ij, DIM, TRUE);
pr_rvec(debug, 0, "rkj", r_kj, DIM, TRUE);
}
for (i = 0; i < nangles; i++)
{
- angle = angles[i] * RAD2DEG;
+ angle = angles[i] * gmx::c_rad2Deg;
if (angle > 135 && angle < 225)
{
for (i = ix = 0; (ix < n4); i++, ix += 4)
{
- aaa = dih_angle(x_s[index[ix]], x_s[index[ix + 1]], x_s[index[ix + 2]], x_s[index[ix + 3]],
- pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3);
+ aaa = dih_angle(x_s[index[ix]],
+ x_s[index[ix + 1]],
+ x_s[index[ix + 2]],
+ x_s[index[ix + 3]],
+ pbc,
+ r_ij,
+ r_kj,
+ r_kl,
+ m,
+ n,
+ &t1,
+ &t2,
+ &t3);
ang[i] = aaa; /* not taking into account ryckaert bellemans yet */
}
dx = npoints / (maxx - minx);
if (debug)
{
- fprintf(debug, "Histogramming: ndata=%d, nhisto=%d, minx=%g,maxx=%g,dx=%g\n", ndata,
- npoints, minx, maxx, dx);
+ fprintf(debug, "Histogramming: ndata=%d, nhisto=%d, minx=%g,maxx=%g,dx=%g\n", ndata, npoints, minx, maxx, dx);
}
for (i = 0; (i < ndata); i++)
{
"0000000000000000000000000000000000000000000000000000000000000",
"0000000000000000000000000000000000000000000000000000000000000",
"0000000000000000000000000000000000000000000000000000000000000" };
-#define NPP asize(map)
- int x, y;
+ int x, y;
-#define INDEX(ppp) (((static_cast<int>(360 + (ppp)*RAD2DEG)) % 360) / 6)
+#define INDEX(ppp) (((static_cast<int>(360 + (ppp)*gmx::c_rad2Deg)) % 360) / 6)
x = INDEX(phi);
y = INDEX(psi);
#undef INDEX
}
else if (debug)
{
- fprintf(debug, "Res. %d has imcomplete occupancy or bfacs > %g\n",
- dlist[i].resnr, bfac_max);
+ fprintf(debug, "Res. %d has imcomplete occupancy or bfacs > %g\n", dlist[i].resnr, bfac_max);
}
}
else
normalize_histo(nbin, his_aa[Dih][i], (360.0 / nbin), normhisto);
}
- residue_name = rt->nameFromResidueIndex(i).c_str();
+ std::string residueName = rt->nameFromResidueIndex(i);
+ residue_name = residueName.c_str();
switch (Dih)
{
case edPhi:
break;
default:
sprintf(hisfile, "histo-chi%d%s", Dih - NONCHI + 1, residue_name);
- sprintf(title, "\\xc\\f{}\\s%d\\N Distribution for %s", Dih - NONCHI + 1,
- residue_name);
+ sprintf(title, "\\xc\\f{}\\s%d\\N Distribution for %s", Dih - NONCHI + 1, residue_name);
}
std::strcpy(hhisfile, hisfile);
std::strcat(hhisfile, ".xvg");
Psi = dlist[i].j0[edPsi];
for (j = 0; (j < nf); j++)
{
- phi = RAD2DEG * dih[Phi][j];
- psi = RAD2DEG * dih[Psi][j];
+ phi = gmx::c_rad2Deg * dih[Phi][j];
+ psi = gmx::c_rad2Deg * dih[Psi][j];
fprintf(fp, "%10g %10g\n", phi, psi);
if (bViol)
{
- fprintf(gp, "%d\n", static_cast<int>(!bAllowed(dih[Phi][j], RAD2DEG * dih[Psi][j])));
+ fprintf(gp,
+ "%d\n",
+ static_cast<int>(!bAllowed(dih[Phi][j], gmx::c_rad2Deg * dih[Psi][j])));
}
if (bOm)
{
- omega = RAD2DEG * dih[Om][j];
+ omega = gmx::c_rad2Deg * dih[Om][j];
mat[static_cast<int>(((phi * NMAT) / 360) + gmx::exactDiv(NMAT, 2))]
[static_cast<int>(((psi * NMAT) / 360) + gmx::exactDiv(NMAT, 2))] += omega;
}
lo += 180;
hi += 180;
nlevels = 20;
- write_xpm3(fp, 0, "Omega/Ramachandran Plot", "Deg", "Phi", "Psi", NMAT, NMAT, axis,
- axis, mat, lo, 180.0, hi, rlo, rmid, rhi, &nlevels);
+ write_xpm3(fp,
+ 0,
+ "Omega/Ramachandran Plot",
+ "Deg",
+ "Phi",
+ "Psi",
+ NMAT,
+ NMAT,
+ axis,
+ axis,
+ mat,
+ lo,
+ 180.0,
+ hi,
+ rlo,
+ rmid,
+ rhi,
+ &nlevels);
gmx_ffclose(fp);
for (j = 0; (j < NMAT); j++)
{
if ((has_dihedral(edChi1, &(dlist[i]))) && (has_dihedral(edChi2, &(dlist[i]))))
{
sprintf(fn, "ramaX1X2%s.xvg", dlist[i].name);
- fp = rama_file(fn, "\\8c\\4\\s1\\N-\\8c\\4\\s2\\N Ramachandran Plot",
- "\\8c\\4\\s1\\N (deg)", "\\8c\\4\\s2\\N (deg)", oenv);
+ fp = rama_file(fn,
+ "\\8c\\4\\s1\\N-\\8c\\4\\s2\\N Ramachandran Plot",
+ "\\8c\\4\\s1\\N (deg)",
+ "\\8c\\4\\s2\\N (deg)",
+ oenv);
Xi1 = dlist[i].j0[edChi1];
Xi2 = dlist[i].j0[edChi2];
for (j = 0; (j < nf); j++)
{
- fprintf(fp, "%10g %10g\n", RAD2DEG * dih[Xi1][j], RAD2DEG * dih[Xi2][j]);
+ fprintf(fp, "%10g %10g\n", gmx::c_rad2Deg * dih[Xi1][j], gmx::c_rad2Deg * dih[Xi2][j]);
}
xvgrclose(fp);
}
/* must correspond with enum in pp2shift.h:38 */
char* leg[edMax];
-#define NLEG asize(leg)
leg[0] = gmx_strdup("Phi");
leg[1] = gmx_strdup("Psi");
z0 *= 10.0; /* nm -> angstrom */
for (i = 0; (i < 10); i++)
{
- gmx_fprintf_pdb_atomline(fp, epdbATOM, atoms->nr + 1 + i, "CA", ' ', "LEG", ' ',
- atoms->nres + 1, ' ', x0, y0, z0 + (1.2 * i), 0.0, -0.1 * i,
+ gmx_fprintf_pdb_atomline(fp,
+ PdbRecordType::Atom,
+ atoms->nr + 1 + i,
+ "CA",
+ ' ',
+ "LEG",
+ ' ',
+ atoms->nres + 1,
+ ' ',
+ x0,
+ y0,
+ z0 + (1.2 * i),
+ 0.0,
+ -0.1 * i,
"");
}
gmx_ffclose(fp);
npargs = asize(pa);
ppa = add_acf_pargs(&npargs, pa);
- if (!parse_common_args(&argc, argv, PCA_CAN_VIEW | PCA_CAN_TIME, NFILE, fnm, npargs, ppa,
- asize(desc), desc, asize(bugs), bugs, &oenv))
+ if (!parse_common_args(
+ &argc, argv, PCA_CAN_VIEW | PCA_CAN_TIME, NFILE, fnm, npargs, ppa, asize(desc), desc, asize(bugs), bugs, &oenv))
{
sfree(ppa);
return 0;
snew(dih, ndih);
/* COMPUTE ALL DIHEDRALS! */
- read_ang_dih(ftp2fn(efTRX, NFILE, fnm), FALSE, TRUE, FALSE, bPBC, 1, &idum, &nf, &time, isize,
- index, &trans_frac, &aver_angle, dih, oenv);
+ read_ang_dih(
+ ftp2fn(efTRX, NFILE, fnm), FALSE, TRUE, FALSE, bPBC, 1, &idum, &nf, &time, isize, index, &trans_frac, &aver_angle, dih, oenv);
dt = (time[nf - 1] - time[0]) / (nf - 1); /* might want this for corr or n. transit*/
if (bCorr)
}
/* put angles in -M_PI to M_PI ! and correct phase factor for phi and psi
- * pass nactdih instead of ndih to low_ana_dih_trans and get_chi_product_traj
+ * pass nactdih instead of ndih to low_ana_dih_trans
* to prevent accessing off end of arrays when maxchi < 5 or 6. */
nactdih = reset_em_all(nlist, dlist, nf, dih, maxchi);
}
/* Histogramming & J coupling constants & calc of S2 order params */
- histogramming(log, nbin, &rt, nf, maxchi, dih, nlist, dlist, index, bPhi, bPsi, bOmega, bChi,
- bNormHisto, bSSHisto, ftp2fn(efDAT, NFILE, fnm), bfac_max, &atoms, bDo_jc,
- opt2fn("-jc", NFILE, fnm), oenv);
+ histogramming(log,
+ nbin,
+ &rt,
+ nf,
+ maxchi,
+ dih,
+ nlist,
+ dlist,
+ index,
+ bPhi,
+ bPsi,
+ bOmega,
+ bChi,
+ bNormHisto,
+ bSSHisto,
+ ftp2fn(efDAT, NFILE, fnm),
+ bfac_max,
+ &atoms,
+ bDo_jc,
+ opt2fn("-jc", NFILE, fnm),
+ oenv);
/* transitions
*
}
- low_ana_dih_trans(bDo_ot, opt2fn("-ot", NFILE, fnm), bDo_oh, opt2fn("-oh", NFILE, fnm), maxchi, dih,
- nlist, dlist, nf, nactdih, grpname, multiplicity, time, FALSE, core_frac, oenv);
+ low_ana_dih_trans(bDo_ot,
+ opt2fn("-ot", NFILE, fnm),
+ bDo_oh,
+ opt2fn("-oh", NFILE, fnm),
+ maxchi,
+ dih,
+ nlist,
+ dlist,
+ nf,
+ nactdih,
+ grpname,
+ multiplicity,
+ time,
+ FALSE,
+ core_frac,
+ oenv);
/* Order parameters */
- order_params(log, opt2fn("-o", NFILE, fnm), maxchi, nlist, dlist, ftp2fn_null(efPDB, NFILE, fnm),
- bfac_init, &atoms, x, pbcType, box, bPhi, bPsi, bChi, oenv);
+ order_params(log,
+ opt2fn("-o", NFILE, fnm),
+ maxchi,
+ nlist,
+ dlist,
+ ftp2fn_null(efPDB, NFILE, fnm),
+ bfac_init,
+ &atoms,
+ x,
+ pbcType,
+ box,
+ bPhi,
+ bPsi,
+ bChi,
+ oenv);
/* Print ramachandran maps! */
if (bRama)
}
mk_chi_lookup(chi_lookup, maxchi, nlist, dlist);
- get_chi_product_traj(dih, nf, nlist, maxchi, dlist, time, chi_lookup, multiplicity, FALSE,
- bNormHisto, core_frac, bAll, opt2fn("-cp", NFILE, fnm), oenv);
+ get_chi_product_traj(dih,
+ nf,
- nactdih,
++ nlist,
+ maxchi,
+ dlist,
+ time,
+ chi_lookup,
+ multiplicity,
+ FALSE,
+ bNormHisto,
+ core_frac,
+ bAll,
+ opt2fn("-cp", NFILE, fnm),
+ oenv);
for (i = 0; i < nlist; i++)
{
/* Correlation comes last because it messes up the angles */
if (bCorr)
{
- do_dihcorr(opt2fn("-corr", NFILE, fnm), nf, ndih, dih, dt, nlist, dlist, time, maxchi, bPhi,
- bPsi, bChi, bOmega, oenv);
+ do_dihcorr(opt2fn("-corr", NFILE, fnm), nf, ndih, dih, dt, nlist, dlist, time, maxchi, bPhi, bPsi, bChi, bOmega, oenv);
}
#include "gromacs/domdec/domdec_struct.h"
#include "gromacs/domdec/gpuhaloexchange.h"
#include "gromacs/domdec/localatomsetmanager.h"
+#include "gromacs/domdec/makebondedlinks.h"
#include "gromacs/domdec/partition.h"
+#include "gromacs/domdec/reversetopology.h"
#include "gromacs/ewald/ewald_utils.h"
+#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_gpu_program.h"
#include "gromacs/ewald/pme_only.h"
#include "gromacs/ewald/pme_pp_comm_gpu.h"
#include "gromacs/hardware/printhardware.h"
#include "gromacs/imd/imd.h"
#include "gromacs/listed_forces/disre.h"
-#include "gromacs/listed_forces/gpubonded.h"
+#include "gromacs/listed_forces/listed_forces_gpu.h"
#include "gromacs/listed_forces/listed_forces.h"
#include "gromacs/listed_forces/orires.h"
#include "gromacs/math/functions.h"
#include "gromacs/utility/keyvaluetree.h"
#include "gromacs/utility/logger.h"
#include "gromacs/utility/loggerbuilder.h"
-#include "gromacs/utility/mdmodulenotification.h"
+#include "gromacs/utility/mdmodulesnotifiers.h"
#include "gromacs/utility/physicalnodecommunicator.h"
#include "gromacs/utility/pleasecite.h"
#include "gromacs/utility/programcontext.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/mpiinfo.h"
#include "isimulator.h"
#include "membedholder.h"
{
DevelopmentFeatureFlags devFlags;
- // Some builds of GCC 5 give false positive warnings that these
- // getenv results are ignored when clearly they are used.
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-result"
-
devFlags.enableGpuBufferOps =
GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
- devFlags.enableGpuHaloExchange = GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_DD_COMMS") != nullptr;
+ devFlags.enableGpuHaloExchange = GMX_MPI && GMX_GPU_CUDA && getenv("GMX_GPU_DD_COMMS") != nullptr;
devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr) || GMX_FAHCORE;
- devFlags.enableGpuPmePPComm =
- GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_PME_PP_COMMS") != nullptr;
+ devFlags.enableGpuPmePPComm = GMX_MPI && GMX_GPU_CUDA && getenv("GMX_GPU_PME_PP_COMMS") != nullptr;
+
+ // Direct GPU comm path is being used with CUDA_AWARE_MPI
+ // make sure underlying MPI implementation is CUDA-aware
+ if (!GMX_THREAD_MPI && (devFlags.enableGpuPmePPComm || devFlags.enableGpuHaloExchange))
+ {
+ const bool haveDetectedCudaAwareMpi =
+ (checkMpiCudaAwareSupport() == CudaAwareMpiStatus::Supported);
+ const bool forceCudaAwareMpi = (getenv("GMX_FORCE_CUDA_AWARE_MPI") != nullptr);
+
+ if (!haveDetectedCudaAwareMpi && forceCudaAwareMpi)
+ {
+ // CUDA-aware support not detected in MPI library but, user has forced it's use
+ GMX_LOG(mdlog.warning)
+ .asParagraph()
+ .appendTextFormatted(
+ "This run has forced use of 'CUDA-aware MPI'. "
+ "But, GROMACS cannot determine if underlying MPI "
+ "is CUDA-aware. GROMACS recommends use of latest openMPI version "
+ "for CUDA-aware support. "
+ "If you observe failures at runtime, try unsetting "
+ "GMX_FORCE_CUDA_AWARE_MPI environment variable.");
+ }
+
+ if (haveDetectedCudaAwareMpi || forceCudaAwareMpi)
+ {
+ devFlags.usingCudaAwareMpi = true;
+ GMX_LOG(mdlog.warning)
+ .asParagraph()
+ .appendTextFormatted(
+ "Using CUDA-aware MPI for 'GPU halo exchange' or 'GPU PME-PP "
+ "communications' feature.");
+ }
+ else
+ {
+ if (devFlags.enableGpuHaloExchange)
+ {
+ GMX_LOG(mdlog.warning)
+ .asParagraph()
+ .appendTextFormatted(
+ "GMX_GPU_DD_COMMS environment variable detected, but the 'GPU "
+ "halo exchange' feature will not be enabled as GROMACS couldn't "
+ "detect CUDA_aware support in underlying MPI implementation.");
+ devFlags.enableGpuHaloExchange = false;
+ }
+ if (devFlags.enableGpuPmePPComm)
+ {
+ GMX_LOG(mdlog.warning)
+ .asParagraph()
+ .appendText(
+ "GMX_GPU_PME_PP_COMMS environment variable detected, but the "
+ "'GPU PME-PP communications' feature will not be enabled as "
+ "GROMACS couldn't "
+ "detect CUDA_aware support in underlying MPI implementation.");
+ devFlags.enableGpuPmePPComm = false;
+ }
-#pragma GCC diagnostic pop
+ GMX_LOG(mdlog.warning)
+ .asParagraph()
+ .appendTextFormatted(
+ "GROMACS recommends use of latest OpenMPI version for CUDA-aware "
+ "support. "
+ "If you are certain about CUDA-aware support in your MPI library, "
+ "you can force it's use by setting environment variable "
+ " GMX_FORCE_CUDA_AWARE_MPI.");
+ }
+ }
if (devFlags.enableGpuBufferOps)
{
{
try
{
- auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner*>(arg);
+ const auto* masterMdrunner = reinterpret_cast<const gmx::Mdrunner*>(arg);
/* copy the arg list to make sure that it's thread-local. This
doesn't copy pointed-to items, of course; fnm, cr and fplog
are reset in the call below, all others should be const. */
#if GMX_THREAD_MPI
/* now spawn new threads that start mdrunner_start_fn(), while
the main thread returns. Thread affinity is handled later. */
- if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, mdrunner_start_fn,
- static_cast<const void*>(this))
+ if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, mdrunner_start_fn, static_cast<const void*>(this))
!= TMPI_SUCCESS)
{
GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads"));
t_commrec* cr,
t_inputrec* ir,
int nstlist_cmdline,
- const gmx_mtop_t* mtop,
+ const gmx_mtop_t& mtop,
const matrix box,
bool makeGpuPairList,
const gmx::CpuInfo& cpuinfo)
{
// We checked the cut-offs in grompp, but double-check here.
// We have PME+LJcutoff kernels for rcoulomb>rvdw.
- if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == eelCUT)
+ if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == VanDerWaalsType::Cut)
{
GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw,
"With Verlet lists and PME we should have rcoulomb>=rvdw");
"With Verlet lists and no PME rcoulomb and rvdw should be identical");
}
/* For NVE simulations, we will retain the initial list buffer */
- if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0 && !(EI_MD(ir->eI) && ir->etc == etcNO))
+ if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0
+ && !(EI_MD(ir->eI) && ir->etc == TemperatureCoupling::No))
{
/* Update the Verlet buffer size for the current run setup */
VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType);
const real rlist_new =
- calcVerletBufferSize(*mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
+ calcVerletBufferSize(mtop, det(box), *ir, ir->nstlist, ir->nstlist - 1, -1, listSetup);
if (rlist_new != ir->rlist)
{
{
fprintf(fplog,
"\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
- ir->rlist, rlist_new, listSetup.cluster_size_i, listSetup.cluster_size_j);
+ ir->rlist,
+ rlist_new,
+ listSetup.cluster_size_i,
+ listSetup.cluster_size_j);
}
ir->rlist = rlist_new;
}
if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
{
- gmx_fatal(FARGS, "Can not set nstlist without %s",
+ gmx_fatal(FARGS,
+ "Can not set nstlist without %s",
!EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
}
if (EI_DYNAMICS(ir->eI))
{
/* Set or try nstlist values */
- increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo);
+ increaseNstlist(fplog, cr, ir, nstlist_cmdline, &mtop, box, makeGpuPairList, cpuinfo);
}
}
{
sprintf(sbuf_msg,
"Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
- gmx_step_str(nsteps_cmdline, sbuf_steps), fabs(nsteps_cmdline * ir->delta_t));
+ gmx_step_str(nsteps_cmdline, sbuf_steps),
+ fabs(nsteps_cmdline * ir->delta_t));
}
else
{
- sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
+ sprintf(sbuf_msg,
+ "Overriding nsteps with value passed on the command line: %s steps",
gmx_step_str(nsteps_cmdline, sbuf_steps));
}
static void finish_run(FILE* fplog,
const gmx::MDLogger& mdlog,
const t_commrec* cr,
- const t_inputrec* inputrec,
+ const t_inputrec& inputrec,
t_nrnb nrnb[],
- gmx_wallcycle_t wcycle,
+ gmx_wallcycle* wcycle,
gmx_walltime_accounting_t walltime_accounting,
nonbonded_verlet_t* nbv,
const gmx_pme_t* pme,
Further, we only report performance for dynamical integrators,
because those are the only ones for which we plan to
consider doing any optimizations. */
- bool printReport = EI_DYNAMICS(inputrec->eI) && SIMMASTER(cr);
+ bool printReport = EI_DYNAMICS(inputrec.eI) && SIMMASTER(cr);
if (printReport && !walltime_accounting_get_valid_finish(walltime_accounting))
{
nrnbTotalStorage = std::make_unique<t_nrnb>();
nrnb_tot = nrnbTotalStorage.get();
#if GMX_MPI
- MPI_Allreduce(nrnb->n, nrnb_tot->n, eNRNB, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
+ MPI_Allreduce(nrnb->n.data(), nrnb_tot->n.data(), eNRNB, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
#endif
}
else
{
#if GMX_MPI
/* reduce elapsed_time over all MPI ranks in the current simulation */
- MPI_Allreduce(&elapsed_time, &elapsed_time_over_all_ranks, 1, MPI_DOUBLE, MPI_SUM,
- cr->mpi_comm_mysim);
+ MPI_Allreduce(&elapsed_time, &elapsed_time_over_all_ranks, 1, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
elapsed_time_over_all_ranks /= cr->nnodes;
/* Reduce elapsed_time_over_all_threads over all MPI ranks in the
* current simulation. */
- MPI_Allreduce(&elapsed_time_over_all_threads, &elapsed_time_over_all_threads_over_all_ranks,
- 1, MPI_DOUBLE, MPI_SUM, cr->mpi_comm_mysim);
+ MPI_Allreduce(&elapsed_time_over_all_threads,
+ &elapsed_time_over_all_threads_over_all_ranks,
+ 1,
+ MPI_DOUBLE,
+ MPI_SUM,
+ cr->mpi_comm_mysim);
#endif
}
else
* to the code that handled the thread region, so that there's a
* mechanism to keep cycle counting working during the transition
* to task parallelism. */
- int nthreads_pp = gmx_omp_nthreads_get(emntNonbonded);
- int nthreads_pme = gmx_omp_nthreads_get(emntPME);
- wallcycle_scale_by_num_threads(wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP),
- nthreads_pp, nthreads_pme);
+ int nthreads_pp = gmx_omp_nthreads_get(ModuleMultiThread::Nonbonded);
+ int nthreads_pme = gmx_omp_nthreads_get(ModuleMultiThread::Pme);
+ wallcycle_scale_by_num_threads(
+ wcycle, thisRankHasDuty(cr, DUTY_PME) && !thisRankHasDuty(cr, DUTY_PP), nthreads_pp, nthreads_pme);
auto cycle_sum(wallcycle_sum(cr, wcycle));
if (printReport)
{
- auto nbnxn_gpu_timings =
+ auto* nbnxn_gpu_timings =
(nbv != nullptr && nbv->useGpu()) ? Nbnxm::gpu_get_timings(nbv->gpu_nbv) : nullptr;
gmx_wallclock_gpu_pme_t pme_gpu_timings = {};
{
pme_gpu_get_timings(pme, &pme_gpu_timings);
}
- wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
- elapsed_time_over_all_ranks, wcycle, cycle_sum, nbnxn_gpu_timings,
+ wallcycle_print(fplog,
+ mdlog,
+ cr->nnodes,
+ cr->npmenodes,
+ nthreads_pp,
+ nthreads_pme,
+ elapsed_time_over_all_ranks,
+ wcycle,
+ cycle_sum,
+ nbnxn_gpu_timings,
&pme_gpu_timings);
- if (EI_DYNAMICS(inputrec->eI))
+ if (EI_DYNAMICS(inputrec.eI))
{
- delta_t = inputrec->delta_t;
+ delta_t = inputrec.delta_t;
}
if (fplog)
{
- print_perf(fplog, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
+ print_perf(fplog,
+ elapsed_time_over_all_threads_over_all_ranks,
+ elapsed_time_over_all_ranks,
walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
- delta_t, nbfs, mflop);
+ delta_t,
+ nbfs,
+ mflop);
}
if (bWriteStat)
{
- print_perf(stderr, elapsed_time_over_all_threads_over_all_ranks, elapsed_time_over_all_ranks,
+ print_perf(stderr,
+ elapsed_time_over_all_threads_over_all_ranks,
+ elapsed_time_over_all_ranks,
walltime_accounting_get_nsteps_done_since_reset(walltime_accounting),
- delta_t, nbfs, mflop);
+ delta_t,
+ nbfs,
+ mflop);
}
}
}
int Mdrunner::mdrunner()
{
- matrix box;
- t_forcerec* fr = nullptr;
- real ewaldcoeff_q = 0;
- real ewaldcoeff_lj = 0;
- int nChargePerturbed = -1, nTypePerturbed = 0;
- gmx_wallcycle_t wcycle;
- gmx_walltime_accounting_t walltime_accounting = nullptr;
- MembedHolder membedHolder(filenames.size(), filenames.data());
+ matrix box;
+ std::unique_ptr<t_forcerec> fr;
+ real ewaldcoeff_q = 0;
+ real ewaldcoeff_lj = 0;
+ int nChargePerturbed = -1, nTypePerturbed = 0;
+ gmx_walltime_accounting_t walltime_accounting = nullptr;
+ MembedHolder membedHolder(filenames.size(), filenames.data());
/* CAUTION: threads may be started later on in this function, so
cr doesn't reflect the final parallel state right now */
gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo_);
- std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo_->deviceInfoList, hw_opt.gpuIdsAvailable);
- const int numDevicesToUse = gmx::ssize(gpuIdsToUse);
+ std::vector<int> availableDevices =
+ makeListOfAvailableDevices(hwinfo_->deviceInfoList, hw_opt.devicesSelectedByUser);
+ const int numAvailableDevices = gmx::ssize(availableDevices);
// Print citation requests after all software/hardware printing
pleaseCiteGromacs(fplog);
/* Read (nearly) all data required for the simulation
* and keep the partly serialized tpr contents to send to other ranks later
*/
- applyGlobalSimulationState(*inputHolder_.get(), partialDeserializedTpr.get(),
- globalState.get(), inputrec.get(), &mtop);
+ applyGlobalSimulationState(
+ *inputHolder_.get(), partialDeserializedTpr.get(), globalState.get(), inputrec.get(), &mtop);
}
/* Check and update the hardware options for internal consistency */
- checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks,
- inputrec.get());
+ checkAndUpdateHardwareOptions(
+ mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks, inputrec.get());
if (GMX_THREAD_MPI && isSimulationMasterRank)
{
// the number of GPUs to choose the number of ranks.
auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi(
- nonbondedTarget, numDevicesToUse, userGpuTaskAssignment, emulateGpuNonbonded,
+ nonbondedTarget,
+ numAvailableDevices > 0,
+ userGpuTaskAssignment,
+ emulateGpuNonbonded,
canUseGpuForNonbonded,
gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
hw_opt.nthreads_tmpi);
- useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
- useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo_,
- *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
+ useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(useGpuForNonbonded,
+ pmeTarget,
+ numAvailableDevices,
+ userGpuTaskAssignment,
+ *hwinfo_,
+ *inputrec,
+ hw_opt.nthreads_tmpi,
+ domdecOptions.numPmeRanks);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
* TODO Over-writing the user-supplied value here does
* prevent any possible subsequent checks from working
* correctly. */
- hw_opt.nthreads_tmpi =
- get_nthreads_mpi(hwinfo_, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
- inputrec.get(), &mtop, mdlog, membedHolder.doMembed());
+ hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo_,
+ &hw_opt,
+ numAvailableDevices,
+ useGpuForNonbonded,
+ useGpuForPme,
+ inputrec.get(),
+ mtop,
+ mdlog,
+ membedHolder.doMembed());
// Now start the threads for thread MPI.
spawnThreads(hw_opt.nthreads_tmpi);
// On non-master ranks, allocate the object that will receive data in the following call.
inputrec = std::make_unique<t_inputrec>();
}
- init_parallel(cr->mpiDefaultCommunicator, MASTER(cr), inputrec.get(), &mtop,
+ init_parallel(cr->mpiDefaultCommunicator,
+ MASTER(cr),
+ inputrec.get(),
+ &mtop,
partialDeserializedTpr.get());
}
GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
partialDeserializedTpr.reset(nullptr);
+ GMX_RELEASE_ASSERT(
+ !inputrec->useConstantAcceleration,
+ "Linear acceleration has been removed in GROMACS 2022, and was broken for many years "
+ "before that. Use GROMACS 4.5 or earlier if you need this feature.");
+
// Now the number of ranks is known to all ranks, and each knows
// the inputrec read by the master rank. The ranks can now all run
// the task-deciding functions and will agree on the result
// without needing to communicate.
- const bool useDomainDecomposition = (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM));
+ const bool useDomainDecomposition =
+ (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == IntegrationAlgorithm::NM));
// Note that these variables describe only their own node.
//
// assignment.
auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(
- nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded,
- gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected);
- useGpuForPme = decideWhetherToUseGpusForPme(
- useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo_, *inputrec,
- cr->sizeOfDefaultCommunicator, domdecOptions.numPmeRanks, gpusWereDetected);
- useGpuForBonded = decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
- bondedTarget, *inputrec, mtop,
- domdecOptions.numPmeRanks, gpusWereDetected);
+ nonbondedTarget,
+ userGpuTaskAssignment,
+ emulateGpuNonbonded,
+ canUseGpuForNonbonded,
+ gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI),
+ gpusWereDetected);
+ useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded,
+ pmeTarget,
+ userGpuTaskAssignment,
+ *hwinfo_,
+ *inputrec,
+ cr->sizeOfDefaultCommunicator,
+ domdecOptions.numPmeRanks,
+ gpusWereDetected);
+ useGpuForBonded = decideWhetherToUseGpusForBonded(
+ useGpuForNonbonded, useGpuForPme, bondedTarget, *inputrec, mtop, domdecOptions.numPmeRanks, gpusWereDetected);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
const DevelopmentFeatureFlags devFlags =
manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode);
- const bool useModularSimulator =
- checkUseModularSimulator(false, inputrec.get(), doRerun, mtop, ms, replExParams,
- nullptr, doEssentialDynamics, membedHolder.doMembed());
+ const bool useModularSimulator = checkUseModularSimulator(false,
+ inputrec.get(),
+ doRerun,
+ mtop,
+ ms,
+ replExParams,
+ nullptr,
+ doEssentialDynamics,
+ membedHolder.doMembed());
// Build restraints.
// TODO: hide restraint implementation details from Mdrunner.
// TODO: Error handling
mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
- // now that the MdModules know their options, they know which callbacks to sign up to
+ // now that the MDModules know their options, they know which callbacks to sign up to
mdModules_->subscribeToSimulationSetupNotifications();
- const auto& mdModulesNotifier = mdModules_->notifier().simulationSetupNotifications_;
+ const auto& setupNotifier = mdModules_->notifiers().simulationSetupNotifier_;
+ // Notify MdModules of existing logger
+ setupNotifier.notify(mdlog);
+
+ // Notify MdModules of internal parameters, saved into KVT
if (inputrec->internalParameters != nullptr)
{
- mdModulesNotifier.notify(*inputrec->internalParameters);
+ setupNotifier.notify(*inputrec->internalParameters);
+ }
+
+ // Let MdModules know the .tpr filename
+ {
+ gmx::MdRunInputFilename mdRunInputFilename = { ftp2fn(efTPR, filenames.size(), filenames.data()) };
+ setupNotifier.notify(mdRunInputFilename);
}
if (fplog != nullptr)
if (SIMMASTER(cr))
{
/* In rerun, set velocities to zero if present */
- if (doRerun && ((globalState->flags & (1 << estV)) != 0))
+ if (doRerun && ((globalState->flags & enumValueToBitMask(StateEntry::V)) != 0))
{
// rerun does not use velocities
GMX_LOG(mdlog.info)
{
clear_rvec(globalState->v[i]);
}
- globalState->flags &= ~(1 << estV);
+ globalState->flags &= ~enumValueToBitMask(StateEntry::V);
}
/* now make sure the state is initialized and propagated */
/* NM and TPI parallelize over force/energy calculations, not atoms,
* so we need to initialize and broadcast the global state.
*/
- if (inputrec->eI == eiNM || inputrec->eI == eiTPI)
+ if (inputrec->eI == IntegrationAlgorithm::NM || inputrec->eI == IntegrationAlgorithm::TPI)
{
if (!MASTER(cr))
{
globalState = std::make_unique<t_state>();
}
- broadcastStateWithoutDynamics(cr->mpiDefaultCommunicator, DOMAINDECOMP(cr), PAR(cr),
- globalState.get());
+ broadcastStateWithoutDynamics(
+ cr->mpiDefaultCommunicator, DOMAINDECOMP(cr), PAR(cr), globalState.get());
}
/* A parallel command line option consistency check that we can
#endif
}
- if (doRerun && (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
+ if (doRerun && (EI_ENERGY_MINIMIZATION(inputrec->eI) || IntegrationAlgorithm::NM == inputrec->eI))
{
gmx_fatal(FARGS,
"The .mdp file specified an energy mininization or normal mode algorithm, and "
"these are not compatible with mdrun -rerun");
}
- if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
- {
- if (domdecOptions.numPmeRanks > 0)
- {
- gmx_fatal_collective(FARGS, cr->mpiDefaultCommunicator, MASTER(cr),
- "PME-only ranks are requested, but the system does not use PME "
- "for electrostatics or LJ");
- }
-
- domdecOptions.numPmeRanks = 0;
- }
-
- if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0)
- {
- /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can
- * improve performance with many threads per GPU, since our OpenMP
- * scaling is bad, but it's difficult to automate the setup.
- */
- domdecOptions.numPmeRanks = 0;
- }
- if (useGpuForPme)
- {
- if (domdecOptions.numPmeRanks < 0)
- {
- domdecOptions.numPmeRanks = 0;
- // TODO possibly print a note that one can opt-in for a separate PME GPU rank?
- }
- else
- {
- GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1,
- "PME GPU decomposition is not supported");
- }
- }
-
/* NMR restraints must be initialized before load_checkpoint,
* since with time averaging the history is added to t_state.
* For proper consistency check we therefore need to extend
/* This needs to be called before read_checkpoint to extend the state */
t_disresdata* disresdata;
snew(disresdata, 1);
- init_disres(fplog, &mtop, inputrec.get(), DisResRunMode::MDRun,
+ init_disres(fplog,
+ mtop,
+ inputrec.get(),
+ DisResRunMode::MDRun,
MASTER(cr) ? DDRole::Master : DDRole::Agent,
- PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mysim, ms, disresdata,
- globalState.get(), replExParams.exchangeInterval > 0);
+ PAR(cr) ? NumRanks::Multiple : NumRanks::Single,
+ cr->mpi_comm_mysim,
+ ms,
+ disresdata,
+ globalState.get(),
+ replExParams.exchangeInterval > 0);
- t_oriresdata* oriresdata;
- snew(oriresdata, 1);
- init_orires(fplog, &mtop, inputrec.get(), cr, ms, globalState.get(), oriresdata);
+ std::unique_ptr<t_oriresdata> oriresData;
+ if (gmx_mtop_ftype_count(mtop, F_ORIRES) > 0)
+ {
+ oriresData = std::make_unique<t_oriresdata>(fplog, mtop, *inputrec, cr, ms, globalState.get());
+ }
- auto deform = prepareBoxDeformation(
- globalState != nullptr ? globalState->box : box, MASTER(cr) ? DDRole::Master : DDRole::Agent,
- PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mygroup, *inputrec);
+ auto deform = prepareBoxDeformation(globalState != nullptr ? globalState->box : box,
+ MASTER(cr) ? DDRole::Master : DDRole::Agent,
+ PAR(cr) ? NumRanks::Multiple : NumRanks::Single,
+ cr->mpi_comm_mygroup,
+ *inputrec);
#if GMX_FAHCORE
/* We have to remember the generation's first step before reading checkpoint.
// Finish applying initial simulation state information from external sources on all ranks.
// Reconcile checkpoint file data with Mdrunner state established up to this point.
- applyLocalState(*inputHolder_.get(), logFileHandle, cr, domdecOptions.numCells,
- inputrec.get(), globalState.get(), &observablesHistory,
- mdrunOptions.reproducible, mdModules_->notifier(),
- modularSimulatorCheckpointData.get(), useModularSimulator);
+ applyLocalState(*inputHolder_.get(),
+ logFileHandle,
+ cr,
+ domdecOptions.numCells,
+ inputrec.get(),
+ globalState.get(),
+ &observablesHistory,
+ mdrunOptions.reproducible,
+ mdModules_->notifiers(),
+ modularSimulatorCheckpointData.get(),
+ useModularSimulator);
// TODO: (#3652) Synchronize filesystem state, SimulationInput contents, and program
// invariants
// on all code paths.
gmx_bcast(sizeof(box), box, cr->mpiDefaultCommunicator);
}
- if (inputrec->cutoff_scheme != ecutsVERLET)
+ if (inputrec->cutoff_scheme != CutoffScheme::Verlet)
{
gmx_fatal(FARGS,
"This group-scheme .tpr file can no longer be run by mdrun. Please update to the "
* increase rlist) tries to check if the newly chosen value fits with the DD scheme. As this is
* run before any DD scheme is set up, this check is never executed. See #3334 for more details.
*/
- prepare_verlet_scheme(fplog, cr, inputrec.get(), nstlist_cmdline, &mtop, box,
+ prepare_verlet_scheme(fplog,
+ cr,
+ inputrec.get(),
+ nstlist_cmdline,
+ mtop,
+ box,
useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
*hwinfo_->cpuInfo);
+ // We need to decide on update groups early, as this affects
+ // inter-domain communication distances.
+ auto updateGroupingsPerMoleculeType = makeUpdateGroupingsPerMoleculeType(mtop);
+ const real maxUpdateGroupRadius = computeMaxUpdateGroupRadius(
+ mtop, updateGroupingsPerMoleculeType, maxReferenceTemperature(*inputrec));
+ const real cutoffMargin = std::sqrt(max_cutoff2(inputrec->pbcType, box)) - inputrec->rlist;
+ UpdateGroups updateGroups = makeUpdateGroups(mdlog,
+ std::move(updateGroupingsPerMoleculeType),
+ maxUpdateGroupRadius,
+ useDomainDecomposition,
+ systemHasConstraintsOrVsites(mtop),
+ cutoffMargin);
+
// This builder is necessary while we have multi-part construction
// of DD. Before DD is constructed, we use the existence of
// the builder object to indicate that further construction of DD
if (useDomainDecomposition)
{
ddBuilder = std::make_unique<DomainDecompositionBuilder>(
- mdlog, cr, domdecOptions, mdrunOptions, mtop, *inputrec, box,
- positionsFromStatePointer(globalState.get()));
+ mdlog,
+ cr,
+ domdecOptions,
+ mdrunOptions,
+ mtop,
+ *inputrec,
+ mdModules_->notifiers(),
+ box,
+ updateGroups.updateGroupingPerMoleculeType(),
+ updateGroups.useUpdateGroups(),
+ updateGroups.maxUpdateGroupRadius(),
+ positionsFromStatePointer(globalState.get()),
+ useGpuForNonbonded,
+ useGpuForPme);
}
else
{
// Produce the task assignment for this rank - done after DD is constructed
GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build(
- gpuIdsToUse, userGpuTaskAssignment, *hwinfo_, simulationCommunicator, physicalNodeComm,
- nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
- useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
+ availableDevices,
+ userGpuTaskAssignment,
+ *hwinfo_,
+ simulationCommunicator,
+ physicalNodeComm,
+ nonbondedTarget,
+ pmeTarget,
+ bondedTarget,
+ updateTarget,
+ useGpuForNonbonded,
+ useGpuForPme,
+ thisRankHasDuty(cr, DUTY_PP),
// TODO cr->duty & DUTY_PME should imply that a PME
// algorithm is active, but currently does not.
EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
ddBuilder.reset(nullptr);
// Note that local state still does not exist yet.
}
+ // Ensure that all atoms within the same update group are in the
+ // same periodic image. Otherwise, a simulation that did not use
+ // update groups (e.g. a single-rank simulation) cannot always be
+ // correctly restarted in a way that does use update groups
+ // (e.g. a multi-rank simulation).
+ if (isSimulationMasterRank)
+ {
+ const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false;
+ if (useUpdateGroups)
+ {
+ putUpdateGroupAtomsInSamePeriodicImage(*cr->dd, mtop, globalState->box, globalState->x);
+ }
+ }
// The GPU update is decided here because we need to know whether the constraints or
- // SETTLEs can span accross the domain borders (i.e. whether or not update groups are
+ // SETTLEs can span across the domain borders (i.e. whether or not update groups are
// defined). This is only known after DD is initialized, hence decision on using GPU
// update is done so late.
try
{
- const bool useUpdateGroups = cr->dd ? ddUsesUpdateGroups(*cr->dd) : false;
const bool haveFrozenAtoms = inputrecFrozenAtoms(inputrec.get());
- useGpuForUpdate = decideWhetherToUseGpuForUpdate(
- useDomainDecomposition, useUpdateGroups, pmeRunMode, domdecOptions.numPmeRanks > 0,
- useGpuForNonbonded, updateTarget, gpusWereDetected, *inputrec, mtop,
- doEssentialDynamics, gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
- replExParams.exchangeInterval > 0, haveFrozenAtoms, doRerun, devFlags, mdlog);
+ useGpuForUpdate = decideWhetherToUseGpuForUpdate(useDomainDecomposition,
+ updateGroups.useUpdateGroups(),
+ pmeRunMode,
+ domdecOptions.numPmeRanks > 0,
+ useGpuForNonbonded,
+ updateTarget,
+ gpusWereDetected,
+ *inputrec,
+ mtop,
+ doEssentialDynamics,
+ gmx_mtop_ftype_count(mtop, F_ORIRES) > 0,
+ haveFrozenAtoms,
+ doRerun,
+ devFlags,
+ mdlog);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
MdrunScheduleWorkload runScheduleWork;
- bool useGpuDirectHalo = decideWhetherToUseGpuForHalo(
- devFlags, havePPDomainDecomposition(cr), useGpuForNonbonded, useModularSimulator,
- doRerun, EI_ENERGY_MINIMIZATION(inputrec->eI));
+ bool useGpuDirectHalo = decideWhetherToUseGpuForHalo(devFlags,
+ havePPDomainDecomposition(cr),
+ useGpuForNonbonded,
+ useModularSimulator,
+ doRerun,
+ EI_ENERGY_MINIMIZATION(inputrec->eI));
// Also populates the simulation constant workload description.
- runScheduleWork.simulationWork = createSimulationWorkload(
- *inputrec, disableNonbondedCalculation, devFlags, useGpuForNonbonded, pmeRunMode,
- useGpuForBonded, useGpuForUpdate, useGpuDirectHalo);
+ runScheduleWork.simulationWork = createSimulationWorkload(*inputrec,
+ disableNonbondedCalculation,
+ devFlags,
+ havePPDomainDecomposition(cr),
+ useGpuForNonbonded,
+ pmeRunMode,
+ useGpuForBonded,
+ useGpuForUpdate,
+ useGpuDirectHalo);
std::unique_ptr<DeviceStreamManager> deviceStreamManager = nullptr;
// where appropriate.
if (!userGpuTaskAssignment.empty())
{
- gpuTaskAssignments.logPerformanceHints(mdlog, numDevicesToUse);
+ gpuTaskAssignments.logPerformanceHints(mdlog, numAvailableDevices);
}
if (PAR(cr))
.appendTextFormatted(
"This is simulation %d out of %d running as a composite GROMACS\n"
"multi-simulation job. Setup for this simulation:\n",
- ms->simulationIndex_, ms->numSimulations_);
+ ms->simulationIndex_,
+ ms->numSimulations_);
}
GMX_LOG(mdlog.warning)
- .appendTextFormatted("Using %d MPI %s\n", cr->nnodes,
+ .appendTextFormatted("Using %d MPI %s\n",
+ cr->nnodes,
# if GMX_THREAD_MPI
cr->nnodes == 1 ? "thread" : "threads"
# else
// the OpenMP support.
gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, FALSE);
/* Check and update the number of OpenMP threads requested */
- checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo_, cr, ms, physicalNodeComm.size_,
- pmeRunMode, mtop, *inputrec);
-
- gmx_omp_nthreads_init(mdlog, cr, hwinfo_->nthreads_hw_avail, physicalNodeComm.size_,
- hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP));
-
- // Enable FP exception detection, but not in
- // Release mode and not for compilers with known buggy FP
- // exception support (clang with any optimization) or suspected
- // buggy FP exception support (gcc 7.* with optimization).
-#if !defined NDEBUG \
- && !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \
- && defined __OPTIMIZE__)
- const bool bEnableFPE = true;
-#else
- const bool bEnableFPE = false;
-#endif
+ checkAndUpdateRequestedNumOpenmpThreads(
+ &hw_opt, *hwinfo_, cr, ms, physicalNodeComm.size_, pmeRunMode, mtop, *inputrec);
+
+ gmx_omp_nthreads_init(mdlog,
+ cr,
+ hwinfo_->nthreads_hw_avail,
+ physicalNodeComm.size_,
+ hw_opt.nthreads_omp,
+ hw_opt.nthreads_omp_pme,
+ !thisRankHasDuty(cr, DUTY_PP));
+
+ const bool bEnableFPE = gmxShouldEnableFPExceptions();
// FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run()
if (bEnableFPE)
{
}
/* Now that we know the setup is consistent, check for efficiency */
- check_resource_division_efficiency(hwinfo_, gpuTaskAssignments.thisRankHasAnyGpuTask(),
- mdrunOptions.ntompOptionIsSet, cr, mdlog);
+ check_resource_division_efficiency(
+ hwinfo_, gpuTaskAssignments.thisRankHasAnyGpuTask(), mdrunOptions.ntompOptionIsSet, cr, mdlog);
/* getting number of PP/PME threads on this MPI / tMPI rank.
PME: env variable should be read only on one node to make sure it is
identical everywhere;
*/
- const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded)
- : gmx_omp_nthreads_get(emntPME);
- checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo_->hardwareTopology,
- physicalNodeComm, mdlog);
+ const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP)
+ ? gmx_omp_nthreads_get(ModuleMultiThread::Nonbonded)
+ : gmx_omp_nthreads_get(ModuleMultiThread::Pme);
+ checkHardwareOversubscription(
+ numThreadsOnThisRank, cr->nodeid, *hwinfo_->hardwareTopology, physicalNodeComm, mdlog);
// Enable Peer access between GPUs where available
// Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus
&& (runScheduleWork.simulationWork.useGpuHaloExchange
|| runScheduleWork.simulationWork.useGpuPmePpCommunication))
{
- setupGpuDevicePeerAccess(gpuIdsToUse, mdlog);
+ setupGpuDevicePeerAccess(gpuTaskAssignments.deviceIdsAssigned(), mdlog);
}
if (hw_opt.threadAffinity != ThreadAffinity::Off)
gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, TRUE);
int numThreadsOnThisNode, intraNodeThreadOffset;
- analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
- &intraNodeThreadOffset);
+ analyzeThreadsOnThisNode(
+ physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode, &intraNodeThreadOffset);
/* Set the CPU affinity */
- gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo_->hardwareTopology, numThreadsOnThisRank,
- numThreadsOnThisNode, intraNodeThreadOffset, nullptr);
+ gmx_set_thread_affinity(mdlog,
+ cr,
+ &hw_opt,
+ *hwinfo_->hardwareTopology,
+ numThreadsOnThisRank,
+ numThreadsOnThisNode,
+ intraNodeThreadOffset,
+ nullptr);
}
if (mdrunOptions.timingOptions.resetStep > -1)
"The -resetstep functionality is deprecated, and may be removed in a "
"future version.");
}
- wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
+ std::unique_ptr<gmx_wallcycle> wcycle =
+ wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr);
if (PAR(cr))
{
/* Master synchronizes its value of reset_counters with all nodes
* including PME only nodes */
- int64_t reset_counters = wcycle_get_reset_counters(wcycle);
+ int64_t reset_counters = wcycle_get_reset_counters(wcycle.get());
gmx_bcast(sizeof(reset_counters), &reset_counters, cr->mpi_comm_mysim);
- wcycle_set_reset_counters(wcycle, reset_counters);
+ wcycle_set_reset_counters(wcycle.get(), reset_counters);
}
// Membrane embedding must be initialized before we call init_forcerec()
- membedHolder.initializeMembed(fplog, filenames.size(), filenames.data(), &mtop, inputrec.get(),
- globalState.get(), cr, &mdrunOptions.checkpointOptions.period);
+ membedHolder.initializeMembed(fplog,
+ filenames.size(),
+ filenames.data(),
+ &mtop,
+ inputrec.get(),
+ globalState.get(),
+ cr,
+ &mdrunOptions.checkpointOptions.period);
const bool thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
std::unique_ptr<MDAtoms> mdAtoms;
std::unique_ptr<VirtualSitesHandler> vsite;
- std::unique_ptr<GpuBonded> gpuBonded;
t_nrnb nrnb;
if (thisRankHasDuty(cr, DUTY_PP))
{
- mdModulesNotifier.notify(*cr);
- mdModulesNotifier.notify(&atomSets);
- mdModulesNotifier.notify(inputrec->pbcType);
- mdModulesNotifier.notify(SimulationTimeStep{ inputrec->delta_t });
+ setupNotifier.notify(*cr);
+ setupNotifier.notify(&atomSets);
+ setupNotifier.notify(mtop);
+ setupNotifier.notify(inputrec->pbcType);
+ setupNotifier.notify(SimulationTimeStep{ inputrec->delta_t });
/* Initiate forcerecord */
- fr = new t_forcerec;
+ fr = std::make_unique<t_forcerec>();
fr->forceProviders = mdModules_->initForceProviders();
- init_forcerec(fplog, mdlog, fr, inputrec.get(), &mtop, cr, box,
+ init_forcerec(fplog,
+ mdlog,
+ runScheduleWork.simulationWork,
+ fr.get(),
+ *inputrec,
+ mtop,
+ cr,
+ box,
opt2fn("-table", filenames.size(), filenames.data()),
opt2fn("-tablep", filenames.size(), filenames.data()),
- opt2fns("-tableb", filenames.size(), filenames.data()), pforce);
+ opt2fns("-tableb", filenames.size(), filenames.data()),
+ pforce);
// Dirty hack, for fixing disres and orires should be made mdmodules
fr->fcdata->disres = disresdata;
- fr->fcdata->orires = oriresdata;
+ fr->fcdata->orires.swap(oriresData);
// Save a handle to device stream manager to use elsewhere in the code
// TODO: Forcerec is not a correct place to store it.
"GPU PP-PME stream should be valid in order to use GPU PME-PP direct "
"communications.");
fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(
- cr->mpi_comm_mysim, cr->dd->pme_nodeid, deviceStreamManager->context(),
+ cr->mpi_comm_mysim,
+ cr->dd->pme_nodeid,
+ &cr->dd->pmeForceReceiveBuffer,
+ deviceStreamManager->context(),
deviceStreamManager->stream(DeviceStreamType::PmePpTransfer));
}
- fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo_,
+ fr->nbv = Nbnxm::init_nb_verlet(mdlog,
+ *inputrec,
+ *fr,
+ cr,
+ *hwinfo_,
runScheduleWork.simulationWork.useGpuNonbonded,
- deviceStreamManager.get(), &mtop, box, wcycle);
+ deviceStreamManager.get(),
+ mtop,
+ box,
+ wcycle.get());
// TODO: Move the logic below to a GPU bonded builder
if (runScheduleWork.simulationWork.useGpuBonded)
{
GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
"GPU device stream manager should be valid in order to use GPU "
"version of bonded forces.");
- gpuBonded = std::make_unique<GpuBonded>(
- mtop.ffparams, fr->ic->epsfac * fr->fudgeQQ, deviceStreamManager->context(),
- deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)), wcycle);
- fr->gpuBonded = gpuBonded.get();
+ fr->listedForcesGpu = std::make_unique<ListedForcesGpu>(
+ mtop.ffparams,
+ fr->ic->epsfac * fr->fudgeQQ,
+ deviceStreamManager->context(),
+ deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)),
+ wcycle.get());
}
/* Initialize the mdAtoms structure.
}
/* Initialize the virtual site communication */
- vsite = makeVirtualSitesHandler(mtop, cr, fr->pbcType);
+ vsite = makeVirtualSitesHandler(
+ mtop, cr, fr->pbcType, updateGroups.updateGroupingPerMoleculeType());
calc_shifts(box, fr->shift_vec);
constructVirtualSitesGlobal(mtop, globalState->x);
}
}
+ // Make the DD reverse topology, now that any vsites that are present are available
+ if (DOMAINDECOMP(cr))
+ {
+ dd_make_reverse_top(fplog, cr->dd, mtop, vsite.get(), *inputrec, domdecOptions.ddBondedChecking);
+ }
if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
{
const DeviceContext* deviceContext = runScheduleWork.simulationWork.useGpuPme
? &deviceStreamManager->context()
: nullptr;
- const DeviceStream* pmeStream =
+ const DeviceStream* pmeStream =
runScheduleWork.simulationWork.useGpuPme
- ? &deviceStreamManager->stream(DeviceStreamType::Pme)
- : nullptr;
-
- pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec.get(),
- nChargePerturbed != 0, nTypePerturbed != 0,
- mdrunOptions.reproducible, ewaldcoeff_q, ewaldcoeff_lj,
- gmx_omp_nthreads_get(emntPME), pmeRunMode, nullptr,
- deviceContext, pmeStream, pmeGpuProgram.get(), mdlog);
+ ? &deviceStreamManager->stream(DeviceStreamType::Pme)
+ : nullptr;
+
+ pmedata = gmx_pme_init(cr,
+ getNumPmeDomains(cr->dd),
+ inputrec.get(),
+ nChargePerturbed != 0,
+ nTypePerturbed != 0,
+ mdrunOptions.reproducible,
+ ewaldcoeff_q,
+ ewaldcoeff_lj,
+ gmx_omp_nthreads_get(ModuleMultiThread::Pme),
+ pmeRunMode,
+ nullptr,
+ deviceContext,
+ pmeStream,
+ pmeGpuProgram.get(),
+ mdlog);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
if (thisRankHasDuty(cr, DUTY_PP))
{
/* Assumes uniform use of the number of OpenMP threads */
- walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
+ walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(ModuleMultiThread::Default));
if (inputrec->bPull)
{
/* Initialize pull code */
- pull_work = init_pull(fplog, inputrec->pull.get(), inputrec.get(), &mtop, cr, &atomSets,
+ pull_work = init_pull(fplog,
+ inputrec->pull.get(),
+ inputrec.get(),
+ mtop,
+ cr,
+ &atomSets,
inputrec->fepvals->init_lambda);
if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
{
if (inputrec->bRot)
{
/* Initialize enforced rotation code */
- enforcedRotation = init_rot(fplog, inputrec.get(), filenames.size(), filenames.data(),
- cr, &atomSets, globalState.get(), &mtop, oenv, mdrunOptions,
+ enforcedRotation = init_rot(fplog,
+ inputrec.get(),
+ filenames.size(),
+ filenames.data(),
+ cr,
+ &atomSets,
+ globalState.get(),
+ mtop,
+ oenv,
+ mdrunOptions,
startingBehavior);
}
t_swap* swap = nullptr;
- if (inputrec->eSwapCoords != eswapNO)
+ if (inputrec->eSwapCoords != SwapType::No)
{
/* Initialize ion swapping code */
- swap = init_swapcoords(fplog, inputrec.get(),
+ swap = init_swapcoords(fplog,
+ inputrec.get(),
opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
- &mtop, globalState.get(), &observablesHistory, cr, &atomSets,
- oenv, mdrunOptions, startingBehavior);
+ mtop,
+ globalState.get(),
+ &observablesHistory,
+ cr,
+ &atomSets,
+ oenv,
+ mdrunOptions,
+ startingBehavior);
}
/* Let makeConstraints know whether we have essential dynamics constraints. */
- auto constr = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics, fplog, cr,
- ms, &nrnb, wcycle, fr->bMolPBC);
+ auto constr = makeConstraints(mtop,
+ *inputrec,
+ pull_work,
+ doEssentialDynamics,
+ fplog,
+ cr,
+ updateGroups.useUpdateGroups(),
+ ms,
+ &nrnb,
+ wcycle.get(),
+ fr->bMolPBC);
/* Energy terms and groups */
gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
// cos acceleration is only supported by md, but older tpr
// files might still combine it with other integrators
- GMX_RELEASE_ASSERT(inputrec->cos_accel == 0.0 || inputrec->eI == eiMD,
+ GMX_RELEASE_ASSERT(inputrec->cos_accel == 0.0 || inputrec->eI == IntegrationAlgorithm::MD,
"cos_acceleration is only supported by integrator=md");
/* Kinetic energy data */
- gmx_ekindata_t ekind;
- init_ekindata(fplog, &mtop, &(inputrec->opts), &ekind, inputrec->cos_accel);
+ gmx_ekindata_t ekind(inputrec->opts.ngtc,
+ inputrec->cos_accel,
+ gmx_omp_nthreads_get(ModuleMultiThread::Update));
/* Set up interactive MD (IMD) */
- auto imdSession =
- makeImdSession(inputrec.get(), cr, wcycle, &enerd, ms, &mtop, mdlog,
- MASTER(cr) ? globalState->x.rvec_array() : nullptr, filenames.size(),
- filenames.data(), oenv, mdrunOptions.imdOptions, startingBehavior);
+ auto imdSession = makeImdSession(inputrec.get(),
+ cr,
+ wcycle.get(),
+ &enerd,
+ ms,
+ mtop,
+ mdlog,
+ MASTER(cr) ? globalState->x : gmx::ArrayRef<gmx::RVec>(),
+ filenames.size(),
+ filenames.data(),
+ oenv,
+ mdrunOptions.imdOptions,
+ startingBehavior);
if (DOMAINDECOMP(cr))
{
GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
- /* This call is not included in init_domain_decomposition mainly
- * because fr->cginfo_mb is set later.
+ /* This call is not included in init_domain_decomposition
+ * because fr->atomInfoForEachMoleculeBlock is set later.
*/
- dd_init_bondeds(fplog, cr->dd, mtop, vsite.get(), inputrec.get(),
- domdecOptions.checkBondedInteractions, fr->cginfo_mb);
+ makeBondedLinks(cr->dd, mtop, fr->atomInfoForEachMoleculeBlock);
}
if (runScheduleWork.simulationWork.useGpuBufferOps)
{
fr->gpuForceReduction[gmx::AtomLocality::Local] = std::make_unique<gmx::GpuForceReduction>(
deviceStreamManager->context(),
- deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal), wcycle);
+ deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal),
+ wcycle.get());
fr->gpuForceReduction[gmx::AtomLocality::NonLocal] = std::make_unique<gmx::GpuForceReduction>(
deviceStreamManager->context(),
- deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal), wcycle);
+ deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal),
+ wcycle.get());
}
std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
&& ((runScheduleWork.simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME))
|| runScheduleWork.simulationWork.useGpuBufferOps))
{
- GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)
- ? GpuApiCallBehavior::Async
- : GpuApiCallBehavior::Sync;
+ GpuApiCallBehavior transferKind =
+ (inputrec->eI == IntegrationAlgorithm::MD && !doRerun && !useModularSimulator)
+ ? GpuApiCallBehavior::Async
+ : GpuApiCallBehavior::Sync;
GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
"GPU device stream manager should be initialized to use GPU.");
stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
- *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle);
+ *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle.get());
fr->stateGpu = stateGpu.get();
}
simulatorBuilder.add(SimulatorEnv(fplog, cr, ms, mdlog, oenv));
- simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle));
+ simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle.get()));
simulatorBuilder.add(ConstraintsParam(
- constr.get(), enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
- vsite.get()));
+ constr.get(), enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, vsite.get()));
// TODO: Separate `fr` to a separate add, and make the `build` handle the coupling sensibly.
- simulatorBuilder.add(LegacyInput(static_cast<int>(filenames.size()), filenames.data(),
- inputrec.get(), fr));
+ simulatorBuilder.add(LegacyInput(
+ static_cast<int>(filenames.size()), filenames.data(), inputrec.get(), fr.get()));
simulatorBuilder.add(ReplicaExchangeParameters(replExParams));
simulatorBuilder.add(InteractiveMD(imdSession.get()));
- simulatorBuilder.add(SimulatorModules(mdModules_->outputProvider(), mdModules_->notifier()));
+ simulatorBuilder.add(SimulatorModules(mdModules_->outputProvider(), mdModules_->notifiers()));
simulatorBuilder.add(CenterOfMassPulling(pull_work));
// Todo move to an MDModule
simulatorBuilder.add(IonSwapping(swap));
- simulatorBuilder.add(TopologyData(&mtop, mdAtoms.get()));
+ simulatorBuilder.add(TopologyData(mtop, mdAtoms.get()));
simulatorBuilder.add(BoxDeformationHandle(deform.get()));
simulatorBuilder.add(std::move(modularSimulatorCheckpointData));
{
GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
/* do PME only */
- walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
- gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec.get(), pmeRunMode,
+ walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(ModuleMultiThread::Pme));
+ gmx_pmeonly(pmedata,
+ cr,
+ &nrnb,
+ wcycle.get(),
+ walltime_accounting,
+ inputrec.get(),
+ pmeRunMode,
+ runScheduleWork.simulationWork.useGpuPmePpCommunication,
deviceStreamManager.get());
}
- wallcycle_stop(wcycle, ewcRUN);
+ wallcycle_stop(wcycle.get(), WallCycleCounter::Run);
/* Finish up, write some stuff
* if rerunMD, don't write last frame again
*/
- finish_run(fplog, mdlog, cr, inputrec.get(), &nrnb, wcycle, walltime_accounting,
- fr ? fr->nbv.get() : nullptr, pmedata, EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
+ finish_run(fplog,
+ mdlog,
+ cr,
+ *inputrec,
+ &nrnb,
+ wcycle.get(),
+ walltime_accounting,
+ fr ? fr->nbv.get() : nullptr,
+ pmedata,
+ EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
- // clean up cycle counter
- wallcycle_destroy(wcycle);
deviceStreamManager.reset(nullptr);
// Free PME data
mdAtoms.reset(nullptr);
globalState.reset(nullptr);
mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU
- gpuBonded.reset(nullptr);
- /* Free pinned buffers in *fr */
- delete fr;
- fr = nullptr;
+ fr.reset(nullptr); // destruct forcerec before gpu
// TODO convert to C++ so we can get rid of these frees
sfree(disresdata);
- sfree(oriresdata);
if (!hwinfo_->deviceInfoList.empty())
{
{
physicalNodeComm.barrier();
}
- releaseDevice(deviceInfo);
+
+ if (!devFlags.usingCudaAwareMpi)
+ {
+ // Don't reset GPU in case of CUDA-AWARE MPI
+ // UCX creates CUDA buffers which are cleaned-up as part of MPI_Finalize()
+ // resetting the device before MPI_Finalize() results in crashes inside UCX
+ releaseDevice(deviceInfo);
+ }
/* Does what it says */
print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());