- grep -i "error" docs/sphinx-*.log | tee sphinxDiagnostics.log || true
- grep -i "warning" docs/sphinx-*.log | tee -a sphinxDiagnostics.log || true
- if [ -s pythonErrors.log ] ; then echo "Found Python Errors during build"; exit 1; fi
- - if [ -s sphinxErrors.log ] ; then echo "Found errors during Sphinx documentation build"; cat sphinxDiagnostics.log; exit 1; fi
+ - if [ -s sphinxErrors.log ] ; then echo "Found errors during Sphinx documentation build"; cat sphinxErrors.log; exit 1; fi
+ - if [ -s sphinxDiagnostics.log ] ; then echo "Found diagnostic warnings during Sphinx documentation build"; cat sphinxDiagnostics.log; exit 1; fi
- cd ..
artifacts:
name: docs-artifacts-$CI_COMMIT_REF_SLUG
\\\\.gitattributes
INSTALL-dev
cmake/FindCUDA\\\\.cmake
- cmake/FindCUDA
- # both below are needed for CI not to include the build directories
- ccache
- build-package)
+ cmake/FindCUDA)
set(CPACK_SOURCE_IGNORE_FILES ${FILES_NOT_INCLUDED_IN_SOURCE_PACKAGE})
# Get the list of directories added with gmx_cpack_add_generated_source_directory()
# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
+include(gmxTestICCNextGen)
+
# Manage setup of the different FFT libraries we can use in Gromacs.
set(PKG_FFT "")
set(PKG_FFT_LIBS "")
# stuff...
set(MKL_MANUALLY FALSE)
if (GMX_FFT_LIBRARY STREQUAL "MKL" AND
- NOT (CMAKE_C_COMPILER_ID MATCHES "Intel" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER "11"))
+ NOT ((CMAKE_C_COMPILER_ID MATCHES "Intel" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER "11")
+ OR GMX_ICC_NEXTGEN))
# The user will have to provide the set of magic libraries in
# MKL_LIBRARIES (see below), which we cache (non-advanced), so that they
# don't have to keep specifying it, and can easily see that
find_package(MPI)
if(MPI_C_FOUND)
set(MPI_COMPILE_FLAGS ${MPI_C_COMPILE_FLAGS})
+ separate_arguments(MPI_COMPILE_FLAGS)
set(MPI_LINKER_FLAGS ${MPI_C_LINK_FLAGS})
+ separate_arguments(MPI_C_LINK_FLAGS)
include_directories(SYSTEM ${MPI_C_INCLUDE_PATH})
list(APPEND GMX_COMMON_LIBRARIES ${MPI_C_LIBRARIES})
endif()
--- /dev/null
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2020, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+
+# CMake detects ICC NextGen (based on LLVM) as Clang
+if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ include(CheckCXXSourceCompiles)
+ check_cxx_source_compiles(
+ "int main() { return __INTEL_LLVM_COMPILER; }"
+ GMX_ICC_NEXTGEN)
+endif()
if(NOT DEFINED MPI_IN_PLACE_COMPILE_OK)
MESSAGE(STATUS "Checking for MPI_IN_PLACE")
- set(CMAKE_REQUIRED_FLAGS ${MPI_COMPILE_FLAGS})
+ if(CMAKE_VERSION VERSION_LESS 3.12)
+ foreach(_FLAG ${MPI_COMPILE_FLAGS})
+ set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${_FLAG}")
+ endforeach()
+ else()
+ list(JOIN MPI_COMPILE_FLAGS " " CMAKE_REQUIRED_FLAGS)
+ endif()
set(CMAKE_REQUIRED_INCLUDES ${MPI_INCLUDE_PATH})
set(CMAKE_REQUIRED_LIBRARIES ${MPI_LIBRARIES})
check_cxx_source_compiles(
else()
gmx_add_sphinx_source_files(FILES
dev-manual/releng/index.rst
- dev-manual/releng/jenkins-howto.rst
- dev-manual/releng/jenkins-ui.rst
)
endif()
* Provide information on what changes in the build system (or other parts of
the repository) need special care to not break Jenkins builds.
-Separate page documents how to interact with the Jenkins UI for these builds:
-:doc:`releng/jenkins-ui`.
-:doc:`releng/jenkins-howto` has information on how to do common things with
-Jenkins builds.
-
.. todo:: Add a link to a wiki page about general Jenkins documentation, once
there is more of that.
We are currently switching our build and testing system to use Gitlab
and the integrated CI system, with information for the general system found
-at `https://docs.gitlab.com/ee/ci/yaml/`_. The new configuration for
-the builds and tests can be found in the file ``.gitlab-ci.yml``, with
-the templates for configuring is found in the files in the
+in the official `Gitlab documentation <https://docs.gitlab.com/ee/ci/yaml/>`_.
+The new configuration for the builds and tests can be found in the file
+``.gitlab-ci.yml``, with the templates for configuring is found in the files in the
``admin/ci-templates/`` directory. This section is going to be extended
with individual build information as it comes available. For now we are
using a combination of building with the previous system on Jenkins
Fixes that affect portability
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Fix compiler errors with Intel compiler
+"""""""""""""""""""""""""""""""""""""""
+
+Fix compiler error with Intel compiler 2019 update 5 and 2020 initial release.
+Compilation was failing with ``mcpcom: core dumped`` for the file :file:`pullutil.cpp`.
Miscellaneous
^^^^^^^^^^^^^
Fixes where mdrun could behave incorrectly
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Fix fatal error with mdrun -multidir with more than 1 rank per simulation
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+:issue:`3296`
+
+Fix deadlock in mdrun runs with multiple ranks and separate PME ranks
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+When multiple PP ranks as well as separate PME ranks are used, mdrun could
+deadlock before starting the PP-PME balancing.
+
+:issue:`3335`
+
+Avoid mdrun assertion failure when running with shells and update on a GPU
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+A check for shells has been added in the mdrun task assignment code,
+so that mdrun falls back to CPU or produces a clear error message
+when attempting to run with shells and update on a GPU.
+
+:issue:`3303`
+
+Allow large prime factors in the mdrun MPI rank count
+"""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+The domain decomposition would refuse to run with large prime factors
+in the MPI rank count even when the grid was specified by the user.
+
+:issue:`3336`
+
Fixes for ``gmx`` tools
^^^^^^^^^^^^^^^^^^^^^^^
Fixes that affect portability
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Add support for ICC NextGen
+"""""""""""""""""""""""""""
+
+Add support for Intel Compiler based on LLVM technology.
+To compile GROMACS with this compiler use ``CXX=icpc CXXFLAGS=-qnextgen cmake``.
Miscellaneous
^^^^^^^^^^^^^
Can be set to "auto", "cpu", "gpu."
Defaults to "auto," which currently always uses the CPU.
Setting "gpu" requires that a compatible CUDA GPU is available,
- the simulation is run as a single thread-MPI thread
- and that the |Gromacs| binary is not compiled with real MPI.
+ the simulation uses a single rank.
Update and constraints on a GPU is currently not supported
- with free-energy, domain decomposition, virtual sites,
- Ewald surface correction, replica exchange, the pull code,
+ with domain decomposition, free-energy, virtual sites,
+ Ewald surface correction, replica exchange, constraint pulling,
orientation restraints and computational electrophysiology.
- It is possible to extend the ``-update`` functionality by
- setting the ``GMX_FORCE_UPDATE_DEFAULT_GPU`` flag to change
- the default path to use the GPU update if the simulation is
- compatible.
``-gpu_id``
A string that specifies the ID numbers of the GPUs that
|Gromacs| now allows the offloading of the bonded part of the PP
workload to a CUDA-compatible GPU. This is treated as part of the PP
work, and requires that the short-ranged non-bonded task also runs on
-a GPU. It is an advantage usually only when the CPU is relatively weak
-compared with the GPU, perhaps because its workload is too large for
-the available cores. This would likely be the case for free-energy
-calculations.
+a GPU. Typically, there is a performance advantage to offloading
+bonded interactions in particular when the amount of CPU resources per GPU
+is relatively little (either because the CPU is weak or there are few CPU
+cores assigned to a GPU in a run) or when there are other computations on the CPU.
+A typical case for the latter is free-energy calculations.
.. _gmx-gpu-update:
.. TODO again, extend this and add some actual useful information concerning performance etc...
|Gromacs| makes it possible to also perform the coordinate update and (if requested)
-constraint calculation on a CUDA-compatible GPU. This allows to having all (compatible)
-parts of a simulation step on the GPU, so that no unnecessary transfers are needed between
-GPU and CPU. This currently only works with single domain cases, and needs to be explicitly
-requested by the user. It is possible to change the default behaviour by setting the
+constraint calculation on a CUDA-compatible GPU. This allows executing all
+(supported) computation of a simulation step on the GPU.
+This feature is supported in single domain runs (unless using the experimental
+GPU domain decomposition feature), and needs to be explicitly requested by the user.
+This is a new parallelization mode where all force and coordinate
+data can be "GPU resident" for a number of steps, typically between neighbor searching steps.
+This has the benefit that there is less coupling between CPU host and GPU and
+on typical MD steps data does not need to be transferred between CPU and GPU.
+In this scheme it is however still possible for part of the computation to be
+executed on the CPU concurrently with GPU calculation.
+This helps supporting the broad range of |Gromacs| features not all of which are
+ported to GPUs. At the same time, it also allows improving performance by making
+use of the otherwise mostly idle CPU. It can often be advantageous to move the bonded
+or PME calculation back to the CPU, but the details of this will depending on the
+relative performance if the CPU cores paired in a simulation with a GPU.
+
+It is possible to change the default behaviour by setting the
``GMX_FORCE_UPDATE_DEFAULT_GPU`` environment variable to a non-zero value. In this
case simulations will try to run all parts by default on the GPU, and will only fall
back to the CPU based calculation if the simulation is not compatible.
-Using this pathway is usually advantageous if a strong GPU is used with a weak CPU.
+Using this parallelization mode is typically advantageous in cases where a fast GPU is
+used with a weak CPU, in particular if there is only single simulation assigned to a GPU.
+However, in typical throughput cases where multiple runs are assigned to each GPU,
+offloading everything, especially without moving back some of the work to the CPU
+can perform worse than the parallelization mode where only force computation is offloaded.
+
Assigning tasks to GPUs
.......................
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the.
- * Copyright (c) 2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2005,2006,2007,2008,2009 by the GROMACS development team.
+ * Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
+ * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
+ * Copyright (c) 2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
systemInfo_ = getSystemInfo(mdlog_, cr_, options_, mtop_, ir_, box, xGlobal);
- const int numRanksRequested = cr_->nnodes;
- checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype), options_.numPmeRanks);
+ const int numRanksRequested = cr_->nnodes;
+ const bool checkForLargePrimeFactors = (options_.numCells[0] <= 0);
+ checkForValidRankCountRequests(numRanksRequested, EEL_PME(ir_.coulombtype),
+ options_.numPmeRanks, checkForLargePrimeFactors);
// DD grid setup uses a more different cell size limit for
// automated setup than the one in systemInfo_. The latter is used
return cellSizeLimit;
}
-void checkForValidRankCountRequests(const int numRanksRequested, const bool usingPme, const int numPmeRanksRequested)
+void checkForValidRankCountRequests(const int numRanksRequested,
+ const bool usingPme,
+ const int numPmeRanksRequested,
+ const bool checkForLargePrimeFactors)
{
int numPPRanksRequested = numRanksRequested;
if (usingPme && numPmeRanksRequested > 0)
// Once the rank count is large enough, it becomes worth
// suggesting improvements to the user.
const int minPPRankCountToCheckForLargePrimeFactors = 13;
- if (numPPRanksRequested >= minPPRankCountToCheckForLargePrimeFactors)
+ if (checkForLargePrimeFactors && numPPRanksRequested >= minPPRankCountToCheckForLargePrimeFactors)
{
const int largestDivisor = largest_divisor(numPPRanksRequested);
/* Check if the largest divisor is more than numPPRanks ^ (2/3) */
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* Issues a fatal error if there are more PME ranks than PP, or if the
* count of PP ranks has a prime factor that is too large to be likely
* to have good performance. */
-void checkForValidRankCountRequests(int numRanksRequested, bool usingPme, int numPmeRanksRequested);
+void checkForValidRankCountRequests(int numRanksRequested,
+ bool usingPme,
+ int numPmeRanksRequested,
+ bool checkForLargePrimeFactors);
/*! \brief Return the minimum cell size (in nm) required for DD */
real getDDGridSetupCellSizeLimit(const gmx::MDLogger& mdlog,
gmx_bool bTriggerOnDLB; /**< trigger balancing only on DD DLB */
gmx_bool bBalance; /**< are we in the balancing phase, i.e. trying different setups? */
int nstage; /**< the current maximum number of stages */
+ bool startupTimeDelayElapsed; /**< Has the c_startupTimeDelay elapsed indicating that the balancing can start. */
real cut_spacing; /**< the minimum cutoff / PME grid spacing ratio */
real rcut_vdw; /**< Vdw cutoff (does not change) */
int stage; /**< the current stage */
- int cycles_n; /**< step cycle counter cumulative count */
- double cycles_c; /**< step cycle counter cumulative cycles */
- double startTime; /**< time stamp when the balancing was started (relative to the UNIX epoch start).*/
+ int cycles_n; /**< step cycle counter cumulative count */
+ double cycles_c; /**< step cycle counter cumulative cycles */
+ double startTime; /**< time stamp when the balancing was started on the master rank (relative to the UNIX epoch start).*/
};
/* TODO The code in this file should call this getter, rather than
pme_lb->end = 0;
pme_lb->elimited = epmelblimNO;
- pme_lb->cycles_n = 0;
- pme_lb->cycles_c = 0;
- pme_lb->startTime = gmx_gettime();
+ pme_lb->cycles_n = 0;
+ pme_lb->cycles_c = 0;
+ // only master ranks do timing
+ if (!PAR(cr) || (DOMAINDECOMP(cr) && DDMASTER(cr->dd)))
+ {
+ pme_lb->startTime = gmx_gettime();
+ }
if (!wallcycle_have_counter())
{
* We also want to skip a number of steps and seconds while
* the CPU and GPU, when used, performance stabilizes.
*/
+ if (!PAR(cr) || (DOMAINDECOMP(cr) && DDMASTER(cr->dd)))
+ {
+ pme_lb->startupTimeDelayElapsed = (gmx_gettime() - pme_lb->startTime < c_startupTimeDelay);
+ }
+ if (DOMAINDECOMP(cr))
+ {
+ dd_bcast(cr->dd, sizeof(bool), &pme_lb->startupTimeDelayElapsed);
+ }
+
if (pme_lb->cycles_n == 0 || step_rel < c_numFirstTuningIntervalSkip * ir.nstlist
- || gmx_gettime() - pme_lb->startTime < c_startupTimeDelay)
+ || pme_lb->startupTimeDelayElapsed)
{
*bPrinting = FALSE;
-
return;
}
/* Sanity check, we expect nstlist cycle counts */
*/
else if (step_rel >= c_numFirstTuningIntervalSkipWithSepPme * ir.nstlist)
{
+ GMX_ASSERT(DOMAINDECOMP(cr), "Domain decomposition should be active here");
if (DDMASTER(cr->dd))
{
/* If PME rank load is too high, start tuning. If
if (bX)
{
/* Communicate the coordinates */
- pme_dd_sendrecv(atc, FALSE, i, pme->bufv[buf_pos], scount * sizeof(rvec),
- atc->xBuffer[local_pos], rcount * sizeof(rvec));
+ pme_dd_sendrecv(atc, FALSE, i, pme->bufv + buf_pos, scount * sizeof(rvec),
+ atc->xBuffer.data() + local_pos, rcount * sizeof(rvec));
}
/* Communicate the coefficients */
pme_dd_sendrecv(atc, FALSE, i, pme->bufr + buf_pos, scount * sizeof(real),
local_pos += atc->slabCommSetup[i].rcount;
}
}
+ GMX_ASSERT(local_pos == atc->numAtoms(), "After receiving we should have numAtoms coordinates");
}
void dd_pmeredist_f(struct gmx_pme_t* pme, PmeAtomComm* atc, gmx::ArrayRef<gmx::RVec> f, gmx_bool bAddF)
if (scount > 0 || rcount > 0)
{
/* Communicate the forces */
- pme_dd_sendrecv(atc, TRUE, i, atc->f[local_pos], scount * sizeof(rvec),
- pme->bufv[buf_pos], rcount * sizeof(rvec));
+ pme_dd_sendrecv(atc, TRUE, i, atc->f.data() + local_pos, scount * sizeof(rvec),
+ pme->bufv + buf_pos, rcount * sizeof(rvec));
local_pos += scount;
}
atc->slabCommSetup[commnode].buf_index = buf_pos;
gmx_fatal_mpi_va(f_errno, file, line, bMaster, bFinalize, fmt, ap);
va_end(ap);
}
-
-void simulationBarrier(const t_commrec* cr)
-{
- if (PAR(cr))
- {
-#if GMX_MPI
- MPI_Barrier(cr->mpi_comm_mysim);
-#endif
- }
-}
* for all processes.
*/
-//! Make a barrier across all ranks of this simulation
-void simulationBarrier(const t_commrec* cr);
-
#endif
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013-2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017, The GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
warning_note(wi,
"Removing center of mass motion in the presence of position restraints might "
- "cause artifacts");
+ "cause artifacts. When you are using position restraints to equilibrate a "
+ "macro-molecule, the artifacts are usually negligible.");
}
if (ir->cutoff_scheme == ecutsVERLET && ir->verletbuf_tol > 0 && ir->nstlist > 1
userGpuTaskAssignment = parseUserTaskAssignmentString(hw_opt.userGpuTaskAssignment);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
- auto nonbondedTarget = findTaskTarget(nbpu_opt);
- auto pmeTarget = findTaskTarget(pme_opt);
- auto pmeFftTarget = findTaskTarget(pme_fft_opt);
- auto bondedTarget = findTaskTarget(bonded_opt);
- auto updateTarget = findTaskTarget(update_opt);
- PmeRunMode pmeRunMode = PmeRunMode::None;
+ auto nonbondedTarget = findTaskTarget(nbpu_opt);
+ auto pmeTarget = findTaskTarget(pme_opt);
+ auto pmeFftTarget = findTaskTarget(pme_fft_opt);
+ auto bondedTarget = findTaskTarget(bonded_opt);
+ auto updateTarget = findTaskTarget(update_opt);
FILE* fplog = nullptr;
// If we are appending, we don't write log output because we need
useGpuForNonbonded, useGpuForPme, bondedTarget, canUseGpuForBonded,
EVDW_PME(inputrec->vdwtype), EEL_PME_EWALD(inputrec->coulombtype),
domdecOptions.numPmeRanks, gpusWereDetected);
-
- pmeRunMode = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU);
- if (pmeRunMode == PmeRunMode::GPU)
- {
- if (pmeFftTarget == TaskTarget::Cpu)
- {
- pmeRunMode = PmeRunMode::Mixed;
- }
- }
- else if (pmeFftTarget == TaskTarget::Gpu)
- {
- gmx_fatal(FARGS,
- "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
- "on CPU you should not be using -pmefft.");
- }
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
+ const PmeRunMode pmeRunMode = determinePmeRunMode(useGpuForPme, pmeFftTarget, *inputrec);
+
// Initialize development feature flags that enabled by environment variable
// and report those features that are enabled.
const DevelopmentFeatureFlags devFlags =
// Produce the task assignment for this rank.
GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
GpuTaskAssignments gpuTaskAssignments = gpuTaskAssignmentsBuilder.build(
- gpuIdsToUse, userGpuTaskAssignment, *hwinfo, cr, ms, physicalNodeComm, nonbondedTarget,
- pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded, useGpuForPme,
- thisRankHasDuty(cr, DUTY_PP),
+ gpuIdsToUse, userGpuTaskAssignment, *hwinfo, communicator, physicalNodeComm,
+ nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
+ useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
// TODO cr->duty & DUTY_PME should imply that a PME
// algorithm is active, but currently does not.
EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
- const bool printHostName = (cr->nnodes > 1);
- gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode);
-
- // If the user chose a task assignment, give them some hints
- // where appropriate.
- if (!userGpuTaskAssignment.empty())
- {
- gpuTaskAssignments.logPerformanceHints(mdlog, ssize(gpuIdsToUse));
- }
-
// Get the device handles for the modules, nullptr when no task is assigned.
gmx_device_info_t* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
gmx_device_info_t* pmeDeviceInfo = gpuTaskAssignments.initPmeDevice();
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
+ const bool printHostName = (cr->nnodes > 1);
+ gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
+
+ // If the user chose a task assignment, give them some hints
+ // where appropriate.
+ if (!userGpuTaskAssignment.empty())
+ {
+ gpuTaskAssignments.logPerformanceHints(mdlog, ssize(gpuIdsToUse));
+ }
+
if (PAR(cr))
{
/* After possible communicator splitting in make_dd_communicators.
}
}
-/*! \brief Count the different particle types in a system
- *
- * Routine prints a warning to stderr in case an unknown particle type
- * is encountered.
- * \param[in] fplog Print what we have found if not NULL
- * \param[in] mtop Molecular topology.
- * \returns Array holding the number of particles of a type
- */
-std::array<int, eptNR> countPtypes(FILE* fplog, const gmx_mtop_t* mtop)
-{
- std::array<int, eptNR> nptype = { { 0 } };
- /* Count number of shells, and find their indices */
- for (int i = 0; (i < eptNR); i++)
- {
- nptype[i] = 0;
- }
-
- gmx_mtop_atomloop_block_t aloopb = gmx_mtop_atomloop_block_init(mtop);
- int nmol;
- const t_atom* atom;
- while (gmx_mtop_atomloop_block_next(aloopb, &atom, &nmol))
- {
- switch (atom->ptype)
- {
- case eptAtom:
- case eptVSite:
- case eptShell: nptype[atom->ptype] += nmol; break;
- default:
- fprintf(stderr, "Warning unsupported particle type %d in countPtypes",
- static_cast<int>(atom->ptype));
- }
- }
- if (fplog)
- {
- /* Print the number of each particle type */
- int n = 0;
- for (const auto& i : nptype)
- {
- if (i != 0)
- {
- fprintf(fplog, "There are: %d %ss\n", i, ptype_str[n]);
- }
- n++;
- }
- }
- return nptype;
-}
-
gmx_shellfc_t* init_shell_flexcon(FILE* fplog, const gmx_mtop_t* mtop, int nflexcon, int nstcalcenergy, bool usingDomainDecomposition)
{
gmx_shellfc_t* shfc;
#define NBT asize(bondtypes)
const gmx_ffparams_t* ffparams;
- std::array<int, eptNR> n = countPtypes(fplog, mtop);
- nshell = n[eptShell];
+ const std::array<int, eptNR> numParticles = gmx_mtop_particletype_count(*mtop);
+ if (fplog)
+ {
+ /* Print the number of each particle type */
+ int pType = 0;
+ for (const auto& n : numParticles)
+ {
+ if (n != 0)
+ {
+ fprintf(fplog, "There are: %d %ss\n", n, ptype_str[pType]);
+ }
+ pType++;
+ }
+ }
+
+ nshell = numParticles[eptShell];
if (nshell == 0 && nflexcon == 0)
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
return (isMaster && isMasterSim(ms));
}
-
-void multiSimBarrier(const gmx_multisim_t* ms)
-{
- if (isMultiSim(ms))
- {
-#if GMX_MPI
- if (ms->mpi_comm_masters != MPI_COMM_NULL)
- {
- MPI_Barrier(ms->mpi_comm_masters);
- }
-#endif
- }
-}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* This rank prints the remaining run time etc. */
bool isMasterSimMasterRank(const gmx_multisim_t* ms, bool isMaster);
-//! Make a barrier across all multi-simulation master ranks
-void multiSimBarrier(const gmx_multisim_t* ms);
-
#endif
const bool isVerbose = mdrunOptions.verbose;
const bool isDynamicBox = inputrecDynamicBox(inputrec);
// Check for polarizable models and flexible constraints
- if (ShellFCElement::doShellsOrFlexConstraints(&topologyHolder_->globalTopology(),
+ if (ShellFCElement::doShellsOrFlexConstraints(topologyHolder_->globalTopology(),
constr ? constr->numFlexibleConstraints() : 0))
{
auto shellFCElement = std::make_unique<ShellFCElement>(
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/mdrun/shellfc.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/topology/atoms.h"
+#include "gromacs/topology/mtop_util.h"
#include "energyelement.h"
#include "freeenergyperturbationelement.h"
namespace gmx
{
-bool ShellFCElement::doShellsOrFlexConstraints(const gmx_mtop_t* mtop, int nflexcon)
+bool ShellFCElement::doShellsOrFlexConstraints(const gmx_mtop_t& mtop, int nflexcon)
{
if (nflexcon != 0)
{
return true;
}
- std::array<int, eptNR> n = countPtypes(nullptr, mtop);
+ std::array<int, eptNR> n = gmx_mtop_particletype_count(mtop);
return n[eptShell] != 0;
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
void elementTeardown() override;
//! Whether either shells or flexible constraints are used
- static bool doShellsOrFlexConstraints(const gmx_mtop_t* mtop, int nflexcon);
+ static bool doShellsOrFlexConstraints(const gmx_mtop_t& mtop, int nflexcon);
private:
//! ITopologyHolderClient implementation
}
/* calculates center of mass of selection index from all coordinates x */
+// Compiler segfault with 2019_update_5 and 2020_initial
+#if defined(__INTEL_COMPILER) \
+ && ((__INTEL_COMPILER == 1900 && __INTEL_COMPILER_UPDATE >= 5) || __INTEL_COMPILER >= 1910)
+# pragma intel optimization_level 2
+#endif
void pull_calc_coms(const t_commrec* cr,
pull_t* pull,
const t_mdatoms* md,
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
/*! \cond libapi */
-/*! \addtogroup module_simd */
+/*! \ingroup module_simd */
/*! \{ */
/*! \name SIMD implementation capability definitions
return false;
}
+
+PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec)
+{
+ if (!EEL_PME(inputrec.coulombtype))
+ {
+ return PmeRunMode::None;
+ }
+
+ if (useGpuForPme)
+ {
+ if (pmeFftTarget == TaskTarget::Cpu)
+ {
+ return PmeRunMode::Mixed;
+ }
+ else
+ {
+ return PmeRunMode::GPU;
+ }
+ }
+ else
+ {
+ if (pmeFftTarget == TaskTarget::Gpu)
+ {
+ gmx_fatal(FARGS,
+ "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
+ "on CPU you should not be using -pmefft.");
+ }
+ return PmeRunMode::CPU;
+ }
+}
+
bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
const bool useGpuForPme,
const TaskTarget bondedTarget,
errorMessage +=
"Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
}
- // Since only direct GPU communications are supported with GPU update, PME should be fully offloaded in DD and PME only cases.
- if (pmeRunMode != PmeRunMode::GPU && (isDomainDecomposition || havePmeOnlyRank))
+
+ // If PME is active (i.e. not PmeRunMode::None), then GPU update requires
+ // either a single-rank run, or that PME runs fully on the GPU.
+ const bool pmeRunningOnCpu = (pmeRunMode == PmeRunMode::CPU || pmeRunMode == PmeRunMode::Mixed);
+ if (pmeRunningOnCpu && isDomainDecomposition)
+ {
+ errorMessage += "With domain decomposition, PME must run fully on the GPU.\n";
+ }
+ if (pmeRunningOnCpu && havePmeOnlyRank)
{
- errorMessage += "PME should run on GPU.\n";
+ errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
}
+
if (!gpusWereDetected)
{
errorMessage += "Compatible GPUs must have been found.\n";
// Actually all free-energy options except for mass and constraint perturbation are supported
errorMessage += "Free energy perturbations are not supported.\n";
}
+ const auto particleTypes = gmx_mtop_particletype_count(mtop);
+ if (particleTypes[eptShell] > 0)
+ {
+ errorMessage += "Shells are not supported.\n";
+ }
if (useReplicaExchange)
{
errorMessage += "Replica exchange simulations are not supported.\n";
int numPmeRanksPerSimulation,
bool gpusWereDetected);
+/*! \brief Determine PME run mode.
+ *
+ * Given the PME task assignment in \p useGpuForPme and the user-provided
+ * FFT task target in \p pmeFftTarget, returns a PME run mode for the
+ * current run. It also checks the compatibility of the two.
+ *
+ * \note Aborts the run upon incompatible values of \p useGpuForPme and \p pmeFftTarget.
+ *
+ * \param[in] useGpuForPme PME task assignment, true if PME task is mapped to the GPU.
+ * \param[in] pmeFftTarget The user's choice for -pmefft for where to assign the FFT
+ * work of the PME task. \param[in] inputrec The user input record
+ * */
+PmeRunMode determinePmeRunMode(bool useGpuForPme, const TaskTarget& pmeFftTarget, const t_inputrec& inputrec);
+
/*! \brief Decide whether the simulation will try to run bonded tasks on GPUs.
*
* \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
size_t numRanks,
bool printHostName,
bool useGpuForBonded,
- PmeRunMode pmeRunMode)
+ PmeRunMode pmeRunMode,
+ bool useGpuForUpdate)
{
size_t numGpusInUse = countUniqueGpuIdsUsed(gpuTaskAssignmentOnRanksOfThisNode);
if (numGpusInUse == 0)
{
output += gmx::formatString("PME tasks will do all aspects on the GPU\n");
}
+ output += gmx::formatString("Coordinates will be updated and constrained on the %s.",
+ useGpuForUpdate ? "GPU" : "CPU");
}
/* NOTE: this print is only for and on one physical node */
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* \param[in] numGpuTasksOnThisNode The number of GPU tasks on this node.
* \param[in] numPpRanks Number of PP ranks on this node
* \param[in] printHostName Print the hostname in the usage information
- * \param[in] useGpuForBonded Whether GPU PP tasks will do bonded work on the
- * GPU \param[in] pmeRunMode Describes the execution of PME tasks
+ * \param[in] useGpuForBonded Whether GPU PP tasks will do bonded work on GPU
+ * \param[in] pmeRunMode Describes the execution of PME tasks
+ * \param[in] useGpuForUpdate Whether update will run on the GPU.
*
* \throws std::bad_alloc if out of memory */
void reportGpuUsage(const MDLogger& mdlog,
size_t numPpRanks,
bool printHostName,
bool useGpuForBonded,
- PmeRunMode pmeRunMode);
+ PmeRunMode pmeRunMode,
+ bool useGpuForUpdate);
} // namespace gmx
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "taskassignment.h"
+#include "config.h"
+
#include <algorithm>
#include <exception>
#include <string>
/*! \brief Return on each rank the total count over all ranks of all
* simulations. */
-int countOverAllRanks(const t_commrec* cr, const gmx_multisim_t* ms, const int countOnThisRank)
+int countOverAllRanks(MPI_Comm comm, int countOnThisRank)
{
- int countOverAllRanksValue = countOnThisRank;
- if (PAR(cr))
+ int sum;
+#if GMX_MPI
+ int numRanks;
+ MPI_Comm_size(comm, &numRanks);
+ if (numRanks > 1)
{
- // Count over the ranks of this simulation.
- gmx_sumi(1, &countOverAllRanksValue, cr);
+ MPI_Allreduce(&countOnThisRank, &sum, 1, MPI_INT, MPI_SUM, comm);
}
- if (isMultiSim(ms))
+ else
+#else
+ GMX_UNUSED_VALUE(comm);
+#endif
{
- // Count over the ranks of all simulations.
- gmx_sumi_sim(1, &countOverAllRanksValue, ms);
- if (PAR(cr))
- {
- // Propagate the information from other simulations back
- // to non-master ranks so they can all agree on future
- // behavior.
- gmx_bcast(sizeof(decltype(countOverAllRanksValue)), &countOverAllRanksValue, cr);
- }
+ sum = countOnThisRank;
+ }
+
+ return sum;
+}
+
+/*! \brief Barrier over all rank in \p comm */
+void barrierOverAllRanks(MPI_Comm comm)
+{
+#if GMX_MPI
+ int numRanks;
+ MPI_Comm_size(comm, &numRanks);
+ if (numRanks > 1)
+ {
+ MPI_Barrier(comm);
}
- return countOverAllRanksValue;
+#else
+ GMX_UNUSED_VALUE(comm);
+#endif
}
} // namespace
GpuTaskAssignments GpuTaskAssignmentsBuilder::build(const std::vector<int>& gpuIdsToUse,
const std::vector<int>& userGpuTaskAssignment,
const gmx_hw_info_t& hardwareInfo,
- const t_commrec* cr,
- const gmx_multisim_t* ms,
+ MPI_Comm gromacsWorldComm,
const PhysicalNodeCommunicator& physicalNodeComm,
const TaskTarget nonbondedTarget,
const TaskTarget pmeTarget,
{
exceptionPtr = std::current_exception();
}
- int countOfExceptionsOnThisRank = int(bool(exceptionPtr));
- int countOfExceptionsOverAllRanks = countOverAllRanks(cr, ms, countOfExceptionsOnThisRank);
+ int countOfExceptionsOnThisRank = int(bool(exceptionPtr));
+ int countOfExceptionsOverAllRanks = countOverAllRanks(gromacsWorldComm, countOfExceptionsOnThisRank);
// Avoid all ranks spamming the error stream
//
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
}
- // TODO This implements a global barrier so that MPI runtimes can
+ // TODO Global barrier so that MPI runtimes can
// organize an orderly shutdown if one of the ranks has had to
// issue a fatal error above. When we have MPI-aware error
// handling and reporting, this should be improved (perhaps
// centralized there).
- simulationBarrier(cr);
- multiSimBarrier(ms);
- simulationBarrier(cr);
+ barrierOverAllRanks(gromacsWorldComm);
if (countOfExceptionsOverAllRanks > 0)
{
gmx_fatal(FARGS,
void GpuTaskAssignments::reportGpuUsage(const MDLogger& mdlog,
bool printHostName,
bool useGpuForBonded,
- PmeRunMode pmeRunMode)
+ PmeRunMode pmeRunMode,
+ bool useGpuForUpdate)
{
gmx::reportGpuUsage(mdlog, assignmentForAllRanksOnThisNode_, numGpuTasksOnThisNode_,
- numRanksOnThisNode_, printHostName, useGpuForBonded, pmeRunMode);
+ numRanksOnThisNode_, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
}
gmx_device_info_t* GpuTaskAssignments::initNonbondedDevice(const t_commrec* cr) const
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <vector>
#include "gromacs/utility/basedefinitions.h"
+#include "gromacs/utility/gmxmpi.h"
struct gmx_device_info_t;
struct gmx_hw_info_t;
-struct gmx_multisim_t;
struct t_commrec;
enum class PmeRunMode;
* \param[in] gpuIdsToUse The compatible GPUs that the user permitted us to use.
* \param[in] userGpuTaskAssignment The user-specified assignment of GPU tasks to device IDs.
* \param[in] hardwareInfo The detected hardware
- * \param[in] cr Communication object.
- * \param[in] ms Multi-simulation handler.
+ * \param[in] gromacsWorldComm MPI communicator for all ranks in the current GROMACS run
* \param[in] physicalNodeComm Communication object for this physical node.
* \param[in] nonbondedTarget The user's choice for mdrun -nb for where to assign
* short-ranged nonbonded interaction tasks.
GpuTaskAssignments build(const std::vector<int>& gpuIdsToUse,
const std::vector<int>& userGpuTaskAssignment,
const gmx_hw_info_t& hardwareInfo,
- const t_commrec* cr,
- const gmx_multisim_t* ms,
+ MPI_Comm gromacsWorldComm,
const PhysicalNodeCommunicator& physicalNodeComm,
TaskTarget nonbondedTarget,
TaskTarget pmeTarget,
* and in what way.
*
* \param[in] mdlog Logging object.
- * \param[in] printHostName Print the hostname in the usage information
- * \param[in] useGpuForBonded Whether GPU PP tasks will do bonded work on the GPU
- * \param[in] pmeRunMode Describes the execution of PME tasks
+ * \param[in] printHostName Print the hostname in the usage information.
+ * \param[in] useGpuForBonded Whether GPU PP tasks will do bonded work on the GPU.
+ * \param[in] pmeRunMode Describes the execution of PME tasks.
+ * \param[in] useGpuForUpdate Whether the update is offloaded on the GPU.
*
- * \throws std::bad_alloc if out of memory */
- void reportGpuUsage(const MDLogger& mdlog, bool printHostName, bool useGpuForBonded, PmeRunMode pmeRunMode);
+ * \throws std::bad_alloc if out of memory
+ */
+ void reportGpuUsage(const MDLogger& mdlog,
+ bool printHostName,
+ bool useGpuForBonded,
+ PmeRunMode pmeRunMode,
+ bool useGpuForUpdate);
+
/*! \brief Logs to \c mdlog information that may help a user
* learn how to let mdrun make a task assignment that runs
* faster.
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2008,2009,2010.
- * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ * Copyright (c) 2008,2009,2010, The GROMACS development team.
+ * Copyright (c) 2012,2013,2014,2015,2016 The GROMACS development team.
* Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
return n;
}
+std::array<int, eptNR> gmx_mtop_particletype_count(const gmx_mtop_t& mtop)
+{
+ std::array<int, eptNR> count = { { 0 } };
+
+ for (const auto& molblock : mtop.molblock)
+ {
+ const t_atoms& atoms = mtop.moltype[molblock.type].atoms;
+ for (int a = 0; a < atoms.nr; a++)
+ {
+ count[atoms.atom[a].ptype] += molblock.nmol;
+ }
+ }
+
+ return count;
+}
+
static void atomcat(t_atoms* dest, const t_atoms* src, int copies, int maxres_renum, int* maxresnr)
{
int i, j, l, size;
#include <cstddef>
+#include <array>
#include <vector>
#include "gromacs/topology/topology.h"
/* Returns the total number of interactions in the system with all interaction flags that are set in \p if_flags set */
int gmx_mtop_interaction_count(const gmx_mtop_t& mtop, int unsigned if_flags);
+/* Returns the count of atoms for each particle type */
+std::array<int, eptNR> gmx_mtop_particletype_count(const gmx_mtop_t& mtop);
+
/* Returns a single t_atoms struct for the whole system */
t_atoms gmx_mtop_global_atoms(const gmx_mtop_t* mtop);
"Jean Baudrillard" },
{ "Install our Free Energy Patents app! There is energy all around us; and it's free! "
"Free energy is everywhere, and all around you, just waiting to be extracted! Over "
- "100+ free energy patents!"
+ "100+ free energy patents!",
"Mind and Miracle Productions on Twitter, spamming a FEP thread" },
{ "\"A slow sort of country!\" said the Queen. \"Now, HERE, you see, it "
"takes all the running YOU can do, to keep in the same place. If you want "
- "to get somewhere else, you must run at least twice as fast as that!\""
+ "to get somewhere else, you must run at least twice as fast as that!\"",
"Lewis Carroll" },
{ "More than 10000000 total errors detected. I'm not reporting any more. "
"Final error counts will be inaccurate. Go fix your program!",