# The GROMACS convention is that these are the version number of the next
# release that is going to be made from this branch.
-set(GMX_VERSION_MAJOR 2016)
-set(GMX_VERSION_PATCH 2)
+set(GMX_VERSION_MAJOR 2017)
+set(GMX_VERSION_PATCH 0)
# The suffix, on the other hand, is used mainly for betas and release
# candidates, where it signifies the most recent such release from
# this branch; it will be empty before the first such release, as well
# here. The important thing is to minimize the chance of third-party
# code being able to dynamically link with a version of libgromacs
# that might not work.
-set(LIBRARY_SOVERSION_MAJOR 2)
-set(LIBRARY_SOVERSION_MINOR 1)
+set(LIBRARY_SOVERSION_MAJOR 3)
+set(LIBRARY_SOVERSION_MINOR 0)
set(LIBRARY_VERSION ${LIBRARY_SOVERSION_MAJOR}.${LIBRARY_SOVERSION_MINOR}.0)
#####################################################################
# each release. It's hard to test because it is only used for
# REGRESSIONTEST_DOWNLOAD, which doesn't work until that tarball has
# been placed on the server.
- set(REGRESSIONTEST_MD5SUM "5f49cfc4f04a34f117340cf9b3e5f8a2" CACHE INTERNAL "MD5 sum of the regressiontests tarball")
+ set(REGRESSIONTEST_MD5SUM "366438549270d005fa6def6e56ca0256" CACHE INTERNAL "MD5 sum of the regressiontests tarball")
math(EXPR GMX_VERSION_NUMERIC
"${GMX_VERSION_MAJOR}*10000 + ${GMX_VERSION_PATCH}")
Quick and dirty installation
----------------------------
1. Get the latest version of your C and C++ compilers.
-2. Check that you have CMake version |GMX_CMAKE_MINIMUM_REQUIRED_VERSION| or later.
+2. Check that you have CMake version |CMAKE_MINIMUM_REQUIRED_VERSION| or later.
3. Get and unpack the latest version of the |Gromacs| tarball.
4. Make a separate build directory and change to it.
5. Run ``cmake`` with the path to the source as an argument
hardware, libraries, and compilers are only going to continue to get
more complex.
+ Quick and dirty cluster installation
+ ------------------------------------
+
+ On a cluster where users are expected to be running across multiple
+ nodes using MPI, make one installation similar to the above, and
+ another using an MPI wrapper compiler and which is `building only
+ mdrun`_, because that is the only component of |Gromacs| that uses
+ MPI.
+
Typical installation
--------------------
As above, and with further details below, but you should consider
* ``-DCMAKE_C_COMPILER=xxx`` equal to the name of the C99 `Compiler`_ you wish to use (or the environment variable ``CC``)
* ``-DCMAKE_CXX_COMPILER=xxx`` equal to the name of the C++98 `compiler`_ you wish to use (or the environment variable ``CXX``)
- * ``-DGMX_MPI=on`` to build using `MPI support`_
+ * ``-DGMX_MPI=on`` to build using `MPI support`_ (generally good to combine with `building only mdrun`_)
* ``-DGMX_GPU=on`` to build using nvcc to run using NVIDIA `CUDA GPU acceleration`_ or an OpenCL_ GPU
* ``-DGMX_USE_OPENCL=on`` to build with OpenCL_ support enabled. ``GMX_GPU`` must also be set.
* ``-DGMX_SIMD=xxx`` to specify the level of `SIMD support`_ of the node on which |Gromacs| will run
frequently provides the best performance.
You should strive to use the most recent version of your
-compiler. Minimum supported compiler versions are
-* GNU (gcc) 4.6
-* Intel (icc) 14
-* LLVM (clang) 3.4
+compiler. Since we require full C++11 support the minimum supported
+compiler versions are
+* GNU (gcc) 4.8.1
+* Intel (icc) 15.0
+* LLVM (clang) 3.3
* Microsoft (MSVC) 2015
Other compilers may work (Cray, Pathscale, older clang) but do
not offer competitive performance. We recommend against PGI because
On Linux, both the Intel and clang compiler use the libstdc++ which
comes with gcc as the default C++ library. For |Gromacs|, we require
-the compiler to support libstc++ version 4.6.1 or higher. To select a
+the compiler to support libstc++ version 4.8.1 or higher. To select a
particular libstdc++ library, use:
* For Intel: ``-DGMX_STDLIB_CXX_FLAGS=-gcc-name=/path/to/gcc/binary``
the vendor's default or recommended compiler, and check for
specialized information below.
+For updated versions of gcc to add to your Linux OS, see
+
+* Ubuntu: `Ubuntu toolchain ppa page`_
+* RHEL/CentOS: `EPEL page`_ or the RedHat Developer Toolset
+
Compiling with parallelization options
--------------------------------------
GPU support
^^^^^^^^^^^
|Gromacs| has excellent support for NVIDIA GPUs supported via CUDA.
-NVIDIA's CUDA_ version |REQUIRED_CUDA_VERSION| software development kit is required,
-and the latest version is strongly encouraged. NVIDIA GPUs with at
+On Linux with gcc, NVIDIA's CUDA_ version |REQUIRED_CUDA_VERSION|
+software development kit is required, and the latest
+version is strongly encouraged. Using Intel or Microsoft compilers
+requires version 7.0 and 8.0, respectively. NVIDIA GPUs with at
least NVIDIA compute capability |REQUIRED_CUDA_COMPUTE_CAPABILITY| are
required, e.g. Fermi, Kepler, Maxwell or Pascal cards. You are strongly recommended to
get the latest CUDA version and driver supported by your hardware, but
-----
|Gromacs| builds with the CMake build system, requiring at least
-version |GMX_CMAKE_MINIMUM_REQUIRED_VERSION|. You can check whether
+version |CMAKE_MINIMUM_REQUIRED_VERSION|. You can check whether
CMake is installed, and what version it is, with ``cmake
--version``. If you need to install CMake, then first check whether
your platform's package management system provides a suitable version,
* that you build FFTW from the source code.
If you build FFTW from source yourself, get the most recent version
- and follow the `FFTW installation guide`_. Note that we have recently
- contributed new SIMD optimization for several extra platforms to
- FFTW, which will appear in FFTW-3.3.5 (for now it is available in the
- FFTW repository on github, or you can find a very unofficial prerelease
- version at ftp://ftp.gromacs.org/pub/contrib ).
- Choose the precision for FFTW (i.e. single/float vs. double) to
- match whether you will later use mixed or double precision for
- |Gromacs|. There is no need to compile FFTW with
- threading or MPI support, but it does no harm. On x86 hardware,
- compile with *both* ``--enable-sse2`` and ``--enable-avx`` for
- FFTW-3.3.4 and earlier. As of FFTW-3.3.5 you should also add
- ``--enable-avx2``. FFTW will create a fat library with codelets
- for all different instruction sets, and pick the fastest supported
- one at runtime. On IBM Power8, you definitely want the upcoming
- FFTW-3.3.5 and to compile it with ``--enable-vsx`` for SIMD support. If you are
- using a Cray, there is a special modified (commercial) version of
- FFTs using the FFTW interface which can be slightly faster.
+ and follow the `FFTW installation guide`_. Choose the precision for
+ FFTW (i.e. single/float vs. double) to match whether you will later
+ use mixed or double precision for |Gromacs|. There is no need to
+ compile FFTW with threading or MPI support, but it does no harm. On
+ x86 hardware, compile with *both* ``--enable-sse2`` and
+ ``--enable-avx`` for FFTW-3.3.4 and earlier. From FFTW-3.3.5, you
+ should also add ``--enable-avx2`` also. On Intel chipsets supporting
+ 512-wide AVX, including KNL, add ``--enable-avx512`` also. FFTW will
+ create a fat library with codelets for all different instruction sets,
+ and pick the fastest supported one at runtime. On IBM Power8, you
+ definitely want FFTW-3.3.5 and to compile it with ``--enable-vsx`` for
+ SIMD support. If you are using a Cray, there is a special modified
+ (commercial) version of FFTs using the FFTW interface which can be
+ slightly faster.
Using MKL
^^^^^^^^^
OpenCL GPU acceleration
^^^^^^^^^^^^^^^^^^^^^^^
- To build Gromacs with OpenCL support enabled, an OpenCL_ SDK
- (e.g. `from AMD <http://developer.amd.com/appsdk>`_) must be installed
- in a path found in ``CMAKE_PREFIX_PATH`` (or via the environment
- variables ``AMDAPPSDKROOT`` or ``CUDA_PATH``), and the following CMake
- flags must be set
+
+ The primary target of the |Gromacs| OpenCL support is accelerating simulations
+ on AMD hardware, both discrete GPUs and APUs (integrated CPU+GPU chips).
+ The |Gromacs| OpenCL on NVIDIA GPUs works, but performance
+ and other limitations make it less practical (for details see the user guide).
+
+ To build |Gromacs| with OpenCL_ support enabled, two components are
+ required: the OpenCL_ headers and the wrapper library that acts
+ as a client driver loader (so-called ICD loader).
+ The additional, runtime-only dependency is the vendor-specific GPU driver
+ for the device targeted. This also contains the OpenCL_ compiler.
+ As the GPU compute kernels are compiled on-demand at run time,
+ this vendor-specific compiler and driver is not needed for building |Gromacs|.
+ The former, compile-time dependencies are standard components,
+ hence stock versions can be obtained from most Linux distribution
+ repositories (e.g. ``opencl-headers`` and ``ocl-icd-libopencl1`` on Debian/Ubuntu).
+ Only the compatibility with the required OpenCL_ version |REQUIRED_OPENCL_MIN_VERSION|
+ needs to be ensured.
+ Alternatively, the headers and library can also be obtained from vendor SDKs
+ (e.g. `from AMD <http://developer.amd.com/appsdk>`_),
+ which must be installed in a path found in ``CMAKE_PREFIX_PATH`` (or via the environment
+ variables ``AMDAPPSDKROOT`` or ``CUDA_PATH``).
+
+ To trigger an OpenCL_ build the following CMake flags must be set
::
cmake .. -DGMX_GPU=ON -DGMX_USE_OPENCL=ON
- Building |Gromacs| OpenCL support for a CUDA_ GPU works, but see the
- known limitations in the user guide. If you want to
- do so anyway, because NVIDIA OpenCL support is part of the CUDA
- package, a C++ compiler supported by your CUDA installation is
- required.
-
On Mac OS, an AMD GPU can be used only with OS version 10.10.4 and
higher; earlier OS versions are known to run incorrectly.
Building only mdrun
^^^^^^^^^^^^^^^^^^^
- Past versions of the build system offered "mdrun" and "install-mdrun"
- targets (similarly for other programs too) to build and install only
- the mdrun program, respectively. Such a build is useful when the
- configuration is only relevant for mdrun (such as with
- parallelization options for MPI, SIMD, GPUs, or on BlueGene or Cray),
- or the length of time for the compile-link-install cycle is relevant
- when developing.
This is now supported with the ``cmake`` option
- ``-DGMX_BUILD_MDRUN_ONLY=ON``, which will build a cut-down version of
- ``libgromacs`` and/or the mdrun program.
+ ``-DGMX_BUILD_MDRUN_ONLY=ON``, which will build a different version of
+ ``libgromacs`` and the ``mdrun`` program.
Naturally, now ``make install`` installs only those
products. By default, mdrun-only builds will default to static linking
against |Gromacs| libraries, because this is generally a good idea for
- the targets for which an mdrun-only build is desirable. If you re-use
- a build tree and change to the mdrun-only build, then you will inherit
- the setting for ``BUILD_SHARED_LIBS`` from the old build, and will be
- warned that you may wish to manage ``BUILD_SHARED_LIBS`` yourself.
+ the targets for which an mdrun-only build is desirable.
Installing |Gromacs|
--------------------
run an MPI program is called ``srun``.
The ``make check`` target also runs integration-style tests that may run
- with MPI if ``GMX_MPI=ON`` was set. To make these work, you may need to
- set the CMake variables ``MPIEXEC``, ``MPIEXEC_NUMPROC_FLAG``, ``NUMPROC``,
+ with MPI if ``GMX_MPI=ON`` was set. To make these work with various possible
+ MPI libraries, you may need to
+ set the CMake variables ``MPIEXEC``, ``MPIEXEC_NUMPROC_FLAG``,
``MPIEXEC_PREFLAGS`` and ``MPIEXEC_POSTFLAGS`` so that
``mdrun-mpi-test_mpi`` would run on multiple ranks via the shell command
${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${NUMPROC} ${MPIEXEC_PREFLAGS} \
mdrun-mpi-test_mpi ${MPIEXEC_POSTFLAGS} -otherflags
- Typically, one might use variable values ``mpirun``, ``-np``, ``2``, ``''``,
- ``''`` respectively, in order to run on two ranks.
+ A typical example for SLURM is
+
+ ::
+
+ cmake .. -DGMX_MPI=on -DMPIEXEC=srun -DMPIEXEC_NUMPROC_FLAG=-n -DMPIEXEC_PREFLAGS= -DMPIEXEC_POSTFLAGS=
Testing |Gromacs| for performance
it works because we have tested it. We do test on Linux, Windows, and
Mac with a range of compilers and libraries for a range of our
configuration options. Every commit in our git source code repository
-is currently tested on x86 with gcc versions ranging from 4.6 through
-5.2, and versions 16 of the Intel compiler as well as Clang
-version 3.4 through 3.8. For this, we use a variety of GNU/Linux
+is currently tested on x86 with a number of gcc versions ranging from 4.8.1
+through 6.1, versions 16 of the Intel compiler, and Clang
+versions 3.4 through 3.8. For this, we use a variety of GNU/Linux
flavors and versions as well as recent versions of Windows. Under
Windows, we test both MSVC 2015 and version 16 of the Intel compiler.
For details, you can
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdlib/mdatoms.h"
#include "gromacs/mdlib/mdrun.h"
+#include "gromacs/mdlib/mdsetup.h"
#include "gromacs/mdlib/nb_verlet.h"
#include "gromacs/mdlib/nbnxn_grid.h"
#include "gromacs/mdlib/nsgrid.h"
#include "gromacs/topology/block.h"
#include "gromacs/topology/idef.h"
#include "gromacs/topology/ifunc.h"
+#include "gromacs/topology/mtop_lookup.h"
#include "gromacs/topology/mtop_util.h"
#include "gromacs/topology/topology.h"
#include "gromacs/utility/basedefinitions.h"
#define GMX_DD_NNODES_SENDRECV 4
+ /* We check if to turn on DLB at the first and every 100 DD partitionings.
+ * With large imbalance DLB will turn on at the first step, so we can
+ * make the interval so large that the MPI overhead of the check is negligible.
+ */
+ static const int c_checkTurnDlbOnInterval = 100;
+ /* We need to check if DLB results in worse performance and then turn it off.
+ * We check this more often then for turning DLB on, because the DLB can scale
+ * the domains very rapidly, so if unlucky the load imbalance can go up quickly
+ * and furthermore, we are already synchronizing often with DLB, so
+ * the overhead of the MPI Bcast is not that high.
+ */
+ static const int c_checkTurnDlbOffInterval = 20;
+
+ /* Forward declaration */
+ static void dd_dlb_set_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd, gmx_bool bValue);
+
+
/*
#define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
static bool dlbIsOn(const gmx_domdec_comm_t *comm)
{
- return (comm->dlbState == edlbsOn);
+ return (comm->dlbState == edlbsOnCanTurnOff ||
+ comm->dlbState == edlbsOnForever);
}
static void vec_rvec_init(vec_rvec_t *v)
gmx_incons("The state does not the domain decomposition state");
}
- state->ncg_gl = dd->ncg_home;
- if (state->ncg_gl > state->cg_gl_nalloc)
- {
- state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
- srenew(state->cg_gl, state->cg_gl_nalloc);
- }
- for (i = 0; i < state->ncg_gl; i++)
+ state->cg_gl.resize(dd->ncg_home);
+ for (i = 0; i < dd->ncg_home; i++)
{
state->cg_gl[i] = dd->index_gl[i];
}
}
}
+int dd_natoms_mdatoms(const gmx_domdec_t *dd)
+{
+ /* We currently set mdatoms entries for all atoms:
+ * local + non-local + communicated for vsite + constraints
+ */
+
+ return dd->comm->nat[ddnatNR - 1];
+}
+
int dd_natoms_vsite(const gmx_domdec_t *dd)
{
return dd->comm->nat[ddnatVSITE];
cgs_gl = &dd->comm->cgs_gl;
- ncg_home = state_local->ncg_gl;
- cg = state_local->cg_gl;
+ ncg_home = state_local->cg_gl.size();
+ cg = state_local->cg_gl.data();
nat_home = 0;
for (i = 0; i < ncg_home; i++)
{
}
static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
- rvec *lv, rvec *v)
+ const rvec *lv, rvec *v)
{
gmx_domdec_master_t *ma;
int n, i, c, a, nalloc = 0;
if (!DDMASTER(dd))
{
#if GMX_MPI
- MPI_Send(lv, dd->nat_home*sizeof(rvec), MPI_BYTE, DDMASTERRANK(dd),
- dd->rank, dd->mpi_comm_all);
+ MPI_Send(const_cast<void *>(static_cast<const void *>(lv)), dd->nat_home*sizeof(rvec), MPI_BYTE,
+ DDMASTERRANK(dd), dd->rank, dd->mpi_comm_all);
#endif
}
else
}
static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
- rvec *lv, rvec *v)
+ const rvec *lv, rvec *v)
{
gmx_domdec_master_t *ma;
int *rcounts = NULL, *disps = NULL;
}
}
-void dd_collect_vec(gmx_domdec_t *dd,
- t_state *state_local, rvec *lv, rvec *v)
+void dd_collect_vec(gmx_domdec_t *dd,
+ t_state *state_local,
+ const PaddedRVecVector *localVector,
+ rvec *v)
{
dd_collect_cg(dd, state_local);
+ const rvec *lv = as_rvec_array(localVector->data());
+
if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
{
dd_collect_vec_sendrecv(dd, lv, v);
}
}
+void dd_collect_vec(gmx_domdec_t *dd,
+ t_state *state_local,
+ const PaddedRVecVector *localVector,
+ PaddedRVecVector *vector)
+{
+ dd_collect_vec(dd, state_local, localVector, as_rvec_array(vector->data()));
+}
+
void dd_collect_state(gmx_domdec_t *dd,
t_state *state_local, t_state *state)
switch (est)
{
case estX:
- dd_collect_vec(dd, state_local, state_local->x, state->x);
+ dd_collect_vec(dd, state_local, &state_local->x, &state->x);
break;
case estV:
- dd_collect_vec(dd, state_local, state_local->v, state->v);
+ dd_collect_vec(dd, state_local, &state_local->v, &state->v);
break;
case est_SDX_NOTSUPPORTED:
break;
case estCGP:
- dd_collect_vec(dd, state_local, state_local->cg_p, state->cg_p);
+ dd_collect_vec(dd, state_local, &state_local->cg_p, &state->cg_p);
break;
case estDISRE_INITF:
case estDISRE_RM3TAV:
}
}
-static void dd_realloc_state(t_state *state, rvec **f, int nalloc)
+static void dd_resize_state(t_state *state, PaddedRVecVector *f, int natoms)
{
int est;
if (debug)
{
- fprintf(debug, "Reallocating state: currently %d, required %d, allocating %d\n", state->nalloc, nalloc, over_alloc_dd(nalloc));
+ fprintf(debug, "Resizing state: currently %d, required %d\n", state->natoms, natoms);
}
- state->nalloc = over_alloc_dd(nalloc);
-
for (est = 0; est < estNR; est++)
{
if (EST_DISTR(est) && (state->flags & (1<<est)))
switch (est)
{
case estX:
- srenew(state->x, state->nalloc + 1);
+ state->x.resize(natoms + 1);
break;
case estV:
- srenew(state->v, state->nalloc + 1);
+ state->v.resize(natoms + 1);
break;
case est_SDX_NOTSUPPORTED:
break;
case estCGP:
- srenew(state->cg_p, state->nalloc + 1);
+ state->cg_p.resize(natoms + 1);
break;
case estDISRE_INITF:
case estDISRE_RM3TAV:
/* No reallocation required */
break;
default:
- gmx_incons("Unknown state entry encountered in dd_realloc_state");
+ gmx_incons("Unknown state entry encountered in dd_resize_state");
}
}
}
if (f != NULL)
{
- srenew(*f, state->nalloc);
+ (*f).resize(natoms + 1);
}
}
-static void dd_check_alloc_ncg(t_forcerec *fr, t_state *state, rvec **f,
- int nalloc)
+static void dd_check_alloc_ncg(t_forcerec *fr,
+ t_state *state,
+ PaddedRVecVector *f,
+ int numChargeGroups)
{
- if (nalloc > fr->cg_nalloc)
+ if (numChargeGroups > fr->cg_nalloc)
{
if (debug)
{
- fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, nalloc, over_alloc_dd(nalloc));
+ fprintf(debug, "Reallocating forcerec: currently %d, required %d, allocating %d\n", fr->cg_nalloc, numChargeGroups, over_alloc_dd(numChargeGroups));
}
- fr->cg_nalloc = over_alloc_dd(nalloc);
+ fr->cg_nalloc = over_alloc_dd(numChargeGroups);
srenew(fr->cginfo, fr->cg_nalloc);
if (fr->cutoff_scheme == ecutsGROUP)
{
srenew(fr->cg_cm, fr->cg_nalloc);
}
}
- if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
+ if (fr->cutoff_scheme == ecutsVERLET)
{
/* We don't use charge groups, we use x in state to set up
* the atom communication.
*/
- dd_realloc_state(state, f, nalloc);
+ dd_resize_state(state, f, numChargeGroups);
}
}
static void dd_distribute_dfhist(gmx_domdec_t *dd, df_history_t *dfhist)
{
- int i;
+ if (dfhist == NULL)
+ {
+ return;
+ }
+
dd_bcast(dd, sizeof(int), &dfhist->bEquil);
dd_bcast(dd, sizeof(int), &dfhist->nlambda);
dd_bcast(dd, sizeof(real), &dfhist->wl_delta);
dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_minvar);
dd_bcast(dd, sizeof(real)*nlam, dfhist->sum_variance);
- for (i = 0; i < nlam; i++)
+ for (int i = 0; i < nlam; i++)
{
dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_p[i]);
dd_bcast(dd, sizeof(real)*nlam, dfhist->accum_m[i]);
static void dd_distribute_state(gmx_domdec_t *dd, t_block *cgs,
t_state *state, t_state *state_local,
- rvec **f)
+ PaddedRVecVector *f)
{
int i, j, nh;
copy_mat(state->boxv, state_local->boxv);
copy_mat(state->svir_prev, state_local->svir_prev);
copy_mat(state->fvir_prev, state_local->fvir_prev);
- copy_df_history(&state_local->dfhist, &state->dfhist);
+ if (state->dfhist != NULL)
+ {
+ copy_df_history(state_local->dfhist, state->dfhist);
+ }
for (i = 0; i < state_local->ngtc; i++)
{
for (j = 0; j < nh; j++)
}
}
}
- dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda);
+ dd_bcast(dd, ((efptNR)*sizeof(real)), state_local->lambda.data());
dd_bcast(dd, sizeof(int), &state_local->fep_state);
dd_bcast(dd, sizeof(real), &state_local->veta);
dd_bcast(dd, sizeof(real), &state_local->vol0);
dd_bcast(dd, sizeof(state_local->boxv), state_local->boxv);
dd_bcast(dd, sizeof(state_local->svir_prev), state_local->svir_prev);
dd_bcast(dd, sizeof(state_local->fvir_prev), state_local->fvir_prev);
- dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi);
- dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi);
- dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral);
- dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi);
- dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi);
+ dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_xi.data());
+ dd_bcast(dd, ((state_local->ngtc*nh)*sizeof(double)), state_local->nosehoover_vxi.data());
+ dd_bcast(dd, state_local->ngtc*sizeof(double), state_local->therm_integral.data());
+ dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_xi.data());
+ dd_bcast(dd, ((state_local->nnhpres*nh)*sizeof(double)), state_local->nhpres_vxi.data());
/* communicate df_history -- required for restarting from checkpoint */
- dd_distribute_dfhist(dd, &state_local->dfhist);
+ dd_distribute_dfhist(dd, state_local->dfhist);
+
+ dd_resize_state(state_local, f, dd->nat_home);
- if (dd->nat_home > state_local->nalloc)
- {
- dd_realloc_state(state_local, f, dd->nat_home);
- }
for (i = 0; i < estNR; i++)
{
if (EST_DISTR(i) && (state_local->flags & (1<<i)))
switch (i)
{
case estX:
- dd_distribute_vec(dd, cgs, state->x, state_local->x);
+ dd_distribute_vec(dd, cgs, as_rvec_array(state->x.data()), as_rvec_array(state_local->x.data()));
break;
case estV:
- dd_distribute_vec(dd, cgs, state->v, state_local->v);
+ dd_distribute_vec(dd, cgs, as_rvec_array(state->v.data()), as_rvec_array(state_local->v.data()));
break;
case est_SDX_NOTSUPPORTED:
break;
case estCGP:
- dd_distribute_vec(dd, cgs, state->cg_p, state_local->cg_p);
+ dd_distribute_vec(dd, cgs, as_rvec_array(state->cg_p.data()), as_rvec_array(state_local->cg_p.data()));
break;
case estDISRE_INITF:
case estDISRE_RM3TAV:
char fname[STRLEN], buf[22];
FILE *out;
int i, ii, resnr, c;
- char *atomname, *resname;
+ const char *atomname, *resname;
real b;
gmx_domdec_t *dd;
fprintf(out, "TITLE %s\n", title);
gmx_write_pdb_box(out, dd->bScrewPBC ? epbcSCREW : epbcXYZ, box);
+ int molb = 0;
for (i = 0; i < natoms; i++)
{
ii = dd->gatindex[i];
- gmx_mtop_atominfo_global(mtop, ii, &atomname, &resnr, &resname);
+ mtopGetAtomAndResidueName(mtop, ii, &molb, &atomname, &resnr, &resname, nullptr);
if (i < dd->comm->nat[ddnatZONE])
{
c = 0;
}
static void rebuild_cgindex(gmx_domdec_t *dd,
- const int *gcgs_index, t_state *state)
+ const int *gcgs_index, const t_state *state)
{
- int nat, i, *ind, *dd_cg_gl, *cgindex, cg_gl;
+ int * gmx_restrict dd_cg_gl = dd->index_gl;
+ int * gmx_restrict cgindex = dd->cgindex;
+ int nat = 0;
- ind = state->cg_gl;
- dd_cg_gl = dd->index_gl;
- cgindex = dd->cgindex;
- nat = 0;
+ /* Copy back the global charge group indices from state
+ * and rebuild the local charge group to atom index.
+ */
cgindex[0] = nat;
- for (i = 0; i < state->ncg_gl; i++)
+ for (unsigned int i = 0; i < state->cg_gl.size(); i++)
{
cgindex[i] = nat;
- cg_gl = ind[i];
+ int cg_gl = state->cg_gl[i];
dd_cg_gl[i] = cg_gl;
nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
}
- cgindex[i] = nat;
+ cgindex[state->cg_gl.size()] = nat;
- dd->ncg_home = state->ncg_gl;
+ dd->ncg_home = state->cg_gl.size();
dd->nat_home = nat;
set_zones_ncg_home(dd);
/* This error should never be triggered under normal
* circumstances, but you never know ...
*/
- gmx_fatal(FARGS, "Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
+ gmx_fatal(FARGS, "step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with fewer ranks might avoid this issue.",
gmx_step_str(step, buf),
dim2char(dim), dd->ci[XX], dd->ci[YY], dd->ci[ZZ]);
}
if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
{
char buf[22];
- gmx_fatal(FARGS, "Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
+ gmx_fatal(FARGS, "step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
gmx_step_str(step, buf),
dim2char(dim), ddbox->box_size[dim], ddbox->skew_fac[dim],
ncd, comm->cellsize_min[dim]);
comm->cellsize_min[dim])
{
char buf[22];
- gmx_fatal(FARGS, "Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
+ gmx_fatal(FARGS, "step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
gmx_step_str(step, buf), dim2char(dim),
comm->cell_x1[dim] - comm->cell_x0[dim],
ddbox->skew_fac[dim],
if (pos_d >= limit1[d])
{
cg_move_error(fplog, dd, step, cg, d, 1,
- cg_cm != state->x, limitd[d],
+ cg_cm != as_rvec_array(state->x.data()), limitd[d],
cg_cm[cg], cm_new, pos_d);
}
dev[d] = 1;
if (pos_d < limit0[d])
{
cg_move_error(fplog, dd, step, cg, d, -1,
- cg_cm != state->x, limitd[d],
+ cg_cm != as_rvec_array(state->x.data()), limitd[d],
cg_cm[cg], cm_new, pos_d);
}
dev[d] = -1;
static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
gmx_domdec_t *dd, ivec tric_dir,
- t_state *state, rvec **f,
+ t_state *state, PaddedRVecVector *f,
t_forcerec *fr,
gmx_bool bCompact,
t_nrnb *nrnb,
cgindex,
( thread *dd->ncg_home)/nthread,
((thread+1)*dd->ncg_home)/nthread,
- fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x,
+ fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
move);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
*/
home_pos_cg =
compact_and_copy_vec_cg(dd->ncg_home, move, cgindex,
- nvec, state->x, comm, FALSE);
+ nvec, as_rvec_array(state->x.data()), comm, FALSE);
if (bCompact)
{
home_pos_cg -= *ncg_moved;
vec = 0;
home_pos_at =
compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
- nvec, vec++, state->x, comm, bCompact);
+ nvec, vec++, as_rvec_array(state->x.data()),
+ comm, bCompact);
if (bV)
{
compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
- nvec, vec++, state->v, comm, bCompact);
+ nvec, vec++, as_rvec_array(state->v.data()),
+ comm, bCompact);
}
if (bCGP)
{
compact_and_copy_vec_at(dd->ncg_home, move, cgindex,
- nvec, vec++, state->cg_p, comm, bCompact);
+ nvec, vec++, as_rvec_array(state->cg_p.data()),
+ comm, bCompact);
}
if (bCompact)
nvr += i;
}
+ dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
+ if (fr->cutoff_scheme == ecutsGROUP)
+ {
+ /* Here we resize to more than necessary and shrink later */
+ dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
+ }
+
/* Process the received charge groups */
buf_pos = 0;
for (cg = 0; cg < ncg_recv; cg++)
dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
/* Copy the state from the buffer */
- dd_check_alloc_ncg(fr, state, f, home_pos_cg+1);
if (fr->cutoff_scheme == ecutsGROUP)
{
cg_cm = fr->cg_cm;
comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
}
- if (home_pos_at+nrcg > state->nalloc)
- {
- dd_realloc_state(state, f, home_pos_at+nrcg);
- }
for (i = 0; i < nrcg; i++)
{
copy_rvec(comm->vbuf.v[buf_pos++],
dd->ncg_home = home_pos_cg;
dd->nat_home = home_pos_at;
+ if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
+ {
+ /* We overallocated before, we need to set the right size here */
+ dd_resize_state(state, f, dd->nat_home);
+ }
+
if (debug)
{
fprintf(debug,
{
case 'a': dlbState = edlbsOffCanTurnOn; break;
case 'n': dlbState = edlbsOffForever; break;
- case 'y': dlbState = edlbsOn; break;
+ case 'y': dlbState = edlbsOnForever; break;
default: gmx_incons("Unknown dlb_opt");
}
if (!EI_DYNAMICS(ir->eI))
{
- if (dlbState == edlbsOn)
+ if (dlbState == edlbsOnForever)
{
sprintf(buf, "NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n", EI(ir->eI));
dd_warning(cr, fplog, buf);
case edlbsOffForever:
break;
case edlbsOffCanTurnOn:
+ case edlbsOnCanTurnOff:
dd_warning(cr, fplog, "NOTE: reproducibility requested, will not use dynamic load balancing\n");
dlbState = edlbsOffForever;
break;
- case edlbsOn:
+ case edlbsOnForever:
dd_warning(cr, fplog, "WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
break;
default:
/* Initialize to GPU share count to 0, might change later */
comm->nrank_gpu_shared = 0;
- comm->dlbState = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
- comm->bCheckWhetherToTurnDlbOn = TRUE;
+ comm->dlbState = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
+ dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, TRUE);
+ /* To consider turning DLB on after 2*nstlist steps we need to check
+ * at partitioning count 3. Thus we need to increase the first count by 2.
+ */
+ comm->ddPartioningCountFirstDlbOff += 2;
if (fplog)
{
gmx_domdec_comm_t *comm;
real cellsize_min;
int d, nc, i;
- char buf[STRLEN];
dd = cr->dd;
comm = dd->comm;
- if (fplog)
- {
- fprintf(fplog, "At step %s the performance loss due to force load imbalance is %.1f %%\n", gmx_step_str(step, buf), dd_force_imb_perf_loss(dd)*100);
- }
-
cellsize_min = comm->cellsize_min[dd->dim[0]];
for (d = 1; d < dd->ndim; d++)
{
if (cellsize_min < comm->cellsize_limit*1.05)
{
- dd_warning(cr, fplog, "NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
+ char buf[STRLEN];
+ sprintf(buf, "step %" GMX_PRId64 " Measured %.1f %% performance load due to load imbalance, but the minimum cell size is smaller than 1.05 times the cell size limit. Will no longer try dynamic load balancing.\n", step, dd_force_imb_perf_loss(dd)*100);
/* Change DLB from "auto" to "no". */
comm->dlbState = edlbsOffForever;
return;
}
- dd_warning(cr, fplog, "NOTE: Turning on dynamic load balancing\n");
- comm->dlbState = edlbsOn;
+ char buf[STRLEN];
+ sprintf(buf, "step %" GMX_PRId64 " Turning on dynamic load balancing, because the performance loss due to load imbalance is %.1f %%.\n", step, dd_force_imb_perf_loss(dd)*100);
+ dd_warning(cr, fplog, buf);
+ comm->dlbState = edlbsOnCanTurnOff;
+
+ /* Store the non-DLB performance, so we can check if DLB actually
+ * improves performance.
+ */
+ GMX_RELEASE_ASSERT(comm->cycl_n[ddCyclStep] > 0, "When we turned on DLB, we should have measured cycles");
+ comm->cyclesPerStepBeforeDLB = comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
set_dlb_limits(dd);
}
}
+ static void turn_off_dlb(FILE *fplog, t_commrec *cr, gmx_int64_t step)
+ {
+ gmx_domdec_t *dd = cr->dd;
+
+ char buf[STRLEN];
+ sprintf(buf, "step %" GMX_PRId64 " Turning off dynamic load balancing, because it is degrading performance.\n", step);
+ dd_warning(cr, fplog, buf);
+ dd->comm->dlbState = edlbsOffCanTurnOn;
+ dd->comm->haveTurnedOffDlb = true;
+ dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
+ }
+
+ static void turn_off_dlb_forever(FILE *fplog, t_commrec *cr, gmx_int64_t step)
+ {
+ GMX_RELEASE_ASSERT(cr->dd->comm->dlbState == edlbsOffCanTurnOn, "Can only turn off DLB forever when it was in the can-turn-on state");
+ char buf[STRLEN];
+ sprintf(buf, "step %" GMX_PRId64 " Will no longer try dynamic load balancing, as it degraded performance.\n", step);
+ dd_warning(cr, fplog, buf);
+ cr->dd->comm->dlbState = edlbsOffForever;
+ }
+
static char *init_bLocalCG(const gmx_mtop_t *mtop)
{
int ncg, cg;
dd = cr->dd;
set_ddbox(dd, FALSE, cr, ir, state->box,
- TRUE, &dd->comm->cgs_gl, state->x, &ddbox);
+ TRUE, &dd->comm->cgs_gl, as_rvec_array(state->x.data()), &ddbox);
LocallyLimited = 0;
if (dd->comm->dlbState == edlbsOffCanTurnOn)
{
dd->comm->bCheckWhetherToTurnDlbOn = bValue;
+
+ if (bValue == TRUE)
+ {
+ /* Store the DD partitioning count, so we can ignore cycle counts
+ * over the next nstlist steps, which are often slower.
+ */
+ dd->comm->ddPartioningCountFirstDlbOff = dd->ddp_count;
+ }
}
}
*/
static gmx_bool dd_dlb_get_should_check_whether_to_turn_dlb_on(gmx_domdec_t *dd)
{
- const int nddp_chk_dlb = 100;
-
if (dd->comm->dlbState != edlbsOffCanTurnOn)
{
return FALSE;
}
+ if (dd->ddp_count <= dd->comm->ddPartioningCountFirstDlbOff)
+ {
+ /* We ignore the first nstlist steps at the start of the run
+ * or after PME load balancing or after turning DLB off, since
+ * these often have extra allocation or cache miss overhead.
+ */
+ return FALSE;
+ }
+
/* We should check whether we should use DLB directly after
* unlocking DLB. */
if (dd->comm->bCheckWhetherToTurnDlbOn)
dd_dlb_set_should_check_whether_to_turn_dlb_on(dd, FALSE);
return TRUE;
}
- /* We should also check whether we should use DLB every 100
+ /* We check whether we should use DLB every c_checkTurnDlbOnInterval
* partitionings (we do not do this every partioning, so that we
* avoid excessive communication). */
- if (dd->comm->n_load_have % nddp_chk_dlb == nddp_chk_dlb - 1)
+ if (dd->comm->n_load_have % c_checkTurnDlbOnInterval == c_checkTurnDlbOnInterval - 1)
{
return TRUE;
}
gmx_bool dd_dlb_is_on(const gmx_domdec_t *dd)
{
- return (dd->comm->dlbState == edlbsOn);
+ return dlbIsOn(dd->comm);
}
gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
static void setup_dd_communication(gmx_domdec_t *dd,
matrix box, gmx_ddbox_t *ddbox,
- t_forcerec *fr, t_state *state, rvec **f)
+ t_forcerec *fr,
+ t_state *state, PaddedRVecVector *f)
{
int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
int nzone, nzone_send, zone, zonei, cg0, cg1;
cg_cm = fr->cg_cm;
break;
case ecutsVERLET:
- cg_cm = state->x;
+ cg_cm = as_rvec_array(state->x.data());
break;
default:
gmx_incons("unimplemented");
}
else
{
- cg_cm = state->x;
+ cg_cm = as_rvec_array(state->x.data());
}
/* Communicate cg_cm */
if (cd->bInPlace)
switch (i)
{
case estX:
- order_vec_atom(dd->ncg_home, cgindex, cgsort, state->x, vbuf);
+ order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->x.data()), vbuf);
break;
case estV:
- order_vec_atom(dd->ncg_home, cgindex, cgsort, state->v, vbuf);
+ order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->v.data()), vbuf);
break;
case est_SDX_NOTSUPPORTED:
break;
case estCGP:
- order_vec_atom(dd->ncg_home, cgindex, cgsort, state->cg_p, vbuf);
+ order_vec_atom(dd->ncg_home, cgindex, cgsort, as_rvec_array(state->cg_p.data()), vbuf);
break;
case estLD_RNG:
case estLD_RNGI:
const gmx_mtop_t *top_global,
const t_inputrec *ir,
t_state *state_local,
- rvec **f,
+ PaddedRVecVector *f,
t_mdatoms *mdatoms,
gmx_localtop_t *top_local,
t_forcerec *fr,
gmx_int64_t step_pcoupl;
rvec cell_ns_x0, cell_ns_x1;
int i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
- gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bTurnOnDLB, bLogLoad;
+ gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckWhetherToTurnDlbOn, bLogLoad;
gmx_bool bRedist, bSortCG, bResortAll;
ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
real grid_density;
}
comm->n_load_collect++;
- if (bCheckWhetherToTurnDlbOn)
+ if (dlbIsOn(comm))
{
+ if (DDMASTER(dd))
+ {
+ /* Add the measured cycles to the running average */
+ const float averageFactor = 0.1f;
+ comm->cyclesPerStepDlbExpAverage =
+ (1 - averageFactor)*comm->cyclesPerStepDlbExpAverage +
+ averageFactor*comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep];
+ }
+ if (comm->dlbState == edlbsOnCanTurnOff &&
+ dd->comm->n_load_have % c_checkTurnDlbOffInterval == c_checkTurnDlbOffInterval - 1)
+ {
+ gmx_bool turnOffDlb;
+ if (DDMASTER(dd))
+ {
+ /* If the running averaged cycles with DLB are more
+ * than before we turned on DLB, turn off DLB.
+ * We will again run and check the cycles without DLB
+ * and we can then decide if to turn off DLB forever.
+ */
+ turnOffDlb = (comm->cyclesPerStepDlbExpAverage >
+ comm->cyclesPerStepBeforeDLB);
+ }
+ dd_bcast(dd, sizeof(turnOffDlb), &turnOffDlb);
+ if (turnOffDlb)
+ {
+ /* To turn off DLB, we need to redistribute the atoms */
+ dd_collect_state(dd, state_local, state_global);
+ bMasterState = TRUE;
+ turn_off_dlb(fplog, cr, step);
+ }
+ }
+ }
+ else if (bCheckWhetherToTurnDlbOn)
+ {
+ gmx_bool turnOffDlbForever = FALSE;
+ gmx_bool turnOnDlb = FALSE;
+
/* Since the timings are node dependent, the master decides */
if (DDMASTER(dd))
{
- /* Here we check if the max PME rank load is more than 0.98
- * the max PP force load. If so, PP DLB will not help,
- * since we are (almost) limited by PME. Furthermore,
- * DLB will cause a significant extra x/f redistribution
- * cost on the PME ranks, which will then surely result
- * in lower total performance.
- * This check might be fragile, since one measurement
- * below 0.98 (although only done once every 100 DD part.)
- * could turn on DLB for the rest of the run.
+ /* If we recently turned off DLB, we want to check if
+ * performance is better without DLB. We want to do this
+ * ASAP to minimize the chance that external factors
+ * slowed down the DLB step are gone here and we
+ * incorrectly conclude that DLB was causing the slowdown.
+ * So we measure one nstlist block, no running average.
*/
- if (cr->npmenodes > 0 &&
- dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
+ if (comm->haveTurnedOffDlb &&
+ comm->cycl[ddCyclStep]/comm->cycl_n[ddCyclStep] <
+ comm->cyclesPerStepDlbExpAverage)
{
- bTurnOnDLB = FALSE;
+ /* After turning off DLB we ran nstlist steps in fewer
+ * cycles than with DLB. This likely means that DLB
+ * in not benefical, but this could be due to a one
+ * time unlucky fluctuation, so we require two such
+ * observations in close succession to turn off DLB
+ * forever.
+ */
+ if (comm->dlbSlowerPartitioningCount > 0 &&
+ dd->ddp_count < comm->dlbSlowerPartitioningCount + 10*c_checkTurnDlbOnInterval)
+ {
+ turnOffDlbForever = TRUE;
+ }
+ comm->haveTurnedOffDlb = false;
+ /* Register when we last measured DLB slowdown */
+ comm->dlbSlowerPartitioningCount = dd->ddp_count;
}
else
{
- bTurnOnDLB =
- (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
- }
- if (debug)
- {
- fprintf(debug, "step %s, imb loss %f\n",
- gmx_step_str(step, sbuf),
- dd_force_imb_perf_loss(dd));
+ /* Here we check if the max PME rank load is more than 0.98
+ * the max PP force load. If so, PP DLB will not help,
+ * since we are (almost) limited by PME. Furthermore,
+ * DLB will cause a significant extra x/f redistribution
+ * cost on the PME ranks, which will then surely result
+ * in lower total performance.
+ */
+ if (cr->npmenodes > 0 &&
+ dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
+ {
+ turnOnDlb = FALSE;
+ }
+ else
+ {
+ turnOnDlb = (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
+ }
}
}
- dd_bcast(dd, sizeof(bTurnOnDLB), &bTurnOnDLB);
- if (bTurnOnDLB)
+ struct
+ {
+ gmx_bool turnOffDlbForever;
+ gmx_bool turnOnDlb;
+ }
+ bools {
+ turnOffDlbForever, turnOnDlb
+ };
+ dd_bcast(dd, sizeof(bools), &bools);
+ if (bools.turnOffDlbForever)
+ {
+ turn_off_dlb_forever(fplog, cr, step);
+ }
+ else if (bools.turnOnDlb)
{
turn_on_dlb(fplog, cr, step);
bDoDLB = TRUE;
ncgindex_set = 0;
set_ddbox(dd, bMasterState, cr, ir, state_global->box,
- TRUE, cgs_gl, state_global->x, &ddbox);
+ TRUE, cgs_gl, as_rvec_array(state_global->x.data()), &ddbox);
get_cg_distribution(fplog, dd, cgs_gl,
- state_global->box, &ddbox, state_global->x);
+ state_global->box, &ddbox, as_rvec_array(state_global->x.data()));
dd_distribute_state(dd, cgs_gl,
state_global, state_local, f);
if (fr->cutoff_scheme == ecutsGROUP)
{
calc_cgcm(fplog, 0, dd->ncg_home,
- &top_local->cgs, state_local->x, fr->cg_cm);
+ &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
}
inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
{
/* Redetermine the cg COMs */
calc_cgcm(fplog, 0, dd->ncg_home,
- &top_local->cgs, state_local->x, fr->cg_cm);
+ &top_local->cgs, as_rvec_array(state_local->x.data()), fr->cg_cm);
}
inc_nrnb(nrnb, eNR_CGCM, dd->nat_home);
dd_set_cginfo(dd->index_gl, 0, dd->ncg_home, fr, comm->bLocalCG);
set_ddbox(dd, bMasterState, cr, ir, state_local->box,
- TRUE, &top_local->cgs, state_local->x, &ddbox);
+ TRUE, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
bRedist = dlbIsOn(comm);
}
copy_rvec(comm->box_size, ddbox.box_size);
}
set_ddbox(dd, bMasterState, cr, ir, state_local->box,
- bNStGlobalComm, &top_local->cgs, state_local->x, &ddbox);
+ bNStGlobalComm, &top_local->cgs, as_rvec_array(state_local->x.data()), &ddbox);
bBoxChanged = TRUE;
bRedist = TRUE;
0, dd->ncg_home,
comm->zones.dens_zone0,
fr->cginfo,
- state_local->x,
+ as_rvec_array(state_local->x.data()),
ncg_moved, bRedist ? comm->moved : NULL,
fr->nbv->grp[eintLocal].kernel_type,
fr->nbv->grp[eintLocal].nbat);
}
dd_sort_state(dd, fr->cg_cm, fr, state_local,
bResortAll ? -1 : ncg_home_old);
+
+ /* After sorting and compacting we set the correct size */
+ dd_resize_state(state_local, f, dd->nat_home);
+
/* Rebuild all the indices */
ga2la_clear(dd->ga2la);
ncgindex_set = 0;
/*
write_dd_pdb("dd_home",step,"dump",top_global,cr,
- -1,state_local->x,state_local->box);
+ -1,as_rvec_array(state_local->x.data()),state_local->box);
*/
wallcycle_sub_start(wcycle, ewcsDD_MAKETOP);
dd_make_local_top(dd, &comm->zones, dd->npbcdim, state_local->box,
comm->cellsize_min, np,
fr,
- fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : state_local->x,
+ fr->cutoff_scheme == ecutsGROUP ? fr->cg_cm : as_rvec_array(state_local->x.data()),
vsite, top_global, top_local);
wallcycle_sub_stop(wcycle, ewcsDD_MAKETOP);
* or constraint communication.
*/
state_local->natoms = comm->nat[ddnatNR-1];
- if (state_local->natoms > state_local->nalloc)
- {
- dd_realloc_state(state_local, f, state_local->natoms);
- }
+
+ dd_resize_state(state_local, f, state_local->natoms);
if (fr->bF_NoVirSum)
{
forcerec_set_ranges(fr, dd->ncg_home, dd->ncg_tot,
dd->nat_tot, comm->nat[ddnatCON], nat_f_novirsum);
- /* We make the all mdatoms up to nat_tot_con.
- * We could save some work by only setting invmass
- * between nat_tot and nat_tot_con.
- */
- /* This call also sets the new number of home particles to dd->nat_home */
- atoms2md(top_global, ir,
- comm->nat[ddnatCON], dd->gatindex, dd->nat_home, mdatoms);
-
- /* Now we have the charges we can sort the FE interactions */
- dd_sort_local_top(dd, mdatoms, top_local);
-
- if (vsite != NULL)
- {
- /* Now we have updated mdatoms, we can do the last vsite bookkeeping */
- split_vsites_over_threads(top_local->idef.il, top_local->idef.iparams,
- mdatoms, FALSE, vsite);
- }
+ /* Update atom data for mdatoms and several algorithms */
+ mdAlgorithmsSetupAtomData(cr, ir, top_global, top_local, fr,
+ NULL, mdatoms, vsite, NULL);
if (ir->implicit_solvent)
{
make_local_gb(cr, fr->born, ir->gb_algorithm);
}
- setup_bonded_threading(fr, &top_local->idef);
-
if (!(cr->duty & DUTY_PME))
{
/* Send the charges and/or c6/sigmas to our PME only node */
* the last vsite construction, we need to communicate the constructing
* atom coordinates again (for spreading the forces this MD step).
*/
- dd_move_x_vsites(dd, state_local->box, state_local->x);
+ dd_move_x_vsites(dd, state_local->box, as_rvec_array(state_local->x.data()));
wallcycle_sub_stop(wcycle, ewcsDD_TOPOTHER);
if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
{
- dd_move_x(dd, state_local->box, state_local->x);
+ dd_move_x(dd, state_local->box, as_rvec_array(state_local->x.data()));
write_dd_pdb("dd_dump", step, "dump", top_global, cr,
- -1, state_local->x, state_local->box);
+ -1, as_rvec_array(state_local->x.data()), state_local->box);
}
/* Store the partitioning step */
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/mdtypes/pull-params.h"
+#include "gromacs/options/options.h"
+#include "gromacs/options/treesupport.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/topology/block.h"
#include "gromacs/topology/ifunc.h"
#include "gromacs/topology/symtab.h"
#include "gromacs/topology/topology.h"
#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/ikeyvaluetreeerror.h"
+#include "gromacs/utility/keyvaluetree.h"
+#include "gromacs/utility/keyvaluetreetransform.h"
#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/stringcompare.h"
+#include "gromacs/utility/stringutil.h"
#define MAXPTR 254
#define NOGID 255
char QMmethod[STRLEN], QMbasis[STRLEN], QMcharge[STRLEN], QMmult[STRLEN],
bSH[STRLEN], CASorbitals[STRLEN], CASelectrons[STRLEN], SAon[STRLEN],
SAoff[STRLEN], SAsteps[STRLEN], bTS[STRLEN], bOPT[STRLEN];
- char efield_x[STRLEN], efield_xt[STRLEN], efield_y[STRLEN],
- efield_yt[STRLEN], efield_z[STRLEN], efield_zt[STRLEN];
} gmx_inputrec_strings;
couple_lambda_value == ecouplamVDWQ);
}
+namespace
+{
+
+class MdpErrorHandler : public gmx::IKeyValueTreeErrorHandler
+{
+ public:
+ explicit MdpErrorHandler(warninp_t wi)
+ : wi_(wi), mapping_(nullptr)
+ {
+ }
+
+ void setBackMapping(const gmx::IKeyValueTreeBackMapping &mapping)
+ {
+ mapping_ = &mapping;
+ }
+
+ virtual bool onError(gmx::UserInputError *ex, const gmx::KeyValueTreePath &context)
+ {
+ ex->prependContext(gmx::formatString("Error in mdp option \"%s\":",
+ getOptionName(context).c_str()));
+ std::string message = gmx::formatExceptionMessageToString(*ex);
+ warning_error(wi_, message.c_str());
+ return true;
+ }
+
+ private:
+ std::string getOptionName(const gmx::KeyValueTreePath &context)
+ {
+ if (mapping_ != nullptr)
+ {
+ gmx::KeyValueTreePath path = mapping_->originalPath(context);
+ GMX_ASSERT(path.size() == 1, "Inconsistent mapping back to mdp options");
+ return path[0];
+ }
+ GMX_ASSERT(context.size() == 1, "Inconsistent context for mdp option parsing");
+ return context[0];
+ }
+
+ warninp_t wi_;
+ const gmx::IKeyValueTreeBackMapping *mapping_;
+};
+
+} // namespace
+
void get_ir(const char *mdparin, const char *mdparout,
t_inputrec *ir, t_gromppopts *opts,
warninp_t wi)
}
/* Electric fields */
- CCTYPE("Electric fields");
- CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
- CTYPE ("and a phase angle (real)");
- STYPE ("E-x", is->efield_x, NULL);
- CTYPE ("Time dependent (pulsed) electric field. Format is omega, time for pulse");
- CTYPE ("peak, and sigma (width) for pulse. Sigma = 0 removes pulse, leaving");
- CTYPE ("the field to be a cosine function.");
- STYPE ("E-xt", is->efield_xt, NULL);
- STYPE ("E-y", is->efield_y, NULL);
- STYPE ("E-yt", is->efield_yt, NULL);
- STYPE ("E-z", is->efield_z, NULL);
- STYPE ("E-zt", is->efield_zt, NULL);
+ {
+ gmx::KeyValueTreeObject convertedValues = flatKeyValueTreeFromInpFile(ninp, inp);
+ gmx::KeyValueTreeTransformer transform;
+ transform.rules()->addRule()
+ .keyMatchType("/", gmx::StringCompareType::CaseAndDashInsensitive);
+ ir->efield->initMdpTransform(transform.rules());
+ for (const auto &path : transform.mappedPaths())
+ {
+ GMX_ASSERT(path.size() == 1, "Inconsistent mapping back to mdp options");
+ mark_einp_set(ninp, inp, path[0].c_str());
+ }
+ gmx::Options options;
+ ir->efield->initMdpOptions(&options);
+ MdpErrorHandler errorHandler(wi);
+ auto result
+ = transform.transform(convertedValues, &errorHandler);
+ errorHandler.setBackMapping(result.backMapping());
+ gmx::assignOptionsFromKeyValueTree(&options, result.object(),
+ &errorHandler);
+ }
/* Ion/water position swapping ("computational electrophysiology") */
CCTYPE("Ion/water position swapping for computational electrophysiology setups");
double *nrdf_tc, *nrdf_vcm, nrdf_uc, *nrdf_vcm_sub;
ivec *dof_vcm;
gmx_mtop_atomloop_all_t aloop;
- t_atom *atom;
int mb, mol, ftype, as;
gmx_molblock_t *molb;
gmx_moltype_t *molt;
snew(nrdf2, natoms);
aloop = gmx_mtop_atomloop_all_init(mtop);
+ const t_atom *atom;
while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
{
nrdf2[i] = 0;
sfree(nrdf_vcm_sub);
}
-static void decode_cos(char *s, t_cosines *cosine)
-{
- char *t;
- char format[STRLEN], f1[STRLEN];
- double a, phi;
- int i;
-
- t = gmx_strdup(s);
- trim(t);
-
- cosine->n = 0;
- cosine->a = NULL;
- cosine->phi = NULL;
- if (strlen(t))
- {
- if (sscanf(t, "%d", &(cosine->n)) != 1)
- {
- gmx_fatal(FARGS, "Cannot parse cosine multiplicity from string '%s'", t);
- }
- if (cosine->n <= 0)
- {
- cosine->n = 0;
- }
- else
- {
- snew(cosine->a, cosine->n);
- snew(cosine->phi, cosine->n);
-
- sprintf(format, "%%*d");
- for (i = 0; (i < cosine->n); i++)
- {
- double gmx_unused canary;
-
- strcpy(f1, format);
- strcat(f1, "%lf%lf%lf");
- if (sscanf(t, f1, &a, &phi, &canary) != 2)
- {
- gmx_fatal(FARGS, "Invalid input for electric field shift: '%s'", t);
- }
- cosine->a[i] = a;
- cosine->phi[i] = phi;
- strcat(format, "%*lf%*lf");
- }
- }
- }
- sfree(t);
-}
-
static gmx_bool do_egp_flag(t_inputrec *ir, gmx_groups_t *groups,
const char *option, const char *val, int flag)
{
nSA_time = str_nelem(is->anneal_time, MAXPTR, ptr1);
if (nSA_time != k)
{
- gmx_fatal(FARGS, "Found %d annealing-time values, wanter %d\n", nSA_time, k);
+ gmx_fatal(FARGS, "Found %d annealing-time values, wanted %d\n", nSA_time, k);
}
nSA_temp = str_nelem(is->anneal_temp, MAXPTR, ptr2);
if (nSA_temp != k)
gmx_fatal(FARGS, "Can only have energy group pair tables in combination with user tables for VdW and/or Coulomb");
}
- decode_cos(is->efield_x, &(ir->ex[XX]));
- decode_cos(is->efield_xt, &(ir->et[XX]));
- decode_cos(is->efield_y, &(ir->ex[YY]));
- decode_cos(is->efield_yt, &(ir->et[YY]));
- decode_cos(is->efield_z, &(ir->ex[ZZ]));
- decode_cos(is->efield_zt, &(ir->et[ZZ]));
-
for (i = 0; (i < grps->nr); i++)
{
sfree(gnames[i]);
rvec acc;
gmx_mtop_atomloop_block_t aloopb;
gmx_mtop_atomloop_all_t aloop;
- t_atom *atom;
ivec AbsRef;
char warn_buf[STRLEN];
bCharge = FALSE;
aloopb = gmx_mtop_atomloop_block_init(sys);
+ const t_atom *atom;
while (gmx_mtop_atomloop_block_next(aloopb, &atom, &nmol))
{
if (atom->q != 0 || atom->qB != 0)
clear_rvec(acc);
snew(mgrp, sys->groups.grps[egcACC].nr);
aloop = gmx_mtop_atomloop_all_init(sys);
+ const t_atom *atom;
while (gmx_mtop_atomloop_all_next(aloop, &i, &atom))
{
mgrp[ggrpnr(&sys->groups, egcACC, i)] += atom->m;
#include "gromacs/pulling/pull.h"
#include "gromacs/topology/block.h"
#include "gromacs/topology/invblock.h"
+#include "gromacs/topology/mtop_lookup.h"
#include "gromacs/topology/mtop_util.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
FILE *out;
int dd_ac0 = 0, dd_ac1 = 0, i, ii, resnr;
gmx_domdec_t *dd;
- char *anm, *resnm;
+ const char *anm, *resnm;
dd = NULL;
if (DOMAINDECOMP(cr))
fprintf(out, "TITLE %s\n", title);
gmx_write_pdb_box(out, -1, box);
+ int molb = 0;
for (i = start; i < start+homenr; i++)
{
if (dd != NULL)
{
ii = i;
}
- gmx_mtop_atominfo_global(mtop, ii, &anm, &resnr, &resnm);
+ mtopGetAtomAndResidueName(mtop, ii, &molb, &anm, &resnr, &resnm, nullptr);
gmx_fprintf_pdb_atomline(out, epdbATOM, ii+1, anm, ' ', resnm, ' ', resnr, ' ',
10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, 0.0, "");
}
invdt, v, vir != NULL, vir_r_m_dr,
econq, nrnb,
constr->maxwarn, &constr->warncount_lincs);
- if (!bOK && constr->maxwarn >= 0)
+ if (!bOK && constr->maxwarn < INT_MAX)
{
if (fplog != NULL)
{
idef, ir, x, xprime, nrnb,
constr->lagr, lambda, dvdlambda,
invdt, v, vir != NULL, vir_r_m_dr,
- constr->maxwarn >= 0, econq);
+ constr->maxwarn < INT_MAX, econq);
break;
case (econqVeloc):
bOK = bshakef(fplog, constr->shaked,
idef, ir, x, min_proj, nrnb,
constr->lagr, lambda, dvdlambda,
invdt, NULL, vir != NULL, vir_r_m_dr,
- constr->maxwarn >= 0, econq);
+ constr->maxwarn < INT_MAX, econq);
break;
default:
gmx_fatal(FARGS, "Internal error, SHAKE called for constraining something else than coordinates");
break;
}
- if (!bOK && constr->maxwarn >= 0)
+ if (!bOK && constr->maxwarn < INT_MAX)
{
if (fplog != NULL)
{
{
constr->maxwarn = 0;
sscanf(env, "%8d", &constr->maxwarn);
+ if (constr->maxwarn < 0)
+ {
+ constr->maxwarn = INT_MAX;
+ }
if (fplog)
{
fprintf(fplog,
constr->maxwarn);
}
}
- if (constr->maxwarn < 0 && fplog)
- {
- fprintf(fplog, "maxwarn < 0, will not stop on constraint errors\n");
- }
constr->warncount_lincs = 0;
constr->warncount_settle = 0;
/* Initialize the essential dynamics sampling.
* Put the pointer to the ED struct in constr */
constr->ed = ed;
- if (ed != NULL || state->edsamstate.nED > 0)
+ if (ed != NULL || state->edsamstate != NULL)
{
- init_edsam(mtop, ir, cr, ed, state->x, state->box, &state->edsamstate);
+ init_edsam(mtop, ir, cr, ed, as_rvec_array(state->x.data()), state->box, state->edsamstate);
}
constr->warn_mtop = mtop;
#include "nbnxn_gpu.h"
- static const bool useCuda = GMX_GPU == GMX_GPU_CUDA;
- static const bool useOpenCL = GMX_GPU == GMX_GPU_OPENCL;
-
void print_time(FILE *out,
gmx_walltime_accounting_t walltime_accounting,
gmx_int64_t step,
walltime_accounting_get_start_time_stamp(walltime_accounting));
}
-static void sum_forces(int start, int end, rvec f[], rvec flr[])
+static void sum_forces(rvec f[], const PaddedRVecVector *forceToAdd)
{
- int i;
+ /* TODO: remove this - 1 when padding is properly implemented */
+ int end = forceToAdd->size() - 1;
+ const rvec *fAdd = as_rvec_array(forceToAdd->data());
- if (gmx_debug_at)
- {
- pr_rvecs(debug, 0, "fsr", f+start, end-start);
- pr_rvecs(debug, 0, "flr", flr+start, end-start);
- }
// cppcheck-suppress unreadVariable
int gmx_unused nt = gmx_omp_nthreads_get(emntDefault);
#pragma omp parallel for num_threads(nt) schedule(static)
- for (i = start; i < end; i++)
+ for (int i = 0; i < end; i++)
{
- rvec_inc(f[i], flr[i]);
- }
-}
-
-/*
- * calc_f_el calculates forces due to an electric field.
- *
- * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
- *
- * Et[] contains the parameters for the time dependent
- * part of the field.
- * Ex[] contains the parameters for
- * the spatial dependent part of the field.
- * The function should return the energy due to the electric field
- * (if any) but for now returns 0.
- *
- * WARNING:
- * There can be problems with the virial.
- * Since the field is not self-consistent this is unavoidable.
- * For neutral molecules the virial is correct within this approximation.
- * For neutral systems with many charged molecules the error is small.
- * But for systems with a net charge or a few charged molecules
- * the error can be significant when the field is high.
- * Solution: implement a self-consistent electric field into PME.
- */
-static void calc_f_el(FILE *fp, int start, int homenr,
- real charge[], rvec f[],
- t_cosines Ex[], t_cosines Et[], double t)
-{
- rvec Ext;
- real t0;
- int i, m;
-
- for (m = 0; (m < DIM); m++)
- {
- if (Et[m].n > 0)
- {
- if (Et[m].n == 3)
- {
- t0 = Et[m].a[1];
- Ext[m] = std::cos(Et[m].a[0]*(t-t0))*std::exp(-gmx::square(t-t0)/(2.0*gmx::square(Et[m].a[2])));
- }
- else
- {
- Ext[m] = std::cos(Et[m].a[0]*t);
- }
- }
- else
- {
- Ext[m] = 1.0;
- }
- if (Ex[m].n > 0)
- {
- /* Convert the field strength from V/nm to MD-units */
- Ext[m] *= Ex[m].a[0]*FIELDFAC;
- for (i = start; (i < start+homenr); i++)
- {
- f[i][m] += charge[i]*Ext[m];
- }
- }
- else
- {
- Ext[m] = 0;
- }
- }
- if (fp != NULL)
- {
- fprintf(fp, "%10g %10g %10g %10g #FIELD\n", t,
- Ext[XX]/FIELDFAC, Ext[YY]/FIELDFAC, Ext[ZZ]/FIELDFAC);
+ rvec_inc(f[i], fAdd[i]);
}
}
wallcycle_start(wcycle, ewcPP_PMEWAITRECVF);
dvdl_q = 0;
dvdl_lj = 0;
- gmx_pme_receive_f(cr, fr->f_novirsum, fr->vir_el_recip, &e_q,
+ gmx_pme_receive_f(cr, as_rvec_array(fr->f_novirsum->data()), fr->vir_el_recip, &e_q,
fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj,
&cycles_seppme);
enerd->term[F_COUL_RECIP] += e_q;
* if the constructing atoms aren't local.
*/
wallcycle_start(wcycle, ewcVSITESPREAD);
- spread_vsite_f(vsite, x, fr->f_novirsum, NULL,
+ spread_vsite_f(vsite, x, as_rvec_array(fr->f_novirsum->data()), NULL,
(flags & GMX_FORCE_VIRIAL), fr->vir_el_recip,
nrnb,
&top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr);
if (flags & GMX_FORCE_VIRIAL)
{
/* Now add the forces, this is local */
- if (fr->bDomDec)
- {
- sum_forces(0, fr->f_novirsum_n, f, fr->f_novirsum);
- }
- else
- {
- sum_forces(0, mdatoms->homenr,
- f, fr->f_novirsum);
- }
+ sum_forces(f, fr->f_novirsum);
+
if (EEL_FULL(fr->eeltype))
{
/* Add the mesh contribution to the virial */
gmx_localtop_t *top,
gmx_groups_t gmx_unused *groups,
matrix box, rvec x[], history_t *hist,
- rvec f[],
+ PaddedRVecVector *force,
tensor vir_force,
t_mdatoms *mdatoms,
gmx_enerdata_t *enerd, t_fcdata *fcd,
real *lambda, t_graph *graph,
t_forcerec *fr, interaction_const_t *ic,
gmx_vsite_t *vsite, rvec mu_tot,
- double t, FILE *field, gmx_edsam_t ed,
+ double t, gmx_edsam_t ed,
gmx_bool bBornRadii,
int flags)
{
int cg1, i, j;
- int start, homenr;
double mu[2*DIM];
gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM;
gmx_bool bDoForces, bUseGPU, bUseOrEmulGPU;
cycles_wait_gpu = 0;
nbv = fr->nbv;
- start = 0;
- homenr = mdatoms->homenr;
+ const int start = 0;
+ const int homenr = mdatoms->homenr;
clear_mat(vir_force);
* we do not need to worry about shifting.
*/
- int pme_flags = 0;
-
wallcycle_start(wcycle, ewcPP_PMESENDX);
bBS = (inputrec->nwall == 2);
svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
}
- if (EEL_PME(fr->eeltype))
- {
- pme_flags |= GMX_PME_DO_COULOMB;
- }
-
- if (EVDW_PME(fr->vdwtype))
- {
- pme_flags |= GMX_PME_DO_LJ;
- }
-
gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
- mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
+ lambda[efptCOUL], lambda[efptVDW],
(flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
- pme_flags, step);
+ step);
wallcycle_stop(wcycle, ewcPP_PMESENDX);
}
wallcycle_stop(wcycle, ewcROT);
}
+ /* Temporary solution until all routines take PaddedRVecVector */
+ rvec *f = as_rvec_array(force->data());
+
/* Start the force cycle counter.
* This counter is stopped after do_force_lowlevel.
* No parallel communication should occur while this counter is running,
{
if (flags & GMX_FORCE_VIRIAL)
{
- fr->f_novirsum = fr->f_novirsum_alloc;
+ fr->f_novirsum = fr->forceBufferNoVirialSummation;
}
else
{
* a separate array for forces that do not contribute
* to the pressure.
*/
- fr->f_novirsum = f;
+ fr->f_novirsum = force;
}
}
{
if (flags & GMX_FORCE_VIRIAL)
{
- if (fr->bDomDec)
- {
- clear_rvecs_omp(fr->f_novirsum_n, fr->f_novirsum);
- }
- else
- {
- clear_rvecs_omp(homenr, fr->f_novirsum+start);
- }
+ /* TODO: remove this - 1 when padding is properly implemented */
+ clear_rvecs_omp(fr->f_novirsum->size() - 1,
+ as_rvec_array(fr->f_novirsum->data()));
}
}
/* Clear the short- and long-range forces */
if (bDoForces && DOMAINDECOMP(cr))
{
- if (bUseGPU && useCuda)
+ if (bUseGPU)
{
/* We are done with the CPU compute, but the GPU local non-bonded
* kernel can still be running while we communicate the forces.
if (bUseOrEmulGPU)
{
/* wait for local forces (or calculate in emulation mode) */
- if (bUseGPU && useCuda)
+ if (bUseGPU)
{
float cycles_tmp, cycles_wait_est;
- const float cuda_api_overhead_margin = 50000.0f; /* cycles */
+ /* Measured overhead on CUDA and OpenCL with(out) GPU sharing
+ * is between 0.5 and 1.5 Mcycles. So 2 MCycles is an overestimate,
+ * but even with a step of 0.1 ms the difference is less than 1%
+ * of the step time.
+ */
+ const float gpuWaitApiOverheadMargin = 2e6f; /* cycles */
wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
nbnxn_gpu_wait_for_gpu(nbv->gpu_nbv,
{
cycles_wait_est = gmx_cycles_read() - cycleCountBeforeLocalWorkCompletes;
- if (cycles_tmp < cuda_api_overhead_margin)
+ if (cycles_tmp < gpuWaitApiOverheadMargin)
{
/* We measured few cycles, it could be that the kernel
* and transfer finished earlier and there was no actual
*/
cycles_force += cycles_wait_est;
cycles_wait_gpu += cycles_wait_est;
- }
- else if (bUseGPU && useOpenCL)
- {
- wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
- nbnxn_gpu_wait_for_gpu(nbv->gpu_nbv,
- flags, eatLocal,
- enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
- fr->fshift);
- cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
- }
- if (bUseGPU)
- {
/* now clear the GPU outputs while we finish the step on the CPU */
wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
nbnxn_gpu_clear_outputs(nbv->gpu_nbv, flags);
if (bDoForces)
{
- if (inputrecElecField(inputrec))
+ /* Compute forces due to electric field */
+ if (fr->efield != nullptr)
{
- /* Compute forces due to electric field */
- calc_f_el(MASTER(cr) ? field : NULL,
- start, homenr, mdatoms->chargeA, fr->f_novirsum,
- inputrec->ex, inputrec->et, t);
+ fr->efield->calculateForces(cr, mdatoms, fr->f_novirsum, t);
}
/* If we have NoVirSum forces, but we do not calculate the virial,
gmx_localtop_t *top,
gmx_groups_t *groups,
matrix box, rvec x[], history_t *hist,
- rvec f[],
+ PaddedRVecVector *force,
tensor vir_force,
t_mdatoms *mdatoms,
gmx_enerdata_t *enerd, t_fcdata *fcd,
real *lambda, t_graph *graph,
t_forcerec *fr, gmx_vsite_t *vsite, rvec mu_tot,
- double t, FILE *field, gmx_edsam_t ed,
+ double t, gmx_edsam_t ed,
gmx_bool bBornRadii,
int flags)
{
int cg0, cg1, i, j;
- int start, homenr;
double mu[2*DIM];
gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM;
gmx_bool bDoForces;
float cycles_pme, cycles_force;
- start = 0;
- homenr = mdatoms->homenr;
+ const int start = 0;
+ const int homenr = mdatoms->homenr;
clear_mat(vir_force);
* we do not need to worry about shifting.
*/
- int pme_flags = 0;
-
wallcycle_start(wcycle, ewcPP_PMESENDX);
bBS = (inputrec->nwall == 2);
svmul(inputrec->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
}
- if (EEL_PME(fr->eeltype))
- {
- pme_flags |= GMX_PME_DO_COULOMB;
- }
-
- if (EVDW_PME(fr->vdwtype))
- {
- pme_flags |= GMX_PME_DO_LJ;
- }
-
gmx_pme_send_coordinates(cr, bBS ? boxs : box, x,
- mdatoms->nChargePerturbed, mdatoms->nTypePerturbed, lambda[efptCOUL], lambda[efptVDW],
+ lambda[efptCOUL], lambda[efptVDW],
(flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),
- pme_flags, step);
+ step);
wallcycle_stop(wcycle, ewcPP_PMESENDX);
}
wallcycle_stop(wcycle, ewcROT);
}
+ /* Temporary solution until all routines take PaddedRVecVector */
+ rvec *f = as_rvec_array(force->data());
+
/* Start the force cycle counter.
* This counter is stopped after do_force_lowlevel.
* No parallel communication should occur while this counter is running,
{
if (flags & GMX_FORCE_VIRIAL)
{
- fr->f_novirsum = fr->f_novirsum_alloc;
- if (fr->bDomDec)
- {
- clear_rvecs(fr->f_novirsum_n, fr->f_novirsum);
- }
- else
- {
- clear_rvecs(homenr, fr->f_novirsum+start);
- }
+ fr->f_novirsum = fr->forceBufferNoVirialSummation;
+ /* TODO: remove this - 1 when padding is properly implemented */
+ clear_rvecs(fr->f_novirsum->size() - 1,
+ as_rvec_array(fr->f_novirsum->data()));
}
else
{
* a separate array for forces that do not contribute
* to the pressure.
*/
- fr->f_novirsum = f;
+ fr->f_novirsum = force;
}
}
if (bDoForces)
{
- if (inputrecElecField(inputrec))
+ /* Compute forces due to electric field */
+ if (fr->efield != nullptr)
{
- /* Compute forces due to electric field */
- calc_f_el(MASTER(cr) ? field : NULL,
- start, homenr, mdatoms->chargeA, fr->f_novirsum,
- inputrec->ex, inputrec->et, t);
+ fr->efield->calculateForces(cr, mdatoms, fr->f_novirsum, t);
}
/* Communicate the forces */
if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
(flags & GMX_FORCE_VIRIAL))
{
- dd_move_f(cr->dd, fr->f_novirsum, NULL);
+ dd_move_f(cr->dd, as_rvec_array(fr->f_novirsum->data()), NULL);
}
wallcycle_stop(wcycle, ewcMOVEF);
}
gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
gmx_localtop_t *top,
gmx_groups_t *groups,
- matrix box, rvec x[], history_t *hist,
- rvec f[],
+ matrix box, PaddedRVecVector *coordinates, history_t *hist,
+ PaddedRVecVector *force,
tensor vir_force,
t_mdatoms *mdatoms,
gmx_enerdata_t *enerd, t_fcdata *fcd,
- real *lambda, t_graph *graph,
+ std::vector<real> *lambda, t_graph *graph,
t_forcerec *fr,
gmx_vsite_t *vsite, rvec mu_tot,
- double t, FILE *field, gmx_edsam_t ed,
+ double t, gmx_edsam_t ed,
gmx_bool bBornRadii,
int flags)
{
flags &= ~GMX_FORCE_NONBONDED;
}
+ GMX_ASSERT(coordinates->size() >= static_cast<unsigned int>(fr->natoms_force + 1), "We might need 1 element extra for SIMD");
+ GMX_ASSERT(force->size() >= static_cast<unsigned int>(fr->natoms_force + 1), "We might need 1 element extra for SIMD");
+
+ rvec *x = as_rvec_array(coordinates->data());
+
switch (inputrec->cutoff_scheme)
{
case ecutsVERLET:
top,
groups,
box, x, hist,
- f, vir_force,
+ force, vir_force,
mdatoms,
enerd, fcd,
- lambda, graph,
+ lambda->data(), graph,
fr, fr->ic,
vsite, mu_tot,
- t, field, ed,
+ t, ed,
bBornRadii,
flags);
break;
top,
groups,
box, x, hist,
- f, vir_force,
+ force, vir_force,
mdatoms,
enerd, fcd,
- lambda, graph,
+ lambda->data(), graph,
fr, vsite, mu_tot,
- t, field, ed,
+ t, ed,
bBornRadii,
flags);
break;
/* constrain the current position */
constrain(NULL, TRUE, FALSE, constr, &(top->idef),
ir, cr, step, 0, 1.0, md,
- state->x, state->x, NULL,
+ as_rvec_array(state->x.data()), as_rvec_array(state->x.data()), NULL,
fr->bMolPBC, state->box,
state->lambda[efptBONDED], &dvdl_dum,
NULL, NULL, nrnb, econqCoord);
/* also may be useful if we need the ekin from the halfstep for velocity verlet */
constrain(NULL, TRUE, FALSE, constr, &(top->idef),
ir, cr, step, 0, 1.0, md,
- state->x, state->v, state->v,
+ as_rvec_array(state->x.data()), as_rvec_array(state->v.data()), as_rvec_array(state->v.data()),
fr->bMolPBC, state->box,
state->lambda[efptBONDED], &dvdl_dum,
NULL, NULL, nrnb, econqVeloc);
dvdl_dum = 0;
constrain(NULL, TRUE, FALSE, constr, &(top->idef),
ir, cr, step, -1, 1.0, md,
- state->x, savex, NULL,
+ as_rvec_array(state->x.data()), savex, NULL,
fr->bMolPBC, state->box,
state->lambda[efptBONDED], &dvdl_dum,
- state->v, NULL, nrnb, econqCoord);
+ as_rvec_array(state->v.data()), NULL, nrnb, econqCoord);
for (i = start; i < end; i++)
{
}
// TODO This can be cleaned up a lot, and move back to runner.cpp
-void finish_run(FILE *fplog, t_commrec *cr,
+void finish_run(FILE *fplog, const gmx::MDLogger &mdlog, t_commrec *cr,
t_inputrec *inputrec,
t_nrnb nrnb[], gmx_wallcycle_t wcycle,
gmx_walltime_accounting_t walltime_accounting,
{
struct gmx_wallclock_gpu_t* gputimes = use_GPU(nbv) ? nbnxn_gpu_get_timings(nbv->gpu_nbv) : NULL;
- wallcycle_print(fplog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
+ wallcycle_print(fplog, mdlog, cr->nnodes, cr->npmenodes, nthreads_pp, nthreads_pme,
elapsed_time_over_all_ranks,
wcycle, cycle_sum, gputimes);
}
}
-extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, real *lambda, double *lam0)
+extern void initialize_lambdas(FILE *fplog, t_inputrec *ir, int *fep_state, std::vector<real> *lambda, double *lam0)
{
/* this function works, but could probably use a logic rewrite to keep all the different
types of efep straight. */
- int i;
+ if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE))
+ {
+ return;
+ }
+
t_lambda *fep = ir->fepvals;
+ *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
+ if checkpoint is set -- a kludge is in for now
+ to prevent this.*/
- if ((ir->efep == efepNO) && (ir->bSimTemp == FALSE))
+ lambda->resize(efptNR);
+
+ for (int i = 0; i < efptNR; i++)
{
- for (i = 0; i < efptNR; i++)
+ /* overwrite lambda state with init_lambda for now for backwards compatibility */
+ if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
{
- lambda[i] = 0.0;
+ (*lambda)[i] = fep->init_lambda;
if (lam0)
{
- lam0[i] = 0.0;
+ lam0[i] = (*lambda)[i];
}
}
- return;
- }
- else
- {
- *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
- if checkpoint is set -- a kludge is in for now
- to prevent this.*/
- for (i = 0; i < efptNR; i++)
+ else
{
- /* overwrite lambda state with init_lambda for now for backwards compatibility */
- if (fep->init_lambda >= 0) /* if it's -1, it was never initializd */
- {
- lambda[i] = fep->init_lambda;
- if (lam0)
- {
- lam0[i] = lambda[i];
- }
- }
- else
+ (*lambda)[i] = fep->all_lambda[i][*fep_state];
+ if (lam0)
{
- lambda[i] = fep->all_lambda[i][*fep_state];
- if (lam0)
- {
- lam0[i] = lambda[i];
- }
+ lam0[i] = (*lambda)[i];
}
}
- if (ir->bSimTemp)
+ }
+ if (ir->bSimTemp)
+ {
+ /* need to rescale control temperatures to match current state */
+ for (int i = 0; i < ir->opts.ngtc; i++)
{
- /* need to rescale control temperatures to match current state */
- for (i = 0; i < ir->opts.ngtc; i++)
+ if (ir->opts.ref_t[i] > 0)
{
- if (ir->opts.ref_t[i] > 0)
- {
- ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
- }
+ ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
}
}
}
if (fplog != NULL)
{
fprintf(fplog, "Initial vector of lambda components:[ ");
- for (i = 0; i < efptNR; i++)
+ for (int i = 0; i < efptNR; i++)
{
- fprintf(fplog, "%10.4f ", lambda[i]);
+ fprintf(fplog, "%10.4f ", (*lambda)[i]);
}
fprintf(fplog, "]\n");
}
void init_md(FILE *fplog,
t_commrec *cr, t_inputrec *ir, const gmx_output_env_t *oenv,
double *t, double *t0,
- real *lambda, int *fep_state, double *lam0,
+ std::vector<real> *lambda, int *fep_state, double *lam0,
t_nrnb *nrnb, gmx_mtop_t *mtop,
gmx_update_t **upd,
int nfile, const t_filenm fnm[],
please_cite(fplog, "Goga2012");
}
}
- if ((ir->et[XX].n > 0) || (ir->et[YY].n > 0) || (ir->et[ZZ].n > 0))
- {
- please_cite(fplog, "Caleman2008a");
- }
init_nrnb(nrnb);
if (nfile != -1)