Merge branch 'origin/release-2020' into merge-release-2020-into-master
authorPaul Bauer <paul.bauer.q@gmail.com>
Wed, 7 Oct 2020 06:55:19 +0000 (08:55 +0200)
committerPaul Bauer <paul.bauer.q@gmail.com>
Wed, 7 Oct 2020 06:55:19 +0000 (08:55 +0200)
Resolved Conflicts:
admin/gitlab-ci/archive.gitlab-ci.yml
admin/gitlab-ci/global.gitlab-ci.yml
admin/gitlab-ci/gromacs.gitlab-ci.yml
admin/gitlab-ci/lint.gitlab-ci.yml
admin/gitlab-ci/python-gmxapi.gitlab-ci.yml
admin/gitlab-ci/rules.gitlab-ci.yml
admin/gitlab-ci/sample_restraint-regression.gitlab-ci.yml
cmake/gmxVersionInfo.cmake
src/gromacs/fileio/checkpoint.cpp
src/gromacs/hardware/printhardware.cpp
src/gromacs/mdlib/md_support.cpp
src/gromacs/mdlib/trajectory_writing.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/modularsimulator/domdechelper.cpp
src/gromacs/modularsimulator/domdechelper.h
src/gromacs/modularsimulator/freeenergyperturbationelement.cpp
src/gromacs/modularsimulator/freeenergyperturbationelement.h
src/gromacs/modularsimulator/modularsimulator.cpp
src/gromacs/tools/trjcat.cpp
src/gromacs/topology/topology.h
src/gromacs/utility/fatalerror.cpp
src/gromacs/utility/futil.cpp
src/gromacs/utility/init.cpp
src/programs/mdrun/tests/CMakeLists.txt
tests/CMakeLists.txt

Change-Id: Icd5d9c78ff2cfb0598c3cb55b057487ca098a1f0

23 files changed:
1  2 
CMakeLists.txt
admin/gitlab-ci/global.gitlab-ci.yml
admin/gitlab-ci/rules.gitlab-ci.yml
cmake/gmxTestImageMagick.cmake
docs/CMakeLists.txt
docs/doxygen/suppressions.txt
docs/release-notes/index.rst
src/gromacs/fileio/checkpoint.cpp
src/gromacs/gmxpreprocess/readir.cpp
src/gromacs/hardware/printhardware.cpp
src/gromacs/mdlib/mdoutf.cpp
src/gromacs/mdlib/trajectory_writing.cpp
src/gromacs/mdrun/md.cpp
src/gromacs/mdrun/runner.cpp
src/gromacs/mdtypes/state_propagator_data_gpu.h
src/gromacs/tables/forcetable.cpp
src/gromacs/tools/trjcat.cpp
src/gromacs/topology/topology.h
src/gromacs/utility/fatalerror.cpp
src/gromacs/utility/futil.cpp
src/gromacs/utility/init.cpp
src/programs/mdrun/tests/CMakeLists.txt
tests/CMakeLists.txt

diff --combined CMakeLists.txt
index e0a70aa060ae856f9dbedc58051e55bdf931ba78,0911eb2a45f1f70ffeb8e3a24421741d524127c8..268786863e6d417f0eca4527464bfc40806a75ef
@@@ -1,9 -1,8 +1,9 @@@
  #
  # This file is part of the GROMACS molecular simulation package.
  #
 -# Copyright (c) 2009,2010,2011,2012,2013,2014, The GROMACS development team.
 -# Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
 +# Copyright (c) 2009,2010,2011,2012,2013 by the GROMACS development team.
 +# Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
 +# Copyright (c) 2019,2020, by the GROMACS development team, led by
  # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  # and including many others, as listed in the AUTHORS file in the
  # top-level source directory and at http://www.gromacs.org.
  # To help us fund GROMACS development, we humbly ask that you cite
  # the research papers on the package. Check out http://www.gromacs.org.
  
 -cmake_minimum_required(VERSION 3.9.6)
 -if(POLICY CMP0074) #3.12
 -    cmake_policy(SET CMP0074 NEW)
 -endif()
 +cmake_minimum_required(VERSION 3.13)
 +cmake_policy(SET CMP0074 NEW) # From CMake 3.12
  cmake_policy(SET CMP0068 NEW) # From CMake-3.9
  
  # CMake modules/macros are in a subdirectory to keep this file cleaner
  list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Platform)
  
  if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
 -    # Providing a default value >=10.9 helps to find modern C++ compatibility,
 +    # Providing a default value >=10.14 helps to find modern C++ compatibility,
      # such as by defaulting to the Clang libc++ instead of libstdc++.
 -    set(CMAKE_OSX_DEPLOYMENT_TARGET 10.9 CACHE STRING
 -        "OS X deployment target affects default SDK version and compiler flags."
 -        FORCE)
 +    set(CMAKE_OSX_DEPLOYMENT_TARGET 10.14 CACHE STRING
 +        "OS X deployment target affects default SDK version and compiler flags.")
      # By default, limit the binary architecture to a single 64-bit build.
      set(CMAKE_OSX_ARCHITECTURES x86_64 CACHE STRING
          "OS X architecture affects the compatibility of the (potentially fat) binaries produced."
@@@ -55,7 -57,7 +55,7 @@@ endif(
  
  project(Gromacs)
  
 -set(CMAKE_CXX_STANDARD 14)
 +set(CMAKE_CXX_STANDARD 17)
  set(CMAKE_CXX_STANDARD_REQUIRED ON)
  set(CMAKE_CXX_EXTENSIONS OFF)
  
@@@ -67,9 -69,6 +67,9 @@@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CM
  
  find_package(LibStdCpp)
  
 +# Python is first referenced in gmxVersionInfo, so we perform the search early
 +# to find a suitable installation for all components.
 +include(gmxPythonDiscovery)
  # Set up common version variables, as well as general information about
  # the build tree (whether the build is from a source package or from a git
  # repository).  Also declares a few functions that will be used for generating
@@@ -89,14 -88,13 +89,14 @@@ include(gmxBuildTypeProfile
  include(gmxBuildTypeTSAN)
  include(gmxBuildTypeASAN)
  include(gmxBuildTypeMSAN)
 +include(gmxBuildTypeUBSAN)
  include(gmxBuildTypeReleaseWithAssert)
  
  if(NOT CMAKE_BUILD_TYPE)
 -    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert Profile TSAN ASAN MSAN." FORCE)
 +    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert Profile TSAN ASAN MSAN UBSAN." FORCE)
      # Set the possible values of build type for cmake-gui
      set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
 -        "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert" "Profile" "TSAN" "ASAN" "MSAN")
 +        "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert" "Profile" "TSAN" "ASAN" "MSAN" "UBSAN")
  endif()
  if(CMAKE_CONFIGURATION_TYPES)
      # Add appropriate GROMACS-specific build types for the Visual
@@@ -198,19 -196,43 +198,19 @@@ option(GMX_COOL_QUOTES "Enable GROMACS 
  mark_as_advanced(GMX_COOL_QUOTES)
  gmx_add_cache_dependency(GMX_COOL_QUOTES BOOL "NOT GMX_FAHCORE" OFF)
  
 -option(GMX_USE_OPENCL "Enable OpenCL acceleration" OFF)
 -
  option(GMX_INSTALL_LEGACY_API "Install legacy headers" OFF)
  
 -# The earliest version of the CUDA toolkit that supports c++14 is 9.0
 -set(REQUIRED_CUDA_VERSION 9.0)
 -set(REQUIRED_CUDA_COMPUTE_CAPABILITY 3.0)
 -
 -# OpenCL required version: 1.2 or newer
 -set(REQUIRED_OPENCL_MIN_VERSION_MAJOR 1)
 -set(REQUIRED_OPENCL_MIN_VERSION_MINOR 2)
 -set(REQUIRED_OPENCL_MIN_VERSION ${REQUIRED_OPENCL_MIN_VERSION_MAJOR}.${REQUIRED_OPENCL_MIN_VERSION_MINOR})
 -
 -if(NOT GMX_USE_OPENCL)
 -    # CUDA detection is done only if GMX_USE_OPENCL is OFF.
 -    include(gmxManageGPU)
 -    set(GMX_USE_CUDA ${GMX_GPU})
 -    if(GMX_GPU)
 -        set(GMX_GPU_ACCELERATION_FRAMEWORK "GMX_GPU_CUDA")
 -    else()
 -        set(GMX_GPU_ACCELERATION_FRAMEWORK "GMX_GPU_NONE")
 -    endif()
 -else()
 -    #Now the OpenCL path (for both AMD and NVIDIA)
 -    if(GMX_GPU)
 -        include(gmxManageOpenCL)
 -        set(GMX_GPU_ACCELERATION_FRAMEWORK "GMX_GPU_OPENCL")
 -    else()
 -        message(FATAL_ERROR "OpenCL requested but GPU option is not enabled (try -DGMX_GPU=on) ")
 -    endif()
 -endif()
 +gmx_option_multichoice(
 +    GMX_GPU
 +    "Framework for GPU acceleration"
 +    OFF
 +    OFF CUDA OpenCL SYCL)
  
  gmx_option_multichoice(
      GMX_SIMD
      "SIMD instruction set for CPU kernels and compiler optimization"
      "AUTO"
 -    AUTO None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX2_128 AVX_512 AVX_512_KNL MIC ARM_NEON ARM_NEON_ASIMD IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference)
 +    AUTO None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 AVX2_128 AVX_512 AVX_512_KNL MIC ARM_NEON ARM_NEON_ASIMD ARM_SVE IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference)
  
  if(GMX_TARGET_MIC)
      set(GMX_FFT_LIBRARY_DEFAULT "mkl")
@@@ -236,6 -258,12 +236,6 @@@ gmx_dependent_option
  mark_as_advanced(GMX_BUILD_OWN_FFTW)
  mark_as_advanced(GMX_DISABLE_FFTW_MEASURE)
  
 -gmx_option_multichoice(
 -    GMX_QMMM_PROGRAM
 -    "QM package for QM/MM"
 -    None
 -    none gaussian mopac gamess orca)
 -
  gmx_dependent_cache_variable(GMX_SIMD_REF_FLOAT_WIDTH  "Reference SIMD single precision width" STRING "4" "GMX_SIMD STREQUAL REFERENCE")
  gmx_dependent_cache_variable(GMX_SIMD_REF_DOUBLE_WIDTH "Reference SIMD double precision width" STRING "2" "GMX_SIMD STREQUAL REFERENCE")
  
@@@ -369,7 -397,7 +369,7 @@@ check_cxx_source_compiles("int main(){ 
  check_cxx_source_compiles("int main(){ return __builtin_clzll(1);}" HAVE_BUILTIN_CLZLL)
  if(MSVC)
      check_cxx_source_compiles("#include <intrin.h>\n int main(){unsigned long r;unsigned long i=1;_BitScanReverse(&r,i);return r;}" HAVE_BITSCANREVERSE)
-     check_cxx_source_compiles("#include <intrin.h>\n int main(){unsigned long r;unsigned __int64 i=1;_BitScanReverse(&r,i);return r;}" HAVE_BITSCANREVERSE64)
+     check_cxx_source_compiles("#include <intrin.h>\n int main(){unsigned long r;unsigned __int64 i=1;_BitScanReverse64(&r,i);return r;}" HAVE_BITSCANREVERSE64)
  elseif(CMAKE_CXX_COMPILER_ID MATCHES "XL")
      check_cxx_source_compiles("int main(){ return __cntlz4(1);}" HAVE_CNTLZ4)
      check_cxx_source_compiles("int main(){ return __cntlz8(1);}" HAVE_CNTLZ8)
@@@ -428,31 -456,6 +428,31 @@@ include(gmxManageMimic
  include(gmxManageSharedLibraries)
  
  
 +########################################################################
 +# Specify install locations
 +########################################################################
 +# Use GNUInstallDirs to set paths on multiarch systems.
 +include(GNUInstallDirs)
 +
 +set(GMX_INSTALL_DATASUBDIR "gromacs" CACHE STRING "Subdirectory for GROMACS data under CMAKE_INSTALL_DATADIR")
 +mark_as_advanced(GMX_INSTALL_DATASUBDIR)
 +
 +# Internal convenience so we do not have to join two path segments in the code
 +set(GMX_INSTALL_GMXDATADIR ${CMAKE_INSTALL_DATADIR}/${GMX_INSTALL_DATASUBDIR})
 +
 +# If the nesting level wrt. the installation root is changed,
 +# gromacs-config.cmake.cmakein needs to be adapted.
 +set(GMX_INSTALL_CMAKEDIR  ${CMAKE_INSTALL_DATAROOTDIR}/cmake)
 +
 +# TODO: Make GMXRC adapt if this is changed
 +set(GMX_INSTALL_PKGCONFIGDIR ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
 +
 +list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${CMAKE_INSTALL_INCLUDEDIR})
 +
 +# Binary and library suffix options
 +include(gmxManageSuffixes)
 +
 +
  ########################################################################
  # Find external packages                                               #
  ########################################################################
  #    set(XML_LIBRARIES ${LIBXML2_LIBRARIES})
  #endif()
  
 -gmx_option_trivalue(
 -    GMX_HWLOC
 -    "Use hwloc portable hardware locality library"
 -    "AUTO")
 +option(GMX_HWLOC "Use hwloc portable hardware locality library" OFF)
  
  if (GMX_HWLOC)
      # Find quietly the second time.
@@@ -567,22 -573,13 +567,22 @@@ include(gmxManageTNG
  
  include(gmxManageLmfit)
  
 +include(gmxManageMuparser)
 +
  if(GMX_GPU)
 -    # now that we have detected the dependencies, do the second configure pass
 -    gmx_gpu_setup()
 -    if (GMX_CLANG_CUDA)
 -        list(APPEND GMX_EXTRA_LIBRARIES ${GMX_CUDA_CLANG_LINK_LIBS})
 -        link_directories("${GMX_CUDA_CLANG_LINK_DIRS}")
 +
 +    string(TOUPPER "${GMX_GPU}" _gmx_gpu_uppercase)
 +    if(${_gmx_gpu_uppercase} STREQUAL "CUDA")
 +        include(gmxManageCuda)
 +    elseif(${_gmx_gpu_uppercase} STREQUAL "OPENCL")
 +        include(gmxManageOpenCL)
 +    elseif(${_gmx_gpu_uppercase} STREQUAL "SYCL")
 +        include(gmxManageSYCL)
      endif()
 +    if(NOT GMX_OPENMP)
 +        message(WARNING "To use GPU acceleration efficiently, mdrun requires OpenMP multi-threading, which is currently not enabled.")
 +    endif()
 +
  endif()
  
  if(CYGWIN)
@@@ -605,7 -602,6 +605,7 @@@ gmx_add_cache_dependency(GMX_BUILD_UNIT
  ########################################################################
  
  include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src)
 +include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/src/external)
  # Required for config.h, maybe should only be set in src/CMakeLists.txt
  include_directories(BEFORE ${CMAKE_BINARY_DIR}/src)
  
@@@ -615,11 -611,51 +615,11 @@@ gmx_test_inline_asm_gcc_x86(GMX_X86_GCC
  include(gmxSetBuildInformation)
  gmx_set_build_information()
  
 -gmx_option_multichoice(
 -    GMX_USE_RDTSCP
 -    "Use low-latency RDTSCP instruction for CPU-based timers for mdrun execution; might need to be off when compiling for heterogeneous environments)"
 -    "AUTO"
 -    OFF ON AUTO DETECT)
 +# Anything but truly ancient x86 hardware should support rdtscp, so we enable it by default.
 +# The inline assembly calling it is only ever compiled on x86, so defaulting to ON is OK.
 +option(GMX_USE_RDTSCP "Use low-latency RDTSCP instruction for x86 CPU-based timers for mdrun execution; might need to be off when compiling for heterogeneous environments" ON)
  mark_as_advanced(GMX_USE_RDTSCP)
  
 -macro(gmx_check_rdtscp)
 -    if (CPU_DETECTION_FEATURES MATCHES "rdtscp")
 -        set(HAVE_RDTSCP 1)
 -        set(RDTSCP_DETECTION_MESSAGE " - detected on the build host")
 -    else()
 -        set(RDTSCP_DETECTION_MESSAGE " - not detected on the build host")
 -    endif()
 -endmacro()
 -
 -set(HAVE_RDTSCP 0)
 -if (GMX_USE_RDTSCP STREQUAL "ON")
 -    set(HAVE_RDTSCP 1)
 -elseif(GMX_USE_RDTSCP STREQUAL "DETECT")
 -    gmx_check_rdtscp()
 -elseif(GMX_USE_RDTSCP STREQUAL "AUTO")
 -    # If the user specified automated SIMD selection, that the choice
 -    # is made based on detection on the build host. If so, then RDTSCP
 -    # should be chosen the same way.
 -    #
 -    # If the user specified an AVX SIMD level (e.g. when
 -    # cross-compiling GROMACS) then they will get our best guess, ie
 -    # that in practice AVX mostly correlates with rdtscp (and anyway
 -    # is only relevant in rather old x86 hardware).
 -    if (GMX_SIMD STREQUAL "AUTO")
 -        gmx_check_rdtscp()
 -    elseif (GMX_SIMD MATCHES "AVX")
 -        set(HAVE_RDTSCP 1)
 -    endif()
 -endif()
 -gmx_check_if_changed(HAVE_RDTSCP_CHANGED HAVE_RDTSCP)
 -if (HAVE_RDTSCP_CHANGED)
 -    if (HAVE_RDTSCP)
 -        message(STATUS "Enabling RDTSCP support${RDTSCP_DETECTION_MESSAGE}")
 -    else()
 -        message(STATUS "Disabling RDTSCP support${RDTSCP_DETECTION_MESSAGE}")
 -    endif()
 -endif()
 -
  include(gmxTestLargeFiles)
  gmx_test_large_files(GMX_LARGEFILES)
  
@@@ -655,6 -691,25 +655,6 @@@ endif(
  include(gmxManageSimd)
  gmx_manage_simd()
  
 -include(gmxManageCycleCounters)
 -gmx_manage_cycle_counters()
 -
 -# Process QM/MM Settings
 -if(${GMX_QMMM_PROGRAM} STREQUAL "GAUSSIAN")
 -    set(GMX_QMMM_GAUSSIAN 1)
 -elseif(${GMX_QMMM_PROGRAM} STREQUAL "MOPAC")
 -    set(GMX_QMMM_MOPAC 1)
 -elseif(${GMX_QMMM_PROGRAM} STREQUAL "GAMESS")
 -    set(GMX_QMMM_GAMESS 1)
 -elseif(${GMX_QMMM_PROGRAM} STREQUAL "ORCA")
 -    set(GMX_QMMM_ORCA 1)
 -elseif(${GMX_QMMM_PROGRAM} STREQUAL "NONE")
 -    # nothing to do
 -else()
 -    gmx_invalid_option_value(GMX_QMMM_PROGRAM)
 -endif()
 -
 -
  ##################################################
  # Process FFT library settings
  ##################################################
@@@ -664,9 -719,8 +664,9 @@@ include(gmxManageFFTLibraries
  include(gmxManageLinearAlgebraLibraries)
  
  include(gmxManagePluginSupport)
 +gmx_manage_plugin_support()
  
 -if (GMX_USE_PLUGINS)
 +if(GMX_USE_PLUGINS)
      if(NOT GMX_VMD_PLUGIN_PATH)
          find_package(VMD)
      endif()
@@@ -705,7 -759,13 +705,7 @@@ if(GMX_FAHCORE
    include_directories(${COREWRAP_INCLUDE_DIR})
  endif()
  
 -# Value of GMX_BUILD_HELP=AUTO tries to generate things, but will only
 -# produce warnings if that fails.
 -set(build_help_default AUTO)
 -if (SOURCE_IS_SOURCE_DISTRIBUTION OR CMAKE_CROSSCOMPILING)
 -    set(build_help_default OFF)
 -endif()
 -gmx_option_trivalue(GMX_BUILD_HELP "Build completions automatically (requires that compiled binaries can be executed on the build host) and install man pages if built (requires building the 'man' target manually)" ${build_help_default})
 +option(GMX_BUILD_HELP "Build completions (requires that compiled binaries can be executed on build host) and install man pages if built (requires building the 'man' target manually)" OFF)
  mark_as_advanced(GMX_BUILD_HELP)
  if (GMX_BUILD_HELP AND SOURCE_IS_SOURCE_DISTRIBUTION AND BUILD_IS_INSOURCE)
      message(FATAL_ERROR
@@@ -717,8 -777,8 +717,8 @@@ endif(
  # # # # # # # # # # NO MORE TESTS AFTER THIS LINE! # # # # # # # # # # #
  # these are set after everything else
  if (NOT GMX_SKIP_DEFAULT_CFLAGS)
 -    set(CMAKE_EXE_LINKER_FLAGS "${FFT_LINKER_FLAGS} ${MPI_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}")
 -    set(CMAKE_SHARED_LINKER_FLAGS "${FFT_LINKER_FLAGS} ${MPI_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}")
 +    set(CMAKE_EXE_LINKER_FLAGS "${FFT_LINKER_FLAGS} ${MPI_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS} ${DISABLE_SYCL_CXX_FLAGS}")
 +    set(CMAKE_SHARED_LINKER_FLAGS "${FFT_LINKER_FLAGS} ${MPI_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS} ${DISABLE_SYCL_CXX_FLAGS}")
  else()
      message("Recommended flags which are not added because GMX_SKIP_DEFAULT_CFLAGS=yes:")
      message("CMAKE_C_FLAGS: ${SIMD_C_FLAGS};${MPI_COMPILE_FLAGS};${EXTRA_C_FLAGS};${GMXC_CFLAGS}")
      message("CMAKE_EXE_LINKER_FLAGS: ${FFT_LINKER_FLAGS} ${MPI_LINKER_FLAGS}")
      message("CMAKE_SHARED_LINKER_FLAGS: ${FFT_LINKER_FLAGS} ${MPI_LINKER_FLAGS}")
  endif()
 -
 -########################################################################
 -# Specify install locations
 -########################################################################
 -# Use GNUInstallDirs to set paths on multiarch systems.
 -include(GNUInstallDirs)
 -
 -set(GMX_INSTALL_DATASUBDIR "gromacs" CACHE STRING "Subdirectory for GROMACS data under CMAKE_INSTALL_DATADIR")
 -mark_as_advanced(GMX_INSTALL_DATASUBDIR)
 -
 -# Internal convenience so we do not have to join two path segments in the code
 -set(GMX_INSTALL_GMXDATADIR ${CMAKE_INSTALL_DATADIR}/${GMX_INSTALL_DATASUBDIR})
 -
 -# If the nesting level wrt. the installation root is changed,
 -# gromacs-config.cmake.cmakein needs to be adapted.
 -set(GMX_INSTALL_CMAKEDIR  ${CMAKE_INSTALL_DATAROOTDIR}/cmake)
 -
 -# TODO: Make GMXRC adapt if this is changed
 -set(GMX_INSTALL_PKGCONFIGDIR ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
 -set(GMX_INSTALL_OCLDIR       ${GMX_INSTALL_GMXDATADIR}/opencl)
 -
 -list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${CMAKE_INSTALL_INCLUDEDIR})
 -
 -# Binary and library suffix options
 -include(gmxManageSuffixes)
 +# Allow `admin` directory to be easily conveyed to nested CMake commands.
 +set(GMX_ADMIN_DIR ${CMAKE_SOURCE_DIR}/admin)
  
  ################################################################
  # Shared library load path settings
@@@ -782,11 -865,30 +782,11 @@@ if (BUILD_TESTING
  endif()
  
  # TODO: Determine control flow and defaults for package installation and testing use cases.
 -# Ref: http://redmine.gromacs.org/issues/2896
 +# Ref: https://gitlab.com/gromacs/gromacs/-/issues/2896
  option(GMX_PYTHON_PACKAGE "Configure gmxapi Python package" OFF)
  mark_as_advanced(GMX_PYTHON_PACKAGE)
  
  if (NOT GMX_BUILD_MDRUN_ONLY)
 -    # Note: Though only documented as an output variable, PYTHON_EXECUTABLE is
 -    # also effective as a CMake input variable to effectively hint the location
 -    # of the Python interpreter. This may be helpful in environments with both
 -    # Python 2 and Python 3 on the default PATH.
 -    # Ref: https://cmake.org/cmake/help/latest/module/FindPythonInterp.html
 -    if(FIND_PACKAGE_MESSAGE_DETAILS_PythonInterp)
 -        # Keep quiet on subsequent runs of cmake
 -        set(PythonInterp_FIND_QUIETLY ON)
 -    endif()
 -    # Older CMake versions might not search for Python newer than 3.7.
 -    set(Python_ADDITIONAL_VERSIONS 3.8)
 -    if(GMX_PYTHON_PACKAGE)
 -        find_package(PythonInterp 3.5 REQUIRED)
 -        # Note: PythonLibs will be found later by pybind11.
 -        # TODO: (issue #2998) When CMake >= 3.12 is required, update detection.
 -        # I.e.  find_package(Python3 3.5 COMPONENTS Interpreter Development REQUIRED)
 -    else()
 -        find_package(PythonInterp 3.5)
 -    endif()
      find_package(ImageMagick QUIET COMPONENTS convert)
      include(gmxTestImageMagick)
      GMX_TEST_IMAGEMAGICK(IMAGE_CONVERT_POSSIBLE)
      add_subdirectory(share)
      add_subdirectory(scripts)
  endif()
 +add_subdirectory(api)
  add_subdirectory(src)
  
  if (BUILD_TESTING)
@@@ -808,6 -909,13 +808,6 @@@ endif(
  
  gmx_cpack_write_config()
  
 -# Issue a warning if NVIDIA GPUs were detected, but CUDA was not found.
 -# Don't bother the user after the first configure pass.
 -if ((CUDA_NOTFOUND_AUTO AND GMX_DETECT_GPU_AVAILABLE) AND NOT GMX_GPU_DETECTION_DONE)
 -    message(WARNING "${CUDA_NOTFOUND_MESSAGE}")
 -endif()
 -set(GMX_GPU_DETECTION_DONE TRUE CACHE INTERNAL "Whether GPU detection has already been done")
 -
  #######################
  ## uninstall target
  #######################
index d6dbf6bf096c0bd6eb9a2b9d48fda75753d9e826,302a58e05efe6987ea812544f6c54b4f6f40d5d7..49056b495926395a77fdc6ffbd485d8b9aeb6923
    variables:
      KUBERNETES_CPU_LIMIT: 8
      KUBERNETES_CPU_REQUEST: 4
-     KUBERNETES_MEMORY_REQUEST: 8Gi
+     KUBERNETES_MEMORY_REQUEST: 4Gi
+     KUBERNETES_MEMORY_LIMIT: 8Gi
      KUBERNETES_EXTENDED_RESOURCE_NAME: ""
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 0
 -    CACHE_FALLBACK_KEY: "$CI_JOB_NAME-$CI_JOB_STAGE-release-2020"
 +    CACHE_FALLBACK_KEY: "$CI_JOB_NAME-$CI_JOB_STAGE-master"
      BUILD_DIR: build
      INSTALL_DIR: install
      CMAKE_GMXAPI_OPTIONS: ""
@@@ -40,7 -41,7 +41,7 @@@
  .use-cuda:
    variables:
      CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF"
 -    CMAKE_GPU_OPTIONS: -DGMX_GPU=ON -DGMX_USE_CUDA=ON
 +    CMAKE_GPU_OPTIONS: -DGMX_GPU=CUDA
  
  .use-mpi:
    variables:
@@@ -49,7 -50,7 +50,7 @@@
  .use-opencl:
    variables:
      CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF"
 -    CMAKE_GPU_OPTIONS: -DGMX_GPU=ON -DGMX_USE_OPENCL=ON
 +    CMAKE_GPU_OPTIONS: -DGMX_GPU=OpenCL
  
  # Base definition for using gcc.
  .use-gcc:base:
      - export CCACHE_DIR=${PWD}/ccache
      - export ASAN_SYMBOLIZER_PATH=/usr/local/bin/llvm-symbolizer
  
 +# Base definition for using oneAPI.
 +.use-oneapi:base:
 +  variables:
 +    # Use the HPC variants of icc and icpc so that OpenMP is active
 +    CMAKE_COMPILER_SCRIPT: -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DCMAKE_INCLUDE_PATH=/opt/intel/oneapi/compiler/latest/linux/include/sycl -DCMAKE_PREFIX_PATH=/opt/intel/oneapi/compiler/latest/linux
 +    CMAKE_EXTRA_OPTIONS: -DGMX_FFT_LIBRARY=mkl
 +  before_script:
 +    # Necessary to override gitlab default 'set -e' which breaks Intel's
 +    # setvar.sh script
 +    - set +e
 +    - source /opt/intel/oneapi/setvars.sh
 +    - mkdir -p ccache
 +    - export CCACHE_BASEDIR=${PWD}
 +    - export CCACHE_DIR=${PWD}/ccache
index 201e329599112cfc23728ccc8cbe63922d158a2f,86bf421545e17fde9f3daff974aa13e80e12a16d..7bf1648b47f83a2508677c5ea10e9a723e99854d
  .rules-element:if-post-merge-acceptance-or-mr-then-always: &if-post-merge-acceptance-or-mr-then-always
    if: '$CI_PIPELINE_SOURCE == "merge_request_event" ||
         ($CI_PIPELINE_SOURCE == "push" &&
-         ($CI_COMMIT_REF_NAME == "master" || $CI_COMMIT_REF_NAME == "release-*"))'
 -        $CI_COMMIT_REF_NAME == "release-2020")'
++        $CI_COMMIT_REF_NAME == "master")'
    when: always
  
  # Include job only for post submit push
  .rules-element:if-post-merge-acceptance-then-always: &if-post-merge-acceptance-then-always
    if: '$CI_PIPELINE_SOURCE == "push" &&
-         ($CI_COMMIT_REF_NAME == "master" || $CI_COMMIT_REF_NAME == "release-*")'
 -       $CI_COMMIT_REF_NAME == "release-2020"'
++       $CI_COMMIT_REF_NAME == "master"'
    when: always
  
  # When composing a rule set, note that the first matching rule is applied.
index 2912b85f3b61dae451d2482fb14b3879bd3874e1,bc06d5af8f8db630991c2d7cf7a340d931a03d95..b008a791706a1fe048a254414282af8921a9db44
@@@ -1,7 -1,7 +1,7 @@@
  #
  # This file is part of the GROMACS molecular simulation package.
  #
--# Copyright (c) 2018,2019, by the GROMACS development team, led by
++# Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
  # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  # and including many others, as listed in the AUTHORS file in the
  # top-level source directory and at http://www.gromacs.org.
@@@ -66,7 -66,7 +66,7 @@@ function(GMX_TEST_IMAGEMAGICK VARIABLE
              else()
                  set(type_ "STATUS")
              endif()
-             MESSAGE(${type_} "Could not convert sample image, ImageMagick convert can not be used. A possible way to fix it can be found here: https://alexvanderbist.com/posts/2018/fixing-imagick-error-unauthorized")
+             MESSAGE(${type_} "Could not convert sample image, ImageMagick convert can not be used. A possible way to fix it can be found here: https://alexvanderbist.com/2018/fixing-imagick-error-unauthorized")
              set(value_ OFF)
          endif()
          FILE(REMOVE_RECURSE ${TEMPDIR})
diff --combined docs/CMakeLists.txt
index 2e985b5484811767b7582a38f54b5910589f0bd5,52af462e040cabbc9ecf9b6e5eddb5e411452089..4db45f30e0642a1491eaabdd309b7826f636393b
@@@ -1,8 -1,7 +1,8 @@@
  #
  # This file is part of the GROMACS molecular simulation package.
  #
 -# Copyright (c) 2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
 +# Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
 +# Copyright (c) 2019,2020, by the GROMACS development team, led by
  # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  # and including many others, as listed in the AUTHORS file in the
  # top-level source directory and at http://www.gromacs.org.
@@@ -48,8 -47,9 +48,8 @@@
  # of configure time, because 1) some of the version variables are only
  # available during build time, and 2) we don't want to do all the Sphinx setup
  # during configuration to save some time when not building the content.
 -# All the generated values get put into conf-vars.py (generated from
 -# conf-vars.py.cmakein), which in turn is included by the Sphinx configuration
 -# file conf.py.
 +# All of the generated values get put into conf.py (generated from
 +# conf.cmakein.py).
  
  set(SOURCE_MD5SUM "unknown" CACHE STRING
      "MD5 sum of the source tarball, normally used only for the pre-release webpage build")
@@@ -327,21 -327,19 +327,21 @@@ if (SPHINX_FOUND
      set(SPHINX_SOURCE_FILES
          index.rst
          download.rst
 -        conf.py
          links.dat
          dev-manual/build-system.rst
          dev-manual/change-management.rst
          dev-manual/commitstyle.rst
 +        dev-manual/containers.rst
          dev-manual/documentation-generation.rst
          dev-manual/contribute.rst
          dev-manual/doxygen.rst
          dev-manual/error-handling.rst
          dev-manual/formatting.rst
 +        dev-manual/gitlab.rst
          dev-manual/gmxtree.rst
          dev-manual/includestyle.rst
          dev-manual/index.rst
 +        dev-manual/infrastructure.rst
          dev-manual/jenkins.rst
          dev-manual/known-issues.rst
          dev-manual/language-features.rst
          how-to/visualize.rst
          install-guide/index.rst
          release-notes/index.rst
 +        release-notes/2021/major/highlights.rst
 +        release-notes/2021/major/features.rst
 +        release-notes/2021/major/performance.rst
 +        release-notes/2021/major/tools.rst
 +        release-notes/2021/major/bugs-fixed.rst
 +        release-notes/2021/major/removed-functionality.rst
 +        release-notes/2021/major/deprecated-functionality.rst
 +        release-notes/2021/major/portability.rst
 +        release-notes/2021/major/miscellaneous.rst
          release-notes/2020/2020.1.rst
          release-notes/2020/2020.2.rst
          release-notes/2020/2020.3.rst
          release-notes/2020/2020.4.rst
+         release-notes/2020/2020.5.rst
          release-notes/2020/major/highlights.rst
          release-notes/2020/major/features.rst
          release-notes/2020/major/performance.rst
          set(IMAGE_CONVERT_STRING "impossible")
      endif()
  
 -    set(SPHINX_CONFIG_VARS_FILE ${SPHINX_INPUT_DIR}/conf-vars.py)
 +    set(SPHINX_CONFIG_FILE ${SPHINX_INPUT_DIR}/conf.py)
      if (GMX_PYTHON_PACKAGE)
          set(GMXAPI_PYTHON_STAGING_DIR ${CMAKE_BINARY_DIR}/python_packaging/src/gmxapi_staging)
          # TODO: Resolve circular reference. We would like to get the CMake build-time directory for
          # in this context?
      endif ()
  
 -    gmx_configure_version_file(conf-vars.py.cmakein ${SPHINX_CONFIG_VARS_FILE}
 +    gmx_configure_version_file(
 +        conf.cmakein.py ${SPHINX_CONFIG_FILE}
          EXTRA_VARS
 -            SPHINX_EXTENSION_PATH RELENG_PATH
 -            IMAGE_CONVERT_STRING
 +            CMAKE_MINIMUM_REQUIRED_VERSION
              EXPECTED_DOXYGEN_VERSION
              EXPECTED_SPHINX_VERSION
 -            CMAKE_MINIMUM_REQUIRED_VERSION REQUIRED_CUDA_VERSION
 -            REQUIRED_OPENCL_MIN_VERSION
 -            REQUIRED_CUDA_COMPUTE_CAPABILITY REGRESSIONTEST_VERSION
 -            SOURCE_MD5SUM REGRESSIONTEST_MD5SUM_STRING
 -            GMX_TNG_MINIMUM_REQUIRED_VERSION
 +            GMX_ADMIN_DIR
              GMX_LMFIT_REQUIRED_VERSION
              GMX_MANUAL_DOI_STRING
 +            GMX_TNG_MINIMUM_REQUIRED_VERSION
              GMX_SOURCE_DOI_STRING
              GMXAPI_PYTHON_STAGING_DIR
 +            IMAGE_CONVERT_STRING
 +            REGRESSIONTEST_VERSION
 +            REQUIRED_CUDA_COMPUTE_CAPABILITY
 +            REQUIRED_CUDA_VERSION
 +            REQUIRED_OPENCL_MIN_VERSION
 +            REGRESSIONTEST_MD5SUM_STRING
 +            RELENG_PATH
 +            SOURCE_MD5SUM
 +            SPHINX_EXTENSION_PATH
          COMMENT "Configuring Sphinx configuration file")
 -    gmx_add_sphinx_input_file(${SPHINX_CONFIG_VARS_FILE})
 +    gmx_add_sphinx_input_file(${SPHINX_CONFIG_FILE})
      gmx_add_sphinx_source_files(FILES ${SPHINX_SOURCE_FILES})
      if (EXISTS ${RELENG_PATH}/docs/FileList.cmake)
          include(${RELENG_PATH}/docs/FileList.cmake)
              )
      endif ()
  
 +    gmx_add_sphinx_source_files(
 +            FILES
 +            nblib/index.rst
 +            nblib/guide-to-writing-MD-programs.rst
 +    )
 +
      gmx_add_sphinx_source_files(
          FILES
          ${REFERENCEMANUAL_SPHINX_FILES_GENERAL})
  
      # Sphinx cache with pickled ReST documents
      set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
 +    set(SPHINX_CONFIG_OVERRIDES "")
 +    if (GMX_DEVELOPER_BUILD)
 +        set(SPHINX_CONFIG_OVERRIDES "-Dtodo_include_todos=1")
 +    endif()
      add_custom_target(webpage-sphinx
          DEPENDS sphinx-programs
          DEPENDS sphinx-input
              -q -b html
              -w sphinx-html.log
              -d "${SPHINX_CACHE_DIR}"
 +            ${SPHINX_CONFIG_OVERRIDES}
              "${SPHINX_INPUT_DIR}"
              "${HTML_OUTPUT_DIR}"
          WORKING_DIRECTORY
@@@ -712,7 -685,7 +713,7 @@@ set(HTML_BUILD_NOT_POSSIBLE_REASON
  set(HTML_BUILD_WARNINGS)
  
  # Next, turn it off if any of the preconditions are unsatisified
 -if (NOT PythonInterp_FOUND)
 +if (NOT Python3_Interpreter_FOUND)
      set(HTML_BUILD_IS_POSSIBLE OFF)
      set(HTML_BUILD_NOT_POSSIBLE_REASON "Python is required")
  elseif (NOT SPHINX_FOUND)
index c5d844eefc2a5aa2f8601755a552ef7014303cfd,533e028090419083d30375477e9919af0d08a2a5..6a976c4971d17e308bdc4b0ed62a0ba137829ec0
@@@ -34,9 -34,6 +34,9 @@@ src/gromacs/nbnxm/pairlist_simd_4xm.h: 
  src/gromacs/nbnxm/kernels_simd_2xmm/kernel_common.h: warning: should include "nbnxm_simd.h"
  src/gromacs/nbnxm/kernels_simd_4xm/kernel_common.h: warning: should include "nbnxm_simd.h"
  
 +# This seems to be a false positive
 +src/gromacs/nbnxm/cuda/nbnxm_cuda_types.h: error: NbnxmGpu: is in internal file(s), but appears in public documentation
 +
  # Temporary while we change the SIMD implementation
  src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace_common.h: warning: should include "simd.h"
  
@@@ -47,10 -44,9 +47,9 @@@ src/gromacs/tables/cubicsplinetable.h: 
  src/gromacs/tables/quadraticsplinetable.h: warning: includes "simd.h" unnecessarily
  
  # These are specific to Folding@Home, and easiest to suppress here
- *: warning: includes non-local file as "corewrap.h"
  src/gmxpre.h: warning: includes non-local file as "swindirect.h"
  
 -# New external API (see https://redmine.gromacs.org/issues/2586) has some unresolved
 +# New external API (see https://gitlab.com/gromacs/gromacs/-/issues/2586) has some unresolved
  # conflicts with previous definitions of public API, installed API, and other things
  # described or implemented in check-source.py, gmxtree.py, gmxtree.rst, and others
  # TODO: resolve definitions, update testing heuristics, and activate policy checks
index 7af19b11c11b2d79e887beef302bf72e53f5ae6c,93af555709b6f6d9aa3796fc3e9043a8b2091efa..dfb12c7e5a2ae8c581cdd8be8d075811cf75af58
@@@ -8,39 -8,17 +8,39 @@@ releases of |Gromacs|. Major releases c
  functionality supported, whereas patch releases contain only fixes for
  issues identified in the corresponding major releases.
  
 -Two versions of |Gromacs| are under active maintenance, the 2020
 -series and the 2019 series. In the latter, only highly conservative
 +Two versions of |Gromacs| are under active maintenance, the 2021
 +series and the 2020 series. In the latter, only highly conservative
  fixes will be made, and only to address issues that affect scientific
  correctness. Naturally, some of those releases will be made after the
 -year 2019 ends, but we keep 2019 in the name so users understand how
 +year 2020 ends, but we keep 2019 in the name so users understand how
  up to date their version is. Such fixes will also be incorporated into
 -the 2020 release series, as appropriate. Around the time the 2021
 -release is made, the 2019 series will no longer be maintained.
 +the 2021 release series, as appropriate. Around the time the 2022
 +release is made, the 2020 series will no longer be maintained.
  
  Where issue numbers are reported in these release notes, more details
 -can be found at https://redmine.gromacs.org at that issue number.
 +can be found at https://gitlab.com/gromacs/gromacs/-/issues at that issue number.
 +
 +|Gromacs| 2021 series
 +---------------------
 +
 +.. todolist::
 +
 +Major release
 +^^^^^^^^^^^^^
 +
 +.. toctree::
 +   :maxdepth: 1
 +
 +   2021/major/highlights
 +   2021/major/features
 +   2021/major/performance
 +   2021/major/tools
 +   2021/major/bugs-fixed
 +   2021/major/deprecated-functionality
 +   2021/major/removed-functionality
 +   2021/major/portability
 +   2021/major/miscellaneous
 +
  
  |Gromacs| 2020 series
  ---------------------
@@@ -51,6 -29,7 +51,7 @@@ Patch release
  .. toctree::
     :maxdepth: 1
  
+    2020/2020.5
     2020/2020.4
     2020/2020.3
     2020/2020.2
index 39d5b2984ae703896505932f40306cd2a2eaa84e,9c6cfe42135b6bf4a7e8c98af3786d7abeed11ea..abd957f09a5fe1a30a48f7f5f2fb9a3d63784942
@@@ -41,6 -41,8 +41,6 @@@
  
  #include "checkpoint.h"
  
 -#include "config.h"
 -
  #include <cerrno>
  #include <cstdlib>
  #include <cstring>
@@@ -60,7 -62,6 +60,7 @@@
  #include "gromacs/math/vectypes.h"
  #include "gromacs/mdtypes/awh_correlation_history.h"
  #include "gromacs/mdtypes/awh_history.h"
 +#include "gromacs/mdtypes/checkpointdata.h"
  #include "gromacs/mdtypes/commrec.h"
  #include "gromacs/mdtypes/df_history.h"
  #include "gromacs/mdtypes/edsamhistory.h"
  #include "gromacs/utility/sysinfo.h"
  #include "gromacs/utility/txtdump.h"
  
- #if GMX_FAHCORE
- #    include "corewrap.h"
- #endif
  #define CPT_MAGIC1 171817
  #define CPT_MAGIC2 171819
  
 +namespace gmx
 +{
 +
 +template<typename ValueType>
 +void readKvtCheckpointValue(compat::not_null<ValueType*> value,
 +                            const std::string&           name,
 +                            const std::string&           identifier,
 +                            const KeyValueTreeObject&    kvt)
 +{
 +    const std::string key = identifier + "-" + name;
 +    if (!kvt.keyExists(key))
 +    {
 +        std::string errorMessage = "Cannot read requested checkpoint value " + key + " .";
 +        GMX_THROW(InternalError(errorMessage));
 +    }
 +    *value = kvt[key].cast<ValueType>();
 +}
 +
 +template void readKvtCheckpointValue(compat::not_null<std::int64_t*> value,
 +                                     const std::string&              name,
 +                                     const std::string&              identifier,
 +                                     const KeyValueTreeObject&       kvt);
 +template void readKvtCheckpointValue(compat::not_null<real*>   value,
 +                                     const std::string&        name,
 +                                     const std::string&        identifier,
 +                                     const KeyValueTreeObject& kvt);
 +
 +template<typename ValueType>
 +void writeKvtCheckpointValue(const ValueType&          value,
 +                             const std::string&        name,
 +                             const std::string&        identifier,
 +                             KeyValueTreeObjectBuilder kvtBuilder)
 +{
 +    kvtBuilder.addValue<ValueType>(identifier + "-" + name, value);
 +}
 +
 +template void writeKvtCheckpointValue(const std::int64_t&       value,
 +                                      const std::string&        name,
 +                                      const std::string&        identifier,
 +                                      KeyValueTreeObjectBuilder kvtBuilder);
 +template void writeKvtCheckpointValue(const real&               value,
 +                                      const std::string&        name,
 +                                      const std::string&        identifier,
 +                                      KeyValueTreeObjectBuilder kvtBuilder);
 +
 +
 +} // namespace gmx
 +
  /*! \brief Enum of values that describe the contents of a cpt file
   * whose format matches a version number
   *
@@@ -158,10 -107,9 +154,10 @@@ enum cpt
      cptv_Unknown = 17,                  /**< Version before numbering scheme */
      cptv_RemoveBuildMachineInformation, /**< remove functionality that makes mdrun builds non-reproducible */
      cptv_ComPrevStepAsPullGroupReference, /**< Allow using COM of previous step as pull group PBC reference */
 -    cptv_PullAverage, /**< Added possibility to output average pull force and position */
 -    cptv_MdModules,   /**< Added checkpointing for MdModules */
 -    cptv_Count        /**< the total number of cptv versions */
 +    cptv_PullAverage,      /**< Added possibility to output average pull force and position */
 +    cptv_MdModules,        /**< Added checkpointing for MdModules */
 +    cptv_ModularSimulator, /**< Added checkpointing for modular simulator */
 +    cptv_Count             /**< the total number of cptv versions */
  };
  
  /*! \brief Version number of the file format written to checkpoint
@@@ -1230,16 -1178,6 +1226,16 @@@ static void do_cpt_header(XDR* xd, gmx_
      {
          contents->flagsPullHistory = 0;
      }
 +
 +    if (contents->file_version >= cptv_ModularSimulator)
 +    {
 +        do_cpt_bool_err(xd, "Is modular simulator checkpoint",
 +                        &contents->isModularSimulatorCheckpoint, list);
 +    }
 +    else
 +    {
 +        contents->isModularSimulatorCheckpoint = false;
 +    }
  }
  
  static int do_cpt_footer(XDR* xd, int file_version)
@@@ -1330,11 -1268,16 +1326,11 @@@ static int do_cpt_state(XDR* xd, int ff
                      break;
                  /* The RNG entries are no longer written,
                   * the next 4 lines are only for reading old files.
 +                 * It's OK that three case statements fall through.
                   */
                  case estLD_RNG_NOTSUPPORTED:
 -                    ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
 -                    break;
                  case estLD_RNGI_NOTSUPPORTED:
 -                    ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
 -                    break;
                  case estMC_RNG_NOTSUPPORTED:
 -                    ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
 -                    break;
                  case estMC_RNGI_NOTSUPPORTED:
                      ret = do_cpte_ints(xd, part, i, sflags, 0, nullptr, list);
                      break;
@@@ -2142,7 -2085,7 +2138,7 @@@ static void do_cpt_mdmodules(in
          gmx::MdModulesCheckpointReadingDataOnMaster mdModuleCheckpointReadingDataOnMaster = {
              mdModuleCheckpointParameterTree, fileVersion
          };
 -        mdModulesNotifier.notifier_.notify(mdModuleCheckpointReadingDataOnMaster);
 +        mdModulesNotifier.checkpointingNotifications_.notify(mdModuleCheckpointReadingDataOnMaster);
      }
  }
  
@@@ -2230,103 -2173,211 +2226,103 @@@ static int do_cpt_files(XDR* xd, gmx_bo
      return 0;
  }
  
 -static void mpiBarrierBeforeRename(const bool applyMpiBarrierBeforeRename, MPI_Comm mpiBarrierCommunicator)
 -{
 -    if (applyMpiBarrierBeforeRename)
 -    {
 -#if GMX_MPI
 -        MPI_Barrier(mpiBarrierCommunicator);
 -#else
 -        GMX_RELEASE_ASSERT(false, "Should not request a barrier without MPI");
 -        GMX_UNUSED_VALUE(mpiBarrierCommunicator);
 -#endif
 -    }
 -}
 -
 -void write_checkpoint(const char*                   fn,
 -                      gmx_bool                      bNumberAndKeep,
 -                      FILE*                         fplog,
 -                      const t_commrec*              cr,
 -                      ivec                          domdecCells,
 -                      int                           nppnodes,
 -                      int                           eIntegrator,
 -                      int                           simulation_part,
 -                      gmx_bool                      bExpanded,
 -                      int                           elamstats,
 -                      int64_t                       step,
 -                      double                        t,
 -                      t_state*                      state,
 -                      ObservablesHistory*           observablesHistory,
 -                      const gmx::MdModulesNotifier& mdModulesNotifier,
 -                      bool                          applyMpiBarrierBeforeRename,
 -                      MPI_Comm                      mpiBarrierCommunicator)
 -{
 -    t_fileio* fp;
 -    char*     fntemp; /* the temporary checkpoint file name */
 -    int       npmenodes;
 -    char      buf[1024], suffix[5 + STEPSTRSIZE], sbuf[STEPSTRSIZE];
 -    t_fileio* ret;
 -
 -    if (DOMAINDECOMP(cr))
 -    {
 -        npmenodes = cr->npmenodes;
 -    }
 -    else
 -    {
 -        npmenodes = 0;
 -    }
 -
 -#if !GMX_NO_RENAME
 -    /* make the new temporary filename */
 -    snew(fntemp, std::strlen(fn) + 5 + STEPSTRSIZE);
 -    std::strcpy(fntemp, fn);
 -    fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 -    sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
 -    std::strcat(fntemp, suffix);
 -    std::strcat(fntemp, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
 -#else
 -    /* if we can't rename, we just overwrite the cpt file.
 -     * dangerous if interrupted.
 -     */
 -    snew(fntemp, std::strlen(fn));
 -    std::strcpy(fntemp, fn);
 -#endif
 -    std::string timebuf = gmx_format_current_time();
 -
 -    if (fplog)
 -    {
 -        fprintf(fplog, "Writing checkpoint, step %s at %s\n\n", gmx_step_str(step, buf), timebuf.c_str());
 -    }
 -
 -    /* Get offsets for open files */
 -    auto outputfiles = gmx_fio_get_output_file_positions();
 -
 -    fp = gmx_fio_open(fntemp, "w");
 -
 -    int flags_eks;
 +void write_checkpoint_data(t_fileio*                         fp,
 +                           CheckpointHeaderContents          headerContents,
 +                           gmx_bool                          bExpanded,
 +                           int                               elamstats,
 +                           t_state*                          state,
 +                           ObservablesHistory*               observablesHistory,
 +                           const gmx::MdModulesNotifier&     mdModulesNotifier,
 +                           std::vector<gmx_file_position_t>* outputfiles,
 +                           gmx::WriteCheckpointDataHolder*   modularSimulatorCheckpointData)
 +{
 +    headerContents.flags_eks = 0;
      if (state->ekinstate.bUpToDate)
      {
 -        flags_eks = ((1 << eeksEKIN_N) | (1 << eeksEKINH) | (1 << eeksEKINF) | (1 << eeksEKINO)
 -                     | (1 << eeksEKINSCALEF) | (1 << eeksEKINSCALEH) | (1 << eeksVSCALE)
 -                     | (1 << eeksDEKINDL) | (1 << eeksMVCOS));
 -    }
 -    else
 -    {
 -        flags_eks = 0;
 +        headerContents.flags_eks = ((1 << eeksEKIN_N) | (1 << eeksEKINH) | (1 << eeksEKINF)
 +                                    | (1 << eeksEKINO) | (1 << eeksEKINSCALEF) | (1 << eeksEKINSCALEH)
 +                                    | (1 << eeksVSCALE) | (1 << eeksDEKINDL) | (1 << eeksMVCOS));
      }
 +    headerContents.isModularSimulatorCheckpoint = !modularSimulatorCheckpointData->empty();
  
 -    energyhistory_t* enerhist  = observablesHistory->energyHistory.get();
 -    int              flags_enh = 0;
 +    energyhistory_t* enerhist = observablesHistory->energyHistory.get();
 +    headerContents.flags_enh  = 0;
      if (enerhist != nullptr && (enerhist->nsum > 0 || enerhist->nsum_sim > 0))
      {
 -        flags_enh |= (1 << eenhENERGY_N) | (1 << eenhENERGY_NSTEPS) | (1 << eenhENERGY_NSTEPS_SIM);
 +        headerContents.flags_enh |=
 +                (1 << eenhENERGY_N) | (1 << eenhENERGY_NSTEPS) | (1 << eenhENERGY_NSTEPS_SIM);
          if (enerhist->nsum > 0)
          {
 -            flags_enh |= ((1 << eenhENERGY_AVER) | (1 << eenhENERGY_SUM) | (1 << eenhENERGY_NSUM));
 +            headerContents.flags_enh |=
 +                    ((1 << eenhENERGY_AVER) | (1 << eenhENERGY_SUM) | (1 << eenhENERGY_NSUM));
          }
          if (enerhist->nsum_sim > 0)
          {
 -            flags_enh |= ((1 << eenhENERGY_SUM_SIM) | (1 << eenhENERGY_NSUM_SIM));
 +            headerContents.flags_enh |= ((1 << eenhENERGY_SUM_SIM) | (1 << eenhENERGY_NSUM_SIM));
          }
          if (enerhist->deltaHForeignLambdas != nullptr)
          {
 -            flags_enh |= ((1 << eenhENERGY_DELTA_H_NN) | (1 << eenhENERGY_DELTA_H_LIST)
 -                          | (1 << eenhENERGY_DELTA_H_STARTTIME) | (1 << eenhENERGY_DELTA_H_STARTLAMBDA));
 +            headerContents.flags_enh |=
 +                    ((1 << eenhENERGY_DELTA_H_NN) | (1 << eenhENERGY_DELTA_H_LIST)
 +                     | (1 << eenhENERGY_DELTA_H_STARTTIME) | (1 << eenhENERGY_DELTA_H_STARTLAMBDA));
          }
      }
  
 -    PullHistory* pullHist         = observablesHistory->pullHistory.get();
 -    int          flagsPullHistory = 0;
 +    PullHistory* pullHist           = observablesHistory->pullHistory.get();
 +    headerContents.flagsPullHistory = 0;
      if (pullHist != nullptr && (pullHist->numValuesInXSum > 0 || pullHist->numValuesInFSum > 0))
      {
 -        flagsPullHistory |= (1 << epullhPULL_NUMCOORDINATES);
 -        flagsPullHistory |= ((1 << epullhPULL_NUMGROUPS) | (1 << epullhPULL_NUMVALUESINXSUM)
 -                             | (1 << epullhPULL_NUMVALUESINFSUM));
 +        headerContents.flagsPullHistory |= (1 << epullhPULL_NUMCOORDINATES);
 +        headerContents.flagsPullHistory |= ((1 << epullhPULL_NUMGROUPS) | (1 << epullhPULL_NUMVALUESINXSUM)
 +                                            | (1 << epullhPULL_NUMVALUESINFSUM));
      }
  
 -    int flags_dfh;
 +    headerContents.flags_dfh = 0;
      if (bExpanded)
      {
 -        flags_dfh = ((1 << edfhBEQUIL) | (1 << edfhNATLAMBDA) | (1 << edfhSUMWEIGHTS)
 -                     | (1 << edfhSUMDG) | (1 << edfhTIJ) | (1 << edfhTIJEMP));
 +        headerContents.flags_dfh = ((1 << edfhBEQUIL) | (1 << edfhNATLAMBDA) | (1 << edfhSUMWEIGHTS)
 +                                    | (1 << edfhSUMDG) | (1 << edfhTIJ) | (1 << edfhTIJEMP));
          if (EWL(elamstats))
          {
 -            flags_dfh |= ((1 << edfhWLDELTA) | (1 << edfhWLHISTO));
 +            headerContents.flags_dfh |= ((1 << edfhWLDELTA) | (1 << edfhWLHISTO));
          }
          if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER)
              || (elamstats == elamstatsMETROPOLIS))
          {
 -            flags_dfh |= ((1 << edfhACCUMP) | (1 << edfhACCUMM) | (1 << edfhACCUMP2)
 -                          | (1 << edfhACCUMM2) | (1 << edfhSUMMINVAR) | (1 << edfhSUMVAR));
 +            headerContents.flags_dfh |= ((1 << edfhACCUMP) | (1 << edfhACCUMM) | (1 << edfhACCUMP2)
 +                                         | (1 << edfhACCUMM2) | (1 << edfhSUMMINVAR) | (1 << edfhSUMVAR));
          }
      }
 -    else
 -    {
 -        flags_dfh = 0;
 -    }
  
 -    int flags_awhh = 0;
 +    headerContents.flags_awhh = 0;
      if (state->awhHistory != nullptr && !state->awhHistory->bias.empty())
      {
 -        flags_awhh |= ((1 << eawhhIN_INITIAL) | (1 << eawhhEQUILIBRATEHISTOGRAM) | (1 << eawhhHISTSIZE)
 -                       | (1 << eawhhNPOINTS) | (1 << eawhhCOORDPOINT) | (1 << eawhhUMBRELLAGRIDPOINT)
 -                       | (1 << eawhhUPDATELIST) | (1 << eawhhLOGSCALEDSAMPLEWEIGHT)
 -                       | (1 << eawhhNUMUPDATES) | (1 << eawhhFORCECORRELATIONGRID));
 -    }
 -
 -    /* We can check many more things now (CPU, acceleration, etc), but
 -     * it is highly unlikely to have two separate builds with exactly
 -     * the same version, user, time, and build host!
 -     */
 -
 -    int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
 -
 -    edsamhistory_t* edsamhist = observablesHistory->edsamHistory.get();
 -    int             nED       = (edsamhist ? edsamhist->nED : 0);
 -
 -    swaphistory_t* swaphist    = observablesHistory->swapHistory.get();
 -    int            eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
 -
 -    CheckpointHeaderContents headerContents = { 0,
 -                                                { 0 },
 -                                                { 0 },
 -                                                { 0 },
 -                                                { 0 },
 -                                                GMX_DOUBLE,
 -                                                { 0 },
 -                                                { 0 },
 -                                                eIntegrator,
 -                                                simulation_part,
 -                                                step,
 -                                                t,
 -                                                nppnodes,
 -                                                { 0 },
 -                                                npmenodes,
 -                                                state->natoms,
 -                                                state->ngtc,
 -                                                state->nnhpres,
 -                                                state->nhchainlength,
 -                                                nlambda,
 -                                                state->flags,
 -                                                flags_eks,
 -                                                flags_enh,
 -                                                flagsPullHistory,
 -                                                flags_dfh,
 -                                                flags_awhh,
 -                                                nED,
 -                                                eSwapCoords };
 -    std::strcpy(headerContents.version, gmx_version());
 -    std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
 -    std::strcpy(headerContents.ftime, timebuf.c_str());
 -    if (DOMAINDECOMP(cr))
 -    {
 -        copy_ivec(domdecCells, headerContents.dd_nc);
 +        headerContents.flags_awhh |=
 +                ((1 << eawhhIN_INITIAL) | (1 << eawhhEQUILIBRATEHISTOGRAM) | (1 << eawhhHISTSIZE)
 +                 | (1 << eawhhNPOINTS) | (1 << eawhhCOORDPOINT) | (1 << eawhhUMBRELLAGRIDPOINT)
 +                 | (1 << eawhhUPDATELIST) | (1 << eawhhLOGSCALEDSAMPLEWEIGHT)
 +                 | (1 << eawhhNUMUPDATES) | (1 << eawhhFORCECORRELATIONGRID));
      }
  
      do_cpt_header(gmx_fio_getxdr(fp), FALSE, nullptr, &headerContents);
  
      if ((do_cpt_state(gmx_fio_getxdr(fp), state->flags, state, nullptr) < 0)
 -        || (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, nullptr) < 0)
 -        || (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, enerhist, nullptr) < 0)
 -        || (doCptPullHist(gmx_fio_getxdr(fp), FALSE, flagsPullHistory, pullHist, StatePart::pullHistory, nullptr)
 +        || (do_cpt_ekinstate(gmx_fio_getxdr(fp), headerContents.flags_eks, &state->ekinstate, nullptr) < 0)
 +        || (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, headerContents.flags_enh, enerhist, nullptr) < 0)
 +        || (doCptPullHist(gmx_fio_getxdr(fp), FALSE, headerContents.flagsPullHistory, pullHist,
 +                          StatePart::pullHistory, nullptr)
 +            < 0)
 +        || (do_cpt_df_hist(gmx_fio_getxdr(fp), headerContents.flags_dfh, headerContents.nlambda,
 +                           &state->dfhist, nullptr)
 +            < 0)
 +        || (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, headerContents.nED,
 +                           observablesHistory->edsamHistory.get(), nullptr)
 +            < 0)
 +        || (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, headerContents.flags_awhh, state->awhHistory.get(), nullptr) < 0)
 +        || (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, headerContents.eSwapCoords,
 +                             observablesHistory->swapHistory.get(), nullptr)
              < 0)
 -        || (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, nlambda, &state->dfhist, nullptr) < 0)
 -        || (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, nED, edsamhist, nullptr) < 0)
 -        || (do_cpt_awh(gmx_fio_getxdr(fp), FALSE, flags_awhh, state->awhHistory.get(), nullptr) < 0)
 -        || (do_cpt_swapstate(gmx_fio_getxdr(fp), FALSE, eSwapCoords, swaphist, nullptr) < 0)
 -        || (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, nullptr, headerContents.file_version) < 0))
 +        || (do_cpt_files(gmx_fio_getxdr(fp), FALSE, outputfiles, nullptr, headerContents.file_version) < 0))
      {
          gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
      }
          gmx::KeyValueTreeBuilder          builder;
          gmx::MdModulesWriteCheckpointData mdModulesWriteCheckpoint = { builder.rootObject(),
                                                                         headerContents.file_version };
 -        mdModulesNotifier.notifier_.notify(mdModulesWriteCheckpoint);
 +        mdModulesNotifier.checkpointingNotifications_.notify(mdModulesWriteCheckpoint);
          auto                     tree = builder.build();
          gmx::FileIOXdrSerializer serializer(fp);
          gmx::serializeKeyValueTree(tree, &serializer);
      }
  
 -    do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
 -
 -    /* we really, REALLY, want to make sure to physically write the checkpoint,
 -       and all the files it depends on, out to disk. Because we've
 -       opened the checkpoint with gmx_fio_open(), it's in our list
 -       of open files.  */
 -    ret = gmx_fio_all_output_fsync();
 -
 -    if (ret)
 +    // Checkpointing modular simulator
      {
 -        char buf[STRLEN];
 -        sprintf(buf, "Cannot fsync '%s'; maybe you are out of disk space?", gmx_fio_getname(ret));
 -
 -        if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
 -        {
 -            gmx_file(buf);
 -        }
 -        else
 -        {
 -            gmx_warning("%s", buf);
 -        }
 -    }
 -
 -    if (gmx_fio_close(fp) != 0)
 -    {
 -        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 -    }
 -
 -    /* we don't move the checkpoint if the user specified they didn't want it,
 -       or if the fsyncs failed */
 -#if !GMX_NO_RENAME
 -    if (!bNumberAndKeep && !ret)
 -    {
 -        if (gmx_fexist(fn))
 -        {
 -            /* Rename the previous checkpoint file */
 -            mpiBarrierBeforeRename(applyMpiBarrierBeforeRename, mpiBarrierCommunicator);
 -
 -            std::strcpy(buf, fn);
 -            buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 -            std::strcat(buf, "_prev");
 -            std::strcat(buf, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
 -            if (!GMX_FAHCORE)
 -            {
 -                /* we copy here so that if something goes wrong between now and
 -                 * the rename below, there's always a state.cpt.
 -                 * If renames are atomic (such as in POSIX systems),
 -                 * this copying should be unneccesary.
 -                 */
 -                gmx_file_copy(fn, buf, FALSE);
 -                /* We don't really care if this fails:
 -                 * there's already a new checkpoint.
 -                 */
 -            }
 -            else
 -            {
 -                gmx_file_rename(fn, buf);
 -            }
 -        }
 -
 -        /* Rename the checkpoint file from the temporary to the final name */
 -        mpiBarrierBeforeRename(applyMpiBarrierBeforeRename, mpiBarrierCommunicator);
 -
 -        if (gmx_file_rename(fntemp, fn) != 0)
 -        {
 -            gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
 -        }
 +        gmx::FileIOXdrSerializer serializer(fp);
 +        modularSimulatorCheckpointData->serialize(&serializer);
      }
 -#endif /* GMX_NO_RENAME */
 -
 -    sfree(fntemp);
  
 +    do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
+ #if GMX_FAHCORE
+     /* Always FAH checkpoint immediately after a Gromacs checkpoint.
+      *
+      * Note that it is critical that we save a FAH checkpoint directly
+      * after writing a Gromacs checkpoint.  If the program dies, either
+      * by the machine powering off suddenly or the process being,
+      * killed, FAH can recover files that have only appended data by
+      * truncating them to the last recorded length.  The Gromacs
+      * checkpoint does not just append data, it is fully rewritten each
+      * time so a crash between moving the new Gromacs checkpoint file in
+      * to place and writing a FAH checkpoint is not recoverable.  Thus
+      * the time between these operations must be kept as short a
+      * possible.
+      */
+     fcCheckpoint();
+ #endif
  }
  
  static void check_int(FILE* fplog, const char* type, int p, int f, gmx_bool* mm)
@@@ -2421,12 -2553,11 +2433,12 @@@ static void check_match(FILE
          check_int(fplog, "#ranks", cr->nnodes, headerContents.nnodes, &mm);
      }
  
 -    if (cr->nnodes > 1 && reproducibilityRequested)
 +    if (cr->sizeOfDefaultCommunicator > 1 && reproducibilityRequested)
      {
 +        // TODO: These checks are incorrect (see redmine #3309)
          check_int(fplog, "#PME-ranks", cr->npmenodes, headerContents.npme, &mm);
  
 -        int npp = cr->nnodes;
 +        int npp = cr->sizeOfDefaultCommunicator;
          if (cr->npmenodes >= 0)
          {
              npp -= cr->npmenodes;
      }
  }
  
 -static void read_checkpoint(const char*                   fn,
 -                            t_fileio*                     logfio,
 -                            const t_commrec*              cr,
 -                            const ivec                    dd_nc,
 -                            int                           eIntegrator,
 -                            int*                          init_fep_state,
 -                            CheckpointHeaderContents*     headerContents,
 -                            t_state*                      state,
 -                            ObservablesHistory*           observablesHistory,
 -                            gmx_bool                      reproducibilityRequested,
 -                            const gmx::MdModulesNotifier& mdModulesNotifier)
 +static void read_checkpoint(const char*                    fn,
 +                            t_fileio*                      logfio,
 +                            const t_commrec*               cr,
 +                            const ivec                     dd_nc,
 +                            int                            eIntegrator,
 +                            int*                           init_fep_state,
 +                            CheckpointHeaderContents*      headerContents,
 +                            t_state*                       state,
 +                            ObservablesHistory*            observablesHistory,
 +                            gmx_bool                       reproducibilityRequested,
 +                            const gmx::MdModulesNotifier&  mdModulesNotifier,
 +                            gmx::ReadCheckpointDataHolder* modularSimulatorCheckpointData,
 +                            bool                           useModularSimulator)
  {
      t_fileio* fp;
      char      buf[STEPSTRSIZE];
                    fn);
      }
  
 -    if (headerContents->flags_state != state->flags)
 +    // For modular simulator, no state object is populated, so we cannot do this check here!
 +    if (headerContents->flags_state != state->flags && !useModularSimulator)
      {
          gmx_fatal(FARGS,
                    "Cannot change a simulation algorithm during a checkpoint restart. Perhaps you "
                    fn);
      }
  
 +    GMX_ASSERT(!(headerContents->isModularSimulatorCheckpoint && !useModularSimulator),
 +               "Checkpoint file was written by modular simulator, but the current simulation uses "
 +               "the legacy simulator.");
 +    GMX_ASSERT(!(!headerContents->isModularSimulatorCheckpoint && useModularSimulator),
 +               "Checkpoint file was written by legacy simulator, but the current simulation uses "
 +               "the modular simulator.");
 +
      if (MASTER(cr))
      {
          check_match(fplog, cr, dd_nc, *headerContents, reproducibilityRequested);
          cp_error();
      }
      do_cpt_mdmodules(headerContents->file_version, fp, mdModulesNotifier);
 +    if (headerContents->file_version >= cptv_ModularSimulator)
 +    {
 +        gmx::FileIOXdrSerializer serializer(fp);
 +        modularSimulatorCheckpointData->deserialize(&serializer);
 +    }
      ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents->file_version);
      if (ret)
      {
  }
  
  
 -void load_checkpoint(const char*                   fn,
 -                     t_fileio*                     logfio,
 -                     const t_commrec*              cr,
 -                     const ivec                    dd_nc,
 -                     t_inputrec*                   ir,
 -                     t_state*                      state,
 -                     ObservablesHistory*           observablesHistory,
 -                     gmx_bool                      reproducibilityRequested,
 -                     const gmx::MdModulesNotifier& mdModulesNotifier)
 +void load_checkpoint(const char*                    fn,
 +                     t_fileio*                      logfio,
 +                     const t_commrec*               cr,
 +                     const ivec                     dd_nc,
 +                     t_inputrec*                    ir,
 +                     t_state*                       state,
 +                     ObservablesHistory*            observablesHistory,
 +                     gmx_bool                       reproducibilityRequested,
 +                     const gmx::MdModulesNotifier&  mdModulesNotifier,
 +                     gmx::ReadCheckpointDataHolder* modularSimulatorCheckpointData,
 +                     bool                           useModularSimulator)
  {
      CheckpointHeaderContents headerContents;
      if (SIMMASTER(cr))
      {
          /* Read the state from the checkpoint file */
 -        read_checkpoint(fn, logfio, cr, dd_nc, ir->eI, &(ir->fepvals->init_fep_state), &headerContents,
 -                        state, observablesHistory, reproducibilityRequested, mdModulesNotifier);
 +        read_checkpoint(fn, logfio, cr, dd_nc, ir->eI, &(ir->fepvals->init_fep_state),
 +                        &headerContents, state, observablesHistory, reproducibilityRequested,
 +                        mdModulesNotifier, modularSimulatorCheckpointData, useModularSimulator);
      }
      if (PAR(cr))
      {
 -        gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr);
 -        gmx::MdModulesCheckpointReadingBroadcast broadcastCheckPointData = { *cr, headerContents.file_version };
 -        mdModulesNotifier.notifier_.notify(broadcastCheckPointData);
 +        gmx_bcast(sizeof(headerContents.step), &headerContents.step, cr->mpiDefaultCommunicator);
 +        gmx::MdModulesCheckpointReadingBroadcast broadcastCheckPointData = {
 +            cr->mpiDefaultCommunicator, PAR(cr), headerContents.file_version
 +        };
 +        mdModulesNotifier.checkpointingNotifications_.notify(broadcastCheckPointData);
      }
      ir->bContinuation = TRUE;
      if (ir->nsteps >= 0)
@@@ -2854,14 -2965,6 +2866,14 @@@ static CheckpointHeaderContents read_ch
      }
      gmx::MdModulesNotifier mdModuleNotifier;
      do_cpt_mdmodules(headerContents.file_version, fp, mdModuleNotifier);
 +    if (headerContents.file_version >= cptv_ModularSimulator)
 +    {
 +        // In the scope of the current function, we can just throw away the content
 +        // of the modular checkpoint, but we need to read it to move the file pointer
 +        gmx::FileIOXdrSerializer      serializer(fp);
 +        gmx::ReadCheckpointDataHolder modularSimulatorCheckpointData;
 +        modularSimulatorCheckpointData.deserialize(&serializer);
 +    }
      ret = do_cpt_footer(gmx_fio_getxdr(fp), headerContents.file_version);
      if (ret)
      {
index ad5ee440ff32c9b9884b837cc718702d4d2cf2a6,9c6d01670a6b6e3b7678cd6f3c066a202a4fc23b..241ef2d64d7a1274294376bca1508d9736067267
@@@ -3,8 -3,7 +3,8 @@@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
 - * Copyright (c) 2013-2020, by the GROMACS development team, led by
 + * Copyright (c) 2013,2014,2015,2016,2017, The GROMACS development team.
 + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@@ -47,7 -46,7 +47,7 @@@
  #include <algorithm>
  #include <string>
  
 -#include "gromacs/awh/read_params.h"
 +#include "gromacs/applied_forces/awh/read_params.h"
  #include "gromacs/fileio/readinp.h"
  #include "gromacs/fileio/warninp.h"
  #include "gromacs/gmxlib/network.h"
@@@ -59,7 -58,6 +59,7 @@@
  #include "gromacs/mdrun/mdmodules.h"
  #include "gromacs/mdtypes/inputrec.h"
  #include "gromacs/mdtypes/md_enums.h"
 +#include "gromacs/mdtypes/multipletimestepping.h"
  #include "gromacs/mdtypes/pull_params.h"
  #include "gromacs/options/options.h"
  #include "gromacs/options/treesupport.h"
   * message.
   */
  
 -typedef struct t_inputrec_strings
 +struct gmx_inputrec_strings
  {
      char tcgrps[STRLEN], tau_t[STRLEN], ref_t[STRLEN], acc[STRLEN], accgrps[STRLEN], freeze[STRLEN],
              frdim[STRLEN], energy[STRLEN], user1[STRLEN], user2[STRLEN], vcm[STRLEN],
              x_compressed_groups[STRLEN], couple_moltype[STRLEN], orirefitgrp[STRLEN],
              egptable[STRLEN], egpexcl[STRLEN], wall_atomtype[STRLEN], wall_density[STRLEN],
              deform[STRLEN], QMMM[STRLEN], imd_grp[STRLEN];
 -    char   fep_lambda[efptNR][STRLEN];
 -    char   lambda_weights[STRLEN];
 -    char** pull_grp;
 -    char** rot_grp;
 -    char   anneal[STRLEN], anneal_npoints[STRLEN], anneal_time[STRLEN], anneal_temp[STRLEN];
 -    char   QMmethod[STRLEN], QMbasis[STRLEN], QMcharge[STRLEN], QMmult[STRLEN], bSH[STRLEN],
 +    char                     fep_lambda[efptNR][STRLEN];
 +    char                     lambda_weights[STRLEN];
 +    std::vector<std::string> pullGroupNames;
 +    std::vector<std::string> rotateGroupNames;
 +    char anneal[STRLEN], anneal_npoints[STRLEN], anneal_time[STRLEN], anneal_temp[STRLEN];
 +    char QMmethod[STRLEN], QMbasis[STRLEN], QMcharge[STRLEN], QMmult[STRLEN], bSH[STRLEN],
              CASorbitals[STRLEN], CASelectrons[STRLEN], SAon[STRLEN], SAoff[STRLEN], SAsteps[STRLEN];
 +};
  
 -} gmx_inputrec_strings;
 -
 -static gmx_inputrec_strings* is = nullptr;
 +static gmx_inputrec_strings* inputrecStrings = nullptr;
  
  void init_inputrec_strings()
  {
 -    if (is)
 +    if (inputrecStrings)
      {
          gmx_incons(
                  "Attempted to call init_inputrec_strings before calling done_inputrec_strings. "
                  "Only one inputrec (i.e. .mdp file) can be parsed at a time.");
      }
 -    snew(is, 1);
 +    inputrecStrings = new gmx_inputrec_strings();
  }
  
  void done_inputrec_strings()
  {
 -    sfree(is);
 -    is = nullptr;
 +    delete inputrecStrings;
 +    inputrecStrings = nullptr;
  }
  
  
@@@ -233,89 -232,6 +233,89 @@@ static void process_interaction_modifie
      }
  }
  
 +static void checkMtsRequirement(const t_inputrec& ir, const char* param, const int nstValue, warninp_t wi)
 +{
 +    GMX_RELEASE_ASSERT(ir.mtsLevels.size() >= 2, "Need at least two levels for MTS");
 +    const int mtsFactor = ir.mtsLevels.back().stepFactor;
 +    if (nstValue % mtsFactor != 0)
 +    {
 +        auto message = gmx::formatString(
 +                "With MTS, %s = %d should be a multiple of mts-factor = %d", param, nstValue, mtsFactor);
 +        warning_error(wi, message.c_str());
 +    }
 +}
 +
 +static void setupMtsLevels(gmx::ArrayRef<gmx::MtsLevel> mtsLevels,
 +                           const t_inputrec&            ir,
 +                           const t_gromppopts&          opts,
 +                           warninp_t                    wi)
 +{
 +    if (!(ir.eI == eiMD || ir.eI == eiSD1))
 +    {
 +        auto message = gmx::formatString(
 +                "Multiple time stepping is only supported with integrators %s and %s",
 +                ei_names[eiMD], ei_names[eiSD1]);
 +        warning_error(wi, message.c_str());
 +    }
 +    if (opts.numMtsLevels != 2)
 +    {
 +        warning_error(wi, "Only mts-levels = 2 is supported");
 +    }
 +    else
 +    {
 +        const std::vector<std::string> inputForceGroups = gmx::splitString(opts.mtsLevel2Forces);
 +        auto&                          forceGroups      = mtsLevels[1].forceGroups;
 +        for (const auto& inputForceGroup : inputForceGroups)
 +        {
 +            bool found     = false;
 +            int  nameIndex = 0;
 +            for (const auto& forceGroupName : gmx::mtsForceGroupNames)
 +            {
 +                if (gmx::equalCaseInsensitive(inputForceGroup, forceGroupName))
 +                {
 +                    forceGroups.set(nameIndex);
 +                    found = true;
 +                }
 +                nameIndex++;
 +            }
 +            if (!found)
 +            {
 +                auto message =
 +                        gmx::formatString("Unknown MTS force group '%s'", inputForceGroup.c_str());
 +                warning_error(wi, message.c_str());
 +            }
 +        }
 +
 +        if (mtsLevels[1].stepFactor <= 1)
 +        {
 +            gmx_fatal(FARGS, "mts-factor should be larger than 1");
 +        }
 +
 +        // Make the level 0 use the complement of the force groups of group 1
 +        mtsLevels[0].forceGroups = ~mtsLevels[1].forceGroups;
 +        mtsLevels[0].stepFactor  = 1;
 +
 +        if ((EEL_FULL(ir.coulombtype) || EVDW_PME(ir.vdwtype))
 +            && !mtsLevels[1].forceGroups[static_cast<int>(gmx::MtsForceGroups::LongrangeNonbonded)])
 +        {
 +            warning_error(wi,
 +                          "With long-range electrostatics and/or LJ treatment, the long-range part "
 +                          "has to be part of the mts-level2-forces");
 +        }
 +
 +        if (ir.nstcalcenergy > 0)
 +        {
 +            checkMtsRequirement(ir, "nstcalcenergy", ir.nstcalcenergy, wi);
 +        }
 +        checkMtsRequirement(ir, "nstenergy", ir.nstenergy, wi);
 +        checkMtsRequirement(ir, "nstlog", ir.nstlog, wi);
 +        if (ir.efep != efepNO)
 +        {
 +            checkMtsRequirement(ir, "nstdhdl", ir.fepvals->nstdhdl, wi);
 +        }
 +    }
 +}
 +
  void check_ir(const char*                   mdparin,
                const gmx::MdModulesNotifier& mdModulesNotifier,
                t_inputrec*                   ir,
              {
                  ir->nstpcouple = ir_optimal_nstpcouple(ir);
              }
 +            if (ir->useMts && ir->nstpcouple % ir->mtsLevels.back().stepFactor != 0)
 +            {
 +                warning_error(wi,
 +                              "With multiple time stepping, nstpcouple should be a mutiple of "
 +                              "mts-factor");
 +            }
          }
  
          if (ir->nstcalcenergy > 0)
          // Inquire all MdModules, if their parameters match with the energy
          // calculation frequency
          gmx::EnergyCalculationFrequencyErrors energyCalculationFrequencyErrors(ir->nstcalcenergy);
 -        mdModulesNotifier.notifier_.notify(&energyCalculationFrequencyErrors);
 +        mdModulesNotifier.preProcessingNotifications_.notify(&energyCalculationFrequencyErrors);
  
          // Emit all errors from the energy calculation frequency checks
          for (const std::string& energyFrequencyErrorMessage :
      /* TPI STUFF */
      if (EI_TPI(ir->eI))
      {
 -        sprintf(err_buf, "TPI only works with pbc = %s", epbc_names[epbcXYZ]);
 -        CHECK(ir->ePBC != epbcXYZ);
 +        sprintf(err_buf, "TPI only works with pbc = %s", c_pbcTypeNames[PbcType::Xyz].c_str());
 +        CHECK(ir->pbcType != PbcType::Xyz);
          sprintf(err_buf, "with TPI nstlist should be larger than zero");
          CHECK(ir->nstlist <= 0);
          sprintf(err_buf, "TPI does not work with full electrostatics other than PME");
          sprintf(err_buf, "The soft-core power is %d and can only be 1 or 2", fep->sc_power);
          CHECK(fep->sc_alpha != 0 && fep->sc_power != 1 && fep->sc_power != 2);
  
 -        sprintf(err_buf, "The soft-core sc-r-power is %d and can only be 6 or 48",
 +        sprintf(err_buf,
 +                "The soft-core sc-r-power is %d and can only be 6. (sc-r-power 48 is no longer "
 +                "supported.)",
                  static_cast<int>(fep->sc_r_power));
 -        CHECK(fep->sc_alpha != 0 && fep->sc_r_power != 6.0 && fep->sc_r_power != 48.0);
 +        CHECK(fep->sc_alpha != 0 && fep->sc_r_power != 6.0);
  
          sprintf(err_buf,
                  "Can't use positive delta-lambda (%g) if initial state/lambda does not start at "
      }
  
      /* PBC/WALLS */
 -    sprintf(err_buf, "walls only work with pbc=%s", epbc_names[epbcXY]);
 -    CHECK(ir->nwall && ir->ePBC != epbcXY);
 +    sprintf(err_buf, "walls only work with pbc=%s", c_pbcTypeNames[PbcType::XY].c_str());
 +    CHECK(ir->nwall && ir->pbcType != PbcType::XY);
  
      /* VACUUM STUFF */
 -    if (ir->ePBC != epbcXYZ && ir->nwall != 2)
 +    if (ir->pbcType != PbcType::Xyz && ir->nwall != 2)
      {
 -        if (ir->ePBC == epbcNONE)
 +        if (ir->pbcType == PbcType::No)
          {
              if (ir->epc != epcNO)
              {
          }
          else
          {
 -            sprintf(err_buf, "Can not have pressure coupling with pbc=%s", epbc_names[ir->ePBC]);
 +            sprintf(err_buf, "Can not have pressure coupling with pbc=%s",
 +                    c_pbcTypeNames[ir->pbcType].c_str());
              CHECK(ir->epc != epcNO);
          }
 -        sprintf(err_buf, "Can not have Ewald with pbc=%s", epbc_names[ir->ePBC]);
 +        sprintf(err_buf, "Can not have Ewald with pbc=%s", c_pbcTypeNames[ir->pbcType].c_str());
          CHECK(EEL_FULL(ir->coulombtype));
  
 -        sprintf(err_buf, "Can not have dispersion correction with pbc=%s", epbc_names[ir->ePBC]);
 +        sprintf(err_buf, "Can not have dispersion correction with pbc=%s",
 +                c_pbcTypeNames[ir->pbcType].c_str());
          CHECK(ir->eDispCorr != edispcNO);
      }
  
                  "with coulombtype = %s or coulombtype = %s\n"
                  "without periodic boundary conditions (pbc = %s) and\n"
                  "rcoulomb and rvdw set to zero",
 -                eel_names[eelCUT], eel_names[eelUSER], epbc_names[epbcNONE]);
 +                eel_names[eelCUT], eel_names[eelUSER], c_pbcTypeNames[PbcType::No].c_str());
          CHECK(((ir->coulombtype != eelCUT) && (ir->coulombtype != eelUSER))
 -              || (ir->ePBC != epbcNONE) || (ir->rcoulomb != 0.0) || (ir->rvdw != 0.0));
 +              || (ir->pbcType != PbcType::No) || (ir->rcoulomb != 0.0) || (ir->rvdw != 0.0));
  
          if (ir->nstlist > 0)
          {
                      "Can not remove the rotation around the center of mass with periodic "
                      "molecules");
              CHECK(ir->bPeriodicMols);
 -            if (ir->ePBC != epbcNONE)
 +            if (ir->pbcType != PbcType::No)
              {
                  warning(wi,
                          "Removing the rotation around the center of mass in a periodic system, "
          }
      }
  
 -    if (EI_STATE_VELOCITY(ir->eI) && !EI_SD(ir->eI) && ir->ePBC == epbcNONE && ir->comm_mode != ecmANGULAR)
 +    if (EI_STATE_VELOCITY(ir->eI) && !EI_SD(ir->eI) && ir->pbcType == PbcType::No && ir->comm_mode != ecmANGULAR)
      {
          sprintf(warn_buf,
                  "Tumbling and flying ice-cubes: We are not removing rotation around center of mass "
      {
          if (ir->ewald_geometry == eewg3D)
          {
 -            sprintf(warn_buf, "With pbc=%s you should use ewald-geometry=%s", epbc_names[ir->ePBC],
 -                    eewg_names[eewg3DC]);
 +            sprintf(warn_buf, "With pbc=%s you should use ewald-geometry=%s",
 +                    c_pbcTypeNames[ir->pbcType].c_str(), eewg_names[eewg3DC]);
              warning(wi, warn_buf);
          }
          /* This check avoids extra pbc coding for exclusion corrections */
          sprintf(err_buf, "wall-ewald-zfac should be >= 2");
          CHECK(ir->wall_ewald_zfac < 2);
      }
 -    if ((ir->ewald_geometry == eewg3DC) && (ir->ePBC != epbcXY) && EEL_FULL(ir->coulombtype))
 +    if ((ir->ewald_geometry == eewg3DC) && (ir->pbcType != PbcType::XY) && EEL_FULL(ir->coulombtype))
      {
          sprintf(warn_buf, "With %s and ewald_geometry = %s you should use pbc = %s",
 -                eel_names[ir->coulombtype], eewg_names[eewg3DC], epbc_names[epbcXY]);
 +                eel_names[ir->coulombtype], eewg_names[eewg3DC], c_pbcTypeNames[PbcType::XY].c_str());
          warning(wi, warn_buf);
      }
      if ((ir->epsilon_surface != 0) && EEL_FULL(ir->coulombtype))
  
      if (ir->bQMMM)
      {
 -        warning_error(wi, "QMMM is currently not supported");
 -        if (!EI_DYNAMICS(ir->eI))
 -        {
 -            char buf[STRLEN];
 -            sprintf(buf, "QMMM is only supported with dynamics, not with integrator %s", ei_names[ir->eI]);
 -            warning_error(wi, buf);
 -        }
 +        warning_error(wi, "The QMMM integration you are trying to use is no longer supported");
      }
  
      if (ir->bAdress)
@@@ -1652,6 -1564,17 +1652,6 @@@ static void do_fep_params(t_inputrec* i
      }
  
  
 -    /* make it easier if sc_r_power = 48 by increasing it to the 4th power, to be in the right scale. */
 -    if (fep->sc_r_power == 48)
 -    {
 -        if (fep->sc_alpha > 0.1)
 -        {
 -            gmx_fatal(FARGS,
 -                      "sc_alpha (%f) for sc_r_power = 48 should usually be between 0.001 and 0.004",
 -                      fep->sc_alpha);
 -        }
 -    }
 -
      /* now read in the weights */
      parse_n_real(weights, &nweights, &(expand->init_lambda_weights), wi);
      if (nweights == 0)
@@@ -1678,6 -1601,19 +1678,6 @@@ static void do_simtemp_params(t_inputre
      GetSimTemps(ir->fepvals->n_lambda, ir->simtempvals, ir->fepvals->all_lambda[efptTEMPERATURE]);
  }
  
 -static void convertYesNos(warninp_t /*wi*/,
 -                          gmx::ArrayRef<const std::string> inputs,
 -                          const char* /*name*/,
 -                          gmx_bool* outputs)
 -{
 -    int i = 0;
 -    for (const auto& input : inputs)
 -    {
 -        outputs[i] = gmx::equalCaseInsensitive(input, "Y", 1);
 -        ++i;
 -    }
 -}
 -
  template<typename T>
  void convertInts(warninp_t wi, gmx::ArrayRef<const std::string> inputs, const char* name, T* outputs)
  {
@@@ -1765,7 -1701,6 +1765,7 @@@ static void do_wall_params(t_inputrec* 
              gmx_fatal(FARGS, "Expected %d elements for wall_atomtype, found %zu", ir->nwall,
                        wallAtomTypes.size());
          }
 +        GMX_RELEASE_ASSERT(ir->nwall < 3, "Invalid number of walls");
          for (int i = 0; i < ir->nwall; i++)
          {
              opts->wall_atomtype[i] = gmx_strdup(wallAtomTypes[i].c_str());
@@@ -1973,30 -1908,12 +1973,30 @@@ void get_ir(const char*     mdparin
      printStringNoNewline(
              &inp, "Part index is updated automatically on checkpointing (keeps files separate)");
      ir->simulation_part = get_eint(&inp, "simulation-part", 1, wi);
 +    printStringNoNewline(&inp, "Multiple time-stepping");
 +    ir->useMts = (get_eeenum(&inp, "mts", yesno_names, wi) != 0);
 +    if (ir->useMts)
 +    {
 +        opts->numMtsLevels = get_eint(&inp, "mts-levels", 2, wi);
 +        ir->mtsLevels.resize(2);
 +        gmx::MtsLevel& mtsLevel = ir->mtsLevels[1];
 +        opts->mtsLevel2Forces   = setStringEntry(&inp, "mts-level2-forces",
 +                                               "longrange-nonbonded nonbonded pair dihedral");
 +        mtsLevel.stepFactor     = get_eint(&inp, "mts-level2-factor", 2, wi);
 +
 +        // We clear after reading without dynamics to not force the user to remove MTS mdp options
 +        if (!EI_DYNAMICS(ir->eI))
 +        {
 +            ir->useMts = false;
 +            ir->mtsLevels.clear();
 +        }
 +    }
      printStringNoNewline(&inp, "mode for center of mass motion removal");
      ir->comm_mode = get_eeenum(&inp, "comm-mode", ecm_names, wi);
      printStringNoNewline(&inp, "number of steps for center of mass motion removal");
      ir->nstcomm = get_eint(&inp, "nstcomm", 100, wi);
      printStringNoNewline(&inp, "group(s) for center of mass motion removal");
 -    setStringEntry(&inp, "comm-grps", is->vcm, nullptr);
 +    setStringEntry(&inp, "comm-grps", inputrecStrings->vcm, nullptr);
  
      printStringNewline(&inp, "LANGEVIN DYNAMICS OPTIONS");
      printStringNoNewline(&inp, "Friction coefficient (amu/ps) and random seed");
      printStringNoNewline(&inp, "This selects the subset of atoms for the compressed");
      printStringNoNewline(&inp, "trajectory file. You can select multiple groups. By");
      printStringNoNewline(&inp, "default, all atoms will be written.");
 -    setStringEntry(&inp, "compressed-x-grps", is->x_compressed_groups, nullptr);
 +    setStringEntry(&inp, "compressed-x-grps", inputrecStrings->x_compressed_groups, nullptr);
      printStringNoNewline(&inp, "Selection of energy groups");
 -    setStringEntry(&inp, "energygrps", is->energy, nullptr);
 +    setStringEntry(&inp, "energygrps", inputrecStrings->energy, nullptr);
  
      /* Neighbor searching */
      printStringNewline(&inp, "NEIGHBORSEARCHING PARAMETERS");
      printStringNoNewline(&inp, "nblist update frequency");
      ir->nstlist = get_eint(&inp, "nstlist", 10, wi);
      printStringNoNewline(&inp, "Periodic boundary conditions: xyz, no, xy");
 -    ir->ePBC          = get_eeenum(&inp, "pbc", epbc_names, wi);
 +    // TODO This conversion should be removed when proper std:string handling will be added to get_eeenum(...), etc.
 +    std::vector<const char*> pbcTypesNamesChar;
 +    for (const auto& pbcTypeName : c_pbcTypeNames)
 +    {
 +        pbcTypesNamesChar.push_back(pbcTypeName.c_str());
 +    }
 +    ir->pbcType       = static_cast<PbcType>(get_eeenum(&inp, "pbc", pbcTypesNamesChar.data(), wi));
      ir->bPeriodicMols = get_eeenum(&inp, "periodic-molecules", yesno_names, wi) != 0;
      printStringNoNewline(&inp,
                           "Allowed energy error due to the Verlet buffer in kJ/mol/ps per atom,");
      printStringNoNewline(&inp, "Extension of the potential lookup tables beyond the cut-off");
      ir->tabext = get_ereal(&inp, "table-extension", 1.0, wi);
      printStringNoNewline(&inp, "Separate tables between energy group pairs");
 -    setStringEntry(&inp, "energygrp-table", is->egptable, nullptr);
 +    setStringEntry(&inp, "energygrp-table", inputrecStrings->egptable, nullptr);
      printStringNoNewline(&inp, "Spacing for the PME/PPPM FFT grid");
      ir->fourier_spacing = get_ereal(&inp, "fourierspacing", 0.12, wi);
      printStringNoNewline(&inp, "FFT grid size, when a value is 0 fourierspacing will be used");
      ir->opts.nhchainlength = get_eint(&inp, "nh-chain-length", 10, wi);
      ir->bPrintNHChains = (get_eeenum(&inp, "print-nose-hoover-chain-variables", yesno_names, wi) != 0);
      printStringNoNewline(&inp, "Groups to couple separately");
 -    setStringEntry(&inp, "tc-grps", is->tcgrps, nullptr);
 +    setStringEntry(&inp, "tc-grps", inputrecStrings->tcgrps, nullptr);
      printStringNoNewline(&inp, "Time constant (ps) and reference temperature (K)");
 -    setStringEntry(&inp, "tau-t", is->tau_t, nullptr);
 -    setStringEntry(&inp, "ref-t", is->ref_t, nullptr);
 +    setStringEntry(&inp, "tau-t", inputrecStrings->tau_t, nullptr);
 +    setStringEntry(&inp, "ref-t", inputrecStrings->ref_t, nullptr);
      printStringNoNewline(&inp, "pressure coupling");
      ir->epc        = get_eeenum(&inp, "pcoupl", epcoupl_names, wi);
      ir->epct       = get_eeenum(&inp, "pcoupltype", epcoupltype_names, wi);
      printStringNewline(&inp, "OPTIONS FOR QMMM calculations");
      ir->bQMMM = (get_eeenum(&inp, "QMMM", yesno_names, wi) != 0);
      printStringNoNewline(&inp, "Groups treated Quantum Mechanically");
 -    setStringEntry(&inp, "QMMM-grps", is->QMMM, nullptr);
 +    setStringEntry(&inp, "QMMM-grps", inputrecStrings->QMMM, nullptr);
      printStringNoNewline(&inp, "QM method");
 -    setStringEntry(&inp, "QMmethod", is->QMmethod, nullptr);
 +    setStringEntry(&inp, "QMmethod", inputrecStrings->QMmethod, nullptr);
      printStringNoNewline(&inp, "QMMM scheme");
 -    ir->QMMMscheme = get_eeenum(&inp, "QMMMscheme", eQMMMscheme_names, wi);
 +    const char* noQMMMSchemeName = "normal";
 +    get_eeenum(&inp, "QMMMscheme", &noQMMMSchemeName, wi);
      printStringNoNewline(&inp, "QM basisset");
 -    setStringEntry(&inp, "QMbasis", is->QMbasis, nullptr);
 +    setStringEntry(&inp, "QMbasis", inputrecStrings->QMbasis, nullptr);
      printStringNoNewline(&inp, "QM charge");
 -    setStringEntry(&inp, "QMcharge", is->QMcharge, nullptr);
 +    setStringEntry(&inp, "QMcharge", inputrecStrings->QMcharge, nullptr);
      printStringNoNewline(&inp, "QM multiplicity");
 -    setStringEntry(&inp, "QMmult", is->QMmult, nullptr);
 +    setStringEntry(&inp, "QMmult", inputrecStrings->QMmult, nullptr);
      printStringNoNewline(&inp, "Surface Hopping");
 -    setStringEntry(&inp, "SH", is->bSH, nullptr);
 +    setStringEntry(&inp, "SH", inputrecStrings->bSH, nullptr);
      printStringNoNewline(&inp, "CAS space options");
 -    setStringEntry(&inp, "CASorbitals", is->CASorbitals, nullptr);
 -    setStringEntry(&inp, "CASelectrons", is->CASelectrons, nullptr);
 -    setStringEntry(&inp, "SAon", is->SAon, nullptr);
 -    setStringEntry(&inp, "SAoff", is->SAoff, nullptr);
 -    setStringEntry(&inp, "SAsteps", is->SAsteps, nullptr);
 +    setStringEntry(&inp, "CASorbitals", inputrecStrings->CASorbitals, nullptr);
 +    setStringEntry(&inp, "CASelectrons", inputrecStrings->CASelectrons, nullptr);
 +    setStringEntry(&inp, "SAon", inputrecStrings->SAon, nullptr);
 +    setStringEntry(&inp, "SAoff", inputrecStrings->SAoff, nullptr);
 +    setStringEntry(&inp, "SAsteps", inputrecStrings->SAsteps, nullptr);
      printStringNoNewline(&inp, "Scale factor for MM charges");
 -    ir->scalefactor = get_ereal(&inp, "MMChargeScaleFactor", 1.0, wi);
 +    get_ereal(&inp, "MMChargeScaleFactor", 1.0, wi);
  
      /* Simulated annealing */
      printStringNewline(&inp, "SIMULATED ANNEALING");
      printStringNoNewline(&inp, "Type of annealing for each temperature group (no/single/periodic)");
 -    setStringEntry(&inp, "annealing", is->anneal, nullptr);
 +    setStringEntry(&inp, "annealing", inputrecStrings->anneal, nullptr);
      printStringNoNewline(&inp,
                           "Number of time points to use for specifying annealing in each group");
 -    setStringEntry(&inp, "annealing-npoints", is->anneal_npoints, nullptr);
 +    setStringEntry(&inp, "annealing-npoints", inputrecStrings->anneal_npoints, nullptr);
      printStringNoNewline(&inp, "List of times at the annealing points for each group");
 -    setStringEntry(&inp, "annealing-time", is->anneal_time, nullptr);
 +    setStringEntry(&inp, "annealing-time", inputrecStrings->anneal_time, nullptr);
      printStringNoNewline(&inp, "Temp. at each annealing point, for each group.");
 -    setStringEntry(&inp, "annealing-temp", is->anneal_temp, nullptr);
 +    setStringEntry(&inp, "annealing-temp", inputrecStrings->anneal_temp, nullptr);
  
      /* Startup run */
      printStringNewline(&inp, "GENERATE VELOCITIES FOR STARTUP RUN");
      printStringNewline(&inp, "ENERGY GROUP EXCLUSIONS");
      printStringNoNewline(
              &inp, "Pairs of energy groups for which all non-bonded interactions are excluded");
 -    setStringEntry(&inp, "energygrp-excl", is->egpexcl, nullptr);
 +    setStringEntry(&inp, "energygrp-excl", inputrecStrings->egpexcl, nullptr);
  
      /* Walls */
      printStringNewline(&inp, "WALLS");
      ir->nwall         = get_eint(&inp, "nwall", 0, wi);
      ir->wall_type     = get_eeenum(&inp, "wall-type", ewt_names, wi);
      ir->wall_r_linpot = get_ereal(&inp, "wall-r-linpot", -1, wi);
 -    setStringEntry(&inp, "wall-atomtype", is->wall_atomtype, nullptr);
 -    setStringEntry(&inp, "wall-density", is->wall_density, nullptr);
 +    setStringEntry(&inp, "wall-atomtype", inputrecStrings->wall_atomtype, nullptr);
 +    setStringEntry(&inp, "wall-density", inputrecStrings->wall_density, nullptr);
      ir->wall_ewald_zfac = get_ereal(&inp, "wall-ewald-zfac", 3, wi);
  
      /* COM pulling */
      if (ir->bPull)
      {
          snew(ir->pull, 1);
 -        is->pull_grp = read_pullparams(&inp, ir->pull, wi);
 +        inputrecStrings->pullGroupNames = read_pullparams(&inp, ir->pull, wi);
 +
 +        if (ir->useMts)
 +        {
 +            for (int c = 0; c < ir->pull->ncoord; c++)
 +            {
 +                if (ir->pull->coord[c].eType == epullCONSTRAINT)
 +                {
 +                    warning_error(wi,
 +                                  "Constraint COM pulling is not supported in combination with "
 +                                  "multiple time stepping");
 +                    break;
 +                }
 +            }
 +        }
      }
  
      /* AWH biasing
 -       NOTE: needs COM pulling input */
 +       NOTE: needs COM pulling or free energy input */
      printStringNewline(&inp, "AWH biasing");
      ir->bDoAwh = (get_eeenum(&inp, "awh", yesno_names, wi) != 0);
      if (ir->bDoAwh)
      {
 -        if (ir->bPull)
 -        {
 -            ir->awhParams = gmx::readAndCheckAwhParams(&inp, ir, wi);
 -        }
 -        else
 -        {
 -            gmx_fatal(FARGS, "AWH biasing is only compatible with COM pulling turned on");
 -        }
 +        ir->awhParams = gmx::readAwhParams(&inp, wi);
      }
  
      /* Enforced rotation */
      if (ir->bRot)
      {
          snew(ir->rot, 1);
 -        is->rot_grp = read_rotparams(&inp, ir->rot, wi);
 +        inputrecStrings->rotateGroupNames = read_rotparams(&inp, ir->rot, wi);
      }
  
      /* Interactive MD */
      ir->bIMD = FALSE;
      printStringNewline(&inp, "Group to display and/or manipulate in interactive MD session");
 -    setStringEntry(&inp, "IMD-group", is->imd_grp, nullptr);
 -    if (is->imd_grp[0] != '\0')
 +    setStringEntry(&inp, "IMD-group", inputrecStrings->imd_grp, nullptr);
 +    if (inputrecStrings->imd_grp[0] != '\0')
      {
          snew(ir->imd, 1);
          ir->bIMD = TRUE;
      printStringNoNewline(&inp, "Orientation restraints force constant and tau for time averaging");
      ir->orires_fc  = get_ereal(&inp, "orire-fc", 0.0, wi);
      ir->orires_tau = get_ereal(&inp, "orire-tau", 0.0, wi);
 -    setStringEntry(&inp, "orire-fitgrp", is->orirefitgrp, nullptr);
 +    setStringEntry(&inp, "orire-fitgrp", inputrecStrings->orirefitgrp, nullptr);
      printStringNoNewline(&inp, "Output frequency for trace(SD) and S to energy file");
      ir->nstorireout = get_eint(&inp, "nstorireout", 100, wi);
  
      /* free energy variables */
      printStringNewline(&inp, "Free energy variables");
      ir->efep = get_eeenum(&inp, "free-energy", efep_names, wi);
 -    setStringEntry(&inp, "couple-moltype", is->couple_moltype, nullptr);
 +    setStringEntry(&inp, "couple-moltype", inputrecStrings->couple_moltype, nullptr);
      opts->couple_lam0  = get_eeenum(&inp, "couple-lambda0", couple_lam, wi);
      opts->couple_lam1  = get_eeenum(&inp, "couple-lambda1", couple_lam, wi);
      opts->bCoupleIntra = (get_eeenum(&inp, "couple-intramol", yesno_names, wi) != 0);
      fep->init_fep_state = get_eint(&inp, "init-lambda-state", -1, wi);
      fep->delta_lambda   = get_ereal(&inp, "delta-lambda", 0.0, wi);
      fep->nstdhdl        = get_eint(&inp, "nstdhdl", 50, wi);
 -    setStringEntry(&inp, "fep-lambdas", is->fep_lambda[efptFEP], nullptr);
 -    setStringEntry(&inp, "mass-lambdas", is->fep_lambda[efptMASS], nullptr);
 -    setStringEntry(&inp, "coul-lambdas", is->fep_lambda[efptCOUL], nullptr);
 -    setStringEntry(&inp, "vdw-lambdas", is->fep_lambda[efptVDW], nullptr);
 -    setStringEntry(&inp, "bonded-lambdas", is->fep_lambda[efptBONDED], nullptr);
 -    setStringEntry(&inp, "restraint-lambdas", is->fep_lambda[efptRESTRAINT], nullptr);
 -    setStringEntry(&inp, "temperature-lambdas", is->fep_lambda[efptTEMPERATURE], nullptr);
 +    setStringEntry(&inp, "fep-lambdas", inputrecStrings->fep_lambda[efptFEP], nullptr);
 +    setStringEntry(&inp, "mass-lambdas", inputrecStrings->fep_lambda[efptMASS], nullptr);
 +    setStringEntry(&inp, "coul-lambdas", inputrecStrings->fep_lambda[efptCOUL], nullptr);
 +    setStringEntry(&inp, "vdw-lambdas", inputrecStrings->fep_lambda[efptVDW], nullptr);
 +    setStringEntry(&inp, "bonded-lambdas", inputrecStrings->fep_lambda[efptBONDED], nullptr);
 +    setStringEntry(&inp, "restraint-lambdas", inputrecStrings->fep_lambda[efptRESTRAINT], nullptr);
 +    setStringEntry(&inp, "temperature-lambdas", inputrecStrings->fep_lambda[efptTEMPERATURE], nullptr);
      fep->lambda_neighbors = get_eint(&inp, "calc-lambda-neighbors", 1, wi);
 -    setStringEntry(&inp, "init-lambda-weights", is->lambda_weights, nullptr);
 +    setStringEntry(&inp, "init-lambda-weights", inputrecStrings->lambda_weights, nullptr);
      fep->edHdLPrintEnergy   = get_eeenum(&inp, "dhdl-print-energy", edHdLPrintEnergy_names, wi);
      fep->sc_alpha           = get_ereal(&inp, "sc-alpha", 0.0, wi);
      fep->sc_power           = get_eint(&inp, "sc-power", 1, wi);
  
      /* Non-equilibrium MD stuff */
      printStringNewline(&inp, "Non-equilibrium MD stuff");
 -    setStringEntry(&inp, "acc-grps", is->accgrps, nullptr);
 -    setStringEntry(&inp, "accelerate", is->acc, nullptr);
 -    setStringEntry(&inp, "freezegrps", is->freeze, nullptr);
 -    setStringEntry(&inp, "freezedim", is->frdim, nullptr);
 +    setStringEntry(&inp, "acc-grps", inputrecStrings->accgrps, nullptr);
 +    setStringEntry(&inp, "accelerate", inputrecStrings->acc, nullptr);
 +    setStringEntry(&inp, "freezegrps", inputrecStrings->freeze, nullptr);
 +    setStringEntry(&inp, "freezedim", inputrecStrings->frdim, nullptr);
      ir->cos_accel = get_ereal(&inp, "cos-acceleration", 0, wi);
 -    setStringEntry(&inp, "deform", is->deform, nullptr);
 +    setStringEntry(&inp, "deform", inputrecStrings->deform, nullptr);
  
      /* simulated tempering variables */
      printStringNewline(&inp, "simulated tempering variables");
  
      /* User defined thingies */
      printStringNewline(&inp, "User defined thingies");
 -    setStringEntry(&inp, "user1-grps", is->user1, nullptr);
 -    setStringEntry(&inp, "user2-grps", is->user2, nullptr);
 +    setStringEntry(&inp, "user1-grps", inputrecStrings->user1, nullptr);
 +    setStringEntry(&inp, "user2-grps", inputrecStrings->user2, nullptr);
      ir->userint1  = get_eint(&inp, "userint1", 0, wi);
      ir->userint2  = get_eint(&inp, "userint2", 0, wi);
      ir->userint3  = get_eint(&inp, "userint3", 0, wi);
      }
  
      opts->couple_moltype = nullptr;
 -    if (strlen(is->couple_moltype) > 0)
 +    if (strlen(inputrecStrings->couple_moltype) > 0)
      {
          if (ir->efep != efepNO)
          {
 -            opts->couple_moltype = gmx_strdup(is->couple_moltype);
 +            opts->couple_moltype = gmx_strdup(inputrecStrings->couple_moltype);
              if (opts->couple_lam0 == opts->couple_lam1)
              {
                  warning(wi, "The lambda=0 and lambda=1 states for coupling are identical");
      /* FREE ENERGY AND EXPANDED ENSEMBLE OPTIONS */
      if (ir->efep != efepNO)
      {
 -        if (fep->delta_lambda > 0)
 +        if (fep->delta_lambda != 0)
          {
              ir->efep = efepSLOWGROWTH;
          }
          {
              ir->bExpanded = TRUE;
          }
 -        do_fep_params(ir, is->fep_lambda, is->lambda_weights, wi);
 +        do_fep_params(ir, inputrecStrings->fep_lambda, inputrecStrings->lambda_weights, wi);
          if (ir->bSimTemp) /* done after fep params */
          {
              do_simtemp_params(ir);
  
      /* WALL PARAMETERS */
  
 -    do_wall_params(ir, is->wall_atomtype, is->wall_density, opts, wi);
 +    do_wall_params(ir, inputrecStrings->wall_atomtype, inputrecStrings->wall_density, opts, wi);
  
      /* ORIENTATION RESTRAINT PARAMETERS */
  
 -    if (opts->bOrire && gmx::splitString(is->orirefitgrp).size() != 1)
 +    if (opts->bOrire && gmx::splitString(inputrecStrings->orirefitgrp).size() != 1)
      {
          warning_error(wi, "ERROR: Need one orientation restraint fit group\n");
      }
      }
  
      double gmx_unused canary;
 -    int ndeform = sscanf(is->deform, "%lf %lf %lf %lf %lf %lf %lf", &(dumdub[0][0]), &(dumdub[0][1]),
 -                         &(dumdub[0][2]), &(dumdub[0][3]), &(dumdub[0][4]), &(dumdub[0][5]), &canary);
 +    int ndeform = sscanf(inputrecStrings->deform, "%lf %lf %lf %lf %lf %lf %lf", &(dumdub[0][0]),
 +                         &(dumdub[0][1]), &(dumdub[0][2]), &(dumdub[0][3]), &(dumdub[0][4]),
 +                         &(dumdub[0][5]), &canary);
  
 -    if (strlen(is->deform) > 0 && ndeform != 6)
 +    if (strlen(inputrecStrings->deform) > 0 && ndeform != 6)
      {
 -        warning_error(
 -                wi, gmx::formatString(
 -                            "Cannot parse exactly 6 box deformation velocities from string '%s'", is->deform)
 -                            .c_str());
 +        warning_error(wi,
 +                      gmx::formatString(
 +                              "Cannot parse exactly 6 box deformation velocities from string '%s'",
 +                              inputrecStrings->deform)
 +                              .c_str());
      }
      for (i = 0; i < 3; i++)
      {
          }
      }
  
 -    sfree(dumstr[0]);
 -    sfree(dumstr[1]);
 -}
 -
 -static int search_QMstring(const char* s, int ng, const char* gn[])
 -{
 -    /* same as normal search_string, but this one searches QM strings */
 -    int i;
 +    /* Set up MTS levels, this needs to happen before checking AWH parameters */
 +    if (ir->useMts)
 +    {
 +        setupMtsLevels(ir->mtsLevels, *ir, *opts, wi);
 +    }
  
 -    for (i = 0; (i < ng); i++)
 +    if (ir->bDoAwh)
      {
 -        if (gmx_strcasecmp(s, gn[i]) == 0)
 -        {
 -            return i;
 -        }
 +        gmx::checkAwhParams(ir->awhParams, ir, wi);
      }
  
 -    gmx_fatal(FARGS, "this QM method or basisset (%s) is not implemented\n!", s);
 -} /* search_QMstring */
 +    sfree(dumstr[0]);
 +    sfree(dumstr[1]);
 +}
  
  /* We would like gn to be const as well, but C doesn't allow this */
  /* TODO this is utility functionality (search for the index of a
@@@ -3362,11 -3268,12 +3362,12 @@@ static void checkAndUpdateVcmFreezeGrou
              if (numFrozenDims == DIM)
              {
                  /* Do not remove COM motion for this fully frozen atom */
-                 if (groups->groups[SimulationAtomGroupType::MassCenterVelocityRemoval].empty())
+                 if (groups->groupNumbers[SimulationAtomGroupType::MassCenterVelocityRemoval].empty())
                  {
-                     groups->groups[SimulationAtomGroupType::MassCenterVelocityRemoval].resize(numAtoms, 0);
+                     groups->groupNumbers[SimulationAtomGroupType::MassCenterVelocityRemoval].resize(
+                             numAtoms, 0);
                  }
-                 groups->groups[SimulationAtomGroupType::MassCenterVelocityRemoval][a] = vcmRestGroup;
+                 groups->groupNumbers[SimulationAtomGroupType::MassCenterVelocityRemoval][a] = vcmRestGroup;
                  numFullyFrozenVcmAtoms++;
              }
              else if (numFrozenDims > 0)
@@@ -3463,9 -3370,9 +3464,9 @@@ void do_index(const char
  
      set_warning_line(wi, mdparin, -1);
  
 -    auto temperatureCouplingTauValues       = gmx::splitString(is->tau_t);
 -    auto temperatureCouplingReferenceValues = gmx::splitString(is->ref_t);
 -    auto temperatureCouplingGroupNames      = gmx::splitString(is->tcgrps);
 +    auto temperatureCouplingTauValues       = gmx::splitString(inputrecStrings->tau_t);
 +    auto temperatureCouplingReferenceValues = gmx::splitString(inputrecStrings->ref_t);
 +    auto temperatureCouplingGroupNames      = gmx::splitString(inputrecStrings->tcgrps);
      if (temperatureCouplingTauValues.size() != temperatureCouplingGroupNames.size()
          || temperatureCouplingReferenceValues.size() != temperatureCouplingGroupNames.size())
      {
      }
  
      /* Simulated annealing for each group. There are nr groups */
 -    auto simulatedAnnealingGroupNames = gmx::splitString(is->anneal);
 +    auto simulatedAnnealingGroupNames = gmx::splitString(inputrecStrings->anneal);
      if (simulatedAnnealingGroupNames.size() == 1
          && gmx::equalCaseInsensitive(simulatedAnnealingGroupNames[0], "N", 1))
      {
              if (bAnneal)
              {
                  /* Read the other fields too */
 -                auto simulatedAnnealingPoints = gmx::splitString(is->anneal_npoints);
 +                auto simulatedAnnealingPoints = gmx::splitString(inputrecStrings->anneal_npoints);
                  if (simulatedAnnealingPoints.size() != simulatedAnnealingGroupNames.size())
                  {
                      gmx_fatal(FARGS, "Found %zu annealing-npoints values for %zu groups\n",
                      numSimulatedAnnealingFields += ir->opts.anneal_npoints[i];
                  }
  
 -                auto simulatedAnnealingTimes = gmx::splitString(is->anneal_time);
 +                auto simulatedAnnealingTimes = gmx::splitString(inputrecStrings->anneal_time);
  
                  if (simulatedAnnealingTimes.size() != numSimulatedAnnealingFields)
                  {
                      gmx_fatal(FARGS, "Found %zu annealing-time values, wanted %zu\n",
                                simulatedAnnealingTimes.size(), numSimulatedAnnealingFields);
                  }
 -                auto simulatedAnnealingTemperatures = gmx::splitString(is->anneal_temp);
 +                auto simulatedAnnealingTemperatures = gmx::splitString(inputrecStrings->anneal_temp);
                  if (simulatedAnnealingTemperatures.size() != numSimulatedAnnealingFields)
                  {
                      gmx_fatal(FARGS, "Found %zu annealing-temp values, wanted %zu\n",
  
      if (ir->bPull)
      {
 -        make_pull_groups(ir->pull, is->pull_grp, defaultIndexGroups, gnames);
 +        make_pull_groups(ir->pull, inputrecStrings->pullGroupNames, defaultIndexGroups, gnames);
  
          make_pull_coords(ir->pull);
      }
  
      if (ir->bRot)
      {
 -        make_rotation_groups(ir->rot, is->rot_grp, defaultIndexGroups, gnames);
 +        make_rotation_groups(ir->rot, inputrecStrings->rotateGroupNames, defaultIndexGroups, gnames);
      }
  
      if (ir->eSwapCoords != eswapNO)
      /* Make indices for IMD session */
      if (ir->bIMD)
      {
 -        make_IMD_group(ir->imd, is->imd_grp, defaultIndexGroups, gnames);
 +        make_IMD_group(ir->imd, inputrecStrings->imd_grp, defaultIndexGroups, gnames);
      }
  
      gmx::IndexGroupsAndNames defaultIndexGroupsAndNames(
              *defaultIndexGroups, gmx::arrayRefFromArray(gnames, defaultIndexGroups->nr));
 -    notifier.notifier_.notify(defaultIndexGroupsAndNames);
 +    notifier.preProcessingNotifications_.notify(defaultIndexGroupsAndNames);
  
 -    auto accelerations          = gmx::splitString(is->acc);
 -    auto accelerationGroupNames = gmx::splitString(is->accgrps);
 +    auto accelerations          = gmx::splitString(inputrecStrings->acc);
 +    auto accelerationGroupNames = gmx::splitString(inputrecStrings->accgrps);
      if (accelerationGroupNames.size() * DIM != accelerations.size())
      {
          gmx_fatal(FARGS, "Invalid Acceleration input: %zu groups and %zu acc. values",
  
      convertRvecs(wi, accelerations, "anneal-time", ir->opts.acc);
  
 -    auto freezeDims       = gmx::splitString(is->frdim);
 -    auto freezeGroupNames = gmx::splitString(is->freeze);
 +    auto freezeDims       = gmx::splitString(inputrecStrings->frdim);
 +    auto freezeGroupNames = gmx::splitString(inputrecStrings->freeze);
      if (freezeDims.size() != DIM * freezeGroupNames.size())
      {
          gmx_fatal(FARGS, "Invalid Freezing input: %zu groups and %zu freeze values",
          }
      }
  
 -    auto energyGroupNames = gmx::splitString(is->energy);
 +    auto energyGroupNames = gmx::splitString(inputrecStrings->energy);
      do_numbering(natoms, groups, energyGroupNames, defaultIndexGroups, gnames,
                   SimulationAtomGroupType::EnergyOutput, restnm, egrptpALL_GENREST, bVerbose, wi);
      add_wall_energrps(groups, ir->nwall, symtab);
      ir->opts.ngener    = groups->groups[SimulationAtomGroupType::EnergyOutput].size();
 -    auto vcmGroupNames = gmx::splitString(is->vcm);
 +    auto vcmGroupNames = gmx::splitString(inputrecStrings->vcm);
      do_numbering(natoms, groups, vcmGroupNames, defaultIndexGroups, gnames,
                   SimulationAtomGroupType::MassCenterVelocityRemoval, restnm,
                   vcmGroupNames.empty() ? egrptpALL_GENREST : egrptpPART, bVerbose, wi);
      /* Now we have filled the freeze struct, so we can calculate NRDF */
      calc_nrdf(mtop, ir, gnames);
  
 -    auto user1GroupNames = gmx::splitString(is->user1);
 +    auto user1GroupNames = gmx::splitString(inputrecStrings->user1);
      do_numbering(natoms, groups, user1GroupNames, defaultIndexGroups, gnames,
                   SimulationAtomGroupType::User1, restnm, egrptpALL_GENREST, bVerbose, wi);
 -    auto user2GroupNames = gmx::splitString(is->user2);
 +    auto user2GroupNames = gmx::splitString(inputrecStrings->user2);
      do_numbering(natoms, groups, user2GroupNames, defaultIndexGroups, gnames,
                   SimulationAtomGroupType::User2, restnm, egrptpALL_GENREST, bVerbose, wi);
 -    auto compressedXGroupNames = gmx::splitString(is->x_compressed_groups);
 +    auto compressedXGroupNames = gmx::splitString(inputrecStrings->x_compressed_groups);
      do_numbering(natoms, groups, compressedXGroupNames, defaultIndexGroups, gnames,
                   SimulationAtomGroupType::CompressedPositionOutput, restnm, egrptpONE, bVerbose, wi);
 -    auto orirefFitGroupNames = gmx::splitString(is->orirefitgrp);
 +    auto orirefFitGroupNames = gmx::splitString(inputrecStrings->orirefitgrp);
      do_numbering(natoms, groups, orirefFitGroupNames, defaultIndexGroups, gnames,
                   SimulationAtomGroupType::OrientationRestraintsFit, restnm, egrptpALL_GENREST,
                   bVerbose, wi);
  
 -    /* QMMM input processing */
 -    auto qmGroupNames = gmx::splitString(is->QMMM);
 -    auto qmMethods    = gmx::splitString(is->QMmethod);
 -    auto qmBasisSets  = gmx::splitString(is->QMbasis);
 -    if (ir->eI != eiMimic)
 +    /* MiMiC QMMM input processing */
 +    auto qmGroupNames = gmx::splitString(inputrecStrings->QMMM);
 +    if (qmGroupNames.size() > 1)
      {
 -        if (qmMethods.size() != qmGroupNames.size() || qmBasisSets.size() != qmGroupNames.size())
 -        {
 -            gmx_fatal(FARGS,
 -                      "Invalid QMMM input: %zu groups %zu basissets"
 -                      " and %zu methods\n",
 -                      qmGroupNames.size(), qmBasisSets.size(), qmMethods.size());
 -        }
 -        /* group rest, if any, is always MM! */
 -        do_numbering(natoms, groups, qmGroupNames, defaultIndexGroups, gnames,
 -                     SimulationAtomGroupType::QuantumMechanics, restnm, egrptpALL_GENREST, bVerbose, wi);
 -        nr            = qmGroupNames.size(); /*atoms->grps[egcQMMM].nr;*/
 -        ir->opts.ngQM = qmGroupNames.size();
 -        snew(ir->opts.QMmethod, nr);
 -        snew(ir->opts.QMbasis, nr);
 -        for (i = 0; i < nr; i++)
 -        {
 -            /* input consists of strings: RHF CASSCF PM3 .. These need to be
 -             * converted to the corresponding enum in names.c
 -             */
 -            ir->opts.QMmethod[i] = search_QMstring(qmMethods[i].c_str(), eQMmethodNR, eQMmethod_names);
 -            ir->opts.QMbasis[i] = search_QMstring(qmBasisSets[i].c_str(), eQMbasisNR, eQMbasis_names);
 -        }
 -        auto qmMultiplicities = gmx::splitString(is->QMmult);
 -        auto qmCharges        = gmx::splitString(is->QMcharge);
 -        auto qmbSH            = gmx::splitString(is->bSH);
 -        snew(ir->opts.QMmult, nr);
 -        snew(ir->opts.QMcharge, nr);
 -        snew(ir->opts.bSH, nr);
 -        convertInts(wi, qmMultiplicities, "QMmult", ir->opts.QMmult);
 -        convertInts(wi, qmCharges, "QMcharge", ir->opts.QMcharge);
 -        convertYesNos(wi, qmbSH, "bSH", ir->opts.bSH);
 -
 -        auto CASelectrons = gmx::splitString(is->CASelectrons);
 -        auto CASorbitals  = gmx::splitString(is->CASorbitals);
 -        snew(ir->opts.CASelectrons, nr);
 -        snew(ir->opts.CASorbitals, nr);
 -        convertInts(wi, CASelectrons, "CASelectrons", ir->opts.CASelectrons);
 -        convertInts(wi, CASorbitals, "CASOrbitals", ir->opts.CASorbitals);
 -
 -        auto SAon    = gmx::splitString(is->SAon);
 -        auto SAoff   = gmx::splitString(is->SAoff);
 -        auto SAsteps = gmx::splitString(is->SAsteps);
 -        snew(ir->opts.SAon, nr);
 -        snew(ir->opts.SAoff, nr);
 -        snew(ir->opts.SAsteps, nr);
 -        convertInts(wi, SAon, "SAon", ir->opts.SAon);
 -        convertInts(wi, SAoff, "SAoff", ir->opts.SAoff);
 -        convertInts(wi, SAsteps, "SAsteps", ir->opts.SAsteps);
 +        gmx_fatal(FARGS, "Currently, having more than one QM group in MiMiC is not supported");
      }
 -    else
 -    {
 -        /* MiMiC */
 -        if (qmGroupNames.size() > 1)
 -        {
 -            gmx_fatal(FARGS, "Currently, having more than one QM group in MiMiC is not supported");
 -        }
 -        /* group rest, if any, is always MM! */
 -        do_numbering(natoms, groups, qmGroupNames, defaultIndexGroups, gnames,
 -                     SimulationAtomGroupType::QuantumMechanics, restnm, egrptpALL_GENREST, bVerbose, wi);
 +    /* group rest, if any, is always MM! */
 +    do_numbering(natoms, groups, qmGroupNames, defaultIndexGroups, gnames,
 +                 SimulationAtomGroupType::QuantumMechanics, restnm, egrptpALL_GENREST, bVerbose, wi);
 +    ir->opts.ngQM = qmGroupNames.size();
  
 -        ir->opts.ngQM = qmGroupNames.size();
 -    }
 -
 -    /* end of QMMM input */
 +    /* end of MiMiC QMMM input */
  
      if (bVerbose)
      {
      nr = groups->groups[SimulationAtomGroupType::EnergyOutput].size();
      snew(ir->opts.egp_flags, nr * nr);
  
 -    bExcl = do_egp_flag(ir, groups, "energygrp-excl", is->egpexcl, EGP_EXCL);
 +    bExcl = do_egp_flag(ir, groups, "energygrp-excl", inputrecStrings->egpexcl, EGP_EXCL);
      if (bExcl && ir->cutoff_scheme == ecutsVERLET)
      {
          warning_error(wi, "Energy group exclusions are currently not supported");
          warning(wi, "Can not exclude the lattice Coulomb energy between energy groups");
      }
  
 -    bTable = do_egp_flag(ir, groups, "energygrp-table", is->egptable, EGP_TABLE);
 +    bTable = do_egp_flag(ir, groups, "energygrp-table", inputrecStrings->egptable, EGP_TABLE);
      if (bTable && !(ir->vdwtype == evdwUSER) && !(ir->coulombtype == eelUSER)
          && !(ir->coulombtype == eelPMEUSER) && !(ir->coulombtype == eelPMEUSERSWITCH))
      {
@@@ -4189,9 -4154,6 +4190,9 @@@ static void check_combination_rules(con
  
  void triple_check(const char* mdparin, t_inputrec* ir, gmx_mtop_t* sys, warninp_t wi)
  {
 +    // Not meeting MTS requirements should have resulted in a fatal error, so we can assert here
 +    gmx::assertMtsRequirements(*ir);
 +
      char                      err_buf[STRLEN];
      int                       i, m, c, nmol;
      bool                      bCharge, bAcc;
@@@ -4493,7 -4455,7 +4494,7 @@@ void double_check(t_inputrec* ir, matri
      char        warn_buf[STRLEN];
      const char* ptr;
  
 -    ptr = check_box(ir->ePBC, box);
 +    ptr = check_box(ir->pbcType, box);
      if (ptr)
      {
          warning_error(wi, ptr);
          ir->LincsWarnAngle = 90.0;
      }
  
 -    if (ir->ePBC != epbcNONE)
 +    if (ir->pbcType != PbcType::No)
      {
          if (ir->nstlist == 0)
          {
                      "With nstlist=0 atoms are only put into the box at step 0, therefore drifting "
                      "atoms might cause the simulation to crash.");
          }
 -        if (gmx::square(ir->rlist) >= max_cutoff2(ir->ePBC, box))
 +        if (gmx::square(ir->rlist) >= max_cutoff2(ir->pbcType, box))
          {
              sprintf(warn_buf,
                      "ERROR: The cut-off length is longer than half the shortest box vector or "
index 616b7b9e09d474bdd053358c63988e9bbbf4c1c1,43f70234094375cc7ed9b0a909b057ee30f5a93e..0720a12bbe9398a344acffd7eb8f6d24ad491268
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
-  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+  * Copyright (c) 2012,2013,2014,2015,2016, The GROMACS development team.
   * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
@@@ -44,8 -44,8 +44,8 @@@
  #include <string>
  #include <vector>
  
 -#include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/hardware/cpuinfo.h"
 +#include "gromacs/hardware/device_management.h"
  #include "gromacs/hardware/hardwaretopology.h"
  #include "gromacs/hardware/hw_info.h"
  #include "gromacs/hardware/identifyavx512fmaunits.h"
  #include "gromacs/utility/stringutil.h"
  #include "gromacs/utility/sysinfo.h"
  
+ #include "architecture.h"
  //! Constant used to help minimize preprocessed code
 -static const bool bGPUBinary = GMX_GPU != GMX_GPU_NONE;
 +static constexpr bool bGPUBinary = (GMX_GPU != 0);
  
  /*! \internal \brief
   * Returns the GPU information text, one GPU per line.
   */
 -static std::string sprint_gpus(const gmx_gpu_info_t& gpu_info)
 +static std::string sprint_gpus(const std::vector<std::unique_ptr<DeviceInformation>>& deviceInfoList)
  {
 -    char                     stmp[STRLEN];
 -    std::vector<std::string> gpuStrings;
 -    for (int i = 0; i < gpu_info.n_dev; i++)
 +    std::vector<std::string> gpuStrings(0);
 +    for (const auto& deviceInfo : deviceInfoList)
      {
 -        get_gpu_device_info_string(stmp, gpu_info, i);
 -        gpuStrings.push_back(gmx::formatString("    %s", stmp));
 +        gpuStrings.emplace_back("    " + getDeviceInformationString(*deviceInfo));
      }
      return gmx::joinStrings(gpuStrings, "\n");
  }
@@@ -80,7 -84,7 +82,7 @@@
     and runtime CPU do not match. */
  static void check_use_of_rdtscp_on_this_cpu(const gmx::MDLogger& mdlog, const gmx::CpuInfo& cpuInfo)
  {
 -    bool binaryUsesRdtscp = HAVE_RDTSCP;
 +    bool binaryUsesRdtscp = GMX_USE_RDTSCP;
  
      const char* programName = gmx::getProgramContext().displayName();
  
@@@ -143,21 -147,14 +145,21 @@@ static std::string detected_hardware_st
          s += gmx::formatString(" %d cores,", hwinfo->ncore_tot);
      }
      s += gmx::formatString(" %d logical cores", hwinfo->nhwthread_tot);
 -    if (hwinfo->gpu_info.bDetectGPUs)
 +    if (canPerformDeviceDetection(nullptr))
      {
          s += gmx::formatString(", %d compatible GPU%s", hwinfo->ngpu_compatible_tot,
                                 hwinfo->ngpu_compatible_tot == 1 ? "" : "s");
      }
      else if (bGPUBinary)
      {
 -        s += gmx::formatString(" (GPU detection deactivated)");
 +        if (isDeviceDetectionEnabled())
 +        {
 +            s += gmx::formatString(" (GPU detection failed)");
 +        }
 +        else
 +        {
 +            s += gmx::formatString(" (GPU detection deactivated)");
 +        }
      }
      s += gmx::formatString("\n");
  
          }
      }
  
 -    if (bGPUBinary && hwinfo->gpu_info.n_dev > 0)
 +    if (bGPUBinary && !hwinfo->deviceInfoList.empty())
      {
          s += gmx::formatString("  GPU info:\n");
 -        s += gmx::formatString("    Number of GPUs detected: %d\n", hwinfo->gpu_info.n_dev);
 -        s += sprint_gpus(hwinfo->gpu_info) + "\n";
 +        s += gmx::formatString("    Number of GPUs detected: %d\n",
 +                               static_cast<int>(hwinfo->deviceInfoList.size()));
 +        s += sprint_gpus(hwinfo->deviceInfoList) + "\n";
      }
      return s;
  }
@@@ -387,6 -383,9 +389,9 @@@ void gmx_print_detected_hardware(FILE
          gmx::simdCheck(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min), fplog, warnToStdErr);
      }
  
-     /* For RDTSCP we only check on our local node and skip the MPI reduction */
-     check_use_of_rdtscp_on_this_cpu(mdlog, cpuInfo);
+     /* For RDTSCP we only check on our local node and skip the MPI reduction, only on x86 */
+     if (gmx::c_architecture == gmx::Architecture::X86)
+     {
+         check_use_of_rdtscp_on_this_cpu(mdlog, cpuInfo);
+     }
  }
index b1a9c7db977f959590b6a6d8f03219e76c88954e,0825e0ab7e33c6f292bc136ab8129b2a6cfde965..6ba96c89503b6d69924eb544949906a3898e968d
@@@ -1,8 -1,7 +1,8 @@@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
 - * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
 + * Copyright (c) 2013,2014,2015,2016,2017 The GROMACS development team.
 + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@@ -37,8 -36,6 +37,8 @@@
  
  #include "mdoutf.h"
  
 +#include "config.h"
 +
  #include "gromacs/commandline/filenm.h"
  #include "gromacs/domdec/collect.h"
  #include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/mdlib/trajectory_writing.h"
  #include "gromacs/mdrunutility/handlerestart.h"
  #include "gromacs/mdrunutility/multisim.h"
 +#include "gromacs/mdtypes/awh_history.h"
  #include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/df_history.h"
 +#include "gromacs/mdtypes/edsamhistory.h"
 +#include "gromacs/mdtypes/energyhistory.h"
  #include "gromacs/mdtypes/imdoutputprovider.h"
  #include "gromacs/mdtypes/inputrec.h"
  #include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/mdtypes/mdrunoptions.h"
 +#include "gromacs/mdtypes/observableshistory.h"
  #include "gromacs/mdtypes/state.h"
 +#include "gromacs/mdtypes/swaphistory.h"
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/topology/topology.h"
 +#include "gromacs/utility/baseversion.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/pleasecite.h"
 +#include "gromacs/utility/programcontext.h"
  #include "gromacs/utility/smalloc.h"
 +#include "gromacs/utility/sysinfo.h"
  
  struct gmx_mdoutf
  {
      FILE*                         fp_dhdl;
      int                           natoms_global;
      int                           natoms_x_compressed;
 -    SimulationGroups*             groups; /* for compressed position writing */
 +    const SimulationGroups*       groups; /* for compressed position writing */
      gmx_wallcycle_t               wcycle;
      rvec*                         f_global;
      gmx::IMDOutputProvider*       outputProvider;
      const gmx::MdModulesNotifier* mdModulesNotifier;
      bool                          simulationsShareState;
 -    MPI_Comm                      mpiCommMasters;
 +    MPI_Comm                      mastersComm;
  };
  
  
@@@ -108,7 -96,7 +108,7 @@@ gmx_mdoutf_t init_mdoutf(FILE
                           gmx::IMDOutputProvider*       outputProvider,
                           const gmx::MdModulesNotifier& mdModulesNotifier,
                           const t_inputrec*             ir,
 -                         gmx_mtop_t*                   top_global,
 +                         const gmx_mtop_t*             top_global,
                           const gmx_output_env_t*       oenv,
                           gmx_wallcycle_t               wcycle,
                           const gmx::StartingBehavior   startingBehavior,
      of->simulationsShareState = simulationsShareState;
      if (of->simulationsShareState)
      {
 -        of->mpiCommMasters = ms->mpi_comm_masters;
 +        of->mastersComm = ms->mastersComm_;
      }
  
      if (MASTER(cr))
@@@ -266,257 -254,19 +266,257 @@@ gmx_wallcycle_t mdoutf_get_wcycle(gmx_m
      return of->wcycle;
  }
  
 -void mdoutf_write_to_trajectory_files(FILE*                    fplog,
 -                                      const t_commrec*         cr,
 -                                      gmx_mdoutf_t             of,
 -                                      int                      mdof_flags,
 -                                      int                      natoms,
 -                                      int64_t                  step,
 -                                      double                   t,
 -                                      t_state*                 state_local,
 -                                      t_state*                 state_global,
 -                                      ObservablesHistory*      observablesHistory,
 -                                      gmx::ArrayRef<gmx::RVec> f_local)
 +static void mpiBarrierBeforeRename(const bool applyMpiBarrierBeforeRename, MPI_Comm mpiBarrierCommunicator)
 +{
 +    if (applyMpiBarrierBeforeRename)
 +    {
 +#if GMX_MPI
 +        MPI_Barrier(mpiBarrierCommunicator);
 +#else
 +        GMX_RELEASE_ASSERT(false, "Should not request a barrier without MPI");
 +        GMX_UNUSED_VALUE(mpiBarrierCommunicator);
 +#endif
 +    }
 +}
 +/*! \brief Write a checkpoint to the filename
 + *
 + * Appends the _step<step>.cpt with bNumberAndKeep, otherwise moves
 + * the previous checkpoint filename with suffix _prev.cpt.
 + */
 +static void write_checkpoint(const char*                     fn,
 +                             gmx_bool                        bNumberAndKeep,
 +                             FILE*                           fplog,
 +                             const t_commrec*                cr,
 +                             ivec                            domdecCells,
 +                             int                             nppnodes,
 +                             int                             eIntegrator,
 +                             int                             simulation_part,
 +                             gmx_bool                        bExpanded,
 +                             int                             elamstats,
 +                             int64_t                         step,
 +                             double                          t,
 +                             t_state*                        state,
 +                             ObservablesHistory*             observablesHistory,
 +                             const gmx::MdModulesNotifier&   mdModulesNotifier,
 +                             gmx::WriteCheckpointDataHolder* modularSimulatorCheckpointData,
 +                             bool                            applyMpiBarrierBeforeRename,
 +                             MPI_Comm                        mpiBarrierCommunicator)
 +{
 +    t_fileio* fp;
 +    char*     fntemp; /* the temporary checkpoint file name */
 +    int       npmenodes;
 +    char      buf[1024], suffix[5 + STEPSTRSIZE], sbuf[STEPSTRSIZE];
 +    t_fileio* ret;
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        npmenodes = cr->npmenodes;
 +    }
 +    else
 +    {
 +        npmenodes = 0;
 +    }
 +
 +#if !GMX_NO_RENAME
 +    /* make the new temporary filename */
 +    snew(fntemp, std::strlen(fn) + 5 + STEPSTRSIZE);
 +    std::strcpy(fntemp, fn);
 +    fntemp[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +    sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
 +    std::strcat(fntemp, suffix);
 +    std::strcat(fntemp, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
 +#else
 +    /* if we can't rename, we just overwrite the cpt file.
 +     * dangerous if interrupted.
 +     */
 +    snew(fntemp, std::strlen(fn));
 +    std::strcpy(fntemp, fn);
 +#endif
 +    std::string timebuf = gmx_format_current_time();
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Writing checkpoint, step %s at %s\n\n", gmx_step_str(step, buf), timebuf.c_str());
 +    }
 +
 +    /* Get offsets for open files */
 +    auto outputfiles = gmx_fio_get_output_file_positions();
 +
 +    fp = gmx_fio_open(fntemp, "w");
 +
 +    /* We can check many more things now (CPU, acceleration, etc), but
 +     * it is highly unlikely to have two separate builds with exactly
 +     * the same version, user, time, and build host!
 +     */
 +
 +    int nlambda = (state->dfhist ? state->dfhist->nlambda : 0);
 +
 +    edsamhistory_t* edsamhist = observablesHistory->edsamHistory.get();
 +    int             nED       = (edsamhist ? edsamhist->nED : 0);
 +
 +    swaphistory_t* swaphist    = observablesHistory->swapHistory.get();
 +    int            eSwapCoords = (swaphist ? swaphist->eSwapCoords : eswapNO);
 +
 +    CheckpointHeaderContents headerContents = { 0,
 +                                                { 0 },
 +                                                { 0 },
 +                                                { 0 },
 +                                                { 0 },
 +                                                GMX_DOUBLE,
 +                                                { 0 },
 +                                                { 0 },
 +                                                eIntegrator,
 +                                                simulation_part,
 +                                                step,
 +                                                t,
 +                                                nppnodes,
 +                                                { 0 },
 +                                                npmenodes,
 +                                                state->natoms,
 +                                                state->ngtc,
 +                                                state->nnhpres,
 +                                                state->nhchainlength,
 +                                                nlambda,
 +                                                state->flags,
 +                                                0,
 +                                                0,
 +                                                0,
 +                                                0,
 +                                                0,
 +                                                nED,
 +                                                eSwapCoords,
 +                                                false };
 +    std::strcpy(headerContents.version, gmx_version());
 +    std::strcpy(headerContents.fprog, gmx::getProgramContext().fullBinaryPath());
 +    std::strcpy(headerContents.ftime, timebuf.c_str());
 +    if (DOMAINDECOMP(cr))
 +    {
 +        copy_ivec(domdecCells, headerContents.dd_nc);
 +    }
 +
 +    write_checkpoint_data(fp, headerContents, bExpanded, elamstats, state, observablesHistory,
 +                          mdModulesNotifier, &outputfiles, modularSimulatorCheckpointData);
 +
 +    /* we really, REALLY, want to make sure to physically write the checkpoint,
 +       and all the files it depends on, out to disk. Because we've
 +       opened the checkpoint with gmx_fio_open(), it's in our list
 +       of open files.  */
 +    ret = gmx_fio_all_output_fsync();
 +
 +    if (ret)
 +    {
 +        char buf[STRLEN];
 +        sprintf(buf, "Cannot fsync '%s'; maybe you are out of disk space?", gmx_fio_getname(ret));
 +
 +        if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == nullptr)
 +        {
 +            gmx_file(buf);
 +        }
 +        else
 +        {
 +            gmx_warning("%s", buf);
 +        }
 +    }
 +
 +    if (gmx_fio_close(fp) != 0)
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    /* we don't move the checkpoint if the user specified they didn't want it,
 +       or if the fsyncs failed */
 +#if !GMX_NO_RENAME
 +    if (!bNumberAndKeep && !ret)
 +    {
 +        if (gmx_fexist(fn))
 +        {
 +            /* Rename the previous checkpoint file */
 +            mpiBarrierBeforeRename(applyMpiBarrierBeforeRename, mpiBarrierCommunicator);
 +
 +            std::strcpy(buf, fn);
 +            buf[std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +            std::strcat(buf, "_prev");
 +            std::strcat(buf, fn + std::strlen(fn) - std::strlen(ftp2ext(fn2ftp(fn))) - 1);
 +            if (!GMX_FAHCORE)
 +            {
 +                /* we copy here so that if something goes wrong between now and
 +                 * the rename below, there's always a state.cpt.
 +                 * If renames are atomic (such as in POSIX systems),
 +                 * this copying should be unneccesary.
 +                 */
 +                gmx_file_copy(fn, buf, FALSE);
 +                /* We don't really care if this fails:
 +                 * there's already a new checkpoint.
 +                 */
 +            }
 +            else
 +            {
 +                gmx_file_rename(fn, buf);
 +            }
 +        }
 +
 +        /* Rename the checkpoint file from the temporary to the final name */
 +        mpiBarrierBeforeRename(applyMpiBarrierBeforeRename, mpiBarrierCommunicator);
 +
 +        if (gmx_file_rename(fntemp, fn) != 0)
 +        {
 +            gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
 +        }
 +    }
 +#endif /* GMX_NO_RENAME */
 +
 +    sfree(fntemp);
 +
 +#if GMX_FAHCORE
 +    /*code for alternate checkpointing scheme.  moved from top of loop over
 +       steps */
 +    fcRequestCheckPoint();
 +    if (fcCheckPointParallel(cr->nodeid, NULL, 0) == 0)
 +    {
 +        gmx_fatal(3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step);
 +    }
 +#endif /* end GMX_FAHCORE block */
 +}
 +
 +void mdoutf_write_checkpoint(gmx_mdoutf_t                    of,
 +                             FILE*                           fplog,
 +                             const t_commrec*                cr,
 +                             int64_t                         step,
 +                             double                          t,
 +                             t_state*                        state_global,
 +                             ObservablesHistory*             observablesHistory,
 +                             gmx::WriteCheckpointDataHolder* modularSimulatorCheckpointData)
 +{
 +    fflush_tng(of->tng);
 +    fflush_tng(of->tng_low_prec);
 +    /* Write the checkpoint file.
 +     * When simulations share the state, an MPI barrier is applied before
 +     * renaming old and new checkpoint files to minimize the risk of
 +     * checkpoint files getting out of sync.
 +     */
 +    ivec one_ivec = { 1, 1, 1 };
 +    write_checkpoint(of->fn_cpt, of->bKeepAndNumCPT, fplog, cr,
 +                     DOMAINDECOMP(cr) ? cr->dd->numCells : one_ivec,
 +                     DOMAINDECOMP(cr) ? cr->dd->nnodes : cr->nnodes, of->eIntegrator,
 +                     of->simulation_part, of->bExpanded, of->elamstats, step, t, state_global,
 +                     observablesHistory, *(of->mdModulesNotifier), modularSimulatorCheckpointData,
 +                     of->simulationsShareState, of->mastersComm);
 +}
 +
 +void mdoutf_write_to_trajectory_files(FILE*                           fplog,
 +                                      const t_commrec*                cr,
 +                                      gmx_mdoutf_t                    of,
 +                                      int                             mdof_flags,
 +                                      int                             natoms,
 +                                      int64_t                         step,
 +                                      double                          t,
 +                                      t_state*                        state_local,
 +                                      t_state*                        state_global,
 +                                      ObservablesHistory*             observablesHistory,
 +                                      gmx::ArrayRef<const gmx::RVec>  f_local,
 +                                      gmx::WriteCheckpointDataHolder* modularSimulatorCheckpointData)
  {
 -    rvec* f_global;
 +    const rvec* f_global;
  
      if (DOMAINDECOMP(cr))
      {
              if (mdof_flags & (MDOF_X | MDOF_X_COMPRESSED))
              {
                  auto globalXRef = MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>();
 -                dd_collect_vec(cr->dd, state_local, state_local->x, globalXRef);
 +                dd_collect_vec(cr->dd, state_local->ddp_count, state_local->ddp_count_cg_gl,
 +                               state_local->cg_gl, state_local->x, globalXRef);
              }
              if (mdof_flags & MDOF_V)
              {
                  auto globalVRef = MASTER(cr) ? state_global->v : gmx::ArrayRef<gmx::RVec>();
 -                dd_collect_vec(cr->dd, state_local, state_local->v, globalVRef);
 +                dd_collect_vec(cr->dd, state_local->ddp_count, state_local->ddp_count_cg_gl,
 +                               state_local->cg_gl, state_local->v, globalVRef);
              }
          }
          f_global = of->f_global;
          if (mdof_flags & MDOF_F)
          {
 -            dd_collect_vec(cr->dd, state_local, f_local,
 -                           gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec*>(f_global), f_local.size()));
 +            dd_collect_vec(
 +                    cr->dd, state_local->ddp_count, state_local->ddp_count_cg_gl, state_local->cg_gl, f_local,
 +                    gmx::arrayRefFromArray(reinterpret_cast<gmx::RVec*>(of->f_global), f_local.size()));
          }
      }
      else
      {
          if (mdof_flags & MDOF_CPT)
          {
 -            fflush_tng(of->tng);
 -            fflush_tng(of->tng_low_prec);
 -            /* Write the checkpoint file.
 -             * When simulations share the state, an MPI barrier is applied before
 -             * renaming old and new checkpoint files to minimize the risk of
 -             * checkpoint files getting out of sync.
 -             */
 -            ivec one_ivec = { 1, 1, 1 };
 -            write_checkpoint(of->fn_cpt, of->bKeepAndNumCPT, fplog, cr,
 -                             DOMAINDECOMP(cr) ? cr->dd->nc : one_ivec,
 -                             DOMAINDECOMP(cr) ? cr->dd->nnodes : cr->nnodes, of->eIntegrator,
 -                             of->simulation_part, of->bExpanded, of->elamstats, step, t,
 -                             state_global, observablesHistory, *(of->mdModulesNotifier),
 -                             of->simulationsShareState, of->mpiCommMasters);
 +            mdoutf_write_checkpoint(of, fplog, cr, step, t, state_global, observablesHistory,
 +                                    modularSimulatorCheckpointData);
          }
  
          if (mdof_flags & (MDOF_X | MDOF_V | MDOF_F))
                                 nullptr, nullptr);
              }
          }
+ #if GMX_FAHCORE
+         /* Write a FAH checkpoint after writing any other data.  We may end up
+            checkpointing twice but it's fast so it's ok. */
+         if ((mdof_flags & ~MDOF_CPT))
+         {
+             fcCheckpoint();
+         }
+ #endif
      }
  }
  
index d9ef7191ede87741b866305ab8c0225f337cfe29,3d99d2353dbc6f8dc236158aa0e63ed0e7024801..f0f9f25fdab8ffac7e4e13e6184a841fede8865b
@@@ -1,8 -1,7 +1,8 @@@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
-  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
 - * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
++ * Copyright (c) 2013,2014,2015,2016,2017, The GROMACS development team.
 + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@@ -44,7 -43,6 +44,7 @@@
  #include "gromacs/mdlib/mdoutf.h"
  #include "gromacs/mdlib/stat.h"
  #include "gromacs/mdlib/update.h"
 +#include "gromacs/mdtypes/checkpointdata.h"
  #include "gromacs/mdtypes/commrec.h"
  #include "gromacs/mdtypes/forcerec.h"
  #include "gromacs/mdtypes/inputrec.h"
  #include "gromacs/topology/topology.h"
  #include "gromacs/utility/smalloc.h"
  
 -void do_md_trajectory_writing(FILE*                    fplog,
 -                              t_commrec*               cr,
 -                              int                      nfile,
 -                              const t_filenm           fnm[],
 -                              int64_t                  step,
 -                              int64_t                  step_rel,
 -                              double                   t,
 -                              t_inputrec*              ir,
 -                              t_state*                 state,
 -                              t_state*                 state_global,
 -                              ObservablesHistory*      observablesHistory,
 -                              const gmx_mtop_t*        top_global,
 -                              t_forcerec*              fr,
 -                              gmx_mdoutf_t             outf,
 -                              const gmx::EnergyOutput& energyOutput,
 -                              gmx_ekindata_t*          ekind,
 -                              gmx::ArrayRef<gmx::RVec> f,
 -                              gmx_bool                 bCPT,
 -                              gmx_bool                 bRerunMD,
 -                              gmx_bool                 bLastStep,
 -                              gmx_bool                 bDoConfOut,
 -                              gmx_bool                 bSumEkinhOld)
 +void do_md_trajectory_writing(FILE*                          fplog,
 +                              t_commrec*                     cr,
 +                              int                            nfile,
 +                              const t_filenm                 fnm[],
 +                              int64_t                        step,
 +                              int64_t                        step_rel,
 +                              double                         t,
 +                              t_inputrec*                    ir,
 +                              t_state*                       state,
 +                              t_state*                       state_global,
 +                              ObservablesHistory*            observablesHistory,
 +                              const gmx_mtop_t*              top_global,
 +                              t_forcerec*                    fr,
 +                              gmx_mdoutf_t                   outf,
 +                              const gmx::EnergyOutput&       energyOutput,
 +                              gmx_ekindata_t*                ekind,
 +                              gmx::ArrayRef<const gmx::RVec> f,
 +                              gmx_bool                       bCPT,
 +                              gmx_bool                       bRerunMD,
 +                              gmx_bool                       bLastStep,
 +                              gmx_bool                       bDoConfOut,
 +                              gmx_bool                       bSumEkinhOld)
  {
      int   mdof_flags;
      rvec* x_for_confout = nullptr;
          mdof_flags |= MDOF_LAMBDA_COMPRESSED;
      }
  
- #if GMX_FAHCORE
-     if (bLastStep)
-     {
-         /* Enforce writing positions and velocities at end of run */
-         mdof_flags |= (MDOF_X | MDOF_V);
-     }
-     if (MASTER(cr))
-     {
-         fcReportProgress(ir->nsteps, step);
-     }
- #    if defined(__native_client__)
-     fcCheckin(MASTER(cr));
- #    endif
-     /* sync bCPT and fc record-keeping */
-     if (bCPT && MASTER(cr))
-     {
-         fcRequestCheckPoint();
-     }
- #endif
      if (mdof_flags != 0)
      {
          wallcycle_start(mdoutf_get_wcycle(outf), ewcTRAJ);
                  energyOutput.fillEnergyHistory(observablesHistory->energyHistory.get());
              }
          }
 +        // The current function is only called by legacy code, while
 +        // mdoutf_write_to_trajectory_files is also called from modular simulator. Create a dummy
 +        // modular simulator checkpointing object for compatibility.
 +        gmx::WriteCheckpointDataHolder checkpointDataHolder;
          // Note that part of the following code is duplicated in StatePropagatorData::trajectoryWriterTeardown.
          // This duplication is needed while both legacy and modular code paths are in use.
          // TODO: Remove duplication asap, make sure to keep in sync in the meantime.
 -        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step, t,
 -                                         state, state_global, observablesHistory, f);
 +        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step, t, state,
 +                                         state_global, observablesHistory, f, &checkpointDataHolder);
          if (bLastStep && step_rel == ir->nsteps && bDoConfOut && MASTER(cr) && !bRerunMD)
          {
              if (fr->bMolPBC && state == state_global)
              if (fr->bMolPBC && !ir->bPeriodicMols)
              {
                  /* Make molecules whole only for confout writing */
 -                do_pbc_mtop(ir->ePBC, state->box, top_global, x_for_confout);
 +                do_pbc_mtop(ir->pbcType, state->box, top_global, x_for_confout);
              }
              write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm), *top_global->name, top_global,
 -                                x_for_confout, state_global->v.rvec_array(), ir->ePBC, state->box);
 +                                x_for_confout, state_global->v.rvec_array(), ir->pbcType, state->box);
              if (fr->bMolPBC && state == state_global)
              {
                  sfree(x_for_confout);
          }
          wallcycle_stop(mdoutf_get_wcycle(outf), ewcTRAJ);
      }
+ #if GMX_FAHCORE
+     if (MASTER(cr))
+     {
+         fcWriteVisFrame(ir->ePBC, state_global->box, top_global, state_global->x.rvec_array());
+     }
+ #endif
  }
diff --combined src/gromacs/mdrun/md.cpp
index bf7aa914beecb3a6b20348dcadd6c8c84070a851,5ca5064229efa6182a30e4e2782847d41c16de22..96d0666e03ed29fbfaad97d38f6235e40691f753
  
  #include <algorithm>
  #include <memory>
 +#include <numeric>
  
 -#include "gromacs/awh/awh.h"
 +#include "gromacs/applied_forces/awh/awh.h"
  #include "gromacs/commandline/filenm.h"
  #include "gromacs/domdec/collect.h"
  #include "gromacs/domdec/dlbtiming.h"
  #include "gromacs/domdec/domdec.h"
  #include "gromacs/domdec/domdec_network.h"
  #include "gromacs/domdec/domdec_struct.h"
 +#include "gromacs/domdec/gpuhaloexchange.h"
  #include "gromacs/domdec/mdsetup.h"
  #include "gromacs/domdec/partition.h"
  #include "gromacs/essentialdynamics/edsam.h"
 -#include "gromacs/ewald/pme.h"
  #include "gromacs/ewald/pme_load_balancing.h"
 +#include "gromacs/ewald/pme_pp.h"
  #include "gromacs/fileio/trxio.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
 +#include "gromacs/gpu_utils/device_stream_manager.h"
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/imd/imd.h"
 -#include "gromacs/listed_forces/manage_threading.h"
 +#include "gromacs/listed_forces/listed_forces.h"
  #include "gromacs/math/functions.h"
 -#include "gromacs/math/utilities.h"
 +#include "gromacs/math/invertmatrix.h"
  #include "gromacs/math/vec.h"
  #include "gromacs/math/vectypes.h"
  #include "gromacs/mdlib/checkpointhandler.h"
  #include "gromacs/mdlib/compute_io.h"
  #include "gromacs/mdlib/constr.h"
 +#include "gromacs/mdlib/coupling.h"
  #include "gromacs/mdlib/ebin.h"
  #include "gromacs/mdlib/enerdata_utils.h"
  #include "gromacs/mdlib/energyoutput.h"
@@@ -87,7 -83,6 +87,7 @@@
  #include "gromacs/mdlib/force.h"
  #include "gromacs/mdlib/force_flags.h"
  #include "gromacs/mdlib/forcerec.h"
 +#include "gromacs/mdlib/freeenergyparameters.h"
  #include "gromacs/mdlib/md_support.h"
  #include "gromacs/mdlib/mdatoms.h"
  #include "gromacs/mdlib/mdoutf.h"
  #include "gromacs/mdlib/tgroup.h"
  #include "gromacs/mdlib/trajectory_writing.h"
  #include "gromacs/mdlib/update.h"
 -#include "gromacs/mdlib/update_constrain_cuda.h"
 +#include "gromacs/mdlib/update_constrain_gpu.h"
  #include "gromacs/mdlib/vcm.h"
  #include "gromacs/mdlib/vsite.h"
  #include "gromacs/mdrunutility/handlerestart.h"
  #include "gromacs/mdtypes/df_history.h"
  #include "gromacs/mdtypes/energyhistory.h"
  #include "gromacs/mdtypes/fcdata.h"
 +#include "gromacs/mdtypes/forcebuffers.h"
  #include "gromacs/mdtypes/forcerec.h"
  #include "gromacs/mdtypes/group.h"
  #include "gromacs/mdtypes/inputrec.h"
  #include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/mdtypes/mdatom.h"
  #include "gromacs/mdtypes/mdrunoptions.h"
 +#include "gromacs/mdtypes/multipletimestepping.h"
  #include "gromacs/mdtypes/observableshistory.h"
  #include "gromacs/mdtypes/pullhistory.h"
  #include "gromacs/mdtypes/simulation_workload.h"
  #include "gromacs/mdtypes/state.h"
  #include "gromacs/mdtypes/state_propagator_data_gpu.h"
 -#include "gromacs/modularsimulator/energyelement.h"
 +#include "gromacs/modularsimulator/energydata.h"
  #include "gromacs/nbnxm/gpu_data_mgmt.h"
  #include "gromacs/nbnxm/nbnxm.h"
 -#include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/pulling/output.h"
  #include "gromacs/pulling/pull.h"
  #include "replicaexchange.h"
  #include "shellfc.h"
  
- #if GMX_FAHCORE
- #    include "corewrap.h"
- #endif
  using gmx::SimulationSignaller;
  
  void gmx::LegacySimulator::do_md()
      // will go away eventually.
      t_inputrec*  ir = inputrec;
      int64_t      step, step_rel;
 -    double       t, t0 = ir->init_t, lam0[efptNR];
 +    double       t, t0 = ir->init_t;
      gmx_bool     bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
 -    gmx_bool     bNS, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
 +    gmx_bool     bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
      gmx_bool     bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
      gmx_bool     do_ene, do_log, do_verbose;
      gmx_bool     bMasterState;
      unsigned int force_flags;
 -    tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, tmp_vir = { { 0 } },
 -           pres = { { 0 } };
 -    int                         i, m;
 -    rvec                        mu_tot;
 -    matrix                      pressureCouplingMu, M;
 -    gmx_repl_ex_t               repl_ex = nullptr;
 -    gmx_localtop_t              top;
 -    PaddedHostVector<gmx::RVec> f{};
 -    gmx_global_stat_t           gstat;
 -    t_graph*                    graph = nullptr;
 -    gmx_shellfc_t*              shellfc;
 -    gmx_bool                    bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
 -    gmx_bool                    bTemp, bPres, bTrotter;
 -    real                        dvdl_constr;
 -    std::vector<RVec>           cbuf;
 -    matrix                      lastbox;
 -    int                         lamnew = 0;
 +    tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } };
 +    int    i, m;
 +    rvec   mu_tot;
 +    matrix pressureCouplingMu, M;
 +    gmx_repl_ex_t     repl_ex = nullptr;
 +    gmx_global_stat_t gstat;
 +    gmx_shellfc_t*    shellfc;
 +    gmx_bool          bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
 +    gmx_bool          bTemp, bPres, bTrotter;
 +    real              dvdl_constr;
 +    std::vector<RVec> cbuf;
 +    matrix            lastbox;
 +    int               lamnew = 0;
      /* for FEP */
      int       nstfep = 0;
      double    cycles;
      int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
      bGStatEveryStep   = (nstglobalcomm == 1);
  
 -    SimulationGroups* groups = &top_global->groups;
 +    const SimulationGroups* groups = &top_global->groups;
  
      std::unique_ptr<EssentialDynamics> ed = nullptr;
      if (opt2bSet("-ei", nfile, fnm))
                    "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
      }
  
 -    initialize_lambdas(fplog, *ir, MASTER(cr), &state_global->fep_state, state_global->lambda, lam0);
 -    Update     upd(ir, deform);
 +    int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
 +    gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
 +    initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
 +    Update     upd(*ir, deform);
      const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
      const bool useReplicaExchange   = (replExParams.exchangeInterval > 0);
  
 +    const t_fcdata& fcdata = *fr->fcdata;
 +
      bool simulationsShareState = false;
      int  nstSignalComm         = nstglobalcomm;
      {
          // TODO This implementation of ensemble orientation restraints is nasty because
          // a user can't just do multi-sim with single-sim orientation restraints.
          bool usingEnsembleRestraints =
 -                (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0));
 +                (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0));
          bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
  
          // Replica exchange, ensemble restraints and AWH need all
      t_state*                 state;
  
  
 +    gmx_localtop_t top(top_global->ffparams);
 +
      auto mdatoms = mdAtoms->mdatoms();
  
 -    std::unique_ptr<UpdateConstrainCuda> integrator;
 +    const auto& simulationWork     = runScheduleWork->simulationWork;
 +    const bool  useGpuForPme       = simulationWork.useGpuPme;
 +    const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
 +    const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
 +    const bool  useGpuForUpdate    = simulationWork.useGpuUpdate;
  
 +    ForceBuffers f(fr->useMts, ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
 +                                       ? PinningPolicy::PinnedIfSupported
 +                                       : PinningPolicy::CannotBePinned);
      if (DOMAINDECOMP(cr))
      {
 -        dd_init_local_top(*top_global, &top);
 -
          stateInstance = std::make_unique<t_state>();
          state         = stateInstance.get();
          dd_init_local_state(cr->dd, state_global, state);
      else
      {
          state_change_natoms(state_global, state_global->natoms);
 -        f.resizeWithPadding(state_global->natoms);
          /* Copy the pointer to the global state */
          state = state_global;
  
          /* Generate and initialize new topology */
 -        mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &graph, mdAtoms, constr, vsite, shellfc);
 +        mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
  
          upd.setNumAtoms(state->natoms);
      }
  
 -    const auto& simulationWork     = runScheduleWork->simulationWork;
 -    const bool  useGpuForPme       = simulationWork.useGpuPme;
 -    const bool  useGpuForNonbonded = simulationWork.useGpuNonbonded;
 -    const bool  useGpuForBufferOps = simulationWork.useGpuBufferOps;
 -    const bool  useGpuForUpdate    = simulationWork.useGpuUpdate;
 +    std::unique_ptr<UpdateConstrainGpu> integrator;
  
      StatePropagatorDataGpu* stateGpu = fr->stateGpu;
  
 +    // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
      if (useGpuForUpdate)
      {
          GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
          GMX_RELEASE_ASSERT(
                  ir->etc != etcNOSEHOOVER,
                  "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
 -        GMX_RELEASE_ASSERT(ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN,
 -                           "Only Parrinello-Rahman and Berendsen pressure coupling are supported "
 -                           "with the GPU update.\n");
 +        GMX_RELEASE_ASSERT(
 +                ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN
 +                        || ir->epc == epcCRESCALE,
 +                "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
 +                "with the GPU update.\n");
          GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
                             "Virtual sites are not supported with the GPU update.\n");
          GMX_RELEASE_ASSERT(ed == nullptr,
                             "Essential dynamics is not supported with the GPU update.\n");
          GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(ir->pull),
                             "Constraints pulling is not supported with the GPU update.\n");
 -        GMX_RELEASE_ASSERT(fcd->orires.nr == 0,
 +        GMX_RELEASE_ASSERT(fcdata.orires->nr == 0,
                             "Orientation restraints are not supported with the GPU update.\n");
 -        GMX_RELEASE_ASSERT(ir->efep == efepNO,
 -                           "Free energy perturbations are not supported with the GPU update.");
 -        GMX_RELEASE_ASSERT(graph == nullptr, "The graph is not supported with GPU update.");
 +        GMX_RELEASE_ASSERT(
 +                ir->efep == efepNO
 +                        || (!haveFreeEnergyType(*ir, efptBONDED) && !haveFreeEnergyType(*ir, efptMASS)),
 +                "Free energy perturbation of masses and constraints are not supported with the GPU "
 +                "update.");
  
          if (constr != nullptr && constr->numConstraintsTotal() > 0)
          {
          {
              GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
          }
 -        integrator = std::make_unique<UpdateConstrainCuda>(
 -                *ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
 -
 -        t_pbc pbc;
 -        set_pbc(&pbc, epbcXYZ, state->box);
 -        integrator->setPbc(&pbc);
 +        GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
 +                           "Device stream manager should be initialized in order to use GPU "
 +                           "update-constraints.");
 +        GMX_RELEASE_ASSERT(
 +                fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
 +                "Update stream should be initialized in order to use GPU "
 +                "update-constraints.");
 +        integrator = std::make_unique<UpdateConstrainGpu>(
 +                *ir, *top_global, fr->deviceStreamManager->context(),
 +                fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
 +                stateGpu->xUpdatedOnDevice());
 +
 +        integrator->setPbc(PbcType::Xyz, state->box);
      }
  
      if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
      {
          changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
      }
 -    if ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
 -    {
 -        changePinningPolicy(&f, PinningPolicy::PinnedIfSupported);
 -    }
      if (useGpuForUpdate)
      {
          changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
  
      if (MASTER(cr))
      {
 -        EnergyElement::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
 +        EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
      }
  
 -    preparePrevStepPullCom(ir, pull_work, mdatoms, state, state_global, cr,
 +    preparePrevStepPullCom(ir, pull_work, mdatoms->massT, state, state_global, cr,
                             startingBehavior != StartingBehavior::NewSimulation);
  
      // TODO: Remove this by converting AWH into a ForceProvider
          if (constr)
          {
              /* Constrain the initial coordinates and velocities */
 -            do_constrain_first(fplog, constr, ir, mdatoms, state->natoms, state->x.arrayRefWithPadding(),
 -                               state->v.arrayRefWithPadding(), state->box, state->lambda[efptBONDED]);
 +            do_constrain_first(fplog, constr, ir, mdatoms->nr, mdatoms->homenr,
 +                               state->x.arrayRefWithPadding(), state->v.arrayRefWithPadding(),
 +                               state->box, state->lambda[efptBONDED]);
          }
          if (vsite)
          {
              /* Construct the virtual sites for the initial configuration */
 -            construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr, top.idef.iparams,
 -                             top.idef.il, fr->ePBC, fr->bMolPBC, cr, state->box);
 +            vsite->construct(state->x, ir->delta_t, {}, state->box);
          }
      }
  
          nstfep = ir->fepvals->nstdhdl;
          if (ir->bExpanded)
          {
 -            nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
 +            nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep);
          }
          if (useReplicaExchange)
          {
 -            nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep);
 +            nstfep = std::gcd(replExParams.exchangeInterval, nstfep);
 +        }
 +        if (ir->bDoAwh)
 +        {
 +            nstfep = std::gcd(ir->awhParams->nstSampleCoord, nstfep);
          }
      }
  
      bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
      if (PAR(cr))
      {
 -        gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
 +        gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
      }
      if (hasReadEkinState)
      {
              cglo_flags_iteration |= CGLO_STOPCM;
              cglo_flags_iteration &= ~CGLO_TEMPERATURE;
          }
 -        compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
 -                        state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, nullptr, enerd,
 -                        force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
 +        compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 +                        makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr,
 +                        enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller,
                          state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
                          cglo_flags_iteration
                                  | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
              /* At initialization, do not pass x with acceleration-correction mode
               * to avoid (incorrect) correction of the initial coordinates.
               */
 -            rvec* xPtr = nullptr;
 -            if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
 -            {
 -                xPtr = state->x.rvec_array();
 -            }
 -            process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
 +            auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef<RVec>()
 +                                                                     : makeArrayRef(state->x);
 +            process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
              inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
          }
      }
      checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top,
 -                                    state->x.rvec_array(), state->box,
 +                                    makeConstArrayRef(state->x), state->box,
                                      &shouldCheckNumberOfBondedInteractions);
      if (ir->eI == eiVVAK)
      {
             kinetic energy calculation.  This minimized excess variables, but
             perhaps loses some logic?*/
  
 -        compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
 -                        state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, nullptr, enerd,
 -                        force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
 +        compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 +                        makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr,
 +                        enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller,
                          state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE);
      }
  
      wallcycle_start(wcycle, ewcRUN);
      print_start(fplog, cr, walltime_accounting, "mdrun");
  
- #if GMX_FAHCORE
-     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
-     int chkpt_ret = fcCheckPointParallel(cr->nodeid, NULL, 0);
-     if (chkpt_ret == 0)
-     {
-         gmx_fatal(3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0);
-     }
- #endif
      /***********************************************************
       *
       *             Loop over MD steps
      bExchanged       = FALSE;
      bNeedRepartition = FALSE;
  
 +    step     = ir->init_step;
 +    step_rel = 0;
 +
      auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
              compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
              MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
  
      const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
  
 -    step     = ir->init_step;
 -    step_rel = 0;
 -
 -    // TODO extract this to new multi-simulation module
      if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
      {
 -        if (!multisim_int_all_are_equal(ms, ir->nsteps))
 -        {
 -            GMX_LOG(mdlog.warning)
 -                    .appendText(
 -                            "Note: The number of steps is not consistent across multi "
 -                            "simulations,\n"
 -                            "but we are proceeding anyway!");
 -        }
 -        if (!multisim_int_all_are_equal(ms, ir->init_step))
 -        {
 -            if (simulationsShareState)
 -            {
 -                if (MASTER(cr))
 -                {
 -                    gmx_fatal(FARGS,
 -                              "The initial step is not consistent across multi simulations which "
 -                              "share the state");
 -                }
 -                gmx_barrier(cr);
 -            }
 -            else
 -            {
 -                GMX_LOG(mdlog.warning)
 -                        .appendText(
 -                                "Note: The initial step is not consistent across multi "
 -                                "simulations,\n"
 -                                "but we are proceeding anyway!");
 -            }
 -        }
 +        logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step);
      }
  
      /* and stop now if we should */
          if (ir->efep != efepNO || ir->bSimTemp)
          {
              /* find and set the current lambdas */
 -            setCurrentLambdasLocal(step, ir->fepvals, lam0, state->lambda, state->fep_state);
 +            state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
  
              bDoDHDL     = do_per_step(step, ir->fepvals->nstdhdl);
              bDoFEP      = ((ir->efep != efepNO) && do_per_step(step, nstfep));
              /* Correct the new box if it is too skewed */
              if (inputrecDynamicBox(ir))
              {
 -                if (correct_box(fplog, step, state->box, graph))
 +                if (correct_box(fplog, step, state->box))
                  {
                      bMasterState = TRUE;
                      // If update is offloaded, it should be informed about the box size change
                      if (useGpuForUpdate)
                      {
 -                        t_pbc pbc;
 -                        set_pbc(&pbc, epbcXYZ, state->box);
 -                        integrator->setPbc(&pbc);
 +                        integrator->setPbc(PbcType::Xyz, state->box);
                      }
                  }
              }
              }
          }
  
 +        // Allocate or re-size GPU halo exchange object, if necessary
 +        if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange)
 +        {
 +            GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
 +                               "GPU device manager has to be initialized to use GPU "
 +                               "version of halo exchange.");
 +            constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle);
 +        }
 +
          if (MASTER(cr) && do_log)
          {
 -            energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
 +            gmx::EnergyOutput::printHeader(fplog, step,
 +                                           t); /* can we improve the information printed here? */
          }
  
          if (ir->efep != efepNO)
              /* We need the kinetic energy at minus the half step for determining
               * the full step kinetic energy and possibly for T-coupling.*/
              /* This may not be quite working correctly yet . . . . */
 -            compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
 -                            state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
 -                            nullptr, nullptr, nullptr, nullptr, mu_tot, constr, &nullSignaller,
 +            compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 +                            makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle,
 +                            enerd, nullptr, nullptr, nullptr, nullptr, constr, &nullSignaller,
                              state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
                              CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
              checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global,
 -                                            &top, state->x.rvec_array(), state->box,
 +                                            &top, makeConstArrayRef(state->x), state->box,
                                              &shouldCheckNumberOfBondedInteractions);
          }
          clear_mat(force_vir);
          force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
                         | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
                         | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
 +        if (fr->useMts && !do_per_step(step, ir->nstfout))
 +        {
 +            force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
 +        }
  
          if (shellfc)
          {
              /* Now is the time to relax the shells */
              relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir,
 -                                imdSession, pull_work, bNS, force_flags, &top, constr, enerd, fcd,
 +                                imdSession, pull_work, bNS, force_flags, &top, constr, enerd,
                                  state->natoms, state->x.arrayRefWithPadding(),
 -                                state->v.arrayRefWithPadding(), state->box, state->lambda, &state->hist,
 -                                f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, graph,
 -                                shellfc, fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
 +                                state->v.arrayRefWithPadding(), state->box, state->lambda,
 +                                &state->hist, &f.view(), force_vir, mdatoms, nrnb, wcycle, shellfc,
 +                                fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
          }
          else
          {
               */
              do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
                       nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
 -                     f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, graph,
 -                     fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
 +                     &f.view(), force_vir, mdatoms, enerd, state->lambda, fr, runScheduleWork,
 +                     vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
                       (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler);
          }
  
                                 trotter_seq, ettTSEQ1);
              }
  
 -            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
 -                          etrtVELOCITY1, cr, constr);
 +            upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind,
 +                              M, etrtVELOCITY1, cr, constr != nullptr);
  
              wallcycle_stop(wcycle, ewcUPDATE);
 -            constrain_velocities(step, nullptr, state, shake_vir, constr, bCalcVir, do_log, do_ene);
 +            constrain_velocities(constr, do_log, do_ene, step, state, nullptr, bCalcVir, shake_vir);
              wallcycle_start(wcycle, ewcUPDATE);
              /* if VV, compute the pressure and constraints */
              /* For VV2, we strictly only need this if using pressure
              if (bGStat || do_per_step(step - 1, nstglobalcomm))
              {
                  wallcycle_stop(wcycle, ewcUPDATE);
 -                compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
 -                                state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
 -                                force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
 +                compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 +                                makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle,
 +                                enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller,
                                  state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
                                  (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0)
                                          | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0)
                     b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
                     EkinAveVel because it's needed for the pressure */
                  checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
 -                                                top_global, &top, state->x.rvec_array(), state->box,
 -                                                &shouldCheckNumberOfBondedInteractions);
 +                                                top_global, &top, makeConstArrayRef(state->x),
 +                                                state->box, &shouldCheckNumberOfBondedInteractions);
                  if (bStopCM)
                  {
 -                    process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(),
 -                                           state->v.rvec_array());
 +                    process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
 +                                           makeArrayRef(state->v));
                      inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
                  }
                  wallcycle_start(wcycle, ewcUPDATE);
                      /* We need the kinetic energy at minus the half step for determining
                       * the full step kinetic energy and possibly for T-coupling.*/
                      /* This may not be quite working correctly yet . . . . */
 -                    compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(),
 -                                    state->v.rvec_array(), state->box, state->lambda[efptVDW],
 -                                    mdatoms, nrnb, &vcm, wcycle, enerd, nullptr, nullptr, nullptr,
 -                                    nullptr, mu_tot, constr, &nullSignaller, state->box, nullptr,
 -                                    &bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE);
 +                    compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 +                                    makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle,
 +                                    enerd, nullptr, nullptr, nullptr, nullptr, constr, &nullSignaller,
 +                                    state->box, nullptr, &bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE);
                      wallcycle_start(wcycle, ewcUPDATE);
                  }
              }
              {
                  saved_conserved_quantity -= enerd->term[F_DISPCORR];
              }
 -            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
 +            /* sum up the foreign kinetic energy and dK/dl terms for vv.  currently done every step so that dhdl is correct in the .edr */
              if (ir->efep != efepNO)
              {
 -                sum_dhdl(enerd, state->lambda, *ir->fepvals);
 +                accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
              }
          }
  
          if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
              && do_per_step(step, ir->nstfout))
          {
 -            stateGpu->copyForcesFromGpu(ArrayRef<RVec>(f), AtomLocality::Local);
 +            stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local);
              stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
          }
          /* Now we have the energies and forces corresponding to the
           * the update.
           */
          do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global,
 -                                 observablesHistory, top_global, fr, outf, energyOutput, ekind, f,
 -                                 checkpointHandler->isCheckpointingStep(), bRerunMD, bLastStep,
 -                                 mdrunOptions.writeConfout, bSumEkinhOld);
 +                                 observablesHistory, top_global, fr, outf, energyOutput, ekind,
 +                                 f.view().force(), checkpointHandler->isCheckpointingStep(),
 +                                 bRerunMD, bLastStep, mdrunOptions.writeConfout, bSumEkinhOld);
          /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
          bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
  
              /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
              if (constr && bIfRandomize)
              {
 -                constrain_velocities(step, nullptr, state, tmp_vir, constr, bCalcVir, do_log, do_ene);
 +                constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr);
              }
          }
          /* Box is changed in update() when we do pressure coupling,
          if (EI_VV(ir->eI))
          {
              /* velocity half-step update */
 -            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
 -                          etrtVELOCITY2, cr, constr);
 +            upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind,
 +                              M, etrtVELOCITY2, cr, constr != nullptr);
          }
  
          /* Above, initialize just copies ekinh into ekin,
              // If the buffer ops were not offloaded this step, the forces are on the host and have to be copied
              if (!runScheduleWork->stepWork.useGpuFBufferOps)
              {
 -                stateGpu->copyForcesToGpu(ArrayRef<RVec>(f), AtomLocality::Local);
 +                stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
              }
  
              const bool doTemperatureScaling =
          }
          else
          {
 -            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
 -                          etrtPOSITION, cr, constr);
 +            /* With multiple time stepping we need to do an additional normal
 +             * update step to obtain the virial, as the actual MTS integration
 +             * using an acceleration where the slow forces are multiplied by mtsFactor.
 +             * Using that acceleration would result in a virial with the slow
 +             * force contribution would be a factor mtsFactor too large.
 +             */
 +            if (fr->useMts && bCalcVir && constr != nullptr)
 +            {
 +                upd.update_for_constraint_virial(*ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind);
 +
 +                constrain_coordinates(constr, do_log, do_ene, step, state,
 +                                      upd.xp()->arrayRefWithPadding(), &dvdl_constr, bCalcVir, shake_vir);
 +            }
 +
 +            ArrayRefWithPadding<const RVec> forceCombined =
 +                    (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0)
 +                            ? f.view().forceMtsCombinedWithPadding()
 +                            : f.view().forceWithPadding();
 +            upd.update_coords(*ir, step, mdatoms, state, forceCombined, fcdata, ekind, M,
 +                              etrtPOSITION, cr, constr != nullptr);
  
              wallcycle_stop(wcycle, ewcUPDATE);
  
 -            constrain_coordinates(step, &dvdl_constr, state, shake_vir, &upd, constr, bCalcVir,
 -                                  do_log, do_ene);
 +            constrain_coordinates(constr, do_log, do_ene, step, state, upd.xp()->arrayRefWithPadding(),
 +                                  &dvdl_constr, bCalcVir && !fr->useMts, shake_vir);
  
 -            update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, cr, nrnb, wcycle, &upd,
 -                                  constr, do_log, do_ene);
 -            finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, constr);
 +            upd.update_sd_second_half(*ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle,
 +                                      constr, do_log, do_ene);
 +            upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr);
          }
  
          if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
          {
              /* erase F_EKIN and F_TEMP here? */
              /* just compute the kinetic energy at the half step to perform a trotter step */
 -            compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
 -                            state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
 -                            force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller, lastbox,
 +            compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 +                            makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, enerd,
 +                            force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, lastbox,
                              nullptr, &bSumEkinhOld, (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE);
              wallcycle_start(wcycle, ewcUPDATE);
              trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
              /* now we know the scaling, we can compute the positions again */
              std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
  
 -            update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd,
 -                          etrtPOSITION, cr, constr);
 +            upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind,
 +                              M, etrtPOSITION, cr, constr != nullptr);
              wallcycle_stop(wcycle, ewcUPDATE);
  
              /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
               * to numerical errors, or are they important
               * physically? I'm thinking they are just errors, but not completely sure.
               * For now, will call without actually constraining, constr=NULL*/
 -            finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, nullptr);
 +            upd.finish_update(*ir, mdatoms, state, wcycle, false);
          }
          if (EI_VV(ir->eI))
          {
              /* this factor or 2 correction is necessary
                 because half of the constraint force is removed
                 in the vv step, so we have to double it.  See
 -               the Redmine issue #1255.  It is not yet clear
 +               the Issue #1255.  It is not yet clear
                 if the factor of 2 is exact, or just a very
                 good approximation, and this will be
                 investigated.  The next step is to see if this
          if (vsite != nullptr)
          {
              wallcycle_start(wcycle, ewcVSITECONSTR);
 -            if (graph != nullptr)
 -            {
 -                shift_self(graph, state->box, state->x.rvec_array());
 -            }
 -            construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
 -                             top.idef.iparams, top.idef.il, fr->ePBC, fr->bMolPBC, cr, state->box);
 -
 -            if (graph != nullptr)
 -            {
 -                unshift_self(graph, state->box, state->x.rvec_array());
 -            }
 +            vsite->construct(state->x, ir->delta_t, state->v, state->box);
              wallcycle_stop(wcycle, ewcVSITECONSTR);
          }
  
                  bool                doIntraSimSignal = true;
                  SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
  
 -                compute_globals(
 -                        gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
 -                        state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
 -                        force_vir, shake_vir, total_vir, pres, mu_tot, constr, &signaller, lastbox,
 -                        &totalNumberOfBondedInteractions, &bSumEkinhOld,
 -                        (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
 -                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 -                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
 -                                | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
 -                                | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
 -                                                                         : 0));
 +                compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
 +                                makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm,
 +                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr,
 +                                &signaller, lastbox, &totalNumberOfBondedInteractions, &bSumEkinhOld,
 +                                (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
 +                                        | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 +                                        | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
 +                                        | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
 +                                        | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
 +                                                                                 : 0));
                  checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
 -                                                top_global, &top, state->x.rvec_array(), state->box,
 -                                                &shouldCheckNumberOfBondedInteractions);
 +                                                top_global, &top, makeConstArrayRef(state->x),
 +                                                state->box, &shouldCheckNumberOfBondedInteractions);
                  if (!EI_VV(ir->eI) && bStopCM)
                  {
 -                    process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(),
 -                                           state->v.rvec_array());
 +                    process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
 +                                           makeArrayRef(state->v));
                      inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
  
                      // TODO: The special case of removing CM motion should be dealt more gracefully
  
          if (ir->efep != efepNO && !EI_VV(ir->eI))
          {
 -            /* Sum up the foreign energy and dhdl terms for md and sd.
 -               Currently done every step so that dhdl is correct in the .edr */
 -            sum_dhdl(enerd, state->lambda, *ir->fepvals);
 +            /* Sum up the foreign energy and dK/dl terms for md and sd.
 +               Currently done every step so that dH/dl is correct in the .edr */
 +            accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
          }
  
          update_pcouple_after_coordinates(fplog, step, ir, mdatoms, pres, force_vir, shake_vir,
 -                                         pressureCouplingMu, state, nrnb, &upd, !useGpuForUpdate);
 +                                         pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate);
  
          const bool doBerendsenPressureCoupling =
                  (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple));
 -        if (useGpuForUpdate && (doBerendsenPressureCoupling || doParrinelloRahman))
 +        const bool doCRescalePressureCoupling =
 +                (inputrec->epc == epcCRESCALE && do_per_step(step, inputrec->nstpcouple));
 +        if (useGpuForUpdate
 +            && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman))
          {
              integrator->scaleCoordinates(pressureCouplingMu);
 -            t_pbc pbc;
 -            set_pbc(&pbc, epbcXYZ, state->box);
 -            integrator->setPbc(&pbc);
 +            if (doCRescalePressureCoupling)
 +            {
 +                matrix pressureCouplingInvMu;
 +                gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu);
 +                integrator->scaleVelocities(pressureCouplingInvMu);
 +            }
 +            integrator->setPbc(PbcType::Xyz, state->box);
          }
  
          /* ################# END UPDATE STEP 2 ################# */
              }
              if (bCalcEner)
              {
 -                energyOutput.addDataAtEnergyStep(bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, state,
 -                                                 ir->fepvals, ir->expandedvals, lastbox, shake_vir,
 -                                                 force_vir, total_vir, pres, ekind, mu_tot, constr);
 +                energyOutput.addDataAtEnergyStep(
 +                        bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, ir->fepvals,
 +                        ir->expandedvals, lastbox,
 +                        PTCouplingArrays{ state->boxv, state->nosehoover_xi, state->nosehoover_vxi,
 +                                          state->nhpres_xi, state->nhpres_vxi },
 +                        state->fep_state, shake_vir, force_vir, total_vir, pres, ekind, mu_tot, constr);
              }
              else
              {
  
              if (doSimulatedAnnealing)
              {
 -                energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
 +                gmx::EnergyOutput::printAnnealingTemperatures(do_log ? fplog : nullptr, groups,
 +                                                              &(ir->opts));
              }
              if (do_log || do_ene || do_dr || do_or)
              {
                  energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
 -                                                   do_log ? fplog : nullptr, step, t, fcd, awh.get());
 +                                                   do_log ? fplog : nullptr, step, t,
 +                                                   fr->fcdata.get(), awh.get());
 +            }
 +            if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension())
 +            {
 +                const bool isInitialOutput = false;
 +                printLambdaStateToLog(fplog, state->lambda, isInitialOutput);
              }
  
              if (ir->bPull)
              /* Gets written into the state at the beginning of next loop*/
              state->fep_state = lamnew;
          }
 +        else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step))
 +        {
 +            state->fep_state = awh->fepLambdaState();
 +        }
          /* Print the remaining wall clock time for the run */
          if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
          {
          step++;
          step_rel++;
  
+ #if GMX_FAHCORE
+         if (MASTER(cr))
+         {
+             fcReportProgress(ir->nsteps + ir->init_step, step);
+         }
+ #endif
          resetHandler->resetCounters(step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb,
                                      fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
  
      {
          if (ir->nstcalcenergy > 0)
          {
 -            energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
 +            gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
              energyOutput.printAverages(fplog, groups);
          }
      }
index dfc399541f395bd24fcd8e2ee5ef06df0812cfb4,3c9def3eaa786fd032bbfecbde701cb33171121e..90275f50a3605c562a59e205c8c6ad41f81eae91
@@@ -64,8 -64,8 +64,8 @@@
  #include "gromacs/domdec/localatomsetmanager.h"
  #include "gromacs/domdec/partition.h"
  #include "gromacs/ewald/ewald_utils.h"
 -#include "gromacs/ewald/pme.h"
  #include "gromacs/ewald/pme_gpu_program.h"
 +#include "gromacs/ewald/pme_only.h"
  #include "gromacs/ewald/pme_pp_comm_gpu.h"
  #include "gromacs/fileio/checkpoint.h"
  #include "gromacs/fileio/gmxfio.h"
  #include "gromacs/fileio/tpxio.h"
  #include "gromacs/gmxlib/network.h"
  #include "gromacs/gmxlib/nrnb.h"
 -#include "gromacs/gpu_utils/gpu_utils.h"
 +#include "gromacs/gpu_utils/device_stream_manager.h"
  #include "gromacs/hardware/cpuinfo.h"
  #include "gromacs/hardware/detecthardware.h"
 +#include "gromacs/hardware/device_management.h"
  #include "gromacs/hardware/printhardware.h"
  #include "gromacs/imd/imd.h"
  #include "gromacs/listed_forces/disre.h"
  #include "gromacs/listed_forces/gpubonded.h"
 +#include "gromacs/listed_forces/listed_forces.h"
  #include "gromacs/listed_forces/orires.h"
  #include "gromacs/math/functions.h"
  #include "gromacs/math/utilities.h"
  #include "gromacs/mdlib/force.h"
  #include "gromacs/mdlib/forcerec.h"
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
 +#include "gromacs/mdlib/gpuforcereduction.h"
  #include "gromacs/mdlib/makeconstraints.h"
  #include "gromacs/mdlib/md_support.h"
  #include "gromacs/mdlib/mdatoms.h"
 -#include "gromacs/mdlib/membed.h"
 -#include "gromacs/mdlib/qmmm.h"
  #include "gromacs/mdlib/sighandler.h"
  #include "gromacs/mdlib/stophandler.h"
 +#include "gromacs/mdlib/tgroup.h"
  #include "gromacs/mdlib/updategroups.h"
 +#include "gromacs/mdlib/vsite.h"
  #include "gromacs/mdrun/mdmodules.h"
  #include "gromacs/mdrun/simulationcontext.h"
 +#include "gromacs/mdrun/simulationinput.h"
 +#include "gromacs/mdrun/simulationinputhandle.h"
  #include "gromacs/mdrunutility/handlerestart.h"
  #include "gromacs/mdrunutility/logging.h"
  #include "gromacs/mdrunutility/multisim.h"
  #include "gromacs/mdrunutility/printtime.h"
  #include "gromacs/mdrunutility/threadaffinity.h"
 +#include "gromacs/mdtypes/checkpointdata.h"
  #include "gromacs/mdtypes/commrec.h"
  #include "gromacs/mdtypes/enerdata.h"
  #include "gromacs/mdtypes/fcdata.h"
 +#include "gromacs/mdtypes/forcerec.h"
  #include "gromacs/mdtypes/group.h"
  #include "gromacs/mdtypes/inputrec.h"
 +#include "gromacs/mdtypes/interaction_const.h"
  #include "gromacs/mdtypes/md_enums.h"
 +#include "gromacs/mdtypes/mdatom.h"
  #include "gromacs/mdtypes/mdrunoptions.h"
  #include "gromacs/mdtypes/observableshistory.h"
  #include "gromacs/mdtypes/simulation_workload.h"
  #include "gromacs/mdtypes/state.h"
  #include "gromacs/mdtypes/state_propagator_data_gpu.h"
 +#include "gromacs/modularsimulator/modularsimulator.h"
  #include "gromacs/nbnxm/gpu_data_mgmt.h"
  #include "gromacs/nbnxm/nbnxm.h"
  #include "gromacs/nbnxm/pairlist_tuning.h"
  #include "gromacs/utility/stringutil.h"
  
  #include "isimulator.h"
 +#include "membedholder.h"
  #include "replicaexchange.h"
  #include "simulatorbuilder.h"
  
- #if GMX_FAHCORE
- #    include "corewrap.h"
- #endif
  namespace gmx
  {
  
@@@ -205,14 -190,13 +201,14 @@@ static DevelopmentFeatureFlags manageDe
      // getenv results are ignored when clearly they are used.
  #pragma GCC diagnostic push
  #pragma GCC diagnostic ignored "-Wunused-result"
 -    devFlags.enableGpuBufferOps = (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr)
 -                                  && (GMX_GPU == GMX_GPU_CUDA) && useGpuForNonbonded;
 +
 +    devFlags.enableGpuBufferOps =
 +            GMX_GPU_CUDA && useGpuForNonbonded && (getenv("GMX_USE_GPU_BUFFER_OPS") != nullptr);
 +    devFlags.enableGpuHaloExchange = GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_DD_COMMS") != nullptr;
-     devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr);
+     devFlags.forceGpuUpdateDefault = (getenv("GMX_FORCE_UPDATE_DEFAULT_GPU") != nullptr) || GMX_FAHCORE;
 -    devFlags.enableGpuHaloExchange =
 -            (getenv("GMX_GPU_DD_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
      devFlags.enableGpuPmePPComm =
 -            (getenv("GMX_GPU_PME_PP_COMMS") != nullptr && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA));
 +            GMX_GPU_CUDA && GMX_THREAD_MPI && getenv("GMX_GPU_PME_PP_COMMS") != nullptr;
 +
  #pragma GCC diagnostic pop
  
      if (devFlags.enableGpuBufferOps)
              GMX_LOG(mdlog.warning)
                      .asParagraph()
                      .appendTextFormatted(
 -                            "This run uses the 'GPU halo exchange' feature, enabled by the "
 +                            "This run has requested the 'GPU halo exchange' feature, enabled by "
 +                            "the "
                              "GMX_GPU_DD_COMMS environment variable.");
          }
          else
      {
          if (pmeRunMode == PmeRunMode::GPU)
          {
 +            if (!devFlags.enableGpuBufferOps)
 +            {
 +                GMX_LOG(mdlog.warning)
 +                        .asParagraph()
 +                        .appendTextFormatted(
 +                                "Enabling GPU buffer operations required by GMX_GPU_PME_PP_COMMS "
 +                                "(equivalent with GMX_USE_GPU_BUFFER_OPS=1).");
 +                devFlags.enableGpuBufferOps = true;
 +            }
              GMX_LOG(mdlog.warning)
                      .asParagraph()
                      .appendTextFormatted(
@@@ -335,7 -309,7 +331,7 @@@ Mdrunner Mdrunner::cloneOnSpawnedThread
  
      // Copy members of master runner.
      // \todo Replace with builder when Simulation context and/or runner phases are better defined.
 -    // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375
 +    // Ref https://gitlab.com/gromacs/gromacs/-/issues/2587 and https://gitlab.com/gromacs/gromacs/-/issues/2375
      newRunner.hw_opt    = hw_opt;
      newRunner.filenames = filenames;
  
      newRunner.pforce          = pforce;
      // Give the spawned thread the newly created valid communicator
      // for the simulation.
 -    newRunner.communicator        = MPI_COMM_WORLD;
 -    newRunner.ms                  = ms;
 -    newRunner.startingBehavior    = startingBehavior;
 -    newRunner.stopHandlerBuilder_ = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 +    newRunner.libraryWorldCommunicator = MPI_COMM_WORLD;
 +    newRunner.simulationCommunicator   = MPI_COMM_WORLD;
 +    newRunner.ms                       = ms;
 +    newRunner.startingBehavior         = startingBehavior;
 +    newRunner.stopHandlerBuilder_      = std::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_);
 +    newRunner.inputHolder_             = inputHolder_;
  
      threadMpiMdrunnerAccessBarrier();
  
@@@ -399,8 -371,7 +395,8 @@@ void Mdrunner::spawnThreads(int numThre
  
      // Give the master thread the newly created valid communicator for
      // the simulation.
 -    communicator = MPI_COMM_WORLD;
 +    libraryWorldCommunicator = MPI_COMM_WORLD;
 +    simulationCommunicator   = MPI_COMM_WORLD;
      threadMpiMdrunnerAccessBarrier();
  #else
      GMX_UNUSED_VALUE(numThreadsToLaunch);
@@@ -420,18 -391,6 +416,18 @@@ static void prepare_verlet_scheme(FILE
                                    bool                makeGpuPairList,
                                    const gmx::CpuInfo& cpuinfo)
  {
 +    // We checked the cut-offs in grompp, but double-check here.
 +    // We have PME+LJcutoff kernels for rcoulomb>rvdw.
 +    if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == eelCUT)
 +    {
 +        GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw,
 +                           "With Verlet lists and PME we should have rcoulomb>=rvdw");
 +    }
 +    else
 +    {
 +        GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw,
 +                           "With Verlet lists and no PME rcoulomb and rvdw should be identical");
 +    }
      /* For NVE simulations, we will retain the initial list buffer */
      if (EI_DYNAMICS(ir->eI) && ir->verletbuf_tol > 0 && !(EI_MD(ir->eI) && ir->etc == etcNO))
      {
@@@ -722,13 -681,14 +718,13 @@@ int Mdrunner::mdrunner(
  {
      matrix                    box;
      t_forcerec*               fr               = nullptr;
 -    t_fcdata*                 fcd              = nullptr;
      real                      ewaldcoeff_q     = 0;
      real                      ewaldcoeff_lj    = 0;
      int                       nChargePerturbed = -1, nTypePerturbed = 0;
      gmx_wallcycle_t           wcycle;
      gmx_walltime_accounting_t walltime_accounting = nullptr;
 -    gmx_membed_t*             membed              = nullptr;
 -    gmx_hw_info_t*            hwinfo              = nullptr;
 +    MembedHolder              membedHolder(filenames.size(), filenames.data());
 +    gmx_hw_info_t*            hwinfo = nullptr;
  
      /* CAUTION: threads may be started later on in this function, so
         cr doesn't reflect the final parallel state right now */
  
      /* TODO: inputrec should tell us whether we use an algorithm, not a file option */
      const bool doEssentialDynamics = opt2bSet("-ei", filenames.size(), filenames.data());
 -    const bool doMembed            = opt2bSet("-membed", filenames.size(), filenames.data());
      const bool doRerun             = mdrunOptions.rerun;
  
      // Handle task-assignment related user options.
      {
          fplog = gmx_fio_getfp(logFileHandle);
      }
 -    const bool       isSimulationMasterRank = findIsSimulationMasterRank(ms, communicator);
 +    const bool isSimulationMasterRank = findIsSimulationMasterRank(ms, simulationCommunicator);
      gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
      gmx::MDLogger    mdlog(logOwner.logger());
  
      // this is expressed, e.g. by expressly running detection only the
      // master rank for thread-MPI, rather than relying on the mutex
      // and reference count.
 -    PhysicalNodeCommunicator physicalNodeComm(communicator, gmx_physicalnode_id_hash());
 +    PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
      hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
  
      gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
  
 -    std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->gpu_info, hw_opt.gpuIdsAvailable);
 +    std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->deviceInfoList, hw_opt.gpuIdsAvailable);
 +    const int        numDevicesToUse = gmx::ssize(gpuIdsToUse);
  
      // Print citation requests after all software/hardware printing
      pleaseCiteGromacs(fplog);
  
 -    // TODO Replace this by unique_ptr once t_inputrec is C++
 -    t_inputrec               inputrecInstance;
 -    t_inputrec*              inputrec = nullptr;
 -    std::unique_ptr<t_state> globalState;
 +    // Note: legacy program logic relies on checking whether these pointers are assigned.
 +    // Objects may or may not be allocated later.
 +    std::unique_ptr<t_inputrec> inputrec;
 +    std::unique_ptr<t_state>    globalState;
  
      auto partialDeserializedTpr = std::make_unique<PartialDeserializedTprFile>();
  
      if (isSimulationMasterRank)
      {
 +        // Allocate objects to be initialized by later function calls.
          /* Only the master rank has the global state */
          globalState = std::make_unique<t_state>();
 +        inputrec    = std::make_unique<t_inputrec>();
  
          /* Read (nearly) all data required for the simulation
           * and keep the partly serialized tpr contents to send to other ranks later
           */
 -        *partialDeserializedTpr = read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()),
 -                                                 &inputrecInstance, globalState.get(), &mtop);
 -        inputrec                = &inputrecInstance;
 +        applyGlobalSimulationState(*inputHolder_.get(), partialDeserializedTpr.get(),
 +                                   globalState.get(), inputrec.get(), &mtop);
      }
  
      /* Check and update the hardware options for internal consistency */
      checkAndUpdateHardwareOptions(mdlog, &hw_opt, isSimulationMasterRank, domdecOptions.numPmeRanks,
 -                                  inputrec);
 +                                  inputrec.get());
  
      if (GMX_THREAD_MPI && isSimulationMasterRank)
      {
              // the number of GPUs to choose the number of ranks.
              auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr);
              useGpuForNonbonded         = decideWhetherToUseGpusForNonbondedWithThreadMpi(
 -                    nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded,
 +                    nonbondedTarget, numDevicesToUse, userGpuTaskAssignment, emulateGpuNonbonded,
                      canUseGpuForNonbonded,
                      gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
                      hw_opt.nthreads_tmpi);
              useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
 -                    useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, *hwinfo,
 -                    *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
 +                    useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo,
 +                    *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
  
           * TODO Over-writing the user-supplied value here does
           * prevent any possible subsequent checks from working
           * correctly. */
 -        hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo, &hw_opt, gpuIdsToUse, useGpuForNonbonded,
 -                                                useGpuForPme, inputrec, &mtop, mdlog, doMembed);
 +        hw_opt.nthreads_tmpi =
 +                get_nthreads_mpi(hwinfo, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
 +                                 inputrec.get(), &mtop, mdlog, membedHolder.doMembed());
  
          // Now start the threads for thread MPI.
          spawnThreads(hw_opt.nthreads_tmpi);
          // The spawned threads enter mdrunner() and execution of
          // master and spawned threads joins at the end of this block.
 -        physicalNodeComm = PhysicalNodeCommunicator(communicator, gmx_physicalnode_id_hash());
 +        physicalNodeComm =
 +                PhysicalNodeCommunicator(libraryWorldCommunicator, gmx_physicalnode_id_hash());
      }
  
 -    GMX_RELEASE_ASSERT(communicator == MPI_COMM_WORLD, "Must have valid world communicator");
 -    CommrecHandle crHandle = init_commrec(communicator, ms);
 +    GMX_RELEASE_ASSERT(ms || simulationCommunicator != MPI_COMM_NULL,
 +                       "Must have valid communicator unless running a multi-simulation");
 +    CommrecHandle crHandle = init_commrec(simulationCommunicator);
      t_commrec*    cr       = crHandle.get();
      GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
  
          /* now broadcast everything to the non-master nodes/threads: */
          if (!isSimulationMasterRank)
          {
 -            inputrec = &inputrecInstance;
 +            // Until now, only the master rank has a non-null pointer.
 +            // On non-master ranks, allocate the object that will receive data in the following call.
 +            inputrec = std::make_unique<t_inputrec>();
          }
 -        init_parallel(cr, inputrec, &mtop, partialDeserializedTpr.get());
 +        init_parallel(cr->mpiDefaultCommunicator, MASTER(cr), inputrec.get(), &mtop,
 +                      partialDeserializedTpr.get());
      }
      GMX_RELEASE_ASSERT(inputrec != nullptr, "All ranks should have a valid inputrec now");
      partialDeserializedTpr.reset(nullptr);
      // the inputrec read by the master rank. The ranks can now all run
      // the task-deciding functions and will agree on the result
      // without needing to communicate.
 -    //
 -    // TODO Should we do the communication in debug mode to support
 -    // having an assertion?
      const bool useDomainDecomposition = (PAR(cr) && !(EI_TPI(inputrec->eI) || inputrec->eI == eiNM));
  
      // Note that these variables describe only their own node.
                  nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded,
                  gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected);
          useGpuForPme = decideWhetherToUseGpusForPme(
 -                useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec, mtop,
 -                cr->nnodes, domdecOptions.numPmeRanks, gpusWereDetected);
 +                useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec,
 +                cr->sizeOfDefaultCommunicator, domdecOptions.numPmeRanks, gpusWereDetected);
          auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr)
                                    && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
          useGpuForBonded = decideWhetherToUseGpusForBonded(
      const DevelopmentFeatureFlags devFlags =
              manageDevelopmentFeatures(mdlog, useGpuForNonbonded, pmeRunMode);
  
 -    const bool inputIsCompatibleWithModularSimulator = ModularSimulator::isInputCompatible(
 -            false, inputrec, doRerun, mtop, ms, replExParams, nullptr, doEssentialDynamics, doMembed);
 -    const bool useModularSimulator = inputIsCompatibleWithModularSimulator
 -                                     && !(getenv("GMX_DISABLE_MODULAR_SIMULATOR") != nullptr);
 +    const bool useModularSimulator =
 +            checkUseModularSimulator(false, inputrec.get(), doRerun, mtop, ms, replExParams,
 +                                     nullptr, doEssentialDynamics, membedHolder.doMembed());
  
      // Build restraints.
      // TODO: hide restraint implementation details from Mdrunner.
  
      // TODO: Error handling
      mdModules_->assignOptionsToModules(*inputrec->params, nullptr);
 -    const auto& mdModulesNotifier = mdModules_->notifier().notifier_;
 +    // now that the MdModules know their options, they know which callbacks to sign up to
 +    mdModules_->subscribeToSimulationSetupNotifications();
 +    const auto& mdModulesNotifier = mdModules_->notifier().simulationSetupNotifications_;
  
      if (inputrec->internalParameters != nullptr)
      {
  
      if (fplog != nullptr)
      {
 -        pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 +        pr_inputrec(fplog, 0, "Input Parameters", inputrec.get(), FALSE);
          fprintf(fplog, "\n");
      }
  
          }
  
          /* now make sure the state is initialized and propagated */
 -        set_state_entries(globalState.get(), inputrec, useModularSimulator);
 +        set_state_entries(globalState.get(), inputrec.get(), useModularSimulator);
      }
  
      /* NM and TPI parallelize over force/energy calculations, not atoms,
          {
              globalState = std::make_unique<t_state>();
          }
 -        broadcastStateWithoutDynamics(cr, globalState.get());
 +        broadcastStateWithoutDynamics(cr->mpiDefaultCommunicator, DOMAINDECOMP(cr), PAR(cr),
 +                                      globalState.get());
      }
  
      /* A parallel command line option consistency check that we can
      {
          if (domdecOptions.numPmeRanks > 0)
          {
 -            gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr),
 +            gmx_fatal_collective(FARGS, cr->mpiDefaultCommunicator, MASTER(cr),
                                   "PME-only ranks are requested, but the system does not use PME "
                                   "for electrostatics or LJ");
          }
          }
      }
  
- #if GMX_FAHCORE
-     if (MASTER(cr))
-     {
-         fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
-     }
- #endif
      /* NMR restraints must be initialized before load_checkpoint,
       * since with time averaging the history is added to t_state.
       * For proper consistency check we therefore need to extend
       * So the PME-only nodes (if present) will also initialize
       * the distance restraints.
       */
 -    snew(fcd, 1);
  
      /* This needs to be called before read_checkpoint to extend the state */
 -    init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0);
 +    t_disresdata* disresdata;
 +    snew(disresdata, 1);
 +    init_disres(fplog, &mtop, inputrec.get(), DisResRunMode::MDRun,
 +                MASTER(cr) ? DDRole::Master : DDRole::Agent,
 +                PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mysim, ms, disresdata,
 +                globalState.get(), replExParams.exchangeInterval > 0);
  
 -    init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires));
 +    t_oriresdata* oriresdata;
 +    snew(oriresdata, 1);
 +    init_orires(fplog, &mtop, inputrec.get(), cr, ms, globalState.get(), oriresdata);
  
 -    auto deform = prepareBoxDeformation(globalState->box, cr, *inputrec);
 +    auto deform = prepareBoxDeformation(
 +            globalState != nullptr ? globalState->box : box, MASTER(cr) ? DDRole::Master : DDRole::Agent,
 +            PAR(cr) ? NumRanks::Multiple : NumRanks::Single, cr->mpi_comm_mygroup, *inputrec);
  
+ #if GMX_FAHCORE
+     /* We have to remember the generation's first step before reading checkpoint.
+        This way, we can report to the F@H core both the generation's first step
+        and the restored first step, thus making it able to distinguish between
+        an interruption/resume and start of the n-th generation simulation.
+        Having this information, the F@H core can correctly calculate and report
+        the progress.
+      */
+     int gen_first_step = 0;
+     if (MASTER(cr))
+     {
+         gen_first_step = inputrec->init_step;
+     }
+ #endif
      ObservablesHistory observablesHistory = {};
  
 +    auto modularSimulatorCheckpointData = std::make_unique<ReadCheckpointDataHolder>();
      if (startingBehavior != StartingBehavior::NewSimulation)
      {
          /* Check if checkpoint file exists before doing continuation.
              inputrec->nsteps = -1;
          }
  
 -        load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr),
 -                        logFileHandle, cr, domdecOptions.numCells, inputrec, globalState.get(),
 -                        &observablesHistory, mdrunOptions.reproducible, mdModules_->notifier());
 +        // Finish applying initial simulation state information from external sources on all ranks.
 +        // Reconcile checkpoint file data with Mdrunner state established up to this point.
 +        applyLocalState(*inputHolder_.get(), logFileHandle, cr, domdecOptions.numCells,
 +                        inputrec.get(), globalState.get(), &observablesHistory,
 +                        mdrunOptions.reproducible, mdModules_->notifier(),
 +                        modularSimulatorCheckpointData.get(), useModularSimulator);
 +        // TODO: (#3652) Synchronize filesystem state, SimulationInput contents, and program
 +        //  invariants
 +        //  on all code paths.
 +        // Write checkpoint or provide hook to update SimulationInput.
 +        // If there was a checkpoint file, SimulationInput contains more information
 +        // than if there wasn't. At this point, we have synchronized the in-memory
 +        // state with the filesystem state only for restarted simulations. We should
 +        // be calling applyLocalState unconditionally and expect that the completeness
 +        // of SimulationInput is not dependent on its creation method.
  
          if (startingBehavior == StartingBehavior::RestartWithAppending && logFileHandle)
          {
          }
      }
  
+ #if GMX_FAHCORE
+     if (MASTER(cr))
+     {
+         fcRegisterSteps(inputrec->nsteps + inputrec->init_step, gen_first_step);
+     }
+ #endif
      if (mdrunOptions.numStepsCommandline > -2)
      {
          GMX_LOG(mdlog.info)
                          "file field.");
      }
      /* override nsteps with value set on the commandline */
 -    override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec);
 +    override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec.get());
  
 -    if (SIMMASTER(cr))
 +    if (isSimulationMasterRank)
      {
          copy_mat(globalState->box, box);
      }
  
      if (PAR(cr))
      {
 -        gmx_bcast(sizeof(box), box, cr);
 +        gmx_bcast(sizeof(box), box, cr->mpiDefaultCommunicator);
      }
  
      if (inputrec->cutoff_scheme != ecutsVERLET)
                    "Verlet scheme, or use an earlier version of GROMACS if necessary.");
      }
      /* Update rlist and nstlist. */
 -    prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box,
 +    /* Note: prepare_verlet_scheme is calling increaseNstlist(...), which (while attempting to
 +     * increase rlist) tries to check if the newly chosen value fits with the DD scheme. As this is
 +     * run before any DD scheme is set up, this check is never executed. See #3334 for more details.
 +     */
 +    prepare_verlet_scheme(fplog, cr, inputrec.get(), nstlist_cmdline, &mtop, box,
                            useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
                            *hwinfo->cpuInfo);
  
 -    const bool prefer1DAnd1PulseDD = (devFlags.enableGpuHaloExchange && useGpuForNonbonded);
      // This builder is necessary while we have multi-part construction
      // of DD. Before DD is constructed, we use the existence of
      // the builder object to indicate that further construction of DD
      if (useDomainDecomposition)
      {
          ddBuilder = std::make_unique<DomainDecompositionBuilder>(
 -                mdlog, cr, domdecOptions, mdrunOptions, prefer1DAnd1PulseDD, mtop, *inputrec, box,
 +                mdlog, cr, domdecOptions, mdrunOptions, mtop, *inputrec, box,
                  positionsFromStatePointer(globalState.get()));
      }
      else
      {
          /* PME, if used, is done on all nodes with 1D decomposition */
 -        cr->npmenodes = 0;
 -        cr->duty      = (DUTY_PP | DUTY_PME);
 +        cr->nnodes     = cr->sizeOfDefaultCommunicator;
 +        cr->sim_nodeid = cr->rankInDefaultCommunicator;
 +        cr->nodeid     = cr->rankInDefaultCommunicator;
 +        cr->npmenodes  = 0;
 +        cr->duty       = (DUTY_PP | DUTY_PME);
  
 -        if (inputrec->ePBC == epbcSCREW)
 +        if (inputrec->pbcType == PbcType::Screw)
          {
              gmx_fatal(FARGS, "pbc=screw is only implemented with domain decomposition");
          }
      }
  
 -    // Produce the task assignment for this rank.
 -    GpuTaskAssignmentsBuilder gpuTaskAssignmentsBuilder;
 -    GpuTaskAssignments        gpuTaskAssignments = gpuTaskAssignmentsBuilder.build(
 -            gpuIdsToUse, userGpuTaskAssignment, *hwinfo, communicator, physicalNodeComm,
 +    // Produce the task assignment for this rank - done after DD is constructed
 +    GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build(
 +            gpuIdsToUse, userGpuTaskAssignment, *hwinfo, simulationCommunicator, physicalNodeComm,
              nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
              useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
              // TODO cr->duty & DUTY_PME should imply that a PME
              EEL_PME(inputrec->coulombtype) && thisRankHasDuty(cr, DUTY_PME));
  
      // Get the device handles for the modules, nullptr when no task is assigned.
 -    gmx_device_info_t* nonbondedDeviceInfo = gpuTaskAssignments.initNonbondedDevice(cr);
 -    gmx_device_info_t* pmeDeviceInfo       = gpuTaskAssignments.initPmeDevice();
 +    int                deviceId   = -1;
 +    DeviceInformation* deviceInfo = gpuTaskAssignments.initDevice(&deviceId);
 +
 +    // timing enabling - TODO put this in gpu_utils (even though generally this is just option handling?)
 +    bool useTiming = true;
  
 -    // TODO Initialize GPU streams here.
 +    if (GMX_GPU_CUDA)
 +    {
 +        /* WARNING: CUDA timings are incorrect with multiple streams.
 +         *          This is the main reason why they are disabled by default.
 +         */
 +        // TODO: Consider turning on by default when we can detect nr of streams.
 +        useTiming = (getenv("GMX_ENABLE_GPU_TIMING") != nullptr);
 +    }
 +    else if (GMX_GPU_OPENCL)
 +    {
 +        useTiming = (getenv("GMX_DISABLE_GPU_TIMING") == nullptr);
 +    }
  
      // TODO Currently this is always built, yet DD partition code
      // checks if it is built before using it. Probably it should
      const bool printHostName = (cr->nnodes > 1);
      gpuTaskAssignments.reportGpuUsage(mdlog, printHostName, useGpuForBonded, pmeRunMode, useGpuForUpdate);
  
 +    const bool disableNonbondedCalculation = (getenv("GMX_NO_NONBONDED") != nullptr);
 +    if (disableNonbondedCalculation)
 +    {
 +        /* turn off non-bonded calculations */
 +        GMX_LOG(mdlog.warning)
 +                .asParagraph()
 +                .appendText(
 +                        "Found environment variable GMX_NO_NONBONDED.\n"
 +                        "Disabling nonbonded calculations.");
 +    }
 +
 +    MdrunScheduleWorkload runScheduleWork;
 +
 +    bool useGpuDirectHalo = decideWhetherToUseGpuForHalo(
 +            devFlags, havePPDomainDecomposition(cr), useGpuForNonbonded, useModularSimulator,
 +            doRerun, EI_ENERGY_MINIMIZATION(inputrec->eI));
 +
 +    // Also populates the simulation constant workload description.
 +    runScheduleWork.simulationWork = createSimulationWorkload(
 +            *inputrec, disableNonbondedCalculation, devFlags, useGpuForNonbonded, pmeRunMode,
 +            useGpuForBonded, useGpuForUpdate, useGpuDirectHalo);
 +
 +    std::unique_ptr<DeviceStreamManager> deviceStreamManager = nullptr;
 +
 +    if (deviceInfo != nullptr)
 +    {
 +        if (DOMAINDECOMP(cr) && thisRankHasDuty(cr, DUTY_PP))
 +        {
 +            dd_setup_dlb_resource_sharing(cr, deviceId);
 +        }
 +        deviceStreamManager = std::make_unique<DeviceStreamManager>(
 +                *deviceInfo, havePPDomainDecomposition(cr), runScheduleWork.simulationWork, useTiming);
 +    }
 +
      // If the user chose a task assignment, give them some hints
      // where appropriate.
      if (!userGpuTaskAssignment.empty())
      {
 -        gpuTaskAssignments.logPerformanceHints(mdlog, ssize(gpuIdsToUse));
 +        gpuTaskAssignments.logPerformanceHints(mdlog, numDevicesToUse);
      }
  
      if (PAR(cr))
                  .appendTextFormatted(
                          "This is simulation %d out of %d running as a composite GROMACS\n"
                          "multi-simulation job. Setup for this simulation:\n",
 -                        ms->sim, ms->nsim);
 +                        ms->simulationIndex_, ms->numSimulations_);
      }
      GMX_LOG(mdlog.warning)
              .appendTextFormatted("Using %d MPI %s\n", cr->nnodes,
      // Only for DD, only master PP rank needs to perform setup, and only if thread MPI plus
      // any of the GPU communication features are active.
      if (DOMAINDECOMP(cr) && MASTER(cr) && thisRankHasDuty(cr, DUTY_PP) && GMX_THREAD_MPI
 -        && (devFlags.enableGpuHaloExchange || devFlags.enableGpuPmePPComm))
 +        && (runScheduleWork.simulationWork.useGpuHaloExchange
 +            || runScheduleWork.simulationWork.useGpuPmePpCommunication))
      {
          setupGpuDevicePeerAccess(gpuIdsToUse, mdlog);
      }
          /* Master synchronizes its value of reset_counters with all nodes
           * including PME only nodes */
          int64_t reset_counters = wcycle_get_reset_counters(wcycle);
 -        gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
 +        gmx_bcast(sizeof(reset_counters), &reset_counters, cr->mpi_comm_mysim);
          wcycle_set_reset_counters(wcycle, reset_counters);
      }
  
      // Membrane embedding must be initialized before we call init_forcerec()
 -    if (doMembed)
 -    {
 -        if (MASTER(cr))
 -        {
 -            fprintf(stderr, "Initializing membed");
 -        }
 -        /* Note that membed cannot work in parallel because mtop is
 -         * changed here. Fix this if we ever want to make it run with
 -         * multiple ranks. */
 -        membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec,
 -                             globalState.get(), cr, &mdrunOptions.checkpointOptions.period);
 -    }
 +    membedHolder.initializeMembed(fplog, filenames.size(), filenames.data(), &mtop, inputrec.get(),
 +                                  globalState.get(), cr, &mdrunOptions.checkpointOptions.period);
  
 -    const bool                   thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
 -    std::unique_ptr<MDAtoms>     mdAtoms;
 -    std::unique_ptr<gmx_vsite_t> vsite;
 +    const bool               thisRankHasPmeGpuTask = gpuTaskAssignments.thisRankHasPmeGpuTask();
 +    std::unique_ptr<MDAtoms> mdAtoms;
 +    std::unique_ptr<VirtualSitesHandler> vsite;
 +    std::unique_ptr<GpuBonded>           gpuBonded;
  
      t_nrnb nrnb;
      if (thisRankHasDuty(cr, DUTY_PP))
      {
          mdModulesNotifier.notify(*cr);
          mdModulesNotifier.notify(&atomSets);
 -        mdModulesNotifier.notify(PeriodicBoundaryConditionType{ inputrec->ePBC });
 +        mdModulesNotifier.notify(inputrec->pbcType);
          mdModulesNotifier.notify(SimulationTimeStep{ inputrec->delta_t });
          /* Initiate forcerecord */
          fr                 = new t_forcerec;
          fr->forceProviders = mdModules_->initForceProviders();
 -        init_forcerec(fplog, mdlog, fr, fcd, inputrec, &mtop, cr, box,
 +        init_forcerec(fplog, mdlog, fr, inputrec.get(), &mtop, cr, box,
                        opt2fn("-table", filenames.size(), filenames.data()),
                        opt2fn("-tablep", filenames.size(), filenames.data()),
 -                      opt2fns("-tableb", filenames.size(), filenames.data()), *hwinfo,
 -                      nonbondedDeviceInfo, useGpuForBonded,
 -                      pmeRunMode == PmeRunMode::GPU && !thisRankHasDuty(cr, DUTY_PME), pforce, wcycle);
 -
 -        // TODO Move this to happen during domain decomposition setup,
 -        // once stream and event handling works well with that.
 -        // TODO remove need to pass local stream into GPU halo exchange - Redmine #3093
 -        if (havePPDomainDecomposition(cr) && prefer1DAnd1PulseDD && is1DAnd1PulseDD(*cr->dd))
 +                      opt2fns("-tableb", filenames.size(), filenames.data()), pforce);
 +        // Dirty hack, for fixing disres and orires should be made mdmodules
 +        fr->fcdata->disres = disresdata;
 +        fr->fcdata->orires = oriresdata;
 +
 +        // Save a handle to device stream manager to use elsewhere in the code
 +        // TODO: Forcerec is not a correct place to store it.
 +        fr->deviceStreamManager = deviceStreamManager.get();
 +
 +        if (runScheduleWork.simulationWork.useGpuPmePpCommunication && !thisRankHasDuty(cr, DUTY_PME))
          {
 -            GMX_RELEASE_ASSERT(devFlags.enableGpuBufferOps,
 -                               "Must use GMX_USE_GPU_BUFFER_OPS=1 to use GMX_GPU_DD_COMMS=1");
 -            void* streamLocal =
 -                    Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local);
 -            void* streamNonLocal =
 -                    Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal);
 -            GMX_LOG(mdlog.warning)
 -                    .asParagraph()
 -                    .appendTextFormatted(
 -                            "NOTE: This run uses the 'GPU halo exchange' feature, enabled by the "
 -                            "GMX_GPU_DD_COMMS environment variable.");
 -            cr->dd->gpuHaloExchange = std::make_unique<GpuHaloExchange>(
 -                    cr->dd, cr->mpi_comm_mysim, streamLocal, streamNonLocal);
 +            GMX_RELEASE_ASSERT(
 +                    deviceStreamManager != nullptr,
 +                    "GPU device stream manager should be valid in order to use PME-PP direct "
 +                    "communications.");
 +            GMX_RELEASE_ASSERT(
 +                    deviceStreamManager->streamIsValid(DeviceStreamType::PmePpTransfer),
 +                    "GPU PP-PME stream should be valid in order to use GPU PME-PP direct "
 +                    "communications.");
 +            fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(
 +                    cr->mpi_comm_mysim, cr->dd->pme_nodeid, deviceStreamManager->context(),
 +                    deviceStreamManager->stream(DeviceStreamType::PmePpTransfer));
 +        }
 +
 +        fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo,
 +                                        runScheduleWork.simulationWork.useGpuNonbonded,
 +                                        deviceStreamManager.get(), &mtop, box, wcycle);
 +        // TODO: Move the logic below to a GPU bonded builder
 +        if (runScheduleWork.simulationWork.useGpuBonded)
 +        {
 +            GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
 +                               "GPU device stream manager should be valid in order to use GPU "
 +                               "version of bonded forces.");
 +            gpuBonded = std::make_unique<GpuBonded>(
 +                    mtop.ffparams, fr->ic->epsfac * fr->fudgeQQ, deviceStreamManager->context(),
 +                    deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)), wcycle);
 +            fr->gpuBonded = gpuBonded.get();
          }
  
          /* Initialize the mdAtoms structure.
          }
  
          /* Initialize the virtual site communication */
 -        vsite = initVsite(mtop, cr);
 +        vsite = makeVirtualSitesHandler(mtop, cr, fr->pbcType);
  
          calc_shifts(box, fr->shift_vec);
  
          /* With periodic molecules the charge groups should be whole at start up
           * and the virtual sites should not be far from their proper positions.
           */
 -        if (!inputrec->bContinuation && MASTER(cr) && !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
 +        if (!inputrec->bContinuation && MASTER(cr)
 +            && !(inputrec->pbcType != PbcType::No && inputrec->bPeriodicMols))
          {
              /* Make molecules whole at start of run */
 -            if (fr->ePBC != epbcNONE)
 +            if (fr->pbcType != PbcType::No)
              {
 -                do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array());
 +                do_pbc_first_mtop(fplog, inputrec->pbcType, box, &mtop, globalState->x.rvec_array());
              }
              if (vsite)
              {
                   * for the initial distribution in the domain decomposition
                   * and for the initial shell prediction.
                   */
 -                constructVsitesGlobal(mtop, globalState->x);
 +                constructVirtualSitesGlobal(mtop, globalState->x);
              }
          }
  
      PmeGpuProgramStorage pmeGpuProgram;
      if (thisRankHasPmeGpuTask)
      {
 -        pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo);
 +        GMX_RELEASE_ASSERT(
 +                (deviceStreamManager != nullptr),
 +                "GPU device stream manager should be initialized in order to use GPU for PME.");
 +        GMX_RELEASE_ASSERT((deviceInfo != nullptr),
 +                           "GPU device should be initialized in order to use GPU for PME.");
 +        pmeGpuProgram = buildPmeGpuProgram(deviceStreamManager->context());
      }
  
      /* Initiate PME if necessary,
          if (cr->npmenodes > 0)
          {
              /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
 -            gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
 -            gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
 +            gmx_bcast(sizeof(nChargePerturbed), &nChargePerturbed, cr->mpi_comm_mysim);
 +            gmx_bcast(sizeof(nTypePerturbed), &nTypePerturbed, cr->mpi_comm_mysim);
          }
  
          if (thisRankHasDuty(cr, DUTY_PME))
          {
              try
              {
 -                pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec, nChargePerturbed != 0,
 -                                       nTypePerturbed != 0, mdrunOptions.reproducible, ewaldcoeff_q,
 -                                       ewaldcoeff_lj, gmx_omp_nthreads_get(emntPME), pmeRunMode,
 -                                       nullptr, pmeDeviceInfo, pmeGpuProgram.get(), mdlog);
 +                // TODO: This should be in the builder.
 +                GMX_RELEASE_ASSERT(!runScheduleWork.simulationWork.useGpuPme
 +                                           || (deviceStreamManager != nullptr),
 +                                   "Device stream manager should be valid in order to use GPU "
 +                                   "version of PME.");
 +                GMX_RELEASE_ASSERT(
 +                        !runScheduleWork.simulationWork.useGpuPme
 +                                || deviceStreamManager->streamIsValid(DeviceStreamType::Pme),
 +                        "GPU PME stream should be valid in order to use GPU version of PME.");
 +
 +                const DeviceContext* deviceContext = runScheduleWork.simulationWork.useGpuPme
 +                                                             ? &deviceStreamManager->context()
 +                                                             : nullptr;
 +                const DeviceStream* pmeStream =
 +                        runScheduleWork.simulationWork.useGpuPme
 +                                ? &deviceStreamManager->stream(DeviceStreamType::Pme)
 +                                : nullptr;
 +
 +                pmedata = gmx_pme_init(cr, getNumPmeDomains(cr->dd), inputrec.get(),
 +                                       nChargePerturbed != 0, nTypePerturbed != 0,
 +                                       mdrunOptions.reproducible, ewaldcoeff_q, ewaldcoeff_lj,
 +                                       gmx_omp_nthreads_get(emntPME), pmeRunMode, nullptr,
 +                                       deviceContext, pmeStream, pmeGpuProgram.get(), mdlog);
              }
              GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
          }
          if (inputrec->bPull)
          {
              /* Initialize pull code */
 -            pull_work = init_pull(fplog, inputrec->pull, inputrec, &mtop, cr, &atomSets,
 +            pull_work = init_pull(fplog, inputrec->pull, inputrec.get(), &mtop, cr, &atomSets,
                                    inputrec->fepvals->init_lambda);
              if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage)
              {
          if (inputrec->bRot)
          {
              /* Initialize enforced rotation code */
 -            enforcedRotation =
 -                    init_rot(fplog, inputrec, filenames.size(), filenames.data(), cr, &atomSets,
 -                             globalState.get(), &mtop, oenv, mdrunOptions, startingBehavior);
 +            enforcedRotation = init_rot(fplog, inputrec.get(), filenames.size(), filenames.data(),
 +                                        cr, &atomSets, globalState.get(), &mtop, oenv, mdrunOptions,
 +                                        startingBehavior);
          }
  
          t_swap* swap = nullptr;
          if (inputrec->eSwapCoords != eswapNO)
          {
              /* Initialize ion swapping code */
 -            swap = init_swapcoords(fplog, inputrec,
 +            swap = init_swapcoords(fplog, inputrec.get(),
                                     opt2fn_master("-swap", filenames.size(), filenames.data(), cr),
                                     &mtop, globalState.get(), &observablesHistory, cr, &atomSets,
                                     oenv, mdrunOptions, startingBehavior);
          }
  
          /* Let makeConstraints know whether we have essential dynamics constraints. */
 -        auto constr = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics, fplog,
 -                                      *mdAtoms->mdatoms(), cr, ms, &nrnb, wcycle, fr->bMolPBC);
 +        auto constr = makeConstraints(mtop, *inputrec, pull_work, doEssentialDynamics, fplog, cr,
 +                                      ms, &nrnb, wcycle, fr->bMolPBC);
  
          /* Energy terms and groups */
          gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
  
          /* Set up interactive MD (IMD) */
          auto imdSession =
 -                makeImdSession(inputrec, cr, wcycle, &enerd, ms, &mtop, mdlog,
 +                makeImdSession(inputrec.get(), cr, wcycle, &enerd, ms, &mtop, mdlog,
                                 MASTER(cr) ? globalState->x.rvec_array() : nullptr, filenames.size(),
                                 filenames.data(), oenv, mdrunOptions.imdOptions, startingBehavior);
  
              /* This call is not included in init_domain_decomposition mainly
               * because fr->cginfo_mb is set later.
               */
 -            dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
 +            dd_init_bondeds(fplog, cr->dd, mtop, vsite.get(), inputrec.get(),
                              domdecOptions.checkBondedInteractions, fr->cginfo_mb);
          }
  
 -        // TODO This is not the right place to manage the lifetime of
 -        // this data structure, but currently it's the easiest way to
 -        // make it work.
 -        MdrunScheduleWorkload runScheduleWork;
 -        // Also populates the simulation constant workload description.
 -        runScheduleWork.simulationWork = createSimulationWorkload(
 -                useGpuForNonbonded, pmeRunMode, useGpuForBonded, useGpuForUpdate,
 -                devFlags.enableGpuBufferOps, devFlags.enableGpuHaloExchange,
 -                devFlags.enableGpuPmePPComm, haveEwaldSurfaceContribution(*inputrec));
 +        if (runScheduleWork.simulationWork.useGpuBufferOps)
 +        {
 +            fr->gpuForceReduction[gmx::AtomLocality::Local] = std::make_unique<gmx::GpuForceReduction>(
 +                    deviceStreamManager->context(),
 +                    deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedLocal));
 +            fr->gpuForceReduction[gmx::AtomLocality::NonLocal] = std::make_unique<gmx::GpuForceReduction>(
 +                    deviceStreamManager->context(),
 +                    deviceStreamManager->stream(gmx::DeviceStreamType::NonBondedNonLocal));
 +        }
  
          std::unique_ptr<gmx::StatePropagatorDataGpu> stateGpu;
          if (gpusWereDetected
 -            && ((useGpuForPme && thisRankHasDuty(cr, DUTY_PME))
 +            && ((runScheduleWork.simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME))
                  || runScheduleWork.simulationWork.useGpuBufferOps))
          {
 -            const void* pmeStream = pme_gpu_get_device_stream(fr->pmedata);
 -            const void* localStream =
 -                    fr->nbv->gpu_nbv != nullptr
 -                            ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::Local)
 -                            : nullptr;
 -            const void* nonLocalStream =
 -                    fr->nbv->gpu_nbv != nullptr
 -                            ? Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv, InteractionLocality::NonLocal)
 -                            : nullptr;
 -            const void*        deviceContext = pme_gpu_get_device_context(fr->pmedata);
 -            const int          paddingSize   = pme_gpu_get_padding_size(fr->pmedata);
              GpuApiCallBehavior transferKind = (inputrec->eI == eiMD && !doRerun && !useModularSimulator)
                                                        ? GpuApiCallBehavior::Async
                                                        : GpuApiCallBehavior::Sync;
 -
 +            GMX_RELEASE_ASSERT(deviceStreamManager != nullptr,
 +                               "GPU device stream manager should be initialized to use GPU.");
              stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
 -                    pmeStream, localStream, nonLocalStream, deviceContext, transferKind, paddingSize, wcycle);
 +                    *deviceStreamManager, transferKind, pme_gpu_get_block_size(fr->pmedata), wcycle);
              fr->stateGpu = stateGpu.get();
          }
  
          GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to simulator.");
          SimulatorBuilder simulatorBuilder;
  
 +        simulatorBuilder.add(SimulatorStateData(globalState.get(), &observablesHistory, &enerd, &ekind));
 +        simulatorBuilder.add(std::move(membedHolder));
 +        simulatorBuilder.add(std::move(stopHandlerBuilder_));
 +        simulatorBuilder.add(SimulatorConfig(mdrunOptions, startingBehavior, &runScheduleWork));
 +
 +
 +        simulatorBuilder.add(SimulatorEnv(fplog, cr, ms, mdlog, oenv));
 +        simulatorBuilder.add(Profiling(&nrnb, walltime_accounting, wcycle));
 +        simulatorBuilder.add(ConstraintsParam(
 +                constr.get(), enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr,
 +                vsite.get()));
 +        // TODO: Separate `fr` to a separate add, and make the `build` handle the coupling sensibly.
 +        simulatorBuilder.add(LegacyInput(static_cast<int>(filenames.size()), filenames.data(),
 +                                         inputrec.get(), fr));
 +        simulatorBuilder.add(ReplicaExchangeParameters(replExParams));
 +        simulatorBuilder.add(InteractiveMD(imdSession.get()));
 +        simulatorBuilder.add(SimulatorModules(mdModules_->outputProvider(), mdModules_->notifier()));
 +        simulatorBuilder.add(CenterOfMassPulling(pull_work));
 +        // Todo move to an MDModule
 +        simulatorBuilder.add(IonSwapping(swap));
 +        simulatorBuilder.add(TopologyData(&mtop, mdAtoms.get()));
 +        simulatorBuilder.add(BoxDeformationHandle(deform.get()));
 +        simulatorBuilder.add(std::move(modularSimulatorCheckpointData));
 +
          // build and run simulator object based on user-input
 -        auto simulator = simulatorBuilder.build(
 -                inputIsCompatibleWithModularSimulator, fplog, cr, ms, mdlog,
 -                static_cast<int>(filenames.size()), filenames.data(), oenv, mdrunOptions,
 -                startingBehavior, vsite.get(), constr.get(),
 -                enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, deform.get(),
 -                mdModules_->outputProvider(), mdModules_->notifier(), inputrec, imdSession.get(),
 -                pull_work, swap, &mtop, fcd, globalState.get(), &observablesHistory, mdAtoms.get(),
 -                &nrnb, wcycle, fr, &enerd, &ekind, &runScheduleWork, replExParams, membed,
 -                walltime_accounting, std::move(stopHandlerBuilder_), doRerun);
 +        auto simulator = simulatorBuilder.build(useModularSimulator);
          simulator->run();
  
          if (fr->pmePpCommGpu)
          GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
          /* do PME only */
          walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
 -        gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode);
 +        gmx_pmeonly(pmedata, cr, &nrnb, wcycle, walltime_accounting, inputrec.get(), pmeRunMode,
 +                    deviceStreamManager.get());
      }
  
      wallcycle_stop(wcycle, ewcRUN);
      /* Finish up, write some stuff
       * if rerunMD, don't write last frame again
       */
 -    finish_run(fplog, mdlog, cr, inputrec, &nrnb, wcycle, walltime_accounting,
 +    finish_run(fplog, mdlog, cr, inputrec.get(), &nrnb, wcycle, walltime_accounting,
                 fr ? fr->nbv.get() : nullptr, pmedata, EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms));
  
      // clean up cycle counter
      wallcycle_destroy(wcycle);
  
 +    deviceStreamManager.reset(nullptr);
      // Free PME data
      if (pmedata)
      {
      }
  
      // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
 -    // before we destroy the GPU context(s) in free_gpu_resources().
 +    // before we destroy the GPU context(s)
      // Pinned buffers are associated with contexts in CUDA.
      // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
      mdAtoms.reset(nullptr);
      globalState.reset(nullptr);
      mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU
 +    gpuBonded.reset(nullptr);
 +    /* Free pinned buffers in *fr */
 +    delete fr;
 +    fr = nullptr;
 +    // TODO convert to C++ so we can get rid of these frees
 +    sfree(disresdata);
 +    sfree(oriresdata);
  
 -    /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
 -    free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
 -    free_gpu(nonbondedDeviceInfo);
 -    free_gpu(pmeDeviceInfo);
 -    done_forcerec(fr, mtop.molblock.size());
 -    sfree(fcd);
 +    if (!hwinfo->deviceInfoList.empty())
 +    {
 +        /* stop the GPU profiler (only CUDA) */
 +        stopGpuProfiler();
 +    }
  
 -    if (doMembed)
 +    /* With tMPI we need to wait for all ranks to finish deallocation before
 +     * destroying the CUDA context as some tMPI ranks may be sharing
 +     * GPU and context.
 +     *
 +     * This is not a concern in OpenCL where we use one context per rank.
 +     *
 +     * Note: it is safe to not call the barrier on the ranks which do not use GPU,
 +     * but it is easier and more futureproof to call it on the whole node.
 +     *
 +     * Note that this function needs to be called even if GPUs are not used
 +     * in this run because the PME ranks have no knowledge of whether GPUs
 +     * are used or not, but all ranks need to enter the barrier below.
 +     * \todo Remove this physical node barrier after making sure
 +     * that it's not needed anymore (with a shared GPU run).
 +     */
 +    if (GMX_THREAD_MPI)
      {
 -        free_membed(membed);
 +        physicalNodeComm.barrier();
      }
 +    releaseDevice(deviceInfo);
  
      /* Does what it says */
      print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
      /* we need to join all threads. The sub-threads join when they
         exit this function, but the master thread needs to be told to
         wait for that. */
 -    if (PAR(cr) && MASTER(cr))
 +    if (MASTER(cr))
      {
          tMPI_Finalize();
      }
  #endif
      return rc;
 -}
 +} // namespace gmx
  
  Mdrunner::~Mdrunner()
  {
@@@ -1878,7 -1711,8 +1889,7 @@@ Mdrunner::Mdrunner(std::unique_ptr<MDMo
  
  Mdrunner::Mdrunner(Mdrunner&&) noexcept = default;
  
 -//NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265
 -Mdrunner& Mdrunner::operator=(Mdrunner&& /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default;
 +Mdrunner& Mdrunner::operator=(Mdrunner&& /*handle*/) noexcept = default;
  
  class Mdrunner::BuilderImplementation
  {
@@@ -1893,8 -1727,6 +1904,8 @@@ public
  
      void addDomdec(const DomdecOptions& options);
  
 +    void addInput(SimulationInputHandle inputHolder);
 +
      void addVerletList(int nstlist);
  
      void addReplicaExchange(const ReplicaExchangeParameters& params);
@@@ -1938,14 -1770,11 +1949,14 @@@ private
      //! Command-line override for the duration of a neighbor list with the Verlet scheme.
      int nstlist_ = 0;
  
 +    //! World communicator, used for hardware detection and task assignment
 +    MPI_Comm libraryWorldCommunicator_ = MPI_COMM_NULL;
 +
      //! Multisim communicator handle.
      gmx_multisim_t* multiSimulation_;
  
      //! mdrun communicator
 -    MPI_Comm communicator_ = MPI_COMM_NULL;
 +    MPI_Comm simulationCommunicator_ = MPI_COMM_NULL;
  
      //! Print a warning if any force is larger than this (in kJ/mol nm).
      real forceWarningThreshold_ = -1;
       * \brief Builder for simulation stop signal handler.
       */
      std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr;
 +
 +    /*!
 +     * \brief Sources for initial simulation state.
 +     *
 +     * See issue #3652 for near-term refinements to the SimulationInput interface.
 +     *
 +     * See issue #3379 for broader discussion on API aspects of simulation inputs and outputs.
 +     */
 +    SimulationInputHandle inputHolder_;
  };
  
  Mdrunner::BuilderImplementation::BuilderImplementation(std::unique_ptr<MDModules> mdModules,
                                                         compat::not_null<SimulationContext*> context) :
      mdModules_(std::move(mdModules))
  {
 -    communicator_    = context->communicator_;
 -    multiSimulation_ = context->multiSimulation_.get();
 +    libraryWorldCommunicator_ = context->libraryWorldCommunicator_;
 +    simulationCommunicator_   = context->simulationCommunicator_;
 +    multiSimulation_          = context->multiSimulation_.get();
  }
  
  Mdrunner::BuilderImplementation::~BuilderImplementation() = default;
@@@ -2049,22 -1868,11 +2060,22 @@@ Mdrunner Mdrunner::BuilderImplementatio
  
      newRunner.filenames = filenames_;
  
 -    newRunner.communicator = communicator_;
 +    newRunner.libraryWorldCommunicator = libraryWorldCommunicator_;
 +
 +    newRunner.simulationCommunicator = simulationCommunicator_;
  
      // nullptr is a valid value for the multisim handle
      newRunner.ms = multiSimulation_;
  
 +    if (inputHolder_)
 +    {
 +        newRunner.inputHolder_ = std::move(inputHolder_);
 +    }
 +    else
 +    {
 +        GMX_THROW(gmx::APIError("MdrunnerBuilder::addInput() is required before build()."));
 +    }
 +
      // \todo Clarify ownership and lifetime management for gmx_output_env_t
      // \todo Update sanity checking when output environment has clearly specified invariants.
      // Initialization and default values for oenv are not well specified in the current version.
@@@ -2180,11 -1988,6 +2191,11 @@@ void Mdrunner::BuilderImplementation::a
      stopHandlerBuilder_ = std::move(builder);
  }
  
 +void Mdrunner::BuilderImplementation::addInput(SimulationInputHandle inputHolder)
 +{
 +    inputHolder_ = std::move(inputHolder);
 +}
 +
  MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
                                   compat::not_null<SimulationContext*> context) :
      impl_{ std::make_unique<Mdrunner::BuilderImplementation>(std::move(mdModules), context) }
@@@ -2289,12 -2092,6 +2300,12 @@@ MdrunnerBuilder& MdrunnerBuilder::addSt
      return *this;
  }
  
 +MdrunnerBuilder& MdrunnerBuilder::addInput(SimulationInputHandle input)
 +{
 +    impl_->addInput(std::move(input));
 +    return *this;
 +}
 +
  MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder&&) noexcept = default;
  
  MdrunnerBuilder& MdrunnerBuilder::operator=(MdrunnerBuilder&&) noexcept = default;
index bcc6dc5c384bbad413b65489b53301e3b9d4f58b,b83d327916e3548d604648d80173497882c4f3e2..89e6d622ac270a39ac566501ac558da92de7934b
@@@ -50,6 -50,8 +50,8 @@@
  #ifndef GMX_MDTYPES_STATE_PROPAGATOR_DATA_GPU_H
  #define GMX_MDTYPES_STATE_PROPAGATOR_DATA_GPU_H
  
+ #include <tuple>
  #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/math/vectypes.h"
  
  #include "locality.h"
  
 +class DeviceContext;
 +class DeviceStream;
  class GpuEventSynchronizer;
  struct gmx_wallcycle;
  
  namespace gmx
  {
 +class DeviceStreamManager;
  
  class StatePropagatorDataGpu
  {
@@@ -87,15 -86,40 +89,15 @@@ public
       * ops are offloaded. This feature is currently not available in OpenCL and
       * hence these streams are not set in these builds.
       *
 -     * \note In CUDA, the update stream is created in the constructor as a temporary
 -     *       solution, in place until the stream manager is introduced.
 -     *       Note that this makes it impossible to construct this object in CUDA
 -     *       builds executing on a host without any CUDA-capable device available.
 -     *
 -     * \note In CUDA, \p deviceContext is unused, hence always nullptr;
 -     *       all stream arguments can also be nullptr in runs where the
 -     *       respective streams are not required.
 -     *       In OpenCL, \p deviceContext needs to be a valid device context.
 -     *       In OpenCL runs StatePropagatorDataGpu is currently only used
 -     *       with PME offload, and only on ranks with PME duty. Hence, the
 -     *       \p pmeStream argument needs to be a valid OpenCL queue object
 -     *       which must have been created in \p deviceContext.
 -     *
 -     * \todo Make a \p CommandStream visible in the CPU parts of the code so we
 -     *       will not have to pass a void*.
 -     * \todo Make a \p DeviceContext object visible in CPU parts of the code so we
 -     *       will not have to pass a void*.
 -     *
 -     *  \param[in] pmeStream       Device PME stream, nullptr allowed.
 -     *  \param[in] localStream     Device NBNXM local stream, nullptr allowed.
 -     *  \param[in] nonLocalStream  Device NBNXM non-local stream, nullptr allowed.
 -     *  \param[in] deviceContext   Device context, nullptr allowed.
 -     *  \param[in] transferKind    H2D/D2H transfer call behavior (synchronous or not).
 -     *  \param[in] paddingSize     Padding size for coordinates buffer.
 -     *  \param[in] wcycle          Wall cycle counter data.
 +     *  \param[in] deviceStreamManager         Object that owns the DeviceContext and DeviceStreams.
 +     *  \param[in] transferKind                H2D/D2H transfer call behavior (synchronous or not).
 +     *  \param[in] allocationBlockSizeDivisor  Deterines padding size for coordinates buffer.
 +     *  \param[in] wcycle                      Wall cycle counter data.
       */
 -    StatePropagatorDataGpu(const void*        pmeStream,
 -                           const void*        localStream,
 -                           const void*        nonLocalStream,
 -                           const void*        deviceContext,
 -                           GpuApiCallBehavior transferKind,
 -                           int                paddingSize,
 -                           gmx_wallcycle*     wcycle);
 +    StatePropagatorDataGpu(const DeviceStreamManager& deviceStreamManager,
 +                           GpuApiCallBehavior         transferKind,
 +                           int                        allocationBlockSizeDivisor,
 +                           gmx_wallcycle*             wcycle);
  
      /*! \brief Constructor to use in PME-only rank and in tests.
       *
       *  \param[in] pmeStream       Device PME stream, nullptr is not allowed.
       *  \param[in] deviceContext   Device context, nullptr allowed for non-OpenCL builds.
       *  \param[in] transferKind    H2D/D2H transfer call behavior (synchronous or not).
 -     *  \param[in] paddingSize     Padding size for coordinates buffer.
 +     *  \param[in] allocationBlockSizeDivisor Determines padding size for coordinates buffer.
       *  \param[in] wcycle          Wall cycle counter data.
       */
 -    StatePropagatorDataGpu(const void*        pmeStream,
 -                           const void*        deviceContext,
 -                           GpuApiCallBehavior transferKind,
 -                           int                paddingSize,
 -                           gmx_wallcycle*     wcycle);
 +    StatePropagatorDataGpu(const DeviceStream*  pmeStream,
 +                           const DeviceContext& deviceContext,
 +                           GpuApiCallBehavior   transferKind,
 +                           int                  allocationBlockSizeDivisor,
 +                           gmx_wallcycle*       wcycle);
  
      //! Move constructor
      StatePropagatorDataGpu(StatePropagatorDataGpu&& other) noexcept;
       *
       *  \returns GPU positions buffer.
       */
 -    DeviceBuffer<float> getCoordinates();
 +    DeviceBuffer<RVec> getCoordinates();
  
      /*! \brief Copy positions to the GPU memory.
       *
       *
       *  \returns GPU velocities buffer.
       */
 -    DeviceBuffer<float> getVelocities();
 +    DeviceBuffer<RVec> getVelocities();
  
      /*! \brief Copy velocities to the GPU memory.
       *
       *
       *  \returns GPU force buffer.
       */
 -    DeviceBuffer<float> getForces();
 +    DeviceBuffer<RVec> getForces();
  
      /*! \brief Copy forces to the GPU memory.
       *
       *
       *  \returns The device command stream to use in update-constraints.
       */
 -    void* getUpdateStream();
 +    const DeviceStream* getUpdateStream();
  
      /*! \brief Getter for the number of local atoms.
       *
index 97b824ce78679ae2819fd58b955cbfdd132e6dc8,5d1834ee947ae31a553e6e59e03f0be2da55bbcb..2e61e90b08c0f5268cf5d934c9259d6eb4877314
  
  #include "gromacs/fileio/xvgr.h"
  #include "gromacs/math/functions.h"
 +#include "gromacs/math/multidimarray.h"
  #include "gromacs/math/units.h"
  #include "gromacs/math/utilities.h"
  #include "gromacs/math/vec.h"
 +#include "gromacs/mdspan/extensions.h"
  #include "gromacs/mdtypes/fcdata.h"
 +#include "gromacs/mdtypes/interaction_const.h"
  #include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/mdtypes/nblist.h"
  #include "gromacs/utility/arrayref.h"
@@@ -79,6 -76,9 +79,6 @@@ enu
      etabLJ6Switch,
      etabLJ12Switch,
      etabCOULSwitch,
 -    etabLJ6Encad,
 -    etabLJ12Encad,
 -    etabCOULEncad,
      etabEXPMIN,
      etabUSER,
      etabNR
@@@ -96,12 -96,27 +96,12 @@@ typedef struc
  /* This structure holds name and a flag that tells whether
     this is a Coulomb type funtion */
  static const t_tab_props tprops[etabNR] = {
 -    { "LJ6", FALSE },
 -    { "LJ12", FALSE },
 -    { "LJ6Shift", FALSE },
 -    { "LJ12Shift", FALSE },
 -    { "Shift", TRUE },
 -    { "RF", TRUE },
 -    { "RF-zero", TRUE },
 -    { "COUL", TRUE },
 -    { "Ewald", TRUE },
 -    { "Ewald-Switch", TRUE },
 -    { "Ewald-User", TRUE },
 -    { "Ewald-User-Switch", TRUE },
 -    { "LJ6Ewald", FALSE },
 -    { "LJ6Switch", FALSE },
 -    { "LJ12Switch", FALSE },
 -    { "COULSwitch", TRUE },
 -    { "LJ6-Encad shift", FALSE },
 -    { "LJ12-Encad shift", FALSE },
 -    { "COUL-Encad shift", TRUE },
 -    { "EXPMIN", FALSE },
 -    { "USER", FALSE },
 +    { "LJ6", FALSE },         { "LJ12", FALSE },      { "LJ6Shift", FALSE },
 +    { "LJ12Shift", FALSE },   { "Shift", TRUE },      { "RF", TRUE },
 +    { "RF-zero", TRUE },      { "COUL", TRUE },       { "Ewald", TRUE },
 +    { "Ewald-Switch", TRUE }, { "Ewald-User", TRUE }, { "Ewald-User-Switch", TRUE },
 +    { "LJ6Ewald", FALSE },    { "LJ6Switch", FALSE }, { "LJ12Switch", FALSE },
 +    { "COULSwitch", TRUE },   { "EXPMIN", FALSE },    { "USER", FALSE },
  };
  
  typedef struct
@@@ -583,23 -598,19 +583,23 @@@ static void set_forces(FILE* fp, int an
  static void read_tables(FILE* fp, const char* filename, int ntab, int angle, t_tabledata td[])
  {
      char     buf[STRLEN];
 -    double **yy = nullptr, start, end, dx0, dx1, ssd, vm, vp, f, numf;
 -    int      k, i, nx, nx0 = 0, ny, nny, ns;
 +    double   start, end, dx0, dx1, ssd, vm, vp, f, numf;
 +    int      k, i, nx0 = 0, nny, ns;
      gmx_bool bAllZero, bZeroV, bZeroF;
      double   tabscale;
  
      nny               = 2 * ntab + 1;
      std::string libfn = gmx::findLibraryFile(filename);
 -    nx                = read_xvg(libfn.c_str(), &yy, &ny);
 -    if (ny != nny)
 +    gmx::MultiDimArray<std::vector<double>, gmx::dynamicExtents2D> xvgData    = readXvgData(libfn);
 +    int                                                            numColumns = xvgData.extent(0);
 +    if (numColumns != nny)
      {
          gmx_fatal(FARGS, "Trying to read file %s, but nr columns = %d, should be %d", libfn.c_str(),
 -                  ny, nny);
 +                  numColumns, nny);
      }
 +    int numRows = xvgData.extent(1);
 +
 +    const auto& yy = xvgData.asView();
      if (angle == 0)
      {
          if (yy[0][0] != 0.0)
              start = -180.0;
          }
          end = 180.0;
 -        if (yy[0][0] != start || yy[0][nx - 1] != end)
 +        if (yy[0][0] != start || yy[0][numRows - 1] != end)
          {
              gmx_fatal(FARGS, "The angles in file %s should go from %f to %f instead of %f to %f\n",
 -                      libfn.c_str(), start, end, yy[0][0], yy[0][nx - 1]);
 +                      libfn.c_str(), start, end, yy[0][0], yy[0][numRows - 1]);
          }
      }
  
 -    tabscale = (nx - 1) / (yy[0][nx - 1] - yy[0][0]);
 +    tabscale = (numRows - 1) / (yy[0][numRows - 1] - yy[0][0]);
  
      if (fp)
      {
 -        fprintf(fp, "Read user tables from %s with %d data points.\n", libfn.c_str(), nx);
 +        fprintf(fp, "Read user tables from %s with %d data points.\n", libfn.c_str(), numRows);
          if (angle == 0)
          {
              fprintf(fp, "Tabscale = %g points/nm\n", tabscale);
      {
          bZeroV = TRUE;
          bZeroF = TRUE;
 -        for (i = 0; (i < nx); i++)
 +        for (i = 0; (i < numRows); i++)
          {
              if (i >= 2)
              {
  
          if (!bZeroV && bZeroF)
          {
 -            set_forces(fp, angle, nx, 1 / tabscale, yy[1 + k * 2], yy[1 + k * 2 + 1], k);
 +            set_forces(fp, angle, numRows, 1 / tabscale, yy[1 + k * 2].data(),
 +                       yy[1 + k * 2 + 1].data(), k);
          }
          else
          {
               */
              ssd = 0;
              ns  = 0;
 -            for (i = 1; (i < nx - 1); i++)
 +            for (i = 1; (i < numRows - 1); i++)
              {
                  vm = yy[1 + 2 * k][i - 1];
                  vp = yy[1 + 2 * k][i + 1];
  
      for (k = 0; (k < ntab); k++)
      {
 -        init_table(nx, nx0, tabscale, &(td[k]), TRUE);
 -        for (i = 0; (i < nx); i++)
 +        init_table(numRows, nx0, tabscale, &(td[k]), TRUE);
 +        for (i = 0; (i < numRows); i++)
          {
              td[k].x[i] = yy[0][i];
              td[k].v[i] = yy[2 * k + 1][i];
              td[k].f[i] = yy[2 * k + 2][i];
          }
      }
 -    for (i = 0; (i < ny); i++)
 -    {
 -        sfree(yy[i]);
 -    }
 -    sfree(yy);
  }
  
  static void done_tabledata(t_tabledata* td)
@@@ -781,7 -796,7 +781,7 @@@ static void fill_table(t_tabledata* td
      int    i;
      double reppow, p;
      double r1, rc, r12, r13;
 -    double r, r2, r6, rc2, rc6, rc12;
 +    double r, r2, r6, rc2;
      double expr, Vtab, Ftab;
      /* Parameters for David's function */
      double A = 0, B = 0, C = 0, A_3 = 0, B_4 = 0;
  
      if (bPotentialShift)
      {
 -        rc2 = rc * rc;
 -        rc6 = 1.0 / (rc2 * rc2 * rc2);
 +        rc2        = rc * rc;
 +        double rc6 = 1.0 / (rc2 * rc2 * rc2);
 +        double rc12;
          if (gmx_within_tol(reppow, 12.0, 10 * GMX_DOUBLE_EPS))
          {
              rc12 = rc6 * rc6;
              swi1 = 0.0;
          }
  
 -        rc6 = rc * rc * rc;
 -        rc6 = 1.0 / (rc6 * rc6);
 -
          switch (tp)
          {
              case etabLJ6:
                      Ftab = reppow * Vtab / r;
                  }
                  break;
 -            case etabLJ6Encad:
 -                if (r < rc)
 -                {
 -                    Vtab = -(r6 - 6.0 * (rc - r) * rc6 / rc - rc6);
 -                    Ftab = -(6.0 * r6 / r - 6.0 * rc6 / rc);
 -                }
 -                else /* r>rc */
 -                {
 -                    Vtab = 0;
 -                    Ftab = 0;
 -                }
 -                break;
 -            case etabLJ12Encad:
 -                if (r < rc)
 -                {
 -                    Vtab = -(r6 - 6.0 * (rc - r) * rc6 / rc - rc6);
 -                    Ftab = -(6.0 * r6 / r - 6.0 * rc6 / rc);
 -                }
 -                else /* r>rc */
 -                {
 -                    Vtab = 0;
 -                    Ftab = 0;
 -                }
 -                break;
              case etabCOUL:
                  Vtab = 1.0 / r;
                  Ftab = 1.0 / r2;
                  Vtab = expr;
                  Ftab = expr;
                  break;
 -            case etabCOULEncad:
 -                if (r < rc)
 -                {
 -                    Vtab = 1.0 / r - (rc - r) / (rc * rc) - 1.0 / rc;
 -                    Ftab = 1.0 / r2 - 1.0 / (rc * rc);
 -                }
 -                else /* r>rc */
 -                {
 -                    Vtab = 0;
 -                    Ftab = 0;
 -                }
 -                break;
              default:
                  gmx_fatal(FARGS, "Table type %d not implemented yet. (%s,%d)", tp, __FILE__, __LINE__);
          }
@@@ -1162,6 -1215,7 +1162,6 @@@ static void set_table_type(int tabsel[]
          case eelRF_ZERO: tabsel[etiCOUL] = etabRF_ZERO; break;
          case eelSWITCH: tabsel[etiCOUL] = etabCOULSwitch; break;
          case eelUSER: tabsel[etiCOUL] = etabUSER; break;
 -        case eelENCADSHIFT: tabsel[etiCOUL] = etabCOULEncad; break;
          default: gmx_fatal(FARGS, "Invalid eeltype %d", eltype);
      }
  
                  tabsel[etiLJ6]  = etabLJ6;
                  tabsel[etiLJ12] = etabLJ12;
                  break;
 -            case evdwENCADSHIFT:
 -                tabsel[etiLJ6]  = etabLJ6Encad;
 -                tabsel[etiLJ12] = etabLJ12Encad;
 -                break;
              case evdwPME:
                  tabsel[etiLJ6]  = etabLJ6Ewald;
                  tabsel[etiLJ12] = etabLJ12;
@@@ -1319,9 -1377,9 +1319,9 @@@ t_forcetable* make_tables(FILE* out, co
  
      /* Each table type (e.g. coul,lj6,lj12) requires four
       * numbers per table->n+1 data points. For performance reasons we want
 -     * the table data to be aligned to a 32-byte boundary.
 +     * the table data to be aligned to (at least) a 32-byte boundary.
       */
 -    snew_aligned(table->data, table->stride * (table->n + 1) * sizeof(real), 32);
 +    table->data.resize(table->stride * (table->n + 1) * sizeof(real));
  
      for (int k = 0; (k < etiNR); k++)
      {
          }
  
          copy2table(table->n, k * table->formatsize, table->stride, td[k].x, td[k].v, td[k].f,
 -                   scalefactor, table->data);
 +                   scalefactor, table->data.data());
  
          done_tabledata(&(td[k]));
      }
@@@ -1396,8 -1454,8 +1396,8 @@@ bondedtable_t make_bonded_table(FILE* f
      }
      tab.n     = td.nx;
      tab.scale = td.tabscale;
 -    snew(tab.data, tab.n * stride);
 -    copy2table(tab.n, 0, stride, td.x, td.v, td.f, 1.0, tab.data);
 +    tab.data.resize(tab.n * stride);
 +    copy2table(tab.n, 0, stride, td.x, td.v, td.f, 1.0, tab.data.data());
      done_tabledata(&td);
  
      return tab;
@@@ -1409,11 -1467,6 +1409,6 @@@ makeDispersionCorrectionTable(FILE* fp
      GMX_RELEASE_ASSERT(ic->vdwtype != evdwUSER || tabfn,
                         "With VdW user tables we need a table file name");
  
-     if (tabfn == nullptr)
-     {
-         return std::unique_ptr<t_forcetable>(nullptr);
-     }
      t_forcetable* fullTable = make_tables(fp, ic, tabfn, rtab, 0);
      /* Copy the contents of the table to one that has just dispersion
       * and repulsion, to improve cache performance. We want the table
      dispersionCorrectionTable->ninteractions = 2;
      dispersionCorrectionTable->stride =
              dispersionCorrectionTable->formatsize * dispersionCorrectionTable->ninteractions;
 -    snew_aligned(dispersionCorrectionTable->data,
 -                 dispersionCorrectionTable->stride * (dispersionCorrectionTable->n + 1), 32);
 +    dispersionCorrectionTable->data.resize(dispersionCorrectionTable->stride
 +                                           * (dispersionCorrectionTable->n + 1));
  
      for (int i = 0; i <= fullTable->n; i++)
      {
@@@ -1449,8 -1502,14 +1444,8 @@@ t_forcetable::t_forcetable(enum gmx_tab
      r(0),
      n(0),
      scale(0),
 -    data(nullptr),
      formatsize(0),
      ninteractions(0),
      stride(0)
  {
  }
 -
 -t_forcetable::~t_forcetable()
 -{
 -    sfree_aligned(data);
 -}
index 5b0b20b6835991a47eb321117305ecc5f5ad2c4e,0d5e1f45cbf806f19ff188a66d4e27caaa313583..b2377eafd6cf650372b3eac5a858ccb1879a069d
@@@ -3,8 -3,7 +3,8 @@@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
-  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
 - * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
++ * Copyright (c) 2013,2014,2015,2016,2017, The GROMACS development team.
 + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@@ -597,7 -596,7 +597,7 @@@ int gmx_trjcat(int argc, char* argv[]
              outFilesDemux.resize(nset);
              for (i = 0; (i < nset); i++)
              {
-                 outFilesDemux[0] = gmx::formatString("%d_%s", i, name.c_str());
+                 outFilesDemux[i] = gmx::formatString("%d_%s", i, name.c_str());
              }
          }
          do_demux(inFiles, outFilesDemux, n, val, t, dt_remd, isize, index, dt, oenv);
index 6ac65d0b39d104f9c8bbf9d38872eb81cc4d58b1,1d8f9abc219be1267815038b4a84d2f44322db76..cda4ec7e942e5bba57d907e69566725d709c82f1
@@@ -3,8 -3,7 +3,8 @@@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
-  * Copyright (c) 2011,2014,2015,2016,2018 by the GROMACS development team.
 - * Copyright (c) 2011,2014,2015,2016,2018,2019,2020, by the GROMACS development team, led by
++ * Copyright (c) 2011,2014,2015,2016,2018, The GROMACS development team.
 + * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@@ -49,7 -48,6 +49,7 @@@
  #include "gromacs/topology/idef.h"
  #include "gromacs/topology/symtab.h"
  #include "gromacs/utility/enumerationhelpers.h"
 +#include "gromacs/utility/listoflists.h"
  #include "gromacs/utility/unique_cptr.h"
  
  enum class SimulationAtomGroupType : int
@@@ -85,10 -83,10 +85,10 @@@ struct gmx_moltype_
      /*! \brief Default copy constructor */
      gmx_moltype_t(const gmx_moltype_t&) = default;
  
 -    char**           name;  /**< Name of the molecule type            */
 -    t_atoms          atoms; /**< The atoms in this molecule           */
 -    InteractionLists ilist; /**< Interaction list with local indices  */
 -    t_blocka         excls; /**< The exclusions                       */
 +    char**                name;  /**< Name of the molecule type            */
 +    t_atoms               atoms; /**< The atoms in this molecule           */
 +    InteractionLists      ilist; /**< Interaction list with local indices  */
 +    gmx::ListOfLists<int> excls; /**< The exclusions                       */
  };
  
  /*! \brief Block of molecules of the same type, used in gmx_mtop_t */
@@@ -118,21 -116,22 +118,22 @@@ struct MoleculeBlockIndice
   */
  struct SimulationGroups
  {
-     //! Groups of particles
+     // TODO: collect groups and groupNumbers in a struct for each group type
+     //! Group numbers for each of the different SimulationAtomGroupType groups.
      gmx::EnumerationArray<SimulationAtomGroupType, AtomGroupIndices> groups;
      //! Names of groups, stored as pointer to the entries in the symbol table.
      std::vector<char**> groupNames;
-     //! Group numbers for the different SimulationAtomGroupType groups.
+     //! Indices into groups for each atom for each of the different SimulationAtomGroupType groups.
      gmx::EnumerationArray<SimulationAtomGroupType, std::vector<unsigned char>> groupNumbers;
  
      /*! \brief
-      * Number of group numbers for a single SimulationGroup.
+      * Number of atoms for which group numbers are stored for a single SimulationGroup.
       *
-      * \param[in] group Integer value for the group type.
+      * \param[in] group  The group type.
       */
      int numberOfGroupNumbers(SimulationAtomGroupType group) const
      {
 -        return gmx::ssize(groupNumbers[group]);
 +        return static_cast<int>(groupNumbers[group].size());
      }
  };
  
@@@ -177,11 -176,15 +178,11 @@@ struct gmx_mtop_t //NOLINT(clang-analyz
      std::unique_ptr<InteractionLists> intermolecular_ilist = nullptr;
      //! Number of global atoms.
      int natoms = 0;
 -    //! Parameter for residue numbering.
 -    int maxres_renum = 0;
 -    //! The maximum residue number in moltype
 -    int maxresnr = -1;
      //! Atomtype properties
      t_atomtypes atomtypes;
      //! Groups of atoms for different purposes
      SimulationGroups groups;
 -    //! The symbol table
 +    //! The legacy symbol table
      t_symtab symtab;
      //! Tells whether we have valid molecule indices
      bool haveMoleculeIndices = false;
       */
      std::vector<int> intermolecularExclusionGroup;
  
 -    /* Derived data  below */
 +    //! Maximum number of residues in molecule to trigger renumbering of residues
 +    int maxResiduesPerMoleculeToTriggerRenumber() const
 +    {
 +        return maxResiduesPerMoleculeToTriggerRenumber_;
 +    }
 +    //! Maximum residue number that is not renumbered.
 +    int maxResNumberNotRenumbered() const { return maxResNumberNotRenumbered_; }
 +    /*! \brief Finalize this data structure.
 +     *
 +     * Should be called after generating or reading mtop, to set some compute
 +     * intesive variables to avoid N^2 operations later on.
 +     *
 +     * \todo Move into a builder class, once available.
 +     */
 +    void finalize();
 +
 +    /* Derived data below */
      //! Indices for each molblock entry for fast lookup of atom properties
      std::vector<MoleculeBlockIndices> moleculeBlockIndices;
 +
 +private:
 +    //! Build the molblock indices
 +    void buildMolblockIndices();
 +    //! Maximum number of residues in molecule to trigger renumbering of residues
 +    int maxResiduesPerMoleculeToTriggerRenumber_ = 0;
 +    //! The maximum residue number in moltype that is not renumbered
 +    int maxResNumberNotRenumbered_ = -1;
  };
  
  /*! \brief
  struct gmx_localtop_t
  {
      //! Constructor used for normal operation, manages own resources.
 -    gmx_localtop_t();
 -
 -    ~gmx_localtop_t();
 +    gmx_localtop_t(const gmx_ffparams_t& ffparams);
  
      //! The interaction function definition
 -    t_idef idef;
 -    //! Atomtype properties
 -    t_atomtypes atomtypes;
 +    InteractionDefinitions idef;
      //! The exclusions
 -    t_blocka excls;
 -    //! Flag for domain decomposition so we don't free already freed memory.
 -    bool useInDomainDecomp_ = false;
 +    gmx::ListOfLists<int> excls;
  };
  
  /* The old topology struct, completely written out, used in analysis tools */
@@@ -244,8 -229,8 +245,8 @@@ typedef struct t_topolog
      t_atomtypes atomtypes;                   /* Atomtype properties                  */
      t_block     mols;                        /* The molecules                        */
      gmx_bool    bIntermolecularInteractions; /* Inter.mol. int. ?   */
 -    t_blocka    excls;                       /* The exclusions                       */
 -    t_symtab    symtab;                      /* The symbol table                     */
 +    /* Note that the exclusions are not stored in t_topology */
 +    t_symtab symtab; /* The symbol table                     */
  } t_topology;
  
  void init_top(t_topology* top);
index 04127783fd14ecc1f5e2f9346ceee5e5a5aecee4,e27f64b69276bb1d2d959d48b64f0c23b80e6462..f29b400a807e489e70b8633a3d55e85279eff8d0
@@@ -3,8 -3,7 +3,8 @@@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
-  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
 - * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
++ * Copyright (c) 2013,2014,2015,2016,2017, The GROMACS development team.
 + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@@ -65,7 -64,8 +65,7 @@@
  
  #include "errorformat.h"
  
 -static bool       bDebug = false;
 -static gmx::Mutex where_mutex;
 +static bool bDebug = false;
  
  FILE*    debug        = nullptr;
  gmx_bool gmx_debug_at = FALSE;
@@@ -192,13 -192,16 +192,16 @@@ void gmx_exit_on_fatal_error(ExitType e
      }
  #endif
  
-     if (exitType == ExitType_CleanExit)
+     if (!GMX_FAHCORE)
      {
-         std::exit(returnValue);
+         if (exitType == ExitType_CleanExit)
+         {
+             std::exit(returnValue);
+         }
+         // We cannot use std::exit() if other threads may still be executing, since that would cause
+         // destructors to be called for global objects that may still be in use elsewhere.
+         std::_Exit(returnValue);
      }
-     // We cannot use std::exit() if other threads may still be executing, since that would cause
-     // destructors to be called for global objects that may still be in use elsewhere.
-     std::_Exit(returnValue);
  }
  
  void gmx_fatal_mpi_va(int /*f_errno*/,
index b09c5403b8dc7849397d365d5cc44e0a1c311c28,c16605e90acc72bbb5c0ce2914e4dae08dbb216b..6f631efc8b3f608b01e18e76e90fb30c41d66b18
@@@ -3,8 -3,7 +3,8 @@@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
-  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
 - * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
++ * Copyright (c) 2013,2014,2015,2016,2017, The GROMACS development team.
 + * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@@ -161,36 -160,28 +161,28 @@@ static void push_ps(FILE* fp
  }
  
  #if GMX_FAHCORE
- /* don't use pipes!*/
- #    define popen fah_fopen
- #    define pclose fah_fclose
- #    define SKIP_FFOPS 1
- #else
  #    ifdef gmx_ffclose
  #        undef gmx_ffclose
  #    endif
- #    if (!HAVE_PIPES && !defined(__native_client__))
+ #endif
+ #if (!HAVE_PIPES && !defined(__native_client__))
 -static FILE* popen(const char* nm, const char* mode)
 +static FILE* popen(const char* /* nm */, const char* /* mode */)
  {
      gmx_impl("Sorry no pipes...");
  
      return NULL;
  }
  
 -static int pclose(FILE* fp)
 +static int pclose(FILE* /* fp */)
  {
      gmx_impl("Sorry no pipes...");
  
      return 0;
  }
- #    endif /* !HAVE_PIPES && !defined(__native_client__) */
- #endif     /* GMX_FAHCORE */
+ #endif /* !HAVE_PIPES && !defined(__native_client__) */
  
  int gmx_ffclose(FILE* fp)
  {
- #ifdef SKIP_FFOPS
-     return fclose(fp);
- #else
      t_pstack *ps, *tmp;
      int       ret = 0;
  
      }
  
      return ret;
- #endif
  }
  
  
@@@ -292,7 -282,7 +283,7 @@@ gmx_off_t gmx_ftell(FILE* stream
  
  int gmx_truncate(const std::string& filename, gmx_off_t length)
  {
- #if GMX_NATIVE_WINDOWS
+ #if GMX_NATIVE_WINDOWS && !GMX_FAHCORE
      FILE* fp = fopen(filename.c_str(), "rb+");
      if (fp == NULL)
      {
@@@ -418,9 -408,6 +409,6 @@@ void make_backup(const std::string& nam
  
  FILE* gmx_ffopen(const std::string& file, const char* mode)
  {
- #ifdef SKIP_FFOPS
-     return fopen(file, mode);
- #else
      FILE*    ff = nullptr;
      gmx_bool bRead;
      int      bs;
          }
      }
      return ff;
- #endif
  }
  
  namespace gmx
@@@ -614,6 -600,10 +601,10 @@@ int gmx_file_rename(const char* oldname
  #else
      if (MoveFileEx(oldname, newname, MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH))
      {
+ #    if GMX_FAHCORE
+         /* This just lets the F@H checksumming system know about the rename */
+         fcRename(oldname, newname);
+ #    endif
          return 0;
      }
      else
@@@ -684,34 -674,28 +675,29 @@@ int gmx_fsync(FILE* fp
  {
      int rc = 0;
  
- #if GMX_FAHCORE
-     /* the fahcore defines its own os-independent fsync */
-     rc = fah_fsync(fp);
- #else /* GMX_FAHCORE */
      {
          int fn;
  
          /* get the file number */
- #    if HAVE_FILENO
+ #if HAVE_FILENO
          fn = fileno(fp);
- #    elif HAVE__FILENO
+ #elif HAVE__FILENO
          fn = _fileno(fp);
- #    else
+ #else
 +        GMX_UNUSED_VALUE(fp);
          fn = -1;
- #    endif
+ #endif
  
          /* do the actual fsync */
          if (fn >= 0)
          {
- #    if HAVE_FSYNC
+ #if HAVE_FSYNC
              rc = fsync(fn);
- #    elif HAVE__COMMIT
+ #elif HAVE__COMMIT
              rc = _commit(fn);
- #    endif
+ #endif
          }
      }
- #endif /* GMX_FAHCORE */
  
      /* We check for these error codes this way because POSIX requires them
         to be defined, and using anything other than macros is unlikely: */
@@@ -743,9 -727,7 +729,9 @@@ void gmx_chdir(const char* directory
  #endif
      if (rc != 0)
      {
 -        gmx_fatal(FARGS, "Cannot change directory to '%s'. Reason: %s", directory, strerror(errno));
 +        auto message = gmx::formatString("Cannot change directory to '%s'. Reason: %s", directory,
 +                                         strerror(errno));
 +        GMX_THROW(gmx::FileIOError(message));
      }
  }
  
index f23f8cae503318caafb86e60d3a93d2bf2ac197c,b9ee23851725d1f39b7fb8483162d520efb6e5c3..1f6e07414bdb765f98457fffd926bc1ee1a06b0c
@@@ -1,8 -1,7 +1,8 @@@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
-  * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
 - * Copyright (c) 2013,2014,2015,2016,2018,2019,2020, by the GROMACS development team, led by
++ * Copyright (c) 2013,2014,2015,2016,2018, The GROMACS development team.
 + * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@@ -82,10 -81,7 +82,7 @@@ void init(int* argc, char*** argv) // N
      }
      else
      {
- #    if GMX_FAHCORE
-         fah_MPI_Init(argc, argv);
- #    else
- #        if GMX_OPENMP
+ #    if GMX_OPENMP
          /* Formally we need to use MPI_Init_thread and ask for MPI_THREAD_FUNNELED
           * level of thread support when using OpenMP. However, in practice we
           * have never seen any problems with just using MPI_Init(), and some MPI
                      "the MPI library. Keep your fingers crossed.");
              MPI_Init(argc, argv);
          }
- #        else
+ #    else
          MPI_Init(argc, argv);
- #        endif
  #    endif
      }
      // Bump the counter to record this initialization event
index dee12e276b95b8fd2fc4ac10447b86c9b42a093e,9ba03c04fc8d9d1dbc1c54c9df9ac052c2acbd0e..b051068c0670184835bd4dda10d283a2b7e00d10
@@@ -1,8 -1,7 +1,8 @@@
  #
  # This file is part of the GROMACS molecular simulation package.
  #
 -# Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
 +# Copyright (c) 2013,2014,2015,2016,2017 The GROMACS development team.
 +# Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
  # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  # and including many others, as listed in the AUTHORS file in the
  # top-level source directory and at http://www.gromacs.org.
@@@ -39,14 -38,11 +39,14 @@@ gmx_add_unit_test_library(mdrun_test_in
      energyreader.cpp
      energycomparison.cpp
      moduletest.cpp
 -    mdmodulenotification.cpp
 +    simulatorcomparison.cpp
      terminationhelper.cpp
      trajectorycomparison.cpp
      trajectoryreader.cpp
 +    # pseudo-library for code for mdrun
 +    $<TARGET_OBJECTS:mdrun_objlib>
      )
 +target_include_directories(mdrun_test_infrastructure SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
  
  # To avoid running into test timeouts, some end-to-end tests of mdrun
  # functionality are split off. This can be rearranged in future as we
  set(testname "MdrunOutputTests")
  set(exename "mdrun-output-test")
  
 -gmx_add_gtest_executable(
 -    ${exename}
 -    # files with code for tests
 -    compressed_x_output.cpp
 -    helpwriting.cpp
 -    outputfiles.cpp
 -    trajectory_writing.cpp
 -    # pseudo-library for code for mdrun
 -    $<TARGET_OBJECTS:mdrun_objlib>
 +gmx_add_gtest_executable(${exename}
 +    CPP_SOURCE_FILES
 +        compressed_x_output.cpp
 +        helpwriting.cpp
 +        outputfiles.cpp
 +        trajectory_writing.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
      )
  target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
  gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST IGNORE_LEAKS)
  set(testname "MdrunModulesTests")
  set(exename "mdrun-modules-test")
  
 -gmx_add_gtest_executable(
 -    ${exename}
 -    densityfittingmodule.cpp
 -    interactiveMD.cpp
 -    mimic.cpp
 -    # pseudo-library for code for mdrun
 -    $<TARGET_OBJECTS:mdrun_objlib>
 +gmx_add_gtest_executable(${exename}
 +    CPP_SOURCE_FILES
 +        densityfittingmodule.cpp
 +        interactiveMD.cpp
 +        mimic.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
      )
  target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
  gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST IGNORE_LEAKS)
  set(testname "MdrunIOTests")
  set(exename "mdrun-io-test")
  
 -gmx_add_gtest_executable(
 -    ${exename}
 -    exactcontinuation.cpp
 -    grompp.cpp
 -    initialconstraints.cpp
 -    termination.cpp
 -    # pseudo-library for code for mdrun
 -    $<TARGET_OBJECTS:mdrun_objlib>
 +gmx_add_gtest_executable(${exename}
 +    CPP_SOURCE_FILES
 +        exactcontinuation.cpp
 +        grompp.cpp
 +        initialconstraints.cpp
 +        termination.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
      )
  target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
  gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST IGNORE_LEAKS)
  
++# To avoid running into test timeouts, some end-to-end tests of mdrun
++# functionality are split off. This can be rearranged in future as we
++# see fit.
  set(testname "MdrunTests")
  set(exename "mdrun-test")
  
 -gmx_add_gtest_executable(
 -    ${exename}
 -    dispersion_correction.cpp
 -    orires.cpp
 -    pmetest.cpp
 -    simulator.cpp
 -    swapcoords.cpp
 -    tabulated_bonded_interactions.cpp
 -    # pseudo-library for code for mdrun
 -    $<TARGET_OBJECTS:mdrun_objlib>
 +gmx_add_gtest_executable(${exename}
 +    CPP_SOURCE_FILES
 +        ewaldsurfaceterm.cpp
 +        multiple_time_stepping.cpp
 +        orires.cpp
 +        simulator.cpp
 +        swapcoords.cpp
 +        tabulated_bonded_interactions.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
 +    )
 +target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
 +gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST IGNORE_LEAKS)
 +
 +
 +set(testname "MdrunPmeTests")
 +set(exename "mdrun-pme-test")
 +
 +gmx_add_gtest_executable(${exename}
 +    CPP_SOURCE_FILES
 +        pmetest.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
      )
  target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
 -gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST)
 +gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST IGNORE_LEAKS)
 +
  
  set(testname "MdrunNonIntegratorTests")
  set(exename "mdrun-non-integrator-test")
  
 -gmx_add_gtest_executable(
 -    ${exename}
 -    # files with code for tests
 -    minimize.cpp
 -    nonbonded_bench.cpp
 -    normalmodes.cpp
 -    rerun.cpp
 -    simple_mdrun.cpp
 -    # pseudo-library for code for mdrun
 -    $<TARGET_OBJECTS:mdrun_objlib>
 -    )
 +gmx_add_gtest_executable(${exename}
 +    CPP_SOURCE_FILES
 +        # files with code for tests
 +        minimize.cpp
 +        nonbonded_bench.cpp
 +        normalmodes.cpp
 +        rerun.cpp
 +        simple_mdrun.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
 +        )
  target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
 -gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST)
 +gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST IGNORE_LEAKS)
  
  # TPI does not support OpenMP, so we need a separate test binary
  set(testname "MdrunTpiTests")
  set(exename "mdrun-tpi-test")
  
 -gmx_add_gtest_executable(
 -    ${exename}
 -    # files with code for tests
 -    tpitest.cpp
 -    # pseudo-library for code for mdrun
 -    $<TARGET_OBJECTS:mdrun_objlib>
 -    )
 +gmx_add_gtest_executable(${exename}
 +    CPP_SOURCE_FILES
 +        # files with code for tests
 +        tpitest.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
 +        )
  target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
 -gmx_register_gtest_test(${testname} ${exename} INTEGRATION_TEST)
 +gmx_register_gtest_test(${testname} ${exename} INTEGRATION_TEST IGNORE_LEAKS)
  
  # Tests that only make sense to run with multiple ranks and/or real
  # MPI are implemented here.
  set(testname "MdrunMpiTests")
  set(exename "mdrun-mpi-test")
  
 -gmx_add_gtest_executable(
 -    ${exename} MPI
 -    # files with code for tests
 -    domain_decomposition.cpp
 -    minimize.cpp
 -    mimic.cpp
 -    multisim.cpp
 -    multisimtest.cpp
 -    pmetest.cpp
 -    replicaexchange.cpp
 -    # pseudo-library for code for mdrun
 -    $<TARGET_OBJECTS:mdrun_objlib>
 -    )
 +gmx_add_gtest_executable(${exename} MPI
 +    CPP_SOURCE_FILES
 +        # files with code for tests
 +        domain_decomposition.cpp
 +        minimize.cpp
 +        mimic.cpp
 +        multisim.cpp
 +        multisimtest.cpp
 +        replicaexchange.cpp
 +        pmetest.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
 +        )
  target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
 -gmx_register_gtest_test(${testname} ${exename} MPI_RANKS 2 OPENMP_THREADS 2 INTEGRATION_TEST)
 +gmx_register_gtest_test(${testname} ${exename} MPI_RANKS 2 OPENMP_THREADS 2 INTEGRATION_TEST IGNORE_LEAKS)
 +
 +# Tests that only make sense to run with multiple ranks and/or real
 +# MPI are implemented here. Special case for slow PME tests
 +set(testname "MdrunMpiPmeTests")
 +set(exename "mdrun-mpi-pme-test")
 +
 +gmx_add_gtest_executable(${exename} MPI
 +    CPP_SOURCE_FILES
 +        # files with code for tests
 +        pmetest.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
 +        )
 +target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
 +gmx_register_gtest_test(${testname} ${exename} MPI_RANKS 2 OPENMP_THREADS 2 INTEGRATION_TEST IGNORE_LEAKS)
  
  # Slow-running tests that target testing multiple-rank coordination behaviors
  set(exename "mdrun-mpi-coordination-test")
 -gmx_add_gtest_executable(
 -    ${exename} MPI
 -    # files with code for tests
 -    periodicactions.cpp
 -    # pseudo-library for code for mdrun
 -    $<TARGET_OBJECTS:mdrun_objlib>
 -    )
 +gmx_add_gtest_executable(${exename} MPI
 +    CPP_SOURCE_FILES
 +        # files with code for tests
 +        periodicactions.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
 +        )
  target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
  
  # These tests are extremely slow without optimization or OpenMP, so only run them for
  # with OpenMP enabled.
  if (CMAKE_BUILD_TYPE MATCHES "Rel" AND GMX_OPENMP)
      set(testname "MdrunMpiCoordinationTestsOneRank")
 -    gmx_register_gtest_test(${testname} ${exename} MPI_RANKS 1 SLOW_TEST)
 +    gmx_register_gtest_test(${testname} ${exename} MPI_RANKS 1 SLOW_TEST IGNORE_LEAKS)
      set(testname "MdrunMpiCoordinationTestsTwoRanks")
 -    gmx_register_gtest_test(${testname} ${exename} MPI_RANKS 2 SLOW_TEST)
 +    gmx_register_gtest_test(${testname} ${exename} MPI_RANKS 2 SLOW_TEST IGNORE_LEAKS)
  endif()
 +
 +# Keeping the FEP tests separate for now to be able to judge runtime more easily
 +# Can be included in mdrun tests later
 +set(testname "MdrunFEPTests")
 +set(exename "mdrunfep-test")
 +
 +gmx_add_gtest_executable(${exename}
 +        CPP_SOURCE_FILES
 +        # files with code for tests
 +        freeenergy.cpp
 +        # pseudo-library for code for mdrun
 +        $<TARGET_OBJECTS:mdrun_objlib>
 +)
 +target_link_libraries(${exename} PRIVATE mdrun_test_infrastructure)
 +gmx_register_gtest_test(${testname} ${exename} OPENMP_THREADS 2 INTEGRATION_TEST IGNORE_LEAKS)
diff --combined tests/CMakeLists.txt
index e8b65a3ed88028f8fef71d73d0b7883a45b816bd,7b3efa2a6c34f7bf7c02f42641b634e613e21732..0ac5cb04dfd72a92f00f35b55e83f551f31f5939
@@@ -1,7 -1,7 +1,7 @@@
  #
  # This file is part of the GROMACS molecular simulation package.
  #
- # Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ # Copyright (c) 2012,2013,2014,2015,2016, The GROMACS development team.
  # Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
  # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  # and including many others, as listed in the AUTHORS file in the
@@@ -290,10 -290,10 +290,10 @@@ if(GMX_PHYSICAL_VALIDATION
          # End copied from regression tests.
          #
  
 -        if (NOT PYTHONINTERP_FOUND)
 +        if (NOT Python3_Interpreter_FOUND)
              message(FATAL_ERROR
                      "Python not found. Physical validation requires python. \
 -                     Install python, set PYTHON_EXECUTABLE to a valid python location, \
 +                     Install python, set Python3_ROOT_DIR or PYTHON_EXECUTABLE to a valid location, \
                       or set GMX_PHYSICAL_VALIDATION=OFF to disable the physical validation tests.")
          endif()
          #