Merge branch 'origin/release-2021' into merge-2021-into-master

author Paul Bauer <paul.bauer.q@gmail.com>

Thu, 5 Nov 2020 10:53:20 +0000 (11:53 +0100)

committer Paul Bauer <paul.bauer.q@gmail.com>

Thu, 5 Nov 2020 12:12:18 +0000 (13:12 +0100)
author Paul Bauer <paul.bauer.q@gmail.com>
Thu, 5 Nov 2020 10:53:20 +0000 (11:53 +0100)
committer Paul Bauer <paul.bauer.q@gmail.com>
Thu, 5 Nov 2020 12:12:18 +0000 (13:12 +0100)
diff --git a/.github/workflows/build_cmake.yml b/.github/workflows/build_cmake.yml

index 3dd77003094005839327a42e4083f9a4808bfe25..aa8dc8de0e6593eb561229ed99ba7a9963dad782 100644 (file)
--- a/.github/workflows/build_cmake.yml
+++ b/.github/workflows/build_cmake.yml
@@ -112,6 +112,10 @@ jobs:
            set(path_separator ";")
          endif()
          set(ENV{PATH} "$ENV{GITHUB_WORKSPACE}${path_separator}$ENV{PATH}")
+        set(openmp_use "ON")
+        if ("${{ runner.os }}" STREQUAL "Windows")
+          set(openmp_use "OFF")
+        endif()
  
          execute_process(
            COMMAND cmake
@@ -127,6 +131,7 @@ jobs:
              -D GMX_GPU=OFF
              -D GMX_SIMD=None
              -D GMX_FFT_LIBRARY=FFTPACK
+            -D GMX_OPENMP=${openmp_use}
            RESULT_VARIABLE result
          )
          if (NOT result EQUAL 0)
diff --git a/admin/ci-scripts/build-and-test-py-gmxapi-0.2.sh b/admin/ci-scripts/build-and-test-py-gmxapi-0.2.sh

index 828538828bd245a78a4ba4ab9fcaa1c48986179a..e33fb3faaa8ace41e9f44b0bda7e04d2cde1fedb 100644 (file)
--- a/admin/ci-scripts/build-and-test-py-gmxapi-0.2.sh
+++ b/admin/ci-scripts/build-and-test-py-gmxapi-0.2.sh
@@ -61,7 +61,7 @@ if [ -x `which mpiexec` ]; then
  fi
  
  # Run Python acceptance tests.
-python -m pytest python_packaging/test --junitxml=$PY_ACCEPTANCE_TEST_XML
+python -m pytest python_packaging/test --junitxml=$PY_ACCEPTANCE_TEST_XML --threads=2
  
  # Note: Multiple pytest processes getting --junitxml output file argument
  # may cause problems, so we set the option on only one of the launched processes.
diff --git a/admin/containers/buildall.sh b/admin/containers/buildall.sh

index 1d2daad833eb52ebff8f4053e4ba1e8df7055bd5..35d37d8a695e1179a95ebe46b4709371fa7e32bb 100644 (file)
--- a/admin/containers/buildall.sh
+++ b/admin/containers/buildall.sh
@@ -9,46 +9,29 @@ SCRIPT=$PWD/scripted_gmx_docker_builds.py
  # images needed, because the same one can test library,
  # thread and no MPI configurations.
  
-tag="gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.15.7 --gcc 8 --cuda 11.0 --opencl --clfft --mpi openmpi \
-| docker build -t $tag -
-
-tag="gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.13.0 --gcc 7 --opencl amd --clfft --mpi openmpi --ubuntu 18.04 | docker build -t $tag -
-
-tag="gromacs/cmake-3.13.0-llvm-8-tsan-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.13.0 --llvm 8 --tsan | docker build -t $tag -
-
-tag="gromacs/cmake-3.15.7-llvm-8-cuda-10.0-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.15.7 --llvm 8 --cuda 10.0 --mpi openmpi | docker build -t $tag -
-
-tag="gromacs/cmake-3.15.7-llvm-8-cuda-11.0-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.15.7 --llvm 8 --cuda 11.0 --mpi openmpi | docker build -t $tag -
-
-tag="gromacs/cmake-3.15.7-llvm-9-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.15.7 --llvm 9 --mpi openmpi | docker build -t $tag -
-
-tag="gromacs/cmake-3.13.0-llvm-9-intelopencl-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.13.0 --llvm 9 --opencl intel --mpi openmpi | docker build -t $tag -
-
-tag="gromacs/cmake-3.13.0-llvm-9-amdopencl-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.13.0 --llvm 9 --opencl amd --mpi openmpi --ubuntu 18.04 | docker build -t $tag -
-
-tag="gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.17.2 --oneapi 2021.1-beta09 | docker build -t $tag -
-
-tag="gromacs/ci-docs-llvm-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.17.2 --llvm --doxygen | docker build -t $tag -
+args[${#args[@]}]="--gcc 8 --cuda 11.0 --clfft --mpi openmpi"
+args[${#args[@]}]="--gcc 7 --clfft --mpi openmpi --ubuntu 18.04"
+args[${#args[@]}]="--llvm 8 --tsan"
+args[${#args[@]}]="--llvm 8 --cuda 10.0 --clfft --mpi openmpi"
+args[${#args[@]}]="--llvm 8 --cuda 11.0 --clfft --mpi openmpi"
+args[${#args[@]}]="--llvm 9 --clfft --mpi openmpi --ubuntu 18.04"
+args[${#args[@]}]="--oneapi 2021.1-beta09"
+args[${#args[@]}]="--llvm --doxygen"
+
+echo "Building the following images."
+for arg_string in "${args[@]}"; do
+  # shellcheck disable=SC2086
+  python3 -m utility $arg_string
+done
+echo
+
+for arg_string in "${args[@]}"; do
+  # shellcheck disable=SC2086
+  tag=$(python3 -m utility $arg_string)
+  tags[${#tags[@]}]=$tag
+  # shellcheck disable=SC2086
+  python3 $SCRIPT $arg_string | docker build -t $tag -
+done
  
  echo "Run the following to upload the updated images."
  echo "docker login"
diff --git a/admin/containers/scripted_gmx_docker_builds.py b/admin/containers/scripted_gmx_docker_builds.py

index a7b155ac01e83df53a8a1605be4f12c3e264201c..5464a7f17c5b62a9ac3519cb5c66e5cd1a4af9a8 100755 (executable)
--- a/admin/containers/scripted_gmx_docker_builds.py
+++ b/admin/containers/scripted_gmx_docker_builds.py
@@ -33,7 +33,8 @@
  # To help us fund GROMACS development, we humbly ask that you cite
  # the research papers on the package. Check out http://www.gromacs.org.
  
-"""
+"""Building block based Dockerfile generation for CI testing images.
+
  Generates a set of docker images used for running GROMACS CI on Gitlab.
  The images are prepared according to a selection of build configuration targets
  that hope to cover a broad enough scope of different possible systems,
@@ -44,6 +45,9 @@ analysing the logic and adding build stages as needed.
  
  Based on the example script provided by the NVidia HPCCM repository.
  
+Reference:
+    `NVidia HPC Container Maker <https://github.com/NVIDIA/hpc-container-maker>`__
+
  Authors:
      * Paul Bauer <paul.bauer.q@gmail.com>
      * Eric Irrgang <ericirrgang@gmail.com>
@@ -56,6 +60,9 @@ Usage::
      $ python3 scripted_gmx_docker_builds.py --format docker > Dockerfile && docker build .
      $ python3 scripted_gmx_docker_builds.py | docker build -
  
+See Also:
+    :file:`buildall.sh`
+
  """
  
  import argparse
@@ -80,6 +87,7 @@ _common_packages = ['build-essential',
                      'ccache',
                      'git',
                      'gnupg',
+                    'gpg-agent',
                      'libfftw3-dev',
                      'libhwloc-dev',
                      'liblapack-dev',
@@ -92,6 +100,22 @@ _common_packages = ['build-essential',
                      'wget',
                      'xsltproc']
  
+_opencl_extra_packages = [
+    'nvidia-opencl-dev',
+    # The following require apt_ppas=['ppa:intel-opencl/intel-opencl']
+    'intel-opencl-icd',
+    'ocl-icd-libopencl1',
+    'ocl-icd-opencl-dev',
+    'opencl-headers',
+    # The following require
+    #             apt_keys=['http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key'],
+    #             apt_repositories=['deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main']
+    'libelf1',
+    'rocm-opencl',
+    'rocm-dev',
+    'clinfo'
+]
+
  # Extra packages needed to build Python installations from source.
  _python_extra_packages = ['build-essential',
                            'ca-certificates',
@@ -134,18 +158,13 @@ _docs_extra_packages = ['autoconf',
                          'texlive-fonts-recommended',
                          'texlive-fonts-extra']
  
-# Supported Python versions for maintained branches.
-_python_versions = ['3.6.10', '3.7.7', '3.8.2']
-
  # Parse command line arguments
-parser = argparse.ArgumentParser(description='GROMACS CI image creation script', parents=[utility.parser])
+parser = argparse.ArgumentParser(description='GROMACS CI image creation script',
+                                 parents=[utility.parser])
  
  parser.add_argument('--format', type=str, default='docker',
                      choices=['docker', 'singularity'],
                      help='Container specification format (default: docker)')
-parser.add_argument('--venvs', nargs='*', type=str, default=_python_versions,
-                    help='List of Python versions ("major.minor.patch") for which to install venvs. '
-                         'Default: {}'.format(' '.join(_python_versions)))
  
  
  def base_image_tag(args) -> str:
@@ -181,6 +200,13 @@ def get_llvm_packages(args) -> typing.Iterable[str]:
          return []
  
  
+def get_opencl_packages(args) -> typing.Iterable[str]:
+    if (args.doxygen is None) and (args.oneapi is None):
+        return _opencl_extra_packages
+    else:
+        return []
+
+
  def get_compiler(args, compiler_build_stage: hpccm.Stage = None) -> bb_base:
      # Compiler
      if args.icc is not None:
@@ -247,34 +273,6 @@ def get_mpi(args, compiler):
          return None
  
  
-def get_opencl(args):
-    # Add OpenCL environment if needed
-    if (args.opencl is not None):
-        if args.opencl == 'nvidia':
-            if (args.cuda is None):
-                raise RuntimeError('Need Nvidia environment for Nvidia OpenCL image')
-
-            return hpccm.building_blocks.packages(ospackages=['nvidia-opencl-dev'])
-
-        elif args.opencl == 'intel':
-            # Note, when using oneapi, there is bundled OpenCL support, so this
-            # installation is not needed.
-            return hpccm.building_blocks.packages(
-                    apt_ppas=['ppa:intel-opencl/intel-opencl'],
-                    ospackages=['opencl-headers', 'ocl-icd-libopencl1',
-                                'ocl-icd-opencl-dev', 'intel-opencl-icd'])
-
-        elif args.opencl == 'amd':
-            # libelf1 is a necessary dependency for something in the ROCm stack,
-            # which they should set up, but seem to have omitted.
-            return hpccm.building_blocks.packages(
-                    apt_keys=['http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key'],
-                    apt_repositories=['deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main'],
-                    ospackages=['ocl-icd-libopencl1', 'ocl-icd-opencl-dev', 'opencl-headers', 'libelf1', 'rocm-opencl', 'rocm-dev', 'clinfo'])
-    else:
-        return None
-
-
  def get_clfft(args):
      if (args.clfft is not None):
          return hpccm.building_blocks.generic_cmake(
@@ -538,21 +536,31 @@ def build_stages(args) -> typing.Iterable[hpccm.Stage]:
      # Building blocks are chunks of container-builder instructions that can be
      # copied to any build stage with the addition operator.
      building_blocks = collections.OrderedDict()
+    building_blocks['base_packages'] = hpccm.building_blocks.packages(
+        ospackages=_common_packages)
  
      # These are the most expensive and most reusable layers, so we put them first.
      building_blocks['compiler'] = get_compiler(args, compiler_build_stage=stages.get('compiler_build'))
      building_blocks['mpi'] = get_mpi(args, building_blocks['compiler'])
+    for i, cmake in enumerate(args.cmake):
+        building_blocks['cmake' + str(i)] = hpccm.building_blocks.cmake(
+            eula=True,
+            prefix='/usr/local/cmake-{}'.format(cmake),
+            version=cmake)
  
      # Install additional packages early in the build to optimize Docker build layer cache.
-    os_packages = _common_packages + get_llvm_packages(args)
+    os_packages = list(get_llvm_packages(args)) + get_opencl_packages(args)
      if args.doxygen is not None:
          os_packages += _docs_extra_packages
      if args.oneapi is not None:
          os_packages += ['lsb-release']
-    building_blocks['ospackages'] = hpccm.building_blocks.packages(ospackages=os_packages)
+    building_blocks['extra_packages'] = hpccm.building_blocks.packages(
+        ospackages=os_packages,
+        apt_ppas=['ppa:intel-opencl/intel-opencl'],
+        apt_keys=['http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key'],
+        apt_repositories=['deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main']
+    )
  
-    building_blocks['cmake'] = hpccm.building_blocks.cmake(eula=True, version=args.cmake)
-    building_blocks['opencl'] = get_opencl(args)
      building_blocks['clfft'] = get_clfft(args)
  
      # Add Python environments to MPI images, only, so we don't have to worry
diff --git a/admin/containers/utility.py b/admin/containers/utility.py

index 5223e886f068698ffe0c72db43b855d7bf8a5429..3df5fdc8beef71a5c9aa18089c8434ba624a2b25 100644 (file)
--- a/admin/containers/utility.py
+++ b/admin/containers/utility.py
@@ -32,9 +32,37 @@
  # To help us fund GROMACS development, we humbly ask that you cite
  # the research papers on the package. Check out http://www.gromacs.org.
  
-"""A `utility` module helps manage the matrix of configurations for CI testing and build containers.
+"""A utility module to help manage the matrix of configurations for CI testing and build containers.
  
-Provides importable argument parser.
+When called as a stand alone script, prints a Docker image name based on the
+command line arguments. The Docker image name is of the form used in the GROMACS
+CI pipeline jobs.
+
+Example::
+
+    $ python3 -m utility --llvm --doxygen
+    gromacs/ci-ubuntu-18.04-llvm-7-docs
+
+See Also:
+    :file:`buildall.sh`
+
+As a module, provides importable argument parser and docker image name generator.
+
+Note that the parser is created with ``add_help=False`` to make it friendly as a
+parent parser, but this means that you must derive a new parser from it if you
+want to see the full generated command line help.
+
+Example::
+
+    import utility.parser
+    # utility.parser does not support `-h` or `--help`
+    parser = argparse.ArgumentParser(
+        description='GROMACS CI image creation script',
+        parents=[utility.parser])
+    # ArgumentParser(add_help=True) is default, so parser supports `-h` and `--help`
+
+See Also:
+    :file:`scripted_gmx_docker_builds.py`
  
  Authors:
      * Paul Bauer <paul.bauer.q@gmail.com>
@@ -58,8 +86,9 @@ parsers for tools.
      Instead, inherit from it with the *parents* argument to :py:class:`argparse.ArgumentParser`
  """
  
-parser.add_argument('--cmake', type=str, default='3.13.0',
+parser.add_argument('--cmake', nargs='*', type=str, default=['3.13.0', '3.15.7', '3.17.2'],
                      help='Selection of CMake version to provide to base image')
+
  compiler_group = parser.add_mutually_exclusive_group()
  compiler_group.add_argument('--gcc', type=int, nargs='?', const=7, default=7,
                              help='Select GNU compiler tool chain. (Default) '
@@ -94,11 +123,63 @@ parser.add_argument('--mpi', type=str, nargs='?', const='openmpi', default=None,
  parser.add_argument('--tsan', type=str, nargs='?', const='llvm', default=None,
                      help='Build special compiler versions with TSAN OpenMP support')
  
-parser.add_argument('--opencl', type=str, nargs='?', const='nvidia', default=None,
-                    help='Provide environment for OpenCL builds')
-
  parser.add_argument('--clfft', type=str, nargs='?', const='master', default=None,
                      help='Add external clFFT libraries to the build image')
  
  parser.add_argument('--doxygen', type=str, nargs='?', const='1.8.5', default=None,
                      help='Add doxygen environment for documentation builds. Also adds other requirements needed for final docs images.')
+
+# Supported Python versions for maintained branches.
+_python_versions = ['3.6.10', '3.7.7', '3.8.2']
+parser.add_argument('--venvs', nargs='*', type=str, default=_python_versions,
+                    help='List of Python versions ("major.minor.patch") for which to install venvs. '
+                         'Default: {}'.format(' '.join(_python_versions)))
+
+
+def image_name(configuration: argparse.Namespace) -> str:
+    """Generate docker image name.
+
+    The configuration slug has the form::
+
+        <distro>-<version>-<compiler>-<major version>[-<gpusdk>-<version>][-<use case>]
+
+    Image name is prefixed by ``gromacs/ci-``
+
+    Arguments:
+        configuration: Docker image configuration as described by the parsed arguments.
+
+    """
+    elements = []
+    for distro in ('centos', 'ubuntu'):
+        version = getattr(configuration, distro, None)
+        if version is not None:
+            elements.append(distro + '-' + version)
+            break
+    for compiler in ('icc', 'llvm', 'gcc'):
+        version = getattr(configuration, compiler, None)
+        if version is not None:
+            elements.append(compiler + '-' + str(version).split('.')[0])
+            break
+    for gpusdk in ('cuda',):
+        version = getattr(configuration, gpusdk, None)
+        if version is not None:
+            elements.append(gpusdk + '-' + version)
+    if configuration.oneapi is not None:
+        elements.append('oneapi-' + configuration.oneapi)
+
+    # Check for special cases
+    # The following attribute keys indicate the image is built for the named
+    # special use case.
+    cases = {'doxygen': 'docs',
+             'tsan': 'tsan'}
+    for attr in cases:
+        value = getattr(configuration, attr, None)
+        if value is not None:
+            elements.append(cases[attr])
+    slug = '-'.join(elements)
+    return 'gromacs/ci-' + slug
+
+
+if __name__ == "__main__":
+    args = argparse.ArgumentParser(parents=[parser]).parse_args()
+    print(image_name(args))
diff --git a/admin/gitlab-ci/archive.gitlab-ci.yml b/admin/gitlab-ci/archive.gitlab-ci.yml

index 15877d35ae55bfa99e6ae77b8f641326f6626f1d..9d44b76a345ffdf3bea59dcb1e802eda3da8912f 100644 (file)
--- a/admin/gitlab-ci/archive.gitlab-ci.yml
+++ b/admin/gitlab-ci/archive.gitlab-ci.yml
@@ -5,9 +5,7 @@ prepare-release-version:
      - .variables:default
      - .rules:nightly-only-for-release
    cache: {}
-  # Docker image uploaded to dockerhub by user eriklindahl
-  # TODO: Get DockerFile for admin/dockerfiles
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    stage: configure-build
    variables:
      KUBERNETES_CPU_LIMIT: 1
@@ -27,7 +25,7 @@ regressiontests:prepare:
      - .variables:default
      - .rules:merge-and-post-merge-acceptance
    cache: {}
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    stage: configure-build
    variables:
      KUBERNETES_CPU_LIMIT: 1
@@ -71,7 +69,7 @@ regressiontests:package:
    cache: {}
    # Docker image uploaded to dockerhub by user eriklindahl
    # TODO: Get DockerFile for admin/dockerfiles
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    stage: release-package
    variables:
      KUBERNETES_CPU_LIMIT: 1
@@ -145,7 +143,7 @@ archive:configure:release:
      - .use-ccache
      - .before_script:default
      - .docs:build
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    variables:
      KUBERNETES_CPU_LIMIT: 4
      KUBERNETES_CPU_REQUEST: 2
@@ -208,7 +206,7 @@ archive:package:
      BUILD_DIR: build-package
  
  release-verify:
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    stage: release-verify
    extends:
      - .variables:default
diff --git a/admin/gitlab-ci/documentation.gitlab-ci.yml b/admin/gitlab-ci/documentation.gitlab-ci.yml

index 6402d8c43b4860aa61b6330b4c16986870e379f3..62b399c20a5ed19202438565c7b46ca6d8e2c47e 100644 (file)
--- a/admin/gitlab-ci/documentation.gitlab-ci.yml
+++ b/admin/gitlab-ci/documentation.gitlab-ci.yml
@@ -6,7 +6,7 @@
      - .gromacs:base:configure
      - .before_script:default
    # TODO (#3480) this should be organized more like the current documentation.py script
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    stage: configure-build
    cache: {}
    variables:
@@ -72,7 +72,7 @@
      - .before_script:default
      - .rules:nightly-only-for-release
    # TODO (#3480) this should be organized more like the current documentation.py script
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    stage: release-configure
    cache: {}
    variables:
@@ -182,7 +182,7 @@ docs:configure:
  
  .docs:build:
    # TODO (#3480) this should be organized more like the current documentation.py script
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    script:
      - cd $BUILD_DIR
      - cmake --build . --target gmx -- -j8
@@ -222,7 +222,7 @@ docs:build:
      - .gromacs:base:build
      - .before_script:default
    # TODO (#3480) this should be organized more like the current documentation.py script
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    cache: {}
    variables:
      KUBERNETES_CPU_LIMIT: 4
diff --git a/admin/gitlab-ci/gromacs.gitlab-ci.yml b/admin/gitlab-ci/gromacs.gitlab-ci.yml

index a7ec874ed8a2cc6808fd309c80573f39377cfb59..8ddef2991554c48400995b30a1dd2fe3b725577e 100644 (file)
--- a/admin/gitlab-ci/gromacs.gitlab-ci.yml
+++ b/admin/gitlab-ci/gromacs.gitlab-ci.yml
@@ -8,8 +8,9 @@ simple-build:
      - .use-clang:base
      - .rules:basic-push
    stage: pre-build
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      KUBERNETES_CPU_LIMIT: 8
      KUBERNETES_CPU_REQUEST: 4
      KUBERNETES_MEMORY_REQUEST: 8Gi
@@ -22,6 +23,7 @@ simple-build:
      COMPILER_MAJOR_VERSION: 9
      BUILD_DIR: simple-build
    script:
+    - CMAKE=${CMAKE:-$(which cmake)}
      - echo $CMAKE_COMPILER_SCRIPT
      - echo $CMAKE_EXTRA_OPTIONS
      - echo $CMAKE_SIMD_OPTIONS
@@ -38,7 +40,7 @@ simple-build:
        mkdir $BUILD_DIR ;
        fi
      - cd $BUILD_DIR
-    - cmake ..
+    - $CMAKE ..
        -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
        $CMAKE_COMPILER_SCRIPT
        $CMAKE_EXTRA_OPTIONS
@@ -52,12 +54,12 @@ simple-build:
        2>&1 | tee cmakeLog.log
      - awk '/CMake Warning/,/^--|^$/' cmakeLog.log | tee cmakeErrors.log
      - if [ -s cmakeErrors.log  ] ; then echo "Found CMake warning while processing build"; cat cmakeErrors.log ; exit 1; fi
-    - cmake --build . -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee buildLogFile.log
-    - cmake --build . --target tests -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee testBuildLogFile.log
+    - $CMAKE --build . -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee buildLogFile.log
+    - $CMAKE --build . --target tests -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee testBuildLogFile.log
      - awk '/warning/,/warning.*generated|^$/' buildLogFile.log testBuildLogFile.log
        | grep -v "CMake" | tee buildErrors.log || true
      - grep "cannot be built" buildLogFile.log testBuildLogFile.log | tee -a buildErrors.log || true
-    - cmake --build . --target install 2>&1 | tee installBuildLogFile.log
+    - $CMAKE --build . --target install 2>&1 | tee installBuildLogFile.log
      - if [ -s buildErrors.log ] ; then echo "Found compiler warning during build"; cat buildErrors.log; exit 1; fi
      - ctest -D ExperimentalTest --output-on-failure | tee ctestLog.log || true
      - awk '/The following tests FAILED/,/^Errors while running CTest|^$/'
@@ -95,6 +97,7 @@ simple-build:
      CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug"
      CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF"
    script:
+    - CMAKE=${CMAKE:-$(which cmake)}
      - echo $CMAKE_COMPILER_SCRIPT
      - echo $CMAKE_EXTRA_OPTIONS
      - echo $CMAKE_SIMD_OPTIONS
@@ -110,7 +113,7 @@ simple-build:
        mkdir $BUILD_DIR ;
        fi
      - cd $BUILD_DIR
-    - cmake ..
+    - $CMAKE ..
        -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
        $CMAKE_COMPILER_SCRIPT
        $CMAKE_EXTRA_OPTIONS
@@ -151,6 +154,7 @@ simple-build:
      RELEASE_SOURCE: release-source-from-tarball
      RELEASE_REGRESSIONTESTS: release-regressiontests-from-tarball
    script:
+    - CMAKE=${CMAKE:-$(which cmake)}
      - VERSION=`cat version.json |
        python3 -c "import json,sys; print(json.load(sys.stdin)['version'])"`
      - if [[ $GROMACS_RELEASE != "true" ]] ; then
@@ -222,20 +226,36 @@ gromacs:gcc-7:configure:
      - .use-gcc:base
      - .use-opencl
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=AVX2_256"
      CMAKE_EXTRA_OPTIONS: "-DGMX_EXTERNAL_CLFFT=ON -DGMX_INSTALL_LEGACY_API=ON"
      COMPILER_MAJOR_VERSION: 7
  
+gromacs:clang-8-cuda-10.0:configure:
+  extends:
+    - .gromacs:base:configure
+    - .use-clang:base
+    - .use-cuda
+    - .rules:merge-and-post-merge-acceptance
+  image: gromacs/ci-ubuntu-18.04-llvm-8-cuda-10.0
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+    CMAKE_SIMD_OPTIONS: "-DGMX_USE_SIMD_KERNELS=off"
+    CMAKE_EXTRA_OPTIONS: "-DGMX_CLANG_CUDA=ON"
+    CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=RelWithDebInfo"
+    COMPILER_MAJOR_VERSION: 8
+
  gromacs:gcc-8-cuda-11.0:configure:
    extends:
      - .gromacs:base:configure
      - .use-gcc:base
      - .use-cuda
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=SSE4.1"
      COMPILER_MAJOR_VERSION: 8
  
@@ -246,8 +266,9 @@ gromacs:gcc-8-cuda-11.0:configureMPI:
      - .use-cuda
      - .use-mpi
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=SSE4.1"
      COMPILER_MAJOR_VERSION: 8
  
@@ -256,8 +277,9 @@ gromacs:clang-TSAN:configure:
      - .gromacs:base:configure
      - .use-clang:base
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      COMPILER_MAJOR_VERSION: 8
      CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=TSAN"
  
@@ -276,8 +298,9 @@ gromacs:clang-9-mpi:configure:
     - .use-clang:base
     - .use-mpi
     - .rules:merge-requests
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      COMPILER_MAJOR_VERSION: 9
      CMAKE_PRECISION_OPTIONS: -DGMX_DOUBLE=ON
  
@@ -286,19 +309,21 @@ gromacs:clang-static-analyzer:configure:
      - .gromacs:base:configure
      - .use-clang:base
      - .rules:merge-requests
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
    variables:
-     CMAKE_COMPILER_SCRIPT: "-DCMAKE_CXX_COMPILER=/usr/local/libexec/c++-analyzer -DCMAKE_C_COMPILER=gcc"
-     CMAKE_EXTRA_OPTIONS: "-DGMX_CLANG_ANALYZER=ON -DGMX_OPENMP=OFF -DGMX_USE_RDTSCP=OFF -DGMX_FFT_LIBRARY=fftpack -DGMX_DEVELOPER_BUILD=ON"
-     CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=None"
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+    CMAKE_COMPILER_SCRIPT: "-DCMAKE_CXX_COMPILER=/usr/local/libexec/c++-analyzer -DCMAKE_C_COMPILER=gcc"
+    CMAKE_EXTRA_OPTIONS: "-DGMX_CLANG_ANALYZER=ON -DGMX_OPENMP=OFF -DGMX_USE_RDTSCP=OFF -DGMX_FFT_LIBRARY=fftpack -DGMX_DEVELOPER_BUILD=ON"
+    CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=None"
  
  gromacs:clang-ASAN:configure:
    extends:
      - .gromacs:base:configure
      - .use-clang:base
      - .rules:merge-requests
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      COMPILER_MAJOR_VERSION: 8
      CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=ASAN"
  
@@ -308,8 +333,9 @@ gromacs:oneapi-2021.1-beta09-opencl:configure:
     - .use-oneapi:base
     - .use-opencl
     - .rules:merge-requests
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
    variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
      COMPILER_MAJOR_VERSION: 2021
  
  gromacs:oneapi-2021.1-beta09-sycl:configure:
@@ -318,8 +344,9 @@ gromacs:oneapi-2021.1-beta09-sycl:configure:
     - .use-oneapi:base
     - .use-sycl
     - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
    variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
      COMPILER_MAJOR_VERSION: 2021
  
  gromacs:clang-UBSAN:configure:
@@ -327,8 +354,9 @@ gromacs:clang-UBSAN:configure:
      - .gromacs:base:configure
      - .use-clang:base
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      COMPILER_MAJOR_VERSION: 8
      CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=UBSAN"
  
@@ -339,8 +367,9 @@ gromacs:gcc-8-cuda-11.0:release:configure:
      - .use-mpi
      - .use-cuda
      - .rules:nightly-only-for-release
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      COMPILER_MAJOR_VERSION: 8
      RELEASE_BUILD_DIR: release-builds-gcc
      CMAKE_EXTRA_OPTIONS: "-DGMX_BUILD_MDRUN_ONLY=ON"
@@ -357,8 +386,9 @@ gromacs:gcc-7:release:configure:
      - .use-gcc:base
      - .use-opencl
      - .rules:nightly-only-for-release
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      COMPILER_MAJOR_VERSION: 7
      CMAKE_EXTRA_OPTIONS: "-DGMX_EXTERNAL_CLFFT=ON"
      RELEASE_BUILD_DIR: release-builds-gcc
@@ -374,8 +404,9 @@ gromacs:clang-9:release:configure:
      - .use-clang:base
      - .use-mpi
      - .rules:nightly-only-for-release
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      COMPILER_MAJOR_VERSION: 9
      RELEASE_BUILD_DIR: release-builds-clang
      CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=ON"
@@ -403,8 +434,9 @@ gromacs:oneapi-2021.1-beta09-opencl:release:configure:
     - .use-oneapi:base
     - .use-opencl
     - .rules:nightly-only-for-release
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
    variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
      COMPILER_MAJOR_VERSION: 2021
      RELEASE_BUILD_DIR: release-builds-oneapi
  
@@ -413,13 +445,14 @@ gromacs:oneapi-2021.1-beta09-opencl:release:configure:
  .gromacs:base:build:
    stage: build
    script:
+    - CMAKE=${CMAKE:-$(which cmake)}
      - cd $BUILD_DIR
-    - cmake --build . -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee buildLogFile.log
-    - cmake --build . --target tests -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee testBuildLogFile.log
+    - $CMAKE --build . -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee buildLogFile.log
+    - $CMAKE --build . --target tests -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee testBuildLogFile.log
      - awk '/warning/,/warning.*generated|^$/' buildLogFile.log testBuildLogFile.log
        | grep -v "CMake" | tee buildErrors.log || true
      - grep "cannot be built" buildLogFile.log testBuildLogFile.log | tee -a buildErrors.log || true
-    - cmake --build . --target install 2>&1 | tee installBuildLogFile.log
+    - $CMAKE --build . --target install 2>&1 | tee installBuildLogFile.log
      - if [ -s buildErrors.log ] ; then echo "Found compiler warning during build"; cat buildErrors.log; exit 1; fi
      - for file in `find . -mindepth 1 -name "*.o" ! -type l` ; do echo $file ; rm $file ; done 2>&1 > remove-build-objects.log
      - cd ..
@@ -432,8 +465,9 @@ gromacs:oneapi-2021.1-beta09-opencl:release:configure:
  .gromacs:static-analyzer-build:
    stage: build
    script:
+    - CMAKE=${CMAKE:-$(which cmake)}
      - cd $BUILD_DIR
-    - scan-build -o scan_html cmake --build . -- -j8 2>&1 | tee buildLogFile.log
+    - scan-build -o scan_html $CMAKE --build . -- -j8 2>&1 | tee buildLogFile.log
      - awk '/warning/,/warning.*generated|^$/' buildLogFile.log
        | grep -v "CMake" | tee buildErrors.log || true
      - grep "cannot be built" buildLogFile.log | tee -a buildErrors.log || true
@@ -451,10 +485,24 @@ gromacs:gcc-7:build:
      - .before_script:default
      - .use-ccache
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
    needs:
      - job: gromacs:gcc-7:configure
-      artifacts: true
+
+gromacs:clang-8-cuda-10.0:build:
+  extends:
+    - .variables:default
+    - .gromacs:base:build
+    - .use-clang:base
+    - .use-ccache
+    - .rules:merge-and-post-merge-acceptance
+  image: gromacs/ci-ubuntu-18.04-llvm-8-cuda-10.0
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+  needs:
+    - job: gromacs:clang-8-cuda-10.0:configure
  
  gromacs:gcc-8-cuda-11.0:build:
    extends:
@@ -463,7 +511,9 @@ gromacs:gcc-8-cuda-11.0:build:
      - .before_script:default
      - .use-ccache
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
+  variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
    needs:
      - job: gromacs:gcc-8-cuda-11.0:configure
  
@@ -474,7 +524,9 @@ gromacs:gcc-8-cuda-11.0:buildMPI:
      - .before_script:default
      - .use-ccache
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
+  variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
    needs:
      - job: gromacs:gcc-8-cuda-11.0:configureMPI
  
@@ -485,7 +537,9 @@ gromacs:clang-TSAN:build:
      - .use-clang:base
      - .use-ccache
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
    needs:
      - job: gromacs:clang-TSAN:configure
  
@@ -496,7 +550,9 @@ gromacs:clang-ASAN:build:
      - .use-clang:base
      - .use-ccache
      - .rules:merge-requests
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
    tags:
      - k8s-scilifelab
    needs:
@@ -509,7 +565,9 @@ gromacs:clang-UBSAN:build:
      - .use-clang:base
      - .use-ccache
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
    tags:
      - k8s-scilifelab
    needs:
@@ -522,7 +580,9 @@ gromacs:clang-static-analyzer:build:
      - .use-clang:base
      - .use-ccache
      - .rules:merge-requests
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
    tags:
      - k8s-scilifelab
    needs:
@@ -546,7 +606,9 @@ gromacs:oneapi-2021.1-beta09-opencl:build:
      - .use-ccache
      - .use-oneapi:base
      - .rules:merge-requests
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+  variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
    needs:
      - job: gromacs:oneapi-2021.1-beta09-opencl:configure
  
@@ -557,7 +619,9 @@ gromacs:oneapi-2021.1-beta09-sycl:build:
      - .use-ccache
      - .use-oneapi:base
      - .rules:merge-and-post-merge-acceptance
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+  variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
    needs:
      - job: gromacs:oneapi-2021.1-beta09-sycl:configure
  
@@ -568,7 +632,9 @@ gromacs:clang-9-mpi:build:
      - .before_script:default
      - .use-ccache
      - .rules:merge-requests
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
+  variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
    needs:
      - job: gromacs:clang-9-mpi:configure
  
@@ -581,8 +647,9 @@ gromacs:gcc-8-cuda-11.0:release:build:
      - .rules:nightly-only-for-release
    stage: release-build
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      BUILD_DIR: release-builds-gcc
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
    needs:
      - job: gromacs:gcc-8-cuda-11.0:release:configure
  
@@ -595,8 +662,9 @@ gromacs:gcc-7:release:build:
      - .rules:nightly-only-for-release
    stage: release-build
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      BUILD_DIR: release-builds-gcc
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
    needs:
      - job: gromacs:gcc-7:release:configure
  
@@ -609,8 +677,9 @@ gromacs:clang-9:release:build:
      - .rules:nightly-only-for-release
    stage: release-build
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      BUILD_DIR: release-builds-clang
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    needs:
      - job: gromacs:clang-9:release:configure
  
@@ -638,9 +707,10 @@ gromacs:oneapi-2021.1-beta09-opencl:release:build:
      - .rules:nightly-only-for-release
    stage: release-build
    variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
      BUILD_DIR: release-builds-oneapi
      COMPILER_MAJOR_VERSION: 2021
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
    needs:
      - job: gromacs:oneapi-2021.1-beta09-opencl:release:configure
  
@@ -656,6 +726,7 @@ gromacs:oneapi-2021.1-beta09-opencl:release:build:
    variables:
      CTEST_RUN_MODE: "ExperimentalTest"
    script:
+    - CMAKE=${CMAKE:-$(which cmake)}
      - cd $BUILD_DIR
      - export UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1:suppressions=$CI_PROJECT_DIR/admin/ubsan-suppressions.txt
      # Needed to run MPI enabled code in the docker images, until we set up different users
@@ -727,8 +798,9 @@ gromacs:gcc-7:test:
    extends:
      - .gromacs:base:test
      - .rules:merge-requests
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      KUBERNETES_EXTENDED_RESOURCE_NAME: "amd.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
      LD_LIBRARY_PATH: "/opt/rocm-3.5.0/opencl/lib"
@@ -737,12 +809,27 @@ gromacs:gcc-7:test:
    needs:
      - job: gromacs:gcc-7:build
  
+gromacs:clang-8-cuda-10.0:test:
+  extends:
+    - .gromacs:base:test
+    - .rules:post-merge-acceptance
+  image: gromacs/ci-ubuntu-18.04-llvm-8-cuda-10.0
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+    KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
+    KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
+  tags:
+    - k8s-scilifelab
+  needs:
+    - job: gromacs:clang-8-cuda-10.0:build
+
  gromacs:gcc-8-cuda-11.0:test:
    extends:
      - .gromacs:base:test
      - .rules:merge-requests
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
    tags:
@@ -754,8 +841,9 @@ gromacs:gcc-8-cuda-11.0:test-gpucommupd:
    extends:
      - .gromacs:base:test
      - .rules:post-merge-acceptance
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
      GMX_GPU_DD_COMMS: 1
@@ -778,7 +866,9 @@ gromacs:clang-TSAN:test:
    extends:
      - .gromacs:base:test
      - .rules:post-merge-acceptance
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
    needs:
      - job: gromacs:clang-TSAN:build
  
@@ -787,8 +877,9 @@ gromacs:clang-ASAN:test:
      - .gromacs:base:test
      - .use-clang:base
      - .rules:merge-requests
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      CTEST_RUN_MODE: "ExperimentalMemCheck"
    tags:
      - k8s-scilifelab
@@ -800,7 +891,9 @@ gromacs:clang-UBSAN:test:
      - .gromacs:base:test
      - .use-clang:base
      - .rules:post-merge-acceptance
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
    tags:
      - k8s-scilifelab
    needs:
@@ -811,7 +904,9 @@ gromacs:oneapi-2021.1-beta09-opencl:test:
      - .gromacs:base:test
      - .use-oneapi:base
      - .rules:merge-requests
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+  variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
    needs:
      - job: gromacs:oneapi-2021.1-beta09-opencl:build
  
@@ -820,7 +915,9 @@ gromacs:oneapi-2021.1-beta09-sycl:test:
      - .gromacs:base:test
      - .use-oneapi:base
      - .rules:post-merge-acceptance
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+  variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
    needs:
      - job: gromacs:oneapi-2021.1-beta09-sycl:build
  
@@ -828,7 +925,9 @@ gromacs:clang-9-mpi:test:
    extends:
      - .gromacs:base:test
      - .rules:merge-requests
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
+  variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
    tags:
      - k8s-scilifelab
    needs:
@@ -838,8 +937,9 @@ gromacs:gcc-7:regressiontest:
    extends:
      - .gromacs:base:regressiontest
      - .rules:post-merge-acceptance
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      KUBERNETES_EXTENDED_RESOURCE_NAME: "amd.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
      REGRESSIONTEST_PME_RANK_NUMBER: 0
@@ -852,12 +952,32 @@ gromacs:gcc-7:regressiontest:
      - job: gromacs:gcc-7:build
      - job: regressiontests:prepare
  
+gromacs:clang-8-cuda-10.0:regressiontest:
+  extends:
+    - .gromacs:base:regressiontest
+    - .rules:post-merge-acceptance
+  image: gromacs/ci-ubuntu-18.04-llvm-8-cuda-10.0
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+    KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
+    KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
+    REGRESSIONTEST_PME_RANK_NUMBER: 0
+    REGRESSIONTEST_TOTAL_RANK_NUMBER: 2
+    REGRESSIONTEST_OMP_RANK_NUMBER: 1
+  tags:
+    - k8s-scilifelab
+  needs:
+    - job: gromacs:clang-8-cuda-10.0:build
+    - job: regressiontests:prepare
+
+
  gromacs:gcc-8-cuda-11.0:regressiontest:
    extends:
      - .gromacs:base:regressiontest
      - .rules:merge-requests
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
      REGRESSIONTEST_PME_RANK_NUMBER: 0
@@ -873,8 +993,9 @@ gromacs:gcc-8-cuda-11.0:regressiontest-gpucommupd-tMPI:
    extends:
      - .gromacs:base:regressiontest
      - .rules:post-merge-acceptance
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 2
      REGRESSIONTEST_PME_RANK_NUMBER: 0
@@ -894,12 +1015,36 @@ gromacs:gcc-8-cuda-11.0:regressiontest-gpucommupd-tMPI:
      when: always
      expire_in: 1 week
  
-gromacs:gcc-8-cuda-11.0:regressiontest-gpucommupd-MPI:
+gromacs:gcc-8-cuda-11.0:regressiontest-upd-tMPI:
    extends:
      - .gromacs:base:regressiontest
      - .rules:post-merge-acceptance
    image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
    variables:
+    KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
+    KUBERNETES_EXTENDED_RESOURCE_LIMIT: 2
+    REGRESSIONTEST_PME_RANK_NUMBER: 0
+    REGRESSIONTEST_TOTAL_RANK_NUMBER: 4
+    REGRESSIONTEST_OMP_RANK_NUMBER: 1
+    GMX_FORCE_UPDATE_DEFAULT_GPU: 1
+  tags:
+    - k8s-scilifelab
+  needs:
+    - job: gromacs:gcc-8-cuda-11.0:build
+    - job: regressiontests:prepare
+  artifacts:
+    paths:
+      - regressiontests
+    when: always
+    expire_in: 1 week
+
+gromacs:gcc-8-cuda-11.0:regressiontest-gpucommupd-MPI:
+  extends:
+    - .gromacs:base:regressiontest
+    - .rules:post-merge-acceptance
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
+  variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 2
      REGRESSIONTEST_PME_RANK_NUMBER: 0
@@ -935,7 +1080,9 @@ gromacs:clang-TSAN:regressiontest:
    extends:
      - .gromacs:base:regressiontest
      - .rules:post-merge-acceptance
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
    tags:
      - k8s-scilifelab
    needs:
@@ -947,7 +1094,9 @@ gromacs:clang-ASAN:regressiontest:
      - .gromacs:base:regressiontest
      - .use-clang:base
      - .rules:merge-requests
-  image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+  image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+  variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
    tags:
      - k8s-scilifelab
    needs:
@@ -958,8 +1107,9 @@ gromacs:clang-9:regressiontest:
    extends:
      - .gromacs:base:regressiontest
      - .rules:merge-requests
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      REGRESSIONTEST_DOUBLE: "-double"
      REGRESSIONTEST_PARALLEL: "-np"
    tags:
@@ -973,7 +1123,9 @@ gromacs:oneapi-2021.1-beta09-opencl:regressiontest:
      - .gromacs:base:regressiontest
      - .use-oneapi:base
      - .rules:merge-requests
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+  variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
    needs:
      - job: gromacs:oneapi-2021.1-beta09-opencl:build
      - job: regressiontests:prepare
@@ -983,7 +1135,9 @@ gromacs:oneapi-2021.1-beta09-sycl:regressiontest:
      - .gromacs:base:regressiontest
      - .use-oneapi:base
      - .rules:post-merge-acceptance
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+  variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
    needs:
      - job: gromacs:oneapi-2021.1-beta09-sycl:build
      - job: regressiontests:prepare
@@ -993,8 +1147,9 @@ gromacs:gcc-8-cuda-11.0:release:test:
      - .gromacs:base:test
      - .rules:nightly-only-for-release
    stage: release-tests
-  image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
      BUILD_DIR: release-builds-gcc
@@ -1009,8 +1164,9 @@ gromacs:gcc-7:release:test:
      - .gromacs:base:test
      - .rules:nightly-only-for-release
    stage: release-tests
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      BUILD_DIR: release-builds-gcc
      KUBERNETES_EXTENDED_RESOURCE_NAME: "amd.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
@@ -1026,8 +1182,9 @@ gromacs:clang-9:release:test:
      - .gromacs:base:test
      - .rules:nightly-only-for-release
    stage: release-tests
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      BUILD_DIR: release-builds-clang
    needs:
      - job: gromacs:clang-9:release:configure
@@ -1055,8 +1212,9 @@ gromacs:oneapi-2021.1-beta09-opencl:release:test:
      - .use-oneapi:base
      - .rules:nightly-only-for-release
    stage: release-tests
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
    variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
      BUILD_DIR: release-builds-oneapi
    needs:
      - job: gromacs:oneapi-2021.1-beta09-opencl:release:configure
@@ -1067,8 +1225,9 @@ gromacs:gcc-7:release:regressiontest:
      - .gromacs:base:regressiontest
      - .rules:nightly-only-for-release
    stage: release-tests
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
    variables:
+    CMAKE: /usr/local/cmake-3.13.0/bin/cmake
      BUILD_DIR: release-builds-gcc
      KUBERNETES_EXTENDED_RESOURCE_NAME: "amd.com/gpu"
      KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
@@ -1084,8 +1243,9 @@ gromacs:clang-9:release:regressiontest:
      - .gromacs:base:regressiontest
      - .rules:nightly-only-for-release
    stage: release-tests
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      BUILD_DIR: release-builds-clang
      REGRESSIONTEST_DOUBLE: "-double"
      REGRESSIONTEST_PARALLEL: "-np"
@@ -1120,8 +1280,9 @@ gromacs:oneapi-2021.1-beta09-opencl:release:regressiontest:
      - .use-oneapi:base
      - .rules:nightly-only-for-release
    stage: release-tests
-  image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
    variables:
+    CMAKE: /usr/local/cmake-3.17.2/bin/cmake
      BUILD_DIR: release-builds-oneapi
      REGRESSIONTEST_PME_RANK_NUMBER: 0
      REGRESSIONTEST_TOTAL_RANK_NUMBER: 2
diff --git a/admin/gitlab-ci/lint.gitlab-ci.yml b/admin/gitlab-ci/lint.gitlab-ci.yml

index c750d2ca7315aeb8a72d1d1d26d9ebff90a62373..548b9ec9a733e53c2ca589c5f18ae4f5e2fcc877 100644 (file)
--- a/admin/gitlab-ci/lint.gitlab-ci.yml
+++ b/admin/gitlab-ci/lint.gitlab-ci.yml
@@ -5,8 +5,9 @@ clang-tidy:configure-push:
      - .gromacs:base:configure
      - .use-clang:base
      - .rules:basic-push
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      COMPILER_MAJOR_VERSION: 9
      BUILD_DIR: build-clang-tidy
      CMAKE_EXTRA_OPTIONS: -DCLANG_TIDY=clang-tidy-$COMPILER_MAJOR_VERSION -DGMX_CLANG_TIDY=ON -DGMX_COMPILER_WARNINGS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
@@ -16,8 +17,9 @@ clang-tidy:configure-schedule:
      - .gromacs:base:configure
      - .use-clang:base
      - .rules:nightly-not-for-release
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      COMPILER_MAJOR_VERSION: 9
      BUILD_DIR: build-clang-tidy
      CMAKE_EXTRA_OPTIONS: -DCLANG_TIDY=clang-tidy-$COMPILER_MAJOR_VERSION -DGMX_CLANG_TIDY=ON -DGMX_COMPILER_WARNINGS=ON
@@ -29,10 +31,11 @@ clang-tidy:build:
      - .variables:default
      - .rules:nightly-not-for-release
    stage: source-check
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    needs:
      - job: clang-tidy:configure-schedule
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      BUILD_DIR: build-clang-tidy
  
  clang-tidy:test:
@@ -41,10 +44,11 @@ clang-tidy:test:
      - .variables:default
      - .rules:basic-push
    stage: source-check
-  image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-llvm-9
    needs:
      - job: clang-tidy:configure-push
    variables:
+    CMAKE: /usr/local/cmake-3.15.7/bin/cmake
      COMPILER_MAJOR_VERSION: 9
      BUILD_DIR: build-clang-tidy
      EXTRA_INSTALLS: clang-tidy-$COMPILER_MAJOR_VERSION
@@ -77,7 +81,7 @@ clang-format:
      - .rules:basic-push
    cache: {}
    stage: pre-build
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    variables:
      COMPILER_MAJOR_VERSION: 7
      KUBERNETES_CPU_LIMIT: 1
@@ -108,7 +112,7 @@ copyright-check:
      - .rules:basic-push
    cache: {}
    stage: pre-build
-  image: gromacs/ci-docs-llvm-master
+  image: gromacs/ci-ubuntu-18.04-llvm-7-docs
    variables:
      KUBERNETES_CPU_LIMIT: 1
      KUBERNETES_CPU_REQUEST: 1
@@ -153,8 +157,9 @@ check-source:
      KUBERNETES_MEMORY_REQUEST: 2Gi
      BUILD_DIR: build-docs
    script:
+    - CMAKE=${CMAKE:-$(which cmake)}
      - cd $BUILD_DIR
-    - cmake --build . --target check-source
+    - $CMAKE --build . --target check-source
      - awk '/warning.*include style.*order/,/You can use.*rst|^$/' docs/doxygen/check-source.log | tee doxygenError.txt || true
      - awk '/warning:.*includes/,/unnecessarily|^$/' docs/doxygen/check-source.log | tee -a doxygenError.txt || true
      - awk '/Traceback/,/.*rror|^$/' docs/doxygen/doxygen*log docs/doxygen/check-source.log | tee -a doxygenError.txt || true
diff --git a/admin/gitlab-ci/python-gmxapi.gitlab-ci.yml b/admin/gitlab-ci/python-gmxapi.gitlab-ci.yml

index 5715a736a136f11022ee2e128c2f7e720ded8e28..8e6b52a1d0d1a976e2b64f042841bed948b5407e 100644 (file)
--- a/admin/gitlab-ci/python-gmxapi.gitlab-ci.yml
+++ b/admin/gitlab-ci/python-gmxapi.gitlab-ci.yml
@@ -78,7 +78,7 @@ gmxapi-0.1:clang-8:py-3.8.2:
    extends:
      - .variables:default
      - .use-clang:base
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
    stage: test
    variables:
      KUBERNETES_CPU_LIMIT: 2
diff --git a/admin/gitlab-ci/sample_restraint.gitlab-ci.yml b/admin/gitlab-ci/sample_restraint.gitlab-ci.yml

index 25d2fc6734400123abe52aac913dfcbc5a7a8af6..27e56e2296a0578f14ad831ff77b3f5fa048a22b 100644 (file)
--- a/admin/gitlab-ci/sample_restraint.gitlab-ci.yml
+++ b/admin/gitlab-ci/sample_restraint.gitlab-ci.yml
@@ -3,7 +3,7 @@
    extends:
      - .variables:default
      - .use-clang:base
-  image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+  image: gromacs/ci-ubuntu-18.04-gcc-7
    stage: test
    variables:
      KUBERNETES_CPU_LIMIT: 2
@@ -15,7 +15,7 @@
      EXTRA_INSTALLS: "curl libbz2-dev libffi-dev liblzma-dev libncurses5-dev libncursesw5-dev libreadline-dev libsqlite3-dev libssl-dev llvm python-openssl tk-dev zlib1g-dev"
    script:
      - source $INSTALL_DIR/bin/GMXRC
-    - source $VENVPATH/bin/activate && INSTALL_DIR=$PWD/$INSTALL_DIR OMP_NUM_THREADS=1 bash admin/ci-scripts/build-and-test-sample_restraint-2020.sh
+    - source $VENVPATH/bin/activate && INSTALL_DIR=$PWD/$INSTALL_DIR OMP_NUM_THREADS=1 bash admin/ci-scripts/build-and-test-sample_restraint-2021.sh
    artifacts:
      reports:
        junit:
diff --git a/api/gmxapi/CMakeLists.txt b/api/gmxapi/CMakeLists.txt

index 81823a89c1c6ebd13e345af0ee8b0b17d581777c..12b2fd4226304f7e2f841b7294aa3edb544dc618 100644 (file)
--- a/api/gmxapi/CMakeLists.txt
+++ b/api/gmxapi/CMakeLists.txt
@@ -66,6 +66,13 @@ if (GMX_LIB_MPI)
      # GROMACS is built against an MPI library.
      # Clarification should be possible with resolution of #3672.
      set(_gmx_mpi_type "library")
+    # Ref https://cmake.org/cmake/help/v3.13/module/FindMPI.html#variables-for-using-mpi
+    find_package(MPI COMPONENTS C)
+    if (MPI_C_FOUND)
+        target_link_libraries(gmxapi PRIVATE MPI::MPI_C)
+    else()
+        message(FATAL_ERROR "Building gmxapi for MPI-enabled GROMACS, but no MPI toolchain found.")
+    endif ()
  elseif(GMX_THREAD_MPI)
      # GROMACS is built with its internal thread-MPI implementation.
      set(_gmx_mpi_type "tmpi")
diff --git a/api/gmxapi/cpp/context.cpp b/api/gmxapi/cpp/context.cpp

index 903f3ce56d6ba6fcc24eff3359e56ffb1821928b..6d11120eae6c55e520374f3ba4a21636f41f7db0 100644 (file)
--- a/api/gmxapi/cpp/context.cpp
+++ b/api/gmxapi/cpp/context.cpp
@@ -55,6 +55,8 @@
  #include "gromacs/commandline/filenm.h"
  #include "gromacs/commandline/pargs.h"
  #include "gromacs/gmxlib/network.h"
+#include "gromacs/hardware/detecthardware.h"
+#include "gromacs/hardware/hw_info.h"
  #include "gromacs/mdlib/stophandler.h"
  #include "gromacs/mdrunutility/logging.h"
  #include "gromacs/mdrunutility/multisim.h"
@@ -65,7 +67,7 @@
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxmpi.h"
  #include "gromacs/utility/init.h"
-#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
  
  #include "gmxapi/mpi/resourceassignment.h"
  #include "gmxapi/exceptions.h"
@@ -229,7 +231,9 @@ Context createContext()
  }
  
  ContextImpl::ContextImpl(MpiContextManager&& mpi) noexcept(std::is_nothrow_constructible_v<gmx::LegacyMdrunOptions>) :
-    mpi_(std::move(mpi))
+    mpi_(std::move(mpi)),
+    hardwareInformation_(gmx_detect_hardware(
+            gmx::PhysicalNodeCommunicator(mpi_.communicator(), gmx_physicalnode_id_hash())))
  {
      // Confirm our understanding of the MpiContextManager invariant.
      GMX_ASSERT(mpi_.communicator() == MPI_COMM_NULL ? !GMX_LIB_MPI : GMX_LIB_MPI,
@@ -315,19 +319,27 @@ std::shared_ptr<Session> ContextImpl::launch(const Workflow& work)
          auto mdModules = std::make_unique<MDModules>();
  
          const char* desc[] = { "gmxapi placeholder text" };
-        if (options_.updateFromCommandLine(argc, argv.data(), desc) == 0)
+
+        // LegacyMdrunOptions needs to be kept alive for the life of ContextImpl,
+        // so we use a data member for now.
+        gmx::LegacyMdrunOptions& options = options_;
+        if (options.updateFromCommandLine(argc, argv.data(), desc) == 0)
          {
              return nullptr;
          }
  
          ArrayRef<const std::string> multiSimDirectoryNames =
-                opt2fnsIfOptionSet("-multidir", ssize(options_.filenames), options_.filenames.data());
+                opt2fnsIfOptionSet("-multidir", ssize(options.filenames), options.filenames.data());
+
  
          // The SimulationContext is necessary with gmxapi so that
          // resources owned by the client code can have suitable
          // lifetime. The gmx wrapper binary uses the same infrastructure,
          // but the lifetime is now trivially that of the invocation of the
          // wrapper binary.
+        //
+        // For now, this should match the communicator used for hardware
+        // detection. There's no way to assert this is true.
          auto communicator = mpi_.communicator();
          // Confirm the precondition for simulationContext().
          GMX_ASSERT(communicator == MPI_COMM_NULL ? !GMX_LIB_MPI : GMX_LIB_MPI,
@@ -335,39 +347,40 @@ std::shared_ptr<Session> ContextImpl::launch(const Workflow& work)
          SimulationContext simulationContext(communicator, multiSimDirectoryNames);
  
  
-        StartingBehavior startingBehavior = StartingBehavior::NewSimulation;
-        LogFilePtr       logFileGuard     = nullptr;
-        gmx_multisim_t*  ms               = simulationContext.multiSimulation_.get();
-        std::tie(startingBehavior, logFileGuard) =
-                handleRestart(findIsSimulationMasterRank(ms, simulationContext.simulationCommunicator_),
-                              communicator, ms, options_.mdrunOptions.appendingBehavior,
-                              ssize(options_.filenames), options_.filenames.data());
+        StartingBehavior startingBehavior        = StartingBehavior::NewSimulation;
+        LogFilePtr       logFileGuard            = nullptr;
+        gmx_multisim_t*  ms                      = simulationContext.multiSimulation_.get();
+        std::tie(startingBehavior, logFileGuard) = handleRestart(
+                findIsSimulationMasterRank(ms, simulationContext.simulationCommunicator_),
+                simulationContext.simulationCommunicator_, ms, options.mdrunOptions.appendingBehavior,
+                ssize(options.filenames), options.filenames.data());
  
          auto builder = MdrunnerBuilder(std::move(mdModules),
                                         compat::not_null<SimulationContext*>(&simulationContext));
-        builder.addSimulationMethod(options_.mdrunOptions, options_.pforce, startingBehavior);
-        builder.addDomainDecomposition(options_.domdecOptions);
+        builder.addHardwareDetectionResult(hardwareInformation_.get());
+        builder.addSimulationMethod(options.mdrunOptions, options.pforce, startingBehavior);
+        builder.addDomainDecomposition(options.domdecOptions);
          // \todo pass by value
-        builder.addNonBonded(options_.nbpu_opt_choices[0]);
+        builder.addNonBonded(options.nbpu_opt_choices[0]);
          // \todo pass by value
-        builder.addElectrostatics(options_.pme_opt_choices[0], options_.pme_fft_opt_choices[0]);
-        builder.addBondedTaskAssignment(options_.bonded_opt_choices[0]);
-        builder.addUpdateTaskAssignment(options_.update_opt_choices[0]);
-        builder.addNeighborList(options_.nstlist_cmdline);
-        builder.addReplicaExchange(options_.replExParams);
+        builder.addElectrostatics(options.pme_opt_choices[0], options.pme_fft_opt_choices[0]);
+        builder.addBondedTaskAssignment(options.bonded_opt_choices[0]);
+        builder.addUpdateTaskAssignment(options.update_opt_choices[0]);
+        builder.addNeighborList(options.nstlist_cmdline);
+        builder.addReplicaExchange(options.replExParams);
          // Need to establish run-time values from various inputs to provide a resource handle to Mdrunner
-        builder.addHardwareOptions(options_.hw_opt);
+        builder.addHardwareOptions(options.hw_opt);
  
          // \todo File names are parameters that should be managed modularly through further factoring.
-        builder.addFilenames(options_.filenames);
+        builder.addFilenames(options.filenames);
          // TODO: Remove `s` and `-cpi` from LegacyMdrunOptions before launch(). #3652
-        auto simulationInput = makeSimulationInput(options_);
+        auto simulationInput = makeSimulationInput(options);
          builder.addInput(simulationInput);
  
          // Note: The gmx_output_env_t life time is not managed after the call to parse_common_args.
          // \todo Implement lifetime management for gmx_output_env_t.
          // \todo Output environment should be configured outside of Mdrunner and provided as a resource.
-        builder.addOutputEnvironment(options_.oenv);
+        builder.addOutputEnvironment(options.oenv);
          builder.addLogFile(logFileGuard.get());
  
          // Note, creation is not mature enough to be exposed in the external API yet.
diff --git a/api/gmxapi/cpp/context_impl.h b/api/gmxapi/cpp/context_impl.h

index 4f9296d379b59c52ffa2318e5ca110c782c6bb36..8091303a56b7e7cb5ce4a2b5025f10df42c480bf 100644 (file)
--- a/api/gmxapi/cpp/context_impl.h
+++ b/api/gmxapi/cpp/context_impl.h
@@ -51,6 +51,8 @@
  #include "gmxapi/context.h"
  #include "gmxapi/session.h"
  
+struct gmx_hw_info_t;
+
  namespace gmxapi
  {
  
@@ -277,6 +279,12 @@ public:
       */
      const MpiContextManager mpi_;
  
+    /*! \brief Owning handle to the results of the hardware detection.
+     *
+     * The hardware is detected across the whole environment described
+     * by \c mpi_ */
+    std::unique_ptr<gmx_hw_info_t> hardwareInformation_;
+
  private:
      /*!
       * \brief Basic constructor.
diff --git a/api/legacy/include/gromacs/fileio/filetypes.h b/api/legacy/include/gromacs/fileio/filetypes.h

index b3bad3c7838ea7bf09a7bc4a56ccfaa97b6ff813..aefcdab357318aaeac438dc0197724bce7590c58 100644 (file)
--- a/api/legacy/include/gromacs/fileio/filetypes.h
+++ b/api/legacy/include/gromacs/fileio/filetypes.h
@@ -84,6 +84,7 @@ enum GromacsFileType
      efCUB,
      efXPM,
      efRND,
+    efCSV,
      efNR
  };
  
diff --git a/api/nblib/CMakeLists.txt b/api/nblib/CMakeLists.txt

index d41c856c204ae262c6d902b27c250808904a8114..4c75ae58ad31abbdbdd180f75549b341a1309a1b 100644 (file)
--- a/api/nblib/CMakeLists.txt
+++ b/api/nblib/CMakeLists.txt
@@ -88,7 +88,19 @@ add_custom_target(check-nblib
          COMMENT "Running nblib tests"
          USES_TERMINAL VERBATIM)
  
-add_library(nblib SHARED "")
+set(NBLIB_MAJOR 0)
+set(NBLIB_MINOR 1)
+set(NBLIB_RELEASE ${NBLIB_MAJOR}.${NBLIB_MINOR}.0)
+
+add_library(nblib)
+set_target_properties(nblib PROPERTIES
+        VERSION_MAJOR ${NBLIB_MAJOR}
+        VERSION_MINOR ${NBLIB_MINOR}
+        SOVERSION ${NBLIB_MAJOR}
+        RELEASE ${NBLIB_RELEASE}
+        VERSION ${NBLIB_RELEASE}
+        LINKER_LANGUAGE CXX
+        OUTPUT_NAME "nblib")
  
  target_sources(nblib
          PRIVATE
@@ -107,12 +119,6 @@ target_sources(nblib
  
  gmx_target_compile_options(nblib)
  
-set_target_properties(nblib
-        PROPERTIES
-        LINKER_LANGUAGE CXX
-        OUTPUT_NAME "nblib"
-        )
-
  target_link_libraries(nblib PRIVATE libgromacs)
  target_include_directories(nblib PRIVATE ${PROJECT_SOURCE_DIR}/api)
  include_directories(BEFORE ${CMAKE_SOURCE_DIR}/api)
@@ -139,12 +145,14 @@ if(GMX_INSTALL_NBLIB_API)
              kerneloptions.h
              nblib.h
              particletype.h
+            ppmap.h
              simulationstate.h
              topology.h
              topologyhelpers.h
              DESTINATION include/nblib)
  endif()
  
+add_subdirectory(listed_forces)
  add_subdirectory(samples)
  add_subdirectory(util)
  
diff --git a/api/nblib/forcecalculator.cpp b/api/nblib/forcecalculator.cpp

index 61dc95af31d8506933d52ddf40498d329a496ac5..5287d4af7de6b22610e7a5b1e343617b8dd88a71 100644 (file)
--- a/api/nblib/forcecalculator.cpp
+++ b/api/nblib/forcecalculator.cpp
@@ -40,6 +40,7 @@
   * \author Prashanth Kanduri <kanduri@cscs.ch>
   * \author Sebastian Keller <keller@cscs.ch>
   */
+#include "nblib/exception.h"
  #include "nblib/forcecalculator.h"
  #include "nblib/gmxcalculator.h"
  #include "nblib/gmxsetup.h"
@@ -56,7 +57,12 @@ ForceCalculator::ForceCalculator(const SimulationState& system, const NBKernelOp
  
  void ForceCalculator::compute(gmx::ArrayRef<const Vec3> coordinates, gmx::ArrayRef<Vec3> forces)
  {
-    return gmxForceCalculator_->compute(coordinates, forces);
+    if (coordinates.size() != forces.size())
+    {
+        throw InputException("Coordinates array and force buffer size mismatch");
+    }
+
+    gmxForceCalculator_->compute(coordinates, forces);
  }
  
  void ForceCalculator::updatePairList(gmx::ArrayRef<const int> particleInfoAllVdW,
diff --git a/api/nblib/forcecalculator.h b/api/nblib/forcecalculator.h

index 7083da541817db7f6a6c1b300814fb71da474d7a..ca7173aa2c3677a916bf9a1586c6efa1a0a93f2f 100644 (file)
--- a/api/nblib/forcecalculator.h
+++ b/api/nblib/forcecalculator.h
@@ -66,7 +66,7 @@ class GmxForceCalculator;
   * costly to create this object since much of the SimulationState and NBKernelOptions has to be
   * passed to the gromacs backend. However, once constructed, compute can be called repeatedly only
   * paying the cost of the actual nonbonded force calculation. Repeated calls to compute on the same
- * coordinated will always return the same forces (within precision), so the user must update the
+ * coordinates will always return the same forces (within precision), so the user must update the
   * positions using the forces generated here to advance a simulation. If the coordinates move
   * sufficiently far from their positions at construction time, the efficiency of the calculation
   * will suffer. To alleviate this, the user can call updatePairList.
diff --git a/api/nblib/gmxcalculator.cpp b/api/nblib/gmxcalculator.cpp

index 30e2594e4005ea3464f862603b433a2935268818..d49b1d1cb56cb692381338695dfbcdaa24e1ea22 100644 (file)
--- a/api/nblib/gmxcalculator.cpp
+++ b/api/nblib/gmxcalculator.cpp
@@ -73,9 +73,6 @@ void GmxForceCalculator::compute(gmx::ArrayRef<const gmx::RVec> coordinateInput,
      // update the coordinates in the backend
      nbv_->convertCoordinates(gmx::AtomLocality::Local, false, coordinateInput);
  
-    // set forces to zero
-    std::fill(forceOutput.begin(), forceOutput.end(), gmx::RVec{ 0, 0, 0 });
-
      nbv_->dispatchNonbondedKernel(gmx::InteractionLocality::Local, *interactionConst_, *stepWork_,
                                    enbvClearFYes, *forcerec_, enerd_.get(), nrnb_.get());
  
diff --git a/api/nblib/gmxcalculator.h b/api/nblib/gmxcalculator.h

index 610983ccf72acda9861c0416ed36c31834f0e938..b5cd04382d3d3eb78a86febbe5fbee9a0acd1879 100644 (file)
--- a/api/nblib/gmxcalculator.h
+++ b/api/nblib/gmxcalculator.h
@@ -71,6 +71,18 @@ class NbvSetupUtil;
  class SimulationState;
  struct NBKernelOptions;
  
+/*! \brief GROMACS non-bonded force calculation backend
+ *
+ * This class encapsulates the various GROMACS data structures and their interplay
+ * from the NBLIB user. The class is a private member of the ForceCalculator and
+ * is not intended for the public interface.
+ *
+ * Handles the task of storing the simulation problem description using the internal
+ * representation used within GROMACS. It currently supports short range non-bonded
+ * interactions (PP) on a single node CPU.
+ *
+ */
+
  class GmxForceCalculator final
  {
  public:
@@ -87,6 +99,7 @@ public:
                              const Box&                     box);
  
  private:
+    //! Friend to allow setting up private members in this class
      friend class NbvSetupUtil;
  
      //! Non-Bonded Verlet object for force calculation
diff --git a/api/nblib/gmxsetup.cpp b/api/nblib/gmxsetup.cpp

index 88299e552f247c2ab400875c924cb17a0f47085f..8501ff062a6ce9c22bf7cf02679a88fd60bc443c 100644 (file)
--- a/api/nblib/gmxsetup.cpp
+++ b/api/nblib/gmxsetup.cpp
@@ -196,7 +196,8 @@ void NbvSetupUtil::setupNbnxmInstance(const size_t numParticleTypes, const NBKer
  
      // Put everything together
      auto nbv = std::make_unique<nonbonded_verlet_t>(std::move(pairlistSets), std::move(pairSearch),
-                                                    std::move(atomData), kernelSetup, nullptr, nullptr);
+                                                    std::move(atomData), kernelSetup, nullptr,
+                                                    nullWallcycle);
  
      // Needs to be called with the number of unique ParticleTypes
      nbnxn_atomdata_init(gmx::MDLogger(), nbv->nbat.get(), kernelSetup.kernelType, combinationRule,
diff --git a/api/nblib/gmxsetup.h b/api/nblib/gmxsetup.h

index 628172ca99fe5a1cac30dc4b55620b0338a30693..1e5d81b25881dc4ba35f15e3bd75c2b3240d93d2 100644 (file)
--- a/api/nblib/gmxsetup.h
+++ b/api/nblib/gmxsetup.h
@@ -59,6 +59,17 @@ struct KernelSetup;
  namespace nblib
  {
  
+/*! \brief Sets up the GROMACS data structures for the non-bonded force calculator
+ *
+ * This data structure initializes the GmxForceCalculator object which internally
+ * contains various objects needed to perform non-bonded force calculations using
+ * the internal representation for the problem as required for GROMACS.
+ *
+ * The public functions of this class basically translate the problem description
+ * specified by the user in NBLIB. This ultimately returns the GmxForceCalculator
+ * object which is used by the ForceCalculator object in the user-facing library.
+ *
+ */
  class NbvSetupUtil final
  {
  public:
@@ -99,6 +110,7 @@ public:
      //! Sets up t_forcerec object on the GmxForceCalculator
      void setupForceRec(const matrix& box);
  
+    //! Returns a unique pointer a GmxForceCalculator object
      std::unique_ptr<GmxForceCalculator> getGmxForceCalculator()
      {
          return std::move(gmxForceCalculator_);
@@ -115,6 +127,16 @@ private:
      std::unique_ptr<GmxForceCalculator> gmxForceCalculator_;
  };
  
+/*! \brief Calls the setup utilities needed to initialize a GmxForceCalculator object
+ *
+ * The GmxSetupDirector encapsulates the multi-stage setup of the GmxForceCalculator which
+ * is done using the public functions of the NbvSetupUtil. This separation ensures that the
+ * NbvSetupUtil object is temporary in scope. The function definition makes it easy for the
+ * developers to follow the sequence of calls and the dataflow involved in setting up
+ * the non-bonded force calculation backend. This is the only function needed to be called
+ * from the ForceCalculator during construction.
+ *
+ */
  class GmxSetupDirector
  {
  public:
diff --git a/api/nblib/integrator.cpp b/api/nblib/integrator.cpp

index 970eea0c6a9a8acda040e23e64da5c80e6ca7eec..a80338d5ffe939b093b6c983d786dec750a5b7e6 100644 (file)
--- a/api/nblib/integrator.cpp
+++ b/api/nblib/integrator.cpp
@@ -49,7 +49,7 @@
  
  namespace nblib
  {
-// NOLINTNEXTLINE(performance-unnecessary-value-param)
+
  LeapFrog::LeapFrog(const Topology& topology, const Box& box) : box_(box)
  {
      inverseMasses_.resize(topology.numParticles());
diff --git a/api/nblib/listed_forces/CMakeLists.txt b/api/nblib/listed_forces/CMakeLists.txt

new file mode 100644 (file)

index 0000000..c4e8f33
--- /dev/null
+++ b/api/nblib/listed_forces/CMakeLists.txt
@@ -0,0 +1,51 @@
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2020, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+#
+# \author Victor Holanda <victor.holanda@cscs.ch>
+# \author Joe Jordan <ejjordan@kth.se>
+# \author Prashanth Kanduri <kanduri@cscs.ch>
+# \author Sebastian Keller <keller@cscs.ch>
+#
+
+if(GMX_INSTALL_NBLIB_API)
+    install(FILES
+            bondtypes.h
+            calculator.h
+            definitions.h
+            DESTINATION include/nblib)
+endif()
+
+if(BUILD_TESTING)
+    add_subdirectory(tests)
+endif()
diff --git a/api/nblib/listed_forces/bondtypes.h b/api/nblib/listed_forces/bondtypes.h

new file mode 100644 (file)

index 0000000..676ee94
--- /dev/null
+++ b/api/nblib/listed_forces/bondtypes.h
@@ -0,0 +1,375 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \inpublicapi \file
+ * \brief
+ * Implements nblib supported bondtypes
+ *
+ * We choose to forward comparison operations to the
+ * corresponding std::tuple comparison operations.
+ * In order to do that without temporary copies,
+ * we employ std::tie, which requires lvalues as input.
+ * For this reason, bond type parameter getters are implemented
+ * with a const lvalue reference return.
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ */
+#ifndef NBLIB_LISTEDFORCES_BONDTYPES_H
+#define NBLIB_LISTEDFORCES_BONDTYPES_H
+
+#include <array>
+
+#include "nblib/particletype.h"
+#include "nblib/ppmap.h"
+#include "nblib/util/user.h"
+
+namespace nblib
+{
+using Name          = std::string;
+using ForceConstant = real;
+using EquilDistance = real;
+using Exponent      = real;
+
+using Degrees = StrongType<real, struct DegreeParameter>;
+using Radians = StrongType<real, struct RadianParameter>;
+
+/*! \brief Basic template for interactions with 2 parameters named forceConstant and equilDistance
+ *
+ * \tparam Phantom unused template parameter for type distinction
+ *
+ * Distinct bond types can be generated from this template with using declarations
+ * and declared, but undefined structs. For example:
+ * using HarmonicBondType = TwoParameterInteraction<struct HarmonicBondTypeParameter>;
+ * Note that HarmonicBondTypeParameter does not have to be defined.
+ */
+template<class Phantom>
+class TwoParameterInteraction
+{
+public:
+    TwoParameterInteraction() = default;
+    TwoParameterInteraction(ForceConstant f, EquilDistance d) : forceConstant_(f), equilDistance_(d)
+    {
+    }
+
+    [[nodiscard]] const ForceConstant& forceConstant() const { return forceConstant_; }
+    [[nodiscard]] const EquilDistance& equilDistance() const { return equilDistance_; }
+
+private:
+    ForceConstant forceConstant_;
+    EquilDistance equilDistance_;
+};
+
+template<class Phantom>
+inline bool operator<(const TwoParameterInteraction<Phantom>& a, const TwoParameterInteraction<Phantom>& b)
+{
+    return std::tie(a.forceConstant(), a.equilDistance())
+           < std::tie(b.forceConstant(), b.equilDistance());
+}
+
+template<class Phantom>
+inline bool operator==(const TwoParameterInteraction<Phantom>& a, const TwoParameterInteraction<Phantom>& b)
+{
+    return std::tie(a.forceConstant(), a.equilDistance())
+           == std::tie(b.forceConstant(), b.equilDistance());
+}
+
+/*! \brief harmonic bond type
+ *
+ *  It represents the interaction of the form
+ *  V(r; forceConstant, equilDistance) = 0.5 * forceConstant * (r - equilDistance)^2
+ */
+using HarmonicBondType = TwoParameterInteraction<struct HarmonicBondTypeParameter>;
+
+
+/*! \brief GROMOS bond type
+ *
+ * It represents the interaction of the form
+ * V(r; forceConstant, equilDistance) = 0.25 * forceConstant * (r^2 - equilDistance^2)^2
+ */
+using G96BondType = TwoParameterInteraction<struct G96BondTypeParameter>;
+
+
+/*! \brief FENE bond type
+ *
+ * It represents the interaction of the form
+ * V(r; forceConstant, equilDistance) = - 0.5 * forceConstant * equilDistance^2 * log( 1 - (r / equilDistance)^2)
+ */
+using FENEBondType = TwoParameterInteraction<struct FENEBondTypeParameter>;
+
+
+/*! \brief Half-attractive quartic bond type
+ *
+ * It represents the interaction of the form
+ * V(r; forceConstant, equilDistance) = 0.5 * forceConstant * (r - equilDistance)^4
+ */
+using HalfAttractiveQuarticBondType =
+        TwoParameterInteraction<struct HalfAttractiveQuarticBondTypeParameter>;
+
+
+/*! \brief Cubic bond type
+ *
+ * It represents the interaction of the form
+ * V(r; quadraticForceConstant, cubicForceConstant, equilDistance) = quadraticForceConstant * (r -
+ * equilDistance)^2 + quadraticForceConstant * cubicForceConstant * (r - equilDistance)
+ */
+struct CubicBondType
+{
+    CubicBondType() = default;
+    CubicBondType(ForceConstant fq, ForceConstant fc, EquilDistance d) :
+        quadraticForceConstant_(fq),
+        cubicForceConstant_(fc),
+        equilDistance_(d)
+    {
+    }
+
+    [[nodiscard]] const ForceConstant& quadraticForceConstant() const
+    {
+        return quadraticForceConstant_;
+    }
+    [[nodiscard]] const ForceConstant& cubicForceConstant() const { return cubicForceConstant_; }
+    [[nodiscard]] const EquilDistance& equilDistance() const { return equilDistance_; }
+
+private:
+    ForceConstant quadraticForceConstant_;
+    ForceConstant cubicForceConstant_;
+    EquilDistance equilDistance_;
+};
+
+inline bool operator<(const CubicBondType& a, const CubicBondType& b)
+{
+    return std::tie(a.quadraticForceConstant(), a.cubicForceConstant(), a.equilDistance())
+           < std::tie(b.quadraticForceConstant(), b.cubicForceConstant(), b.equilDistance());
+}
+
+inline bool operator==(const CubicBondType& a, const CubicBondType& b)
+{
+    return std::tie(a.quadraticForceConstant(), a.cubicForceConstant(), a.equilDistance())
+           == std::tie(b.quadraticForceConstant(), b.cubicForceConstant(), b.equilDistance());
+}
+
+/*! \brief Morse bond type
+ *
+ * It represents the interaction of the form
+ * V(r; forceConstant, exponent, equilDistance) = forceConstant * ( 1 - exp( -exponent * (r - equilDistance))
+ */
+class MorseBondType
+{
+public:
+    MorseBondType() = default;
+    MorseBondType(ForceConstant f, Exponent e, EquilDistance d) :
+        forceConstant_(f),
+        exponent_(e),
+        equilDistance_(d)
+    {
+    }
+
+    [[nodiscard]] const ForceConstant& forceConstant() const { return forceConstant_; }
+    [[nodiscard]] const Exponent&      exponent() const { return exponent_; }
+    [[nodiscard]] const EquilDistance& equilDistance() const { return equilDistance_; }
+
+private:
+    ForceConstant forceConstant_;
+    Exponent      exponent_;
+    EquilDistance equilDistance_;
+};
+
+inline bool operator<(const MorseBondType& a, const MorseBondType& b)
+{
+    return std::tie(a.forceConstant(), a.exponent(), a.equilDistance())
+           < std::tie(b.forceConstant(), b.exponent(), b.equilDistance());
+}
+
+inline bool operator==(const MorseBondType& a, const MorseBondType& b)
+{
+    return std::tie(a.forceConstant(), a.exponent(), a.equilDistance())
+           == std::tie(b.forceConstant(), b.exponent(), b.equilDistance());
+}
+
+
+/*! \brief default angle type
+ *
+ * Note: the angle is always stored as radians internally
+ */
+struct DefaultAngle : public TwoParameterInteraction<struct DefaultAngleParameter>
+{
+    DefaultAngle() = default;
+    //! \brief construct from angle given in radians
+    DefaultAngle(Radians angle, ForceConstant f) :
+        TwoParameterInteraction<struct DefaultAngleParameter>{ f, angle }
+    {
+    }
+
+    //! \brief construct from angle given in degrees
+    DefaultAngle(Degrees angle, ForceConstant f) :
+        TwoParameterInteraction<struct DefaultAngleParameter>{ f, angle * DEG2RAD }
+    {
+    }
+};
+
+/*! \brief Proper Dihedral Implementation
+ */
+class ProperDihedral
+{
+public:
+    using Multiplicity = int;
+
+    ProperDihedral() = default;
+    ProperDihedral(Radians phi, ForceConstant f, Multiplicity m) :
+        phi_(phi),
+        forceConstant_(f),
+        multiplicity_(m)
+    {
+    }
+    ProperDihedral(Degrees phi, ForceConstant f, Multiplicity m) :
+        phi_(phi * DEG2RAD),
+        forceConstant_(f),
+        multiplicity_(m)
+    {
+    }
+
+    [[nodiscard]] const EquilDistance& equilDistance() const { return phi_; }
+    [[nodiscard]] const ForceConstant& forceConstant() const { return forceConstant_; }
+    [[nodiscard]] const Multiplicity&  multiplicity() const { return multiplicity_; }
+
+private:
+    EquilDistance phi_;
+    ForceConstant forceConstant_;
+    Multiplicity  multiplicity_;
+};
+
+inline bool operator<(const ProperDihedral& a, const ProperDihedral& b)
+{
+    return std::tie(a.equilDistance(), a.forceConstant(), a.multiplicity())
+           < std::tie(b.equilDistance(), b.forceConstant(), b.multiplicity());
+}
+
+inline bool operator==(const ProperDihedral& a, const ProperDihedral& b)
+{
+    return std::tie(a.equilDistance(), a.forceConstant(), a.multiplicity())
+           == std::tie(b.equilDistance(), b.forceConstant(), b.multiplicity());
+}
+
+
+/*! \brief Improper Dihedral Implementation
+ */
+struct ImproperDihedral : public TwoParameterInteraction<struct ImproperDihdedralParameter>
+{
+    ImproperDihedral() = default;
+    ImproperDihedral(Radians phi, ForceConstant f) :
+        TwoParameterInteraction<struct ImproperDihdedralParameter>{ f, phi }
+    {
+    }
+    ImproperDihedral(Degrees phi, ForceConstant f) :
+        TwoParameterInteraction<struct ImproperDihdedralParameter>{ f, phi * DEG2RAD }
+    {
+    }
+};
+
+/*! \brief Ryckaert-Belleman Dihedral Implementation
+ */
+class RyckaertBellemanDihedral
+{
+public:
+    RyckaertBellemanDihedral() = default;
+    RyckaertBellemanDihedral(real p1, real p2, real p3, real p4, real p5, real p6) :
+        parameters_{ p1, p2, p3, p4, p5, p6 }
+    {
+    }
+
+    const real& operator[](std::size_t i) const { return parameters_[i]; }
+
+    [[nodiscard]] const std::array<real, 6>& parameters() const { return parameters_; }
+
+    [[nodiscard]] std::size_t size() const { return parameters_.size(); }
+
+private:
+    std::array<real, 6> parameters_;
+};
+
+inline bool operator<(const RyckaertBellemanDihedral& a, const RyckaertBellemanDihedral& b)
+{
+    return a.parameters() < b.parameters();
+}
+
+inline bool operator==(const RyckaertBellemanDihedral& a, const RyckaertBellemanDihedral& b)
+{
+    return a.parameters() == b.parameters();
+}
+
+
+/*! \brief Type for 5-center interaction (C-MAP)
+ *
+ *  Note: no kernels currently implemented
+ */
+class Default5Center
+{
+public:
+    Default5Center() = default;
+    Default5Center(Radians phi, Radians psi, ForceConstant fphi, ForceConstant fpsi) :
+        phi_(phi),
+        psi_(psi),
+        fphi_(fphi),
+        fpsi_(fpsi)
+    {
+    }
+
+    [[nodiscard]] const Radians&       phi() const { return phi_; }
+    [[nodiscard]] const Radians&       psi() const { return psi_; }
+    [[nodiscard]] const ForceConstant& fphi() const { return fphi_; }
+    [[nodiscard]] const ForceConstant& fpsi() const { return fpsi_; }
+
+private:
+    Radians       phi_, psi_;
+    ForceConstant fphi_, fpsi_;
+};
+
+inline bool operator<(const Default5Center& a, const Default5Center& b)
+{
+    return std::tie(a.phi(), a.psi(), a.fphi(), a.fpsi())
+           < std::tie(b.phi(), b.psi(), b.fphi(), b.fpsi());
+}
+
+inline bool operator==(const Default5Center& a, const Default5Center& b)
+{
+    return std::tie(a.phi(), a.psi(), a.fphi(), a.fpsi())
+           == std::tie(b.phi(), b.psi(), b.fphi(), b.fpsi());
+}
+
+
+} // namespace nblib
+#endif // NBLIB_LISTEDFORCES_BONDTYPES_H
diff --git a/api/nblib/listed_forces/calculator.h b/api/nblib/listed_forces/calculator.h

new file mode 100644 (file)

index 0000000..dc44055
--- /dev/null
+++ b/api/nblib/listed_forces/calculator.h
@@ -0,0 +1,134 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \inpublicapi \file
+ * \brief
+ * Implements a force calculator based on GROMACS data structures.
+ *
+ * Intended for internal use inside the ForceCalculator.
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ */
+
+#ifndef NBLIB_LISTEDFORCES_CALCULATOR_H
+#define NBLIB_LISTEDFORCES_CALCULATOR_H
+
+#include <memory>
+#include <unordered_map>
+
+#include "nblib/listed_forces/definitions.h"
+
+namespace gmx
+{
+template<typename T>
+class ArrayRef;
+} // namespace gmx
+
+namespace nblib
+{
+class Box;
+class PbcHolder;
+template<class T>
+class ForceBuffer;
+
+/*! \internal \brief object to calculate listed forces
+ *
+ */
+class ListedForceCalculator
+{
+public:
+    using EnergyType = std::array<real, std::tuple_size<ListedInteractionData>::value>;
+
+    ListedForceCalculator(const ListedInteractionData& interactions,
+                          size_t                       bufferSize,
+                          int                          numThreads,
+                          const Box&                   box);
+
+    /*! \brief Dispatch the listed force kernels and reduce the forces
+     *
+     * This function adds the computed listed forces to all values in the passed in forces buffer,
+     * so it can be regarded as an output only param. In case this is being used in a simulation
+     * that uses the same force buffer for both non-bonded and listed forces, this call should be
+     * made only after the compute() call from the non-bonded ForceCalculator
+     *
+     * This function also stores the forces and energies from listed interactions in the internal
+     * buffer of the ListedForceCalculator object
+     *
+     * \param[in] coordinates to be used for the force calculation
+     * \param[out] forces buffer to store the output forces
+     */
+    void compute(gmx::ArrayRef<const Vec3> coordinates, gmx::ArrayRef<Vec3> forces, bool usePbc = false);
+
+    //! Alternative overload with the energies in an output buffer
+    void compute(gmx::ArrayRef<const Vec3> coordinates,
+                 gmx::ArrayRef<Vec3>       forces,
+                 EnergyType&               energies,
+                 bool                      usePbc = false);
+
+    /*! \brief We need to declare the destructor here to move the (still default) implementation
+     *  to the .cpp file. Omitting this declaration would mean an inline destructor
+     *  which can't compile because the unique_ptr dtor needs ~ForceBuffer, which is not available
+     * here because it's incomplete.
+     */
+    ~ListedForceCalculator();
+
+private:
+    int numThreads;
+
+    //! the main buffer to hold the final listed forces
+    std::vector<gmx::RVec> masterForceBuffer_;
+
+    //! holds the array of energies computed
+    EnergyType energyBuffer_;
+
+    //! holds the listed interactions split into groups for multithreading
+    std::vector<ListedInteractionData> threadedInteractions_;
+
+    //! reduction force buffers
+    std::vector<std::unique_ptr<ForceBuffer<gmx::RVec>>> threadedForceBuffers_;
+
+    //! PBC objects
+    std::unique_ptr<PbcHolder> pbcHolder_;
+
+    //! compute listed forces and energies, overwrites the internal buffers
+    void computeForcesAndEnergies(gmx::ArrayRef<const Vec3> x, bool usePbc = false);
+};
+
+} // namespace nblib
+
+#endif // NBLIB_LISTEDFORCES_CALCULATOR_H
diff --git a/api/nblib/listed_forces/definitions.h b/api/nblib/listed_forces/definitions.h

new file mode 100644 (file)

index 0000000..5240837
--- /dev/null
+++ b/api/nblib/listed_forces/definitions.h
@@ -0,0 +1,181 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \inpublicapi \file
+ * \brief
+ * Definitions for supported nblib listed interaction data, such as bonds, angles, dihedrals, etc
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * A note on the preprocessor (PP) usage in this file:
+ *
+ * The PP macros defined here are used exclusively to generate
+ * template instantiations declarations of the form "extern template function(X)"
+ * in header files and "template function(X)" in .cpp files.
+ * These declarations do not affect the program logic in any way and neither are they
+ * required to read and understand the behavior of the code as they do not
+ * result in any executable instructions.
+ * In fact, it would even be technically possible to omit these PP generated
+ * declarations in the header files and replace them with an unused static function
+ * in the .cpp file that calls the template function in question
+ * (e.g. Molecule::addInteraction) once with each type from the variadic template
+ * TypeLists declared in this file. This would be enough to create the required instantiations.
+ * It would, however, create more work for the compiler which then has to instantiate the
+ * templates in the header in each translation unit where the header is included.
+ * Doing this results in a compiler warning.
+ *
+ */
+#ifndef NBLIB_LISTEDFORCES_DEFINITIONS_H
+#define NBLIB_LISTEDFORCES_DEFINITIONS_H
+
+#include "nblib/util/user.h"
+#include "bondtypes.h"
+
+namespace nblib
+{
+
+//***********************************************************************************
+
+/*! \brief These macros define what interaction types are supported in
+ *  -Molecule
+ *  -Topology
+ *  -ListedForceCalculator
+ *
+ *  To enable force calculation for your new interaction type that you've added to bondtypes.h,
+ *  list your new type here under the appropriate category and make sure that you've added
+ *  a kernel in kernels.hpp
+ */
+
+#define SUPPORTED_TWO_CENTER_TYPES \
+    HarmonicBondType, G96BondType, CubicBondType, FENEBondType, HalfAttractiveQuarticBondType
+
+#define SUPPORTED_THREE_CENTER_TYPES DefaultAngle
+
+#define SUPPORTED_FOUR_CENTER_TYPES ProperDihedral, ImproperDihedral, RyckaertBellemanDihedral
+
+#define SUPPORTED_FIVE_CENTER_TYPES Default5Center
+
+//***********************************************************************************
+
+#define SUPPORTED_LISTED_TYPES                                                             \
+    SUPPORTED_TWO_CENTER_TYPES, SUPPORTED_THREE_CENTER_TYPES, SUPPORTED_FOUR_CENTER_TYPES, \
+            SUPPORTED_FIVE_CENTER_TYPES
+
+#define NBLIB_ALWAYS_INLINE __attribute((always_inline))
+
+//! \brief encodes the number of integers needed to represent 2-center interactions (bonds, pairs)
+using TwoCenterInteractionIndex = std::array<int, 3>;
+//! \brief encodes the number of integers needed to represent 3-center interactions (angles)
+using ThreeCenterInteractionIndex = std::array<int, 4>;
+//! \brief encodes the number of integers needed to represent 4-center interactions (dihedrals)
+using FourCenterInteractionIndex = std::array<int, 5>;
+//! \brief encodes the number of integers needed to represent 5-center interactions (CMAP)
+using FiveCenterInteractionIndex = std::array<int, 6>;
+
+//! \brief data type for pairwise interactions, e.g. bonds
+template<class TwoCenterType>
+struct TwoCenterData
+{
+    using type = TwoCenterType;
+
+    // tuple format: <particleID i, particleID j, TwoCenterInstanceIndex>
+    std::vector<TwoCenterInteractionIndex> indices;
+    // vector of unique TwoCenterType instances
+    std::vector<TwoCenterType> parameters;
+};
+
+//! \brief data type for three-center interactions, e.g. angles
+template<class ThreeCenterType>
+struct ThreeCenterData
+{
+    using type = ThreeCenterType;
+
+    // tuple format: <particleID i, particleID j, particleID k, ThreeCenterInstanceIndex>
+    std::vector<ThreeCenterInteractionIndex> indices;
+    // vector of unique ThreeCenterType instances
+    std::vector<ThreeCenterType> parameters;
+};
+
+//! \brief data type for four-center interactions, e.g. dihedrals
+template<class FourCenterType>
+struct FourCenterData
+{
+    using type = FourCenterType;
+
+    // tuple format: <particleID i, particleID j, particleID k, particleID l, FourCenterInstanceIndex>
+    std::vector<FourCenterInteractionIndex> indices;
+    // vector of unique FiveCenterType instances
+    std::vector<FourCenterType> parameters;
+};
+
+//! \brief data type for five-center interactions, e.g. CMAP
+template<class FiveCenterType>
+struct FiveCenterData
+{
+    using type = FiveCenterType;
+
+    // tuple format: <particleID i, particleID j, particleID k, particleID l, particleID m, FiveCenterInstanceIndex>
+    std::vector<FiveCenterInteractionIndex> indices;
+    // vector of unique FiveCenterType instances
+    std::vector<FiveCenterType> parameters;
+};
+
+
+using SupportedTwoCenterTypes = TypeList<SUPPORTED_TWO_CENTER_TYPES>;
+// std::tuple<TwoCenterData<TwoCenterType1>, ...>
+using TwoCenterInteractionData = Reduce<std::tuple, Map<TwoCenterData, SupportedTwoCenterTypes>>;
+
+using SupportedThreeCenterTypes = TypeList<SUPPORTED_THREE_CENTER_TYPES>;
+// std::tuple<AngleData<ThreeCenterType1>, ...>
+using ThreeCenterInteractionData = Reduce<std::tuple, Map<ThreeCenterData, SupportedThreeCenterTypes>>;
+
+using SupportedFourCenterTypes = TypeList<SUPPORTED_FOUR_CENTER_TYPES>;
+// std::tuple<FourCenterData<FourCenterType1>, ...>
+using FourCenterInteractionData = Reduce<std::tuple, Map<FourCenterData, SupportedFourCenterTypes>>;
+
+using SupportedFiveCenterTypes = TypeList<SUPPORTED_FIVE_CENTER_TYPES>;
+// std::tuple<FiveCenterData<FiveCenterType1>, ...>
+using FiveCenterInteractionData = Reduce<std::tuple, Map<FiveCenterData, SupportedFiveCenterTypes>>;
+
+//! This is the complete type that holds all listed interaction data
+using ListedInteractionData = decltype(std::tuple_cat(TwoCenterInteractionData{},
+                                                      ThreeCenterInteractionData{},
+                                                      FourCenterInteractionData{},
+                                                      FiveCenterInteractionData{}));
+} // namespace nblib
+#endif // NBLIB_LISTEDFORCES_DEFINITIONS_H
diff --git a/api/nblib/listed_forces/tests/CMakeLists.txt b/api/nblib/listed_forces/tests/CMakeLists.txt

new file mode 100644 (file)

index 0000000..2f0bc8e
--- /dev/null
+++ b/api/nblib/listed_forces/tests/CMakeLists.txt
@@ -0,0 +1,55 @@
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2020, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+#
+# \author Victor Holanda <victor.holanda@cscs.ch>
+# \author Joe Jordan <ejjordan@kth.se>
+# \author Prashanth Kanduri <kanduri@cscs.ch>
+# \author Sebastian Keller <keller@cscs.ch>
+#
+
+# Make a static library for test infrastructure code that we re-use
+# in multiple test executables across the repository.
+
+set(testname "NbLibListedForcesTests")
+set(exename "nblib-listed-forces-test")
+
+gmx_add_gtest_executable(
+        ${exename}
+        CPP_SOURCE_FILES
+        # files with code for tests
+        bondtypes.cpp
+)
+target_link_libraries(${exename} PRIVATE nblib_test_infrastructure nblib)
+gmx_register_gtest_test(${testname} ${exename} INTEGRATION_TEST)
+add_dependencies(check-nblib ${exename})
diff --git a/api/nblib/listed_forces/tests/bondtypes.cpp b/api/nblib/listed_forces/tests/bondtypes.cpp

new file mode 100644 (file)

index 0000000..15863da
--- /dev/null
+++ b/api/nblib/listed_forces/tests/bondtypes.cpp
@@ -0,0 +1,140 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * This implements basic nblib box tests
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ */
+#include "nblib/listed_forces/bondtypes.h"
+#include "nblib/util/internal.h"
+
+#include "testutils/testasserts.h"
+
+namespace nblib
+{
+
+namespace test_detail
+{
+
+template<class B>
+void testTwoParameterBondEquality(const B& deduceType)
+{
+    ignore_unused(deduceType);
+    B a(1, 2);
+    B b(1, 2);
+    EXPECT_TRUE(a == b);
+
+    B c(1, 3);
+    EXPECT_FALSE(a == c);
+}
+
+template<class B>
+void testThreeParameterBondEquality(const B& deduceType)
+{
+    ignore_unused(deduceType);
+    B a(1, 2, 3);
+    B b(1, 2, 3);
+    EXPECT_TRUE(a == b);
+
+    B c(2, 3, 4);
+    EXPECT_FALSE(a == c);
+}
+
+template<class B>
+void testTwoParameterBondLessThan(const B& deduceType)
+{
+    ignore_unused(deduceType);
+    B a(1, 2);
+    B b(1, 3);
+    EXPECT_TRUE(a < b);
+    EXPECT_FALSE(b < a);
+
+    B c(1, 2);
+    B d(1, 2);
+    EXPECT_FALSE(c < d);
+
+    B e(2, 1);
+    B f(3, 1);
+    EXPECT_TRUE(e < f);
+    EXPECT_FALSE(f < e);
+}
+
+template<class B>
+void testThreeParameterBondLessThan(const B& deduceType)
+{
+    ignore_unused(deduceType);
+    B a(1, 2, 1);
+    B b(1, 3, 1);
+    EXPECT_TRUE(a < b);
+    EXPECT_FALSE(b < a);
+
+    B c(1, 2, 3);
+    B d(1, 2, 3);
+    EXPECT_FALSE(c < d);
+
+    B e(4, 1, 3);
+    B f(5, 1, 2);
+    EXPECT_TRUE(e < f);
+    EXPECT_FALSE(f < e);
+}
+
+} // namespace test_detail
+
+TEST(NBlibTest, BondTypesOperatorEqualWorks)
+{
+    auto bondList3 = std::make_tuple(HarmonicBondType(), G96BondType(), FENEBondType(),
+                                     HalfAttractiveQuarticBondType());
+    for_each_tuple([](const auto& b) { test_detail::testTwoParameterBondEquality(b); }, bondList3);
+
+    auto bondList4 = std::make_tuple(CubicBondType(), MorseBondType());
+    for_each_tuple([](const auto& b) { test_detail::testThreeParameterBondEquality(b); }, bondList4);
+}
+
+TEST(NBlibTest, BondTypesLessThanWorks)
+{
+    auto bondList3 = std::make_tuple(HarmonicBondType(), G96BondType(), FENEBondType(),
+                                     HalfAttractiveQuarticBondType());
+    for_each_tuple([](const auto& b) { test_detail::testTwoParameterBondLessThan(b); }, bondList3);
+
+    auto bondList4 = std::make_tuple(CubicBondType(), MorseBondType());
+    for_each_tuple([](const auto& b) { test_detail::testThreeParameterBondLessThan(b); }, bondList4);
+}
+
+
+} // namespace nblib
diff --git a/api/nblib/listed_forces/traits.h b/api/nblib/listed_forces/traits.h

new file mode 100644 (file)

index 0000000..82cfd4a
--- /dev/null
+++ b/api/nblib/listed_forces/traits.h
@@ -0,0 +1,246 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * These traits defined here for supported nblib listed interaction data types
+ * are used to control the dataflow in dataflow.hpp
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ */
+#ifndef NBLIB_LISTEDFORCES_TRAITS_H
+#define NBLIB_LISTEDFORCES_TRAITS_H
+
+#include <numeric>
+
+#include "nblib/util/internal.h"
+#include "bondtypes.h"
+#include "definitions.h"
+
+namespace nblib
+{
+
+namespace detail
+{
+
+template<class InteractionType, class = void>
+struct CoordinateIndex_
+{
+};
+
+template<class InteractionType>
+struct CoordinateIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedTwoCenterTypes>{}>>
+{
+    typedef std::array<int, 2> type;
+};
+
+template<class InteractionType>
+struct CoordinateIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedThreeCenterTypes>{}>>
+{
+    typedef std::array<int, 3> type;
+};
+
+template<class InteractionType>
+struct CoordinateIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedFourCenterTypes>{}>>
+{
+    typedef std::array<int, 4> type;
+};
+
+template<class InteractionType>
+struct CoordinateIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedFiveCenterTypes>{}>>
+{
+    typedef std::array<int, 5> type;
+};
+
+} // namespace detail
+
+/*! \brief traits class to determine the coordinate index type for InteractionType
+ *  \internal
+ *
+ * \tparam InteractionCategory
+ */
+template<class InteractionType>
+using CoordinateIndex = typename detail::CoordinateIndex_<InteractionType>::type;
+
+
+namespace detail
+{
+
+template<class InteractionType, class = void>
+struct InteractionIndex_
+{
+};
+
+template<class InteractionType>
+struct InteractionIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedTwoCenterTypes>{}>>
+{
+    typedef TwoCenterInteractionIndex type;
+};
+
+template<class InteractionType>
+struct InteractionIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedThreeCenterTypes>{}>>
+{
+    typedef ThreeCenterInteractionIndex type;
+};
+
+template<class InteractionType>
+struct InteractionIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedFourCenterTypes>{}>>
+{
+    typedef FourCenterInteractionIndex type;
+};
+
+template<class InteractionType>
+struct InteractionIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedFiveCenterTypes>{}>>
+{
+    typedef FiveCenterInteractionIndex type;
+};
+
+} // namespace detail
+
+/*! \brief traits class to determine the InteractionIndex type for InteractionType
+ *  \internal
+ *
+ * \tparam InteractionType
+ */
+template<class InteractionType>
+using InteractionIndex = typename detail::InteractionIndex_<InteractionType>::type;
+
+
+template<class I, class = void>
+struct HasTwoCenterAggregate : std::false_type
+{
+};
+
+template<class I>
+struct HasTwoCenterAggregate<I, std::void_t<typename I::TwoCenterAggregateType>> : std::true_type
+{
+};
+
+template<class I, class = void>
+struct HasThreeCenterAggregate : std::false_type
+{
+};
+
+template<class I>
+struct HasThreeCenterAggregate<I, std::void_t<typename I::ThreeCenterAggregateType>> : std::true_type
+{
+};
+
+//! \internal \brief determines the energy storage location of the carrier part for InteractionTypes without aggregates
+template<class InteractionType, class = void>
+struct CarrierIndex :
+    std::integral_constant<size_t, FindIndex<InteractionType, ListedInteractionData>{}>
+{
+};
+
+//! \internal \brief determines the energy storage location of the carrier part for InteractionTypes with aggregates
+template<class InteractionType>
+struct CarrierIndex<InteractionType, std::void_t<typename InteractionType::CarrierType>> :
+    std::integral_constant<size_t, FindIndex<typename InteractionType::CarrierType, ListedInteractionData>{}>
+{
+};
+
+//! \internal \brief determines the energy storage location of the 2-C aggregate part for InteractionTypes without aggregates
+template<class InteractionType, class = void>
+struct TwoCenterAggregateIndex : std::integral_constant<size_t, 0>
+{
+};
+
+//! \internal \brief determines the energy storage location of the 2-C aggregate part for InteractionTypes with 2-C aggregates
+template<class InteractionType>
+struct TwoCenterAggregateIndex<InteractionType, std::void_t<typename InteractionType::TwoCenterAggregateType>> :
+    std::integral_constant<size_t, FindIndex<typename InteractionType::TwoCenterAggregateType, ListedInteractionData>{}>
+{
+};
+
+//! \internal \brief determines the energy storage location of the 3-C aggregate part for InteractionTypes without aggregates
+template<class InteractionType, class = void>
+struct ThreeCenterAggregateIndex : std::integral_constant<size_t, 0>
+{
+};
+
+//! \internal \brief determines the energy storage location of the 3-C aggregate part for InteractionTypes with 3-C aggregates
+template<class InteractionType>
+struct ThreeCenterAggregateIndex<InteractionType, std::void_t<typename InteractionType::ThreeCenterAggregateType>> :
+    std::integral_constant<size_t, FindIndex<typename InteractionType::ThreeCenterAggregateType, ListedInteractionData>{}>
+{
+};
+
+/*! \brief return type to hold the energies of the different overloads of "dispatchInteraction"
+ * \internal
+ *
+ * \tparam T
+ */
+template<class T>
+class KernelEnergy
+{
+public:
+    KernelEnergy() : energies_{ 0, 0, 0, 0 } {}
+
+    T&       carrier() { return energies_[0]; }
+    const T& carrier() const { return energies_[0]; }
+
+    T&       twoCenterAggregate() { return energies_[1]; }
+    const T& twoCenterAggregate() const { return energies_[1]; }
+
+    T&       threeCenterAggregate() { return energies_[2]; }
+    const T& threeCenterAggregate() const { return energies_[2]; }
+
+    T&       freeEnergyDerivative() { return energies_[3]; }
+    const T& freeEnergyDerivative() const { return energies_[3]; }
+
+    KernelEnergy& operator+=(const KernelEnergy& other)
+    {
+        for (size_t i = 0; i < energies_.size(); ++i)
+        {
+            energies_[i] += other.energies_[i];
+        }
+        return *this;
+    }
+
+    operator T() const { return std::accumulate(begin(energies_), end(energies_), T{}); }
+
+private:
+    std::array<T, 4> energies_;
+};
+
+template<class BasicVector>
+using BasicVectorValueType_t = std::remove_all_extents_t<typename BasicVector::RawArray>;
+
+} // namespace nblib
+#endif // NBLIB_LISTEDFORCES_TRAITS_H
diff --git a/api/nblib/molecules.cpp b/api/nblib/molecules.cpp

index b8f70ee797e1fdb11b901efca36fa04aa453e112..c2491e7b5541afc1a391d4ed1bd924c9e8d1f634 100644 (file)
--- a/api/nblib/molecules.cpp
+++ b/api/nblib/molecules.cpp
@@ -131,8 +131,8 @@ void Molecule::addExclusion(const int particleIndex, const int particleIndexToEx
      }
  }
  
-void Molecule::addExclusion(std::tuple<std::string, std::string> particle,
-                            std::tuple<std::string, std::string> particleToExclude)
+void Molecule::addExclusion(std::tuple<ParticleName, ResidueName> particle,
+                            std::tuple<ParticleName, ResidueName> particleToExclude)
  {
      // duplication for the swapped pair happens in getExclusions()
      exclusionsByName_.emplace_back(std::make_tuple(std::get<0>(particle), std::get<1>(particle),
@@ -140,9 +140,10 @@ void Molecule::addExclusion(std::tuple<std::string, std::string> particle,
                                                     std::get<1>(particleToExclude)));
  }
  
-void Molecule::addExclusion(const std::string& particleName, const std::string& particleNameToExclude)
+void Molecule::addExclusion(const ParticleName& particleName, const ParticleName& particleNameToExclude)
  {
-    addExclusion(std::make_tuple(particleName, name_), std::make_tuple(particleNameToExclude, name_));
+    addExclusion(std::make_tuple(particleName, ResidueName(name_)),
+                 std::make_tuple(particleNameToExclude, ResidueName(name_)));
  }
  
  const ParticleType& Molecule::at(const std::string& particleTypeName) const
diff --git a/api/nblib/molecules.h b/api/nblib/molecules.h

index afcae8ccbd0cbc66b411aeb28854ac5eaf724593..33b5dbe0bd76101297326baa38fe63a516d96293 100644 (file)
--- a/api/nblib/molecules.h
+++ b/api/nblib/molecules.h
@@ -53,10 +53,13 @@
  #include <unordered_map>
  #include <vector>
  
+#include "nblib/listed_forces/definitions.h"
  #include "nblib/particletype.h"
  
  namespace nblib
  {
+class TopologyBuilder;
+
  //! Named type for unique identifier for a particle in a molecule
  using ParticleName = StrongType<std::string, struct ParticleNameParameter>;
  
@@ -79,6 +82,59 @@ struct ParticleData
  
  class Molecule final
  {
+    //! \brief string based listed interaction data type used during construction
+    template<class TwoCenterType>
+    struct TwoCenterData
+    {
+        using type = TwoCenterType;
+
+        std::vector<TwoCenterType> interactionTypes_;
+        std::vector<std::tuple<ParticleName, ResidueName, ParticleName, ResidueName>> interactions_;
+    };
+
+    template<class ThreeCenterType>
+    struct ThreeCenterData
+    {
+        using type = ThreeCenterType;
+
+        std::vector<ThreeCenterType> interactionTypes_;
+        std::vector<std::tuple<ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName>> interactions_;
+    };
+
+    template<class FourCenter>
+    struct FourCenterDataHolder
+    {
+        using type = FourCenter;
+
+        std::vector<FourCenter> interactionTypes_;
+        std::vector<std::tuple<ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName>> interactions_;
+    };
+
+    template<class FiveCenter>
+    struct FiveCenterDataHolder
+    {
+        using type = FiveCenter;
+
+        std::vector<FiveCenter> interactionTypes_;
+        std::vector<std::tuple<ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName>>
+                interactions_;
+    };
+
+    // BondContainerTypes is TypeList<TwoCenterData<HarmonicBondType>, ...>
+    using TwoCenterContainerTypes = Map<TwoCenterData, SupportedTwoCenterTypes>;
+
+    using ThreeCenterContainerTypes = Map<ThreeCenterData, SupportedThreeCenterTypes>;
+
+    using FourCenterContainerTypes = Map<FourCenterDataHolder, SupportedFourCenterTypes>;
+
+    using FiveCenterContainerTypes = Map<FiveCenterDataHolder, SupportedFiveCenterTypes>;
+
+    // InteractionTuple is std::tuple<TwoCenterData<HarmonicBondType>, ...>
+    using InteractionTuple = decltype(std::tuple_cat(Reduce<std::tuple, TwoCenterContainerTypes>{},
+                                                     Reduce<std::tuple, ThreeCenterContainerTypes>{},
+                                                     Reduce<std::tuple, FourCenterContainerTypes>{},
+                                                     Reduce<std::tuple, FiveCenterContainerTypes>{}));
+
  public:
      explicit Molecule(MoleculeName moleculeName);
  
@@ -103,11 +159,45 @@ public:
      void addExclusion(int particleIndex, int particleIndexToExclude);
  
      //! Specify an exclusion with particle and residue names that have been added to molecule
-    void addExclusion(std::tuple<std::string, std::string> particle,
-                      std::tuple<std::string, std::string> particleToExclude);
+    void addExclusion(std::tuple<ParticleName, ResidueName> particle,
+                      std::tuple<ParticleName, ResidueName> particleToExclude);
  
      //! Specify an exclusion with particle names that have been added to molecule
-    void addExclusion(const std::string& particleName, const std::string& particleNameToExclude);
+    void addExclusion(const ParticleName& particleName, const ParticleName& particleNameToExclude);
+
+    // Add various types of interactions to the molecule
+    // Note: adding an interaction type not listed in SUPPORTED_TWO_CENTER_TYPES results in a compilation error
+
+    //! For 2-particle interactions such as harmonic bonds
+    template<class Interaction>
+    void addInteraction(const ParticleName& particleNameI,
+                        const ResidueName&  residueNameI,
+                        const ParticleName& particleNameJ,
+                        const ResidueName&  residueNameJ,
+                        const Interaction&  interaction);
+
+    //! Add 2-particle interactions with the default residue name
+    template<class Interaction>
+    void addInteraction(const ParticleName& particleNameI,
+                        const ParticleName& particleNameJ,
+                        const Interaction&  interaction);
+
+    //! For 3-particle interactions such as angles
+    template<class Interaction>
+    void addInteraction(const ParticleName& particleNameI,
+                        const ResidueName&  residueNameI,
+                        const ParticleName& particleNameJ,
+                        const ResidueName&  residueNameJ,
+                        const ParticleName& particleNameK,
+                        const ResidueName&  residueNameK,
+                        const Interaction&  interaction);
+
+    //! Add 3-particle interactions with the default residue name
+    template<class Interaction>
+    void addInteraction(const ParticleName& particleNameI,
+                        const ParticleName& particleNameJ,
+                        const ParticleName& particleNameK,
+                        const Interaction&  interaction);
  
      //! The number of molecules
      int numParticlesInMolecule() const;
@@ -119,6 +209,9 @@ public:
      //! returns a sorted vector containing no duplicates of particles to exclude by indices
      std::vector<std::tuple<int, int>> getExclusions() const;
  
+    //! Return all interactions stored in Molecule
+    const InteractionTuple& interactionData() const;
+
      //! Return name of ith particle
      ParticleName particleName(int i) const;
  
@@ -150,7 +243,40 @@ private:
      //! we cannot efficiently compute indices during the build-phase
      //! so we delay the conversion until TopologyBuilder requests it
      std::vector<std::tuple<std::string, std::string, std::string, std::string>> exclusionsByName_;
+
+    //! collection of data for all types of interactions
+    InteractionTuple interactionData_;
  };
  
+//! \cond DO_NOT_DOCUMENT
+#define ADD_INTERACTION_EXTERN_TEMPLATE(x)                                      \
+    extern template void Molecule::addInteraction(                              \
+            const ParticleName& particleNameI, const ResidueName& residueNameI, \
+            const ParticleName& particleNameJ, const ResidueName& residueNameJ, const x& interaction);
+MAP(ADD_INTERACTION_EXTERN_TEMPLATE, SUPPORTED_TWO_CENTER_TYPES)
+#undef ADD_INTERACTION_EXTERN_TEMPLATE
+
+#define ADD_INTERACTION_EXTERN_TEMPLATE(x)         \
+    extern template void Molecule::addInteraction( \
+            const ParticleName& particleNameI, const ParticleName& particleNameJ, const x& interaction);
+MAP(ADD_INTERACTION_EXTERN_TEMPLATE, SUPPORTED_TWO_CENTER_TYPES)
+#undef ADD_INTERACTION_EXTERN_TEMPLATE
+
+#define ADD_INTERACTION_EXTERN_TEMPLATE(x)                                      \
+    extern template void Molecule::addInteraction(                              \
+            const ParticleName& particleNameI, const ResidueName& residueNameI, \
+            const ParticleName& particleNameJ, const ResidueName& residueNameJ, \
+            const ParticleName& particleNameK, const ResidueName& residueNameK, const x& interaction);
+MAP(ADD_INTERACTION_EXTERN_TEMPLATE, SUPPORTED_THREE_CENTER_TYPES)
+#undef ADD_INTERACTION_EXTERN_TEMPLATE
+
+#define ADD_INTERACTION_EXTERN_TEMPLATE(x)                                        \
+    extern template void Molecule::addInteraction(                                \
+            const ParticleName& particleNameI, const ParticleName& particleNameJ, \
+            const ParticleName& particleNameK, const x& interaction);
+MAP(ADD_INTERACTION_EXTERN_TEMPLATE, SUPPORTED_THREE_CENTER_TYPES)
+#undef ADD_INTERACTION_EXTERN_TEMPLATE
+//! \endcond
+
  } // namespace nblib
  #endif // NBLIB_MOLECULES_H
diff --git a/api/nblib/nblib.h b/api/nblib/nblib.h

index 55c510a577be4546e57d1408ba2dc633d6ae06d4..cb732eafedb6ff5073376a06065e8155633238fe 100644 (file)
--- a/api/nblib/nblib.h
+++ b/api/nblib/nblib.h
@@ -51,8 +51,12 @@
  #include "nblib/integrator.h"
  #include "nblib/interactions.h"
  #include "nblib/kerneloptions.h"
+#include "nblib/listed_forces/bondtypes.h"
+#include "nblib/listed_forces/calculator.h"
+#include "nblib/listed_forces/definitions.h"
  #include "nblib/molecules.h"
  #include "nblib/particletype.h"
+#include "nblib/ppmap.h"
  #include "nblib/simulationstate.h"
  #include "nblib/topology.h"
  #include "nblib/topologyhelpers.h"
diff --git a/api/nblib/ppmap.h b/api/nblib/ppmap.h

new file mode 100644 (file)

index 0000000..3f1b392
--- /dev/null
+++ b/api/nblib/ppmap.h
@@ -0,0 +1,159 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*
+ * Copyright (C) 2012 William Swanson
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the names of the authors or
+ * their institutions shall not be used in advertising or otherwise to
+ * promote the sale, use or other dealings in this Software without
+ * prior written authorization from the authors.
+ */
+
+/*! \inpublicapi \file
+ *  \brief
+ *  Provides MAP and MAP_LIST to apply a macro to a variadic argument list
+ *
+ *  The MAP and MAP_LIST macros implement calling a supplied macro with
+ *  all of the subsequent arguments. For example:
+ *  MAP(macro, x, y, z) expands to macro(x) macro(y) macro(z)  while
+ *  MAP_LIST(macro, x, y, z) expands to macro(x), macro(y), macro(z)
+ *
+ *  Due to the limitations of the preprocessor, it is unfortunately not
+ *  possible to implement this functionality in a more straight-forward way.
+ *  Since this use-case is not too uncommon, Boost for example implements
+ *  BOOST_PP_SEQ_FOR_EACH which provides equivalent functionality implemented
+ *  with the same technique, but is more comprehensive in scope,
+ *  beyond what's required here.
+ *
+ *  A discussion of how and why this macro works can be found here:
+ *  https://stackoverflow.com/questions/27765387/distributing-an-argument-in-a-variadic-macro
+ *  and the original repository of this implementation is this one:
+ *  https://github.com/swansontec/map-macro
+ *  It also contains some useful explanations of how it works.
+ */
+
+#ifndef NBLIB_PPMAP_H
+#define NBLIB_PPMAP_H
+
+#define EVAL0(...) __VA_ARGS__
+#define EVAL1(...) EVAL0(EVAL0(EVAL0(__VA_ARGS__)))
+#define EVAL2(...) EVAL1(EVAL1(EVAL1(__VA_ARGS__)))
+#define EVAL3(...) EVAL2(EVAL2(EVAL2(__VA_ARGS__)))
+#define EVAL4(...) EVAL3(EVAL3(EVAL3(__VA_ARGS__)))
+#define EVAL(...) EVAL4(EVAL4(EVAL4(__VA_ARGS__)))
+
+#define MAP_END(...)
+#define MAP_OUT
+#define MAP_COMMA ,
+
+#define MAP_GET_END2() 0, MAP_END
+#define MAP_GET_END1(...) MAP_GET_END2
+#define MAP_GET_END(...) MAP_GET_END1
+#define MAP_NEXT0(test, next, ...) next MAP_OUT
+#define MAP_NEXT1(test, next) MAP_NEXT0(test, next, 0)
+#define MAP_NEXT(test, next) MAP_NEXT1(MAP_GET_END test, next)
+
+#define MAP0(f, x, peek, ...) f(x) MAP_NEXT(peek, MAP1)(f, peek, __VA_ARGS__)
+#define MAP1(f, x, peek, ...) f(x) MAP_NEXT(peek, MAP0)(f, peek, __VA_ARGS__)
+
+#define MAP_LIST_NEXT1(test, next) MAP_NEXT0(test, MAP_COMMA next, 0)
+#define MAP_LIST_NEXT(test, next) MAP_LIST_NEXT1(MAP_GET_END test, next)
+
+#define MAP_LIST0(f, x, peek, ...) f(x) MAP_LIST_NEXT(peek, MAP_LIST1)(f, peek, __VA_ARGS__)
+#define MAP_LIST1(f, x, peek, ...) f(x) MAP_LIST_NEXT(peek, MAP_LIST0)(f, peek, __VA_ARGS__)
+
+/**
+ * Applies the function macro `f` to each of the remaining parameters.
+ */
+#define MAP(f, ...) EVAL(MAP1(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0))
+
+/**
+ * Applies the function macro `f` to each of the remaining parameters and
+ * inserts commas between the results.
+ */
+#define MAP_LIST(f, ...) EVAL(MAP_LIST1(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0))
+
+
+/** The PP_NARG macro returns the number of arguments that have been
+ *  passed to it.
+ */
+#define PP_NARG(...) PP_NARG_(__VA_ARGS__, PP_RSEQ_N())
+#define PP_NARG_(...) PP_ARG_N(__VA_ARGS__)
+#define PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, \
+                 _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34,  \
+                 _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50,  \
+                 _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, N, ...)         \
+    N
+#define PP_RSEQ_N()                                                                             \
+    63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, \
+            40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, \
+            19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/** MAP_ENUMERATE macro:
+ * MAP_ENUMERATE(action, args...)
+ * like MAP, calls action with each argument, but also forwards the index of the argument to action
+ */
+#define FE_0(WHAT)
+#define FE_1(WHAT, N, X) WHAT(X, N - 1) // NOLINT bugprone-macro-parentheses
+#define FE_2(WHAT, N, X, ...) WHAT(X, N - 2) FE_1(WHAT, N, __VA_ARGS__)
+#define FE_3(WHAT, N, X, ...) WHAT(X, N - 3) FE_2(WHAT, N, __VA_ARGS__)
+#define FE_4(WHAT, N, X, ...) WHAT(X, N - 4) FE_3(WHAT, N, __VA_ARGS__)
+#define FE_5(WHAT, N, X, ...) WHAT(X, N - 5) FE_4(WHAT, N, __VA_ARGS__)
+
+#define GET_MACRO(_0, _1, _2, _3, _4, _5, NAME, ...) NAME
+#define MAP_ENUMERATE(action, ...)                                   \
+    GET_MACRO(_0, __VA_ARGS__, FE_5, FE_4, FE_3, FE_2, FE_1, FE_0, ) \
+    (action, PP_NARG(__VA_ARGS__), __VA_ARGS__)
+
+#endif // NBLIB_PPMAP_H
diff --git a/api/nblib/simulationstate.cpp b/api/nblib/simulationstate.cpp

index 9e38433644f24e6cb697e0f861748517945b0ce3..c326b828239cc4ed368808326730960ccbc03eee 100644 (file)
--- a/api/nblib/simulationstate.cpp
+++ b/api/nblib/simulationstate.cpp
@@ -70,12 +70,29 @@ SimulationState::Impl::Impl(const std::vector<Vec3>& coordinates,
      box_(box),
      topology_(std::move(topology))
  {
-    if (!checkNumericValues(coordinates))
+    auto numParticles = topology_.numParticles();
+
+    if (int(coordinates.size()) != numParticles)
+    {
+        throw InputException("Coordinates array size mismatch");
+    }
+
+    if (int(velocities.size()) != numParticles)
+    {
+        throw InputException("Velocities array size mismatch");
+    }
+
+    if (int(forces.size()) != numParticles)
+    {
+        throw InputException("Force buffer array size mismatch");
+    }
+
+    if (!isRealValued(coordinates))
      {
          throw InputException("Input coordinates has at least one NaN");
      }
      coordinates_ = coordinates;
-    if (!checkNumericValues(velocities))
+    if (!isRealValued(velocities))
      {
          throw InputException("Input velocities has at least one NaN");
      }
diff --git a/api/nblib/tests/molecules.cpp b/api/nblib/tests/molecules.cpp

index 52e5248cbcfd19ec6e8b15eaca75784da3599805..b906e097b56afbf577fcbfc6a6ed21ffc0d5f231 100644 (file)
--- a/api/nblib/tests/molecules.cpp
+++ b/api/nblib/tests/molecules.cpp
@@ -142,8 +142,8 @@ TEST(NBlibTest, CanConstructExclusionListFromNamesAndIndicesMixed)
      Molecule             water = waterMolecule.waterMoleculeWithoutExclusions();
  
      //! Add the exclusions
-    water.addExclusion("H1", "Oxygen");
-    water.addExclusion("H2", "Oxygen");
+    water.addExclusion(ParticleName("H1"), ParticleName("Oxygen"));
+    water.addExclusion(ParticleName("H2"), ParticleName("Oxygen"));
      water.addExclusion(1, 2);
  
      std::vector<std::tuple<int, int>> exclusions = water.getExclusions();
diff --git a/api/nblib/tests/nbkernelsystem.cpp b/api/nblib/tests/nbkernelsystem.cpp

index cf05e0ed6da28a28a7c2e77483a7c31558c0e62e..816a545dfb3067ecc7c9db643aed609818571c02 100644 (file)
--- a/api/nblib/tests/nbkernelsystem.cpp
+++ b/api/nblib/tests/nbkernelsystem.cpp
@@ -156,9 +156,13 @@ TEST(NBlibTest, UpdateChangesForces)
      gmx::ArrayRef<Vec3> forces(simState.forces());
      forceCalculator.compute(simState.coordinates(), simState.forces());
  
+    // copy computed forces to another array
      std::vector<Vec3> forces_1(forces.size());
      std::copy(forces.begin(), forces.end(), begin(forces_1));
  
+    // zero original force buffer
+    zeroCartesianArray(forces);
+
      // check if forces change without update step
      forceCalculator.compute(simState.coordinates(), forces);
  
@@ -174,8 +178,11 @@ TEST(NBlibTest, UpdateChangesForces)
      // update
      integrator.integrate(1.0, simState.coordinates(), simState.velocities(), simState.forces());
  
+    // zero original force buffer
+    zeroCartesianArray(forces);
+
      // step 2
-    forceCalculator.compute(simState.coordinates(), simState.forces());
+    forceCalculator.compute(simState.coordinates(), forces);
      std::vector<Vec3> forces_2(forces.size());
      std::copy(forces.begin(), forces.end(), begin(forces_2));
  
diff --git a/api/nblib/tests/testsystems.cpp b/api/nblib/tests/testsystems.cpp

index 70e86dd8135681f4be2d75a07a4d126906abb309..41045b08c42c45632bc06e365f7404b7b8ff45a9 100644 (file)
--- a/api/nblib/tests/testsystems.cpp
+++ b/api/nblib/tests/testsystems.cpp
@@ -133,9 +133,9 @@ Molecule WaterMoleculeBuilder::waterMoleculeWithoutExclusions()
  
  void WaterMoleculeBuilder::addExclusionsFromNames()
  {
-    water_.addExclusion("H1", "Oxygen");
-    water_.addExclusion("H2", "Oxygen");
-    water_.addExclusion("H1", "H2");
+    water_.addExclusion(ParticleName("H1"), ParticleName("Oxygen"));
+    water_.addExclusion(ParticleName("H2"), ParticleName("Oxygen"));
+    water_.addExclusion(ParticleName("H1"), ParticleName("H2"));
  }
  
  MethanolMoleculeBuilder::MethanolMoleculeBuilder() : methanol_(MoleculeName("MeOH"))
@@ -148,9 +148,9 @@ MethanolMoleculeBuilder::MethanolMoleculeBuilder() : methanol_(MoleculeName("MeO
      methanol_.addParticle(ParticleName("H3"), Charges.at("HMet"), library.type("H"));
  
      // Add the exclusions
-    methanol_.addExclusion("Me1", "O2");
-    methanol_.addExclusion("Me1", "H3");
-    methanol_.addExclusion("H3", "O2");
+    methanol_.addExclusion(ParticleName("Me1"), ParticleName("O2"));
+    methanol_.addExclusion(ParticleName("Me1"), ParticleName("H3"));
+    methanol_.addExclusion(ParticleName("H3"), ParticleName("O2"));
  }
  
  Molecule MethanolMoleculeBuilder::methanolMolecule()
diff --git a/api/nblib/topology.cpp b/api/nblib/topology.cpp

index 4e4810765c55e2968b9abef596a9bbb2561f56d3..4e2e1e4d8c895a23dfe40d887b70b7ee4f74d7cc 100644 (file)
--- a/api/nblib/topology.cpp
+++ b/api/nblib/topology.cpp
@@ -71,9 +71,10 @@ gmx::ListOfLists<int> TopologyBuilder::createExclusionsListOfLists() const
          size_t          numMols    = std::get<1>(molNumberTuple);
          const auto&     exclusions = molecule.getExclusions();
  
-        assert((!exclusions.empty()
-                && std::string("No exclusions found in the " + molecule.name().value() + " molecule.")
-                           .c_str()));
+        // Note this is a programming error as all particles should exclude at least themselves and empty topologies are not allowed.
+        const std::string message =
+                "No exclusions found in the " + molecule.name().value() + " molecule.";
+        assert((!exclusions.empty() && message.c_str()));
  
          std::vector<gmx::ExclusionBlock> exclusionBlockPerMolecule =
                  detail::toGmxExclusionBlock(exclusions);
@@ -129,6 +130,11 @@ std::vector<T> TopologyBuilder::extractParticleTypeQuantity(Extractor&& extracto
  
  Topology TopologyBuilder::buildTopology()
  {
+    assert((!(numParticles_ < 0) && "It should not be possible to have negative particles"));
+    if (numParticles_ == 0)
+    {
+        throw InputException("You cannot build a topology with no particles");
+    }
      topology_.numParticles_ = numParticles_;
  
      topology_.exclusions_ = createExclusionsListOfLists();
diff --git a/api/nblib/topology.h b/api/nblib/topology.h

index 380daf79ac57e144475491779cfb36a48fcae729..93fb9faf3ea7b8162ce99597e99ebc23bea2f54f 100644 (file)
--- a/api/nblib/topology.h
+++ b/api/nblib/topology.h
@@ -48,6 +48,7 @@
  #include <vector>
  
  #include "nblib/interactions.h"
+#include "nblib/listed_forces/definitions.h"
  #include "nblib/molecules.h"
  #include "nblib/topologyhelpers.h"
  
@@ -94,6 +95,9 @@ public:
      //! Returns a map of non-bonded force parameters indexed by ParticleType names
      NonBondedInteractionMap getNonBondedInteractionMap() const;
  
+    //! Returns the interaction data
+    ListedInteractionData getInteractionData() const;
+
      //! Returns the combination rule used to generate the NonBondedInteractionMap
      CombinationRule getCombinationRule() const;
  
@@ -116,6 +120,8 @@ private:
      detail::ParticleSequencer particleSequencer_;
      //! Map that should hold all nonbonded interactions for all particle types
      NonBondedInteractionMap nonBondedInteractionMap_;
+    //! data about bonds for all supported types
+    ListedInteractionData interactionData_;
      //! Combination Rule used to generate the nonbonded interactions
      CombinationRule combinationRule_;
  };
@@ -164,6 +170,9 @@ private:
      //! Builds a GROMACS-compliant performant exclusions list aggregating exclusions from all molecules
      gmx::ListOfLists<int> createExclusionsListOfLists() const;
  
+    //! Gather interaction data from molecules
+    ListedInteractionData createInteractionData(const detail::ParticleSequencer&);
+
      //! Helper function to extract quantities like mass, charge, etc from the system
      template<typename T, class Extractor>
      std::vector<T> extractParticleTypeQuantity(Extractor&& extractor);
diff --git a/api/nblib/topologyhelpers.cpp b/api/nblib/topologyhelpers.cpp

index 68b109a7636ff819f6fcf57432ae608ca0d0465a..d2cdd63d66728f742a77b1ed5ef0601de7bd1052 100644 (file)
--- a/api/nblib/topologyhelpers.cpp
+++ b/api/nblib/topologyhelpers.cpp
@@ -64,9 +64,12 @@ std::vector<gmx::ExclusionBlock> toGmxExclusionBlock(const std::vector<std::tupl
          return std::get<0>(tup1) < std::get<0>(tup2);
      };
  
+    // Note this is a programming error as all particles should exclude at least themselves and empty topologies are not allowed.
+    // Note also that this is also checked in the parent function with a more informative error message.
+    assert((!tupleList.empty() && "No exclusions found.\n"));
+
      // initialize pair of iterators delimiting the range of exclusions for
      // the first particle in the list
-    assert((!tupleList.empty() && "tupleList must not be empty\n"));
      auto range = std::equal_range(std::begin(tupleList), std::end(tupleList), tupleList[0], firstLowerThan);
      auto it1 = range.first;
      auto it2 = range.second;
diff --git a/api/nblib/topologyhelpers.h b/api/nblib/topologyhelpers.h

index e67f64619a5d0ad912f56e2288bec0d8547d07ef..7e69b5fd673943238df77a16bc96ce10061a9220 100644 (file)
--- a/api/nblib/topologyhelpers.h
+++ b/api/nblib/topologyhelpers.h
@@ -50,6 +50,7 @@
  #include <vector>
  
  #include "gromacs/utility/listoflists.h"
+#include "nblib/listed_forces/traits.h"
  #include "nblib/molecules.h"
  
  namespace gmx
@@ -69,6 +70,42 @@ std::vector<gmx::ExclusionBlock> toGmxExclusionBlock(const std::vector<std::tupl
  //! Add offset to all indices in inBlock
  std::vector<gmx::ExclusionBlock> offsetGmxBlock(std::vector<gmx::ExclusionBlock> inBlock, int offset);
  
+/*!
+ * \brief
+ * Extract all interactions of type I from a vector of molecules. The second argument tuple element
+ * specifies multiples of the molecule given as first tuple element. Let (S, I) denote the return
+ * value tuple. Then J[i] = I[S[i]] for all i in 0...S.size() is the full sequence of BondType
+ * instances as they occur in the input tuple
+ *
+ */
+template<class I>
+std::tuple<std::vector<size_t>, std::vector<I>>
+collectInteractions(const std::vector<std::tuple<Molecule, int>>&);
+
+#define COLLECT_BONDS_EXTERN_TEMPLATE(x)                                                 \
+    extern template std::tuple<std::vector<size_t>, std::vector<x>> collectInteractions( \
+            const std::vector<std::tuple<Molecule, int>>&);
+MAP(COLLECT_BONDS_EXTERN_TEMPLATE, SUPPORTED_TWO_CENTER_TYPES)
+#undef COLLECT_BONDS_EXTERN_TEMPLATE
+
+/*!
+ * \brief
+ * Return a list of unique BondType instances U and an index list S of size aggregatedBonds.size()
+ * such that the BondType instance at aggregatedBonds[i] is equal to U[S[i]]
+ * returns std::tuple(S, U)
+ *
+ */
+template<class I>
+std::tuple<std::vector<size_t>, std::vector<I>> eliminateDuplicateInteractions(const std::vector<I>& collectedBonds);
+
+/// \cond DO_NOT_DOCUMENT
+#define ELIMINATE_DUPLICATE_EXTERN_TEMPLATE(x)                                                      \
+    extern template std::tuple<std::vector<size_t>, std::vector<x>> eliminateDuplicateInteractions( \
+            const std::vector<x>& collectedBonds);
+MAP(ELIMINATE_DUPLICATE_EXTERN_TEMPLATE, SUPPORTED_LISTED_TYPES)
+#undef ELIMINATE_DUPLICATE_EXTERN_TEMPLATE
+/// \endcond
+
  //! Helper class for Topology to keep track of particle IDs
  class ParticleSequencer
  {
@@ -88,6 +125,19 @@ private:
      DataType data_;
  };
  
+//!
+template<class B>
+std::vector<CoordinateIndex<B>> sequenceIDs(const std::vector<std::tuple<Molecule, int>>&,
+                                            const detail::ParticleSequencer&);
+
+/// \cond DO_NOT_DOCUMENT
+#define SEQUENCE_PAIR_ID_EXTERN_TEMPLATE(x)                         \
+    extern template std::vector<CoordinateIndex<x>> sequenceIDs<x>( \
+            const std::vector<std::tuple<Molecule, int>>&, const detail::ParticleSequencer&);
+MAP(SEQUENCE_PAIR_ID_EXTERN_TEMPLATE, SUPPORTED_LISTED_TYPES)
+#undef SEQUENCE_PAIR_ID_EXTERN_TEMPLATE
+/// \endcond
+
  } // namespace detail
  
  } // namespace nblib
diff --git a/api/nblib/util/internal.h b/api/nblib/util/internal.h

index 63a86586879c5bd65955c33aedb216d8ce9fc7ca..32b041a3e0ba1d6cc59bdb04bddb514b8a48b22c 100644 (file)
--- a/api/nblib/util/internal.h
+++ b/api/nblib/util/internal.h
@@ -54,8 +54,6 @@
  #include <type_traits>
  #include <vector>
  
-#include "nblib/basicdefinitions.h"
-#include "nblib/vector.h"
  
  namespace nblib
  {
@@ -69,6 +67,9 @@ std::string next_token(std::string& s, const std::string& delimiter);
  template<auto...>
  using void_value_t = void;
  
+template<class... Tuples>
+using tuple_cat_t = decltype(std::tuple_cat(Tuples{}...));
+
  template<class T, class = void>
  struct HasValueMember : std::false_type
  {
@@ -94,8 +95,11 @@ struct AccessTypeMemberIfPresent<T, typename std::void_t<typename T::type>>
  template<class T>
  using AccessTypeMemberIfPresent_t = typename AccessTypeMemberIfPresent<T>::type;
  
-//! this trait evaluates to std::true_type if T is the same as Tuple[N]
-//! OR if T is the same as the type member of Tuple[N]
+/*! \brief Comparison meta function that compares T to Tuple[N]
+ *
+ * This trait evaluates to std::true_type if T is the same as Tuple[N]
+ * OR if T is the same as the type member of Tuple[N]
+ */
  template<int N, typename T, typename Tuple>
  struct MatchTypeOrTypeMember :
      std::disjunction<std::is_same<T, std::tuple_element_t<N, Tuple>>,
@@ -103,48 +107,74 @@ struct MatchTypeOrTypeMember :
  {
  };
  
-//! recursion to check the next field N+1
-template<int N, class T, class Tuple, template<int, class, class> class Comparison, bool Match = false>
-struct MatchField_ :
-    std::integral_constant<size_t, MatchField_<N + 1, T, Tuple, Comparison, Comparison<N + 1, T, Tuple>{}>{}>
+//! \brief Recursion to check the next field N+1
+template<int N, class T, class Tuple, template<int, class, class> class Comparison, class Match = void>
+struct MatchField_ : std::integral_constant<size_t, MatchField_<N + 1, T, Tuple, Comparison>{}>
  {
  };
  
-//! recursion stop when Comparison<N, T, Tuple>::value is true
+//! \brief recursion stop when Comparison<N, T, Tuple>::value is true
  template<int N, class T, class Tuple, template<int, class, class> class Comparison>
-struct MatchField_<N, T, Tuple, Comparison, true> : std::integral_constant<size_t, N>
+struct MatchField_<N, T, Tuple, Comparison, std::enable_if_t<Comparison<N, T, Tuple>{}>> :
+    std::integral_constant<size_t, N>
  {
  };
  
  } // namespace detail
  
-/*! \brief The value member of this struct evaluates to the integral constant N for which
- *  the value member of Comparison<N, T, Tuple> is true
- *  and generates a compiler error if there is no such N
+
+/*! \brief Meta function to return the first index in Tuple whose type matches T
+ *
+ *  If there are more than one, the first occurrence will be returned.
+ *  If there is no such type, the size of Tuple will be returned.
+ *  Note that the default comparison operation supplied here also matches if the type member Tuple[N]::type matches T
   */
-template<class T, class Tuple, template<int, class, class> class Comparison>
-struct MatchField : detail::MatchField_<0, T, Tuple, Comparison, Comparison<0, T, Tuple>{}>
+template<typename T, class TL, template<int, class, class> class Comparison = detail::MatchTypeOrTypeMember>
+struct FindIndex
  {
  };
  
-/*! \brief Function to return the index in Tuple whose type matches T
- *  - If there are more than one, the first occurrence will be returned
- *  - If there is no such type, a compiler error from accessing a tuple out of range is generated
- *  Note that the default comparison operation supplied here also matches if the type member of Tuple[N] matches T
+/*! \brief Specialization to only enable this trait if TL has template parameters
+ *
+ * \tparam T          a type to look for in the template parameters of TL
+ * \tparam TL         a template template parameter, e.g. std::tuple or nblib::TypeList
+ * \tparam Ts         template parameters of TL
+ * \tparam Comparison comparison operation
+ *
+ *  Note that \a T is added to \a TL as a sentinel to terminate the recursion
+ *  and prevent an out of bounds tuple access compiler error.
   */
-template<typename T, typename Tuple, template<int, class, class> class Comparison = detail::MatchTypeOrTypeMember>
-struct FindIndex : std::integral_constant<size_t, MatchField<T, Tuple, Comparison>{}>
+template<typename T, template<class...> class TL, class... Ts, template<int, class, class> class Comparison>
+struct FindIndex<T, TL<Ts...>, Comparison> : detail::MatchField_<0, T, std::tuple<Ts..., T>, Comparison>
  {
  };
  
-//! Function to return the element in Tuple whose type matches T
-//! Note: if there are more than one, the first occurrence will be returned
+/*! \brief Meta function to return the element in Tuple whose type matches T
+ *
+ * If there are more than one, the first occurrence will be returned
+ * If there is no such that, a compiler error is generated due to accessing
+ * the tuple out of bounds
+ */
  template<typename T, typename Tuple>
  decltype(auto) pickType(Tuple& tup)
  {
-    return std::get<FindIndex<T, Tuple>{}>(tup);
+    return std::get<FindIndex<T, std::decay_t<Tuple>>{}>(tup);
  }
  
+//! \brief template meta function to determine whether T is contained in TL
+template<class T, class TL>
+struct Contains
+{
+};
+
+//! this formatting must be a bug in clang-format... should be:
+// struct Contains<T, TL<Ts...>> : std::bool_constant<FindIndex<T, TL<Ts...>>{} < sizeof...(Ts)>
+template<class T, template<class...> class TL, class... Ts>
+        struct Contains<T, TL<Ts...>> : std::bool_constant < FindIndex<T, TL<Ts...>>{}<sizeof...(Ts)>
+{
+};
+
+
  //! Utility to call function with each element in tuple_
  template<class F, class... Ts>
  void for_each_tuple(F&& func, std::tuple<Ts...>& tuple_)
diff --git a/api/nblib/util/tests/CMakeLists.txt b/api/nblib/util/tests/CMakeLists.txt

index 4169b078f86c2bfac83cded716f6989b7140bd42..2cc78abdb9a0352529a57e0e092ab5c1c86bb339 100644 (file)
--- a/api/nblib/util/tests/CMakeLists.txt
+++ b/api/nblib/util/tests/CMakeLists.txt
@@ -44,6 +44,7 @@ set(exename "nblib-util-test")
  gmx_add_gtest_executable(
          ${exename}
          CPP_SOURCE_FILES
+        internal.cpp
          user.cpp
  )
  target_link_libraries(${exename} PRIVATE nblib_test_infrastructure nblib)
diff --git a/api/nblib/util/tests/internal.cpp b/api/nblib/util/tests/internal.cpp

new file mode 100644 (file)

index 0000000..c8257ed
--- /dev/null
+++ b/api/nblib/util/tests/internal.cpp
@@ -0,0 +1,149 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * This implements basic nblib utility tests
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ */
+
+#include "nblib/tests/testhelpers.h"
+#include "nblib/util/internal.h"
+#include "nblib/util/user.h"
+
+namespace nblib
+{
+
+TEST(NblibInternalUtils, FindIndexTuple1)
+{
+    using TupleType = std::tuple<float>;
+
+    constexpr int floatIndex = FindIndex<float, TupleType>{};
+
+    constexpr int outOfRange = FindIndex<unsigned, TupleType>{};
+
+    EXPECT_EQ(0, floatIndex);
+    EXPECT_EQ(1, outOfRange);
+}
+
+TEST(NblibInternalUtils, FindIndexTuple2)
+{
+    using TupleType = std::tuple<float, int>;
+
+    constexpr int floatIndex = FindIndex<float, TupleType>{};
+    constexpr int intIndex   = FindIndex<int, TupleType>{};
+
+    constexpr int outOfRange = FindIndex<unsigned, TupleType>{};
+
+    EXPECT_EQ(0, floatIndex);
+    EXPECT_EQ(1, intIndex);
+    EXPECT_EQ(2, outOfRange);
+}
+
+TEST(NblibInternalUtils, FindIndexTypeList1)
+{
+    using ListType = TypeList<float>;
+
+    constexpr int floatIndex = FindIndex<float, ListType>{};
+
+    constexpr int outOfRange = FindIndex<unsigned, ListType>{};
+
+    EXPECT_EQ(0, floatIndex);
+    EXPECT_EQ(1, outOfRange);
+}
+
+TEST(NblibInternalUtils, FindIndexTypeList2)
+{
+    using ListType = TypeList<float, int>;
+
+    constexpr int floatIndex = FindIndex<float, ListType>{};
+    constexpr int intIndex   = FindIndex<int, ListType>{};
+
+    constexpr int outOfRange = FindIndex<unsigned, ListType>{};
+
+    EXPECT_EQ(0, floatIndex);
+    EXPECT_EQ(1, intIndex);
+    EXPECT_EQ(2, outOfRange);
+}
+
+
+TEST(NblibInternalUtils, Contains)
+{
+    using ListType = TypeList<float, int>;
+
+    constexpr bool hasFloat = Contains<float, ListType>{};
+    constexpr bool hasInt   = Contains<int, ListType>{};
+    constexpr bool hasUint  = Contains<unsigned, ListType>{};
+
+    EXPECT_TRUE(hasFloat);
+    EXPECT_TRUE(hasInt);
+    EXPECT_FALSE(hasUint);
+}
+
+TEST(NblibInternalUtils, FindIndexTupleRepeated)
+{
+    using TupleType = std::tuple<float, float, int>;
+
+    constexpr int floatIndex = FindIndex<float, TupleType>{};
+
+    constexpr int intIndex = FindIndex<int, TupleType>{};
+
+    constexpr int outOfRange = FindIndex<unsigned, TupleType>{};
+
+    EXPECT_EQ(0, floatIndex);
+    EXPECT_EQ(2, intIndex);
+    EXPECT_EQ(3, outOfRange);
+}
+
+TEST(NblibInternalUtils, FindIndexTypeListRepeated)
+{
+    using TupleType = TypeList<float, float, int>;
+
+    constexpr int floatIndex = FindIndex<float, TupleType>{};
+
+    constexpr int intIndex = FindIndex<int, TupleType>{};
+
+    constexpr int outOfRange = FindIndex<unsigned, TupleType>{};
+
+    EXPECT_EQ(0, floatIndex);
+    EXPECT_EQ(2, intIndex);
+    EXPECT_EQ(3, outOfRange);
+}
+
+
+} // namespace nblib
diff --git a/api/nblib/util/tests/user.cpp b/api/nblib/util/tests/user.cpp

index 0fccd6ec655f3296b393ae941ba82167c05075a4..b44f76f08da087c847b211c7eda84a13d80b7d89 100644 (file)
--- a/api/nblib/util/tests/user.cpp
+++ b/api/nblib/util/tests/user.cpp
@@ -56,13 +56,13 @@ namespace test
  namespace
  {
  
-TEST(NBlibTest, checkNumericValues)
+TEST(NBlibTest, isRealValued)
  {
      std::vector<Vec3> vec;
      vec.emplace_back(1., 1., 1.);
      vec.emplace_back(2., 2., 2.);
  
-    bool ret = checkNumericValues(vec);
+    bool ret = isRealValued(vec);
      EXPECT_EQ(ret, true);
  }
  
@@ -74,7 +74,7 @@ TEST(NBlibTest, checkNumericValuesHasNan)
  
      vec.emplace_back(NAN, NAN, NAN);
  
-    bool ret = checkNumericValues(vec);
+    bool ret = isRealValued(vec);
      EXPECT_EQ(ret, false);
  }
  
@@ -86,7 +86,7 @@ TEST(NBlibTest, checkNumericValuesHasInf)
  
      vec.emplace_back(INFINITY, INFINITY, INFINITY);
  
-    bool ret = checkNumericValues(vec);
+    bool ret = isRealValued(vec);
      EXPECT_EQ(ret, false);
  }
  
@@ -114,7 +114,7 @@ TEST(NBlibTest, generateVelocityCheckNumbers)
      constexpr int     N = 10;
      std::vector<real> masses(N, 1.0);
      auto              out = generateVelocity(300.0, 1, masses);
-    bool              ret = checkNumericValues(out);
+    bool              ret = isRealValued(out);
      EXPECT_EQ(ret, true);
  }
  
diff --git a/api/nblib/util/user.cpp b/api/nblib/util/user.cpp

index 5090441b6857efea97bf9a526209f8251343b0d2..0019143f78494a1300a53bea1294f4e4f366c0b2 100644 (file)
--- a/api/nblib/util/user.cpp
+++ b/api/nblib/util/user.cpp
@@ -47,6 +47,7 @@
  #include "nblib/util/user.h"
  #include "gromacs/random/tabulatednormaldistribution.h"
  #include "gromacs/random/threefry.h"
+#include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/fatalerror.h"
  
  namespace nblib
@@ -139,7 +140,7 @@ std::vector<Vec3> generateVelocity(real tempi, unsigned int seed, std::vector<re
  }
  
  //! Check within the container of gmx::RVecs for a NaN or inf
-bool checkNumericValues(const std::vector<Vec3>& values)
+bool isRealValued(gmx::ArrayRef<const Vec3> values)
  {
      for (auto val : values)
      {
@@ -154,4 +155,9 @@ bool checkNumericValues(const std::vector<Vec3>& values)
      return true;
  }
  
+void zeroCartesianArray(gmx::ArrayRef<Vec3> cartesianArray)
+{
+    std::fill(cartesianArray.begin(), cartesianArray.end(), Vec3{ 0, 0, 0 });
+}
+
  } // namespace nblib
diff --git a/api/nblib/util/user.h b/api/nblib/util/user.h

index 9d19189c4e8d2a7d55baf71a791196a8467c693a..6d782fb6bd99c62af25dc9c71f56ccd846f7e77a 100644 (file)
--- a/api/nblib/util/user.h
+++ b/api/nblib/util/user.h
@@ -57,6 +57,12 @@
  #include "nblib/basicdefinitions.h"
  #include "nblib/vector.h"
  
+namespace gmx
+{
+template<typename T>
+class ArrayRef;
+} // namespace gmx
+
  namespace nblib
  {
  
@@ -65,7 +71,10 @@ namespace nblib
  std::vector<Vec3> generateVelocity(real Temperature, unsigned int seed, std::vector<real> const& masses);
  
  //! Check within the container of gmx::RVecs for a NaN or inf
-bool checkNumericValues(const std::vector<Vec3>& values);
+bool isRealValued(gmx::ArrayRef<const Vec3> values);
+
+//! Zero a cartesian buffer
+void zeroCartesianArray(gmx::ArrayRef<Vec3> cartesianArray);
  
  //! Used to ignore unused arguments of a lambda functions
  inline void ignore_unused() {}
diff --git a/cmake/gmxManageNvccConfig.cmake b/cmake/gmxManageNvccConfig.cmake

index e0ce2f34815bd28313b0614be805e48c1535e5e0..12e54cb40efca94399964c7cff008d92dca0dd38 100644 (file)
--- a/cmake/gmxManageNvccConfig.cmake
+++ b/cmake/gmxManageNvccConfig.cmake
@@ -152,14 +152,22 @@ endif()
  # FindCUDA.cmake is unaware of the mechanism used by cmake to embed
  # the compiler flag for the required C++ standard in the generated
  # build files, so we have to pass it ourselves
-if (CUDA_VERSION VERSION_LESS 10.2)
-    # CUDA doesn't formally support C++17 until version 10.2, so for
+if (CUDA_VERSION VERSION_LESS 11.0)
+    # CUDA doesn't formally support C++17 until version 11.0, so for
      # now host-side code that compiles with CUDA is restricted to
      # C++14. This needs to be expressed formally for older CUDA
      # version.
      list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX14_STANDARD_COMPILE_OPTION}")
  else()
-    list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX17_STANDARD_COMPILE_OPTION}")
+    # gcc-7 pre-dated C++17, so uses the -std=c++1z compiler flag for it,
+    # which modern nvcc does not recognize. So we work around that by
+    # compiling in C++14 mode. Clang doesn't have this problem because nvcc
+    # only supports version of clang that already understood -std=c++17
+    if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8)
+        list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX14_STANDARD_COMPILE_OPTION}")
+    else()
+        list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX17_STANDARD_COMPILE_OPTION}")
+    endif()
  endif()
  
  # assemble the CUDA flags
diff --git a/docs/dev-manual/containers.rst b/docs/dev-manual/containers.rst

index 82b3c9faa1dbc48ed0bcf33dbe3485106c57c993..20cf4d9c25650811dabc0c5bc256c9a87c382d9d 100644 (file)
--- a/docs/dev-manual/containers.rst
+++ b/docs/dev-manual/containers.rst
@@ -11,18 +11,19 @@ under :file:`admin/containers/`
  Images are (re)built manually by |Gromacs| project staff and pushed to
  repositories at https://hub.docker.com/u/gromacs
  
+Refer to :file:`buildall.sh` in the ``master`` branch for the set of images
+currently being built.
+
  Utilities
  =========
  
+:file:`utility.py`
+------------------
+
  .. automodule:: utility
      :members:
  
-HPC container maker
--------------------
-
-We use the `NVidia HPC Container Maker <https://github.com/NVIDIA/hpc-container-maker>`__
-package for scripted Dockerfile generation.
-See :file:`admin/containers/scripted_gmx_docker_builds.py`.
+:file:`scripted_gmx_docker_builds.py`
+-------------------------------------
  
-.. todo:: :issue:`3272` Insert tool documentation.
-    E.g. ``.. automodule:: scripted_gmx_docker_builds``
+.. automodule:: scripted_gmx_docker_builds
diff --git a/docs/dev-manual/gitlab.rst b/docs/dev-manual/gitlab.rst

index a7d5254f184e2ba244725b426af65c7c7075e663..3c5dda31de7c3e62440304fa019da8f8c61eac44 100644 (file)
--- a/docs/dev-manual/gitlab.rst
+++ b/docs/dev-manual/gitlab.rst
@@ -1,5 +1,5 @@
-GitLab
-======
+GitLab CI Pipeline Execution
+============================
  
  The repository contains DockerFiles and GitLab Runner configuration
  files to support automated testing and documentation builds.
@@ -18,12 +18,11 @@ This documentation is incomplete, pending resolution of :issue:`3275`.
  
  ..  todo:: Expand this documentation to resolve :issue:`3275`
  
-Pipeline execution
-------------------
-
  .. todo:: Discuss the distinct characteristics of |Gromacs| CI pipelines to relevant to job configuration.
+          (:issue:`3472` and :issue:`3617`)
  
-.. todo:: Comment on the number of pipelines that can be or which are likely to be running at the same time.
+.. todo:: (:issue:`3472` and :issue:`3617`) Comment on the number of pipelines that can be or which are likely to be running at the same time.
+          (:issue:`3472` and :issue:`3617`)
  
  .. note::
  
@@ -35,7 +34,7 @@ Pipeline execution
      sufficient testing before acceptance.
  
  Configuration files
-~~~~~~~~~~~~~~~~~~~
+-------------------
  
  At the root of the repository, :file:`.gitlab-ci.yml` defines the stages and
  some default parameters, then includes files from :file:`admin/gitlab-ci/` to
@@ -47,7 +46,7 @@ Such jobs are not directly eligible to run, but may be used as templates
  via the `*extends* job property <https://docs.gitlab.com/ee/ci/yaml/#extends>`_.
  
  Job parameters
-~~~~~~~~~~~~~~
+--------------
  
  Refer to https://docs.gitlab.com/ee/ci/yaml for complete documentation on
  GitLab CI job parameters, but note the following GROMACS-specific conventions.
@@ -69,9 +68,9 @@ GitLab CI job parameters, but note the following GROMACS-specific conventions.
          to `cache:key <https://docs.gitlab.com/ee/ci/yaml/#cachekey>`__
  
      image
-        Part of the tool chain configuration. Instead of setting *image*
-        directly, *extend* a *.use_<toolchain>* template from
-        :file:`admin/gitlab-ci/global.gitlab-ci.yml`
+        See :doc:`/dev-manual/containers` for more about the Docker images used for the
+        CI pipelines. If a job depends on artifacts from previous jobs, be sure
+        to use the same (or a compatible) image as the dependency!
  
      rules
      only
@@ -110,7 +109,7 @@ GitLab CI job parameters, but note the following GROMACS-specific conventions.
          for details of the merging behavior. Refer to :ref:`variables` for local usage.
  
  Schedules and triggers
-~~~~~~~~~~~~~~~~~~~~~~
+----------------------
  
  Pipeline `schedules <https://gitlab.com/help/ci/pipelines/schedules>`__ are
  configured through the GitLab web interface.
@@ -131,7 +130,7 @@ or one of the *release* branches. Those jobs can be triggered manually using the
  through the Gitlab web interface.
  
  Global templates
-~~~~~~~~~~~~~~~~
+----------------
  
  In addition to the templates in the main job definition files,
  common "mix-in" functionality and behavioral templates are defined in
@@ -149,7 +148,7 @@ by a meaningful descriptor and documented within
  :file:`admin/gitlab-ci/global.gitlab-ci.yml`
  
  Job names
-~~~~~~~~~
+---------
  
  Job names should
  
@@ -170,7 +169,7 @@ basic job name from qualifiers or details. Also consider
  .. _variables:
  
  Updating regression tests
-~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------
  
  Changes in |Gromacs| that require changes in regression-tests are notoriously hard,
  because a merge request that tests against the non-updated version of the
@@ -181,11 +180,11 @@ merge request pipelines to fail.
  The solution is a new regression-test branch or commit, uploaded to gitlab.
  Then set that regression test branch with REGRESSIONTESTBRANCH or
  the specific commit with REGRESSIONTESTCOMMIT when
-running the specific pipeline that requires the regressiontest-update. 
+running the specific pipeline that requires the regressiontest-update.
  See below on how to set variables for specific pipelines.
  
  Variables
-~~~~~~~~~
+---------
  
  The GitLab CI framework, GitLab Runner, plugins, and our own scripts set and
  use several `variables <https://docs.gitlab.com/ee/ci/variables/README.html>`__.
@@ -215,6 +214,16 @@ Other important variable keys are as follows.
          Integer version number provided by toolchain mix-in for convenience and
          internal use.
  
+    CMAKE
+        ``gromacs/ci-...`` Docker images built after October 2020 have several
+        versions of CMake installed. The most recent version of CMake in the
+        container will be appear first in ``PATH``. To allow individual jobs to
+        use specific versions of CMake, please write the job *script* sections
+        using ``$CMAKE`` instead of ``cmake`` and begin the *script* section with
+        a line such as ``- CMAKE=${CMAKE:-$(which cmake)}``. Specify a CMake
+        version by setting the *CMAKE* variable to the full executable path for
+        the CMake version you would like to use. See also :doc:`containers`.
+
      CMAKE_COMPILER_SCRIPT
          CMake command line options for a tool chain. A definition is provided by
          the mix-in toolchain definitions (e.g. ``.use-gcc8``) to be appended to
@@ -238,12 +247,12 @@ Other important variable keys are as follows.
          pipeline execution time.
  
      REGRESSIONTESTBRANCH
-        Use this branch of the regressiontests rather than master to allow for 
+        Use this branch of the regressiontests rather than master to allow for
          merge requests that require updated regression tests with valid CI tests.
  
      REGRESSIONTESTCOMMIT
-        Use this commit to the regressiontests rather than the head on master to 
-        allow for merge requests that require updated regression tests with 
+        Use this commit to the regressiontests rather than the head on master to
+        allow for merge requests that require updated regression tests with
          valid CI tests.
  
      POST_MERGE_ACCEPTANCE
@@ -257,7 +266,7 @@ Other important variable keys are as follows.
      ``BUILD_DIR``, ``INSTALL_DIR``, ``CACHE_FALLBACK_KEY``, ...
  
  Setting variables
-~~~~~~~~~~~~~~~~~
+-----------------
  
  Variables for individual piplelines are set in the gitlab interface under 
  ``CI/CD``; ``Pipelines``. Then chose in the top right corner ``Run Piplelines``.
diff --git a/docs/install-guide/index.rst b/docs/install-guide/index.rst

index c40015459307c910ba2beb340287468ebecffd2b..9a5b136ce26f281cc397b5fc31003c093ec52393 100644 (file)
--- a/docs/install-guide/index.rst
+++ b/docs/install-guide/index.rst
@@ -237,7 +237,7 @@ library. LAM-MPI_ might work, but since it has
  been deprecated for years, it is not supported.
  
  For example, depending on your actual MPI library, use ``cmake
--DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DGMX_MPI=on``.
+-DMPI_C_COMPILER=mpicc -DGMX_MPI=on``.
  
  
  CMake
diff --git a/docs/nblib/listed-data-format.rst b/docs/nblib/listed-data-format.rst

new file mode 100644 (file)

index 0000000..275d64b
--- /dev/null
+++ b/docs/nblib/listed-data-format.rst
@@ -0,0 +1,312 @@
+Design goals and motivation for the data format of bonded forces in NB-LIB
+--------------------------------------------------------------------------
+
+
+The current format for listed forces in GROMACS looks like this:
+
+.. code:: cpp
+
+   struct InteractionDefinitions
+   {
+       std::vector<t_iparams> iparams;
+       std::array<std::vector<int>, F_NRE> il;
+   };
+
+The format covers all interaction types, i.e. \ ``t_iparams`` is a union
+type which can hold the parameters of any type.
+The other member called ``il`` contains the
+indices for each interaction type, where ``F_NRE`` is the number of
+interaction types that GROMACS supports. More precisely, each
+member of ``il``, a ``std::vector<int>``, is a flattened list of all
+interactions for a given interaction type. The vector contains ``N+1`` integer indices
+for each interaction, where ``N`` is the number of particles that are
+involved in the interaction. An additional index is needed to retrieve
+the correct parameters in ``iparams``, hence the total number of indices sums up
+to ``N+1`` per interaction.
+
+The big advantage of storing all types in a union data type is (was),
+that it allows looping over all types with a simple for-loop.
+In pre C++11 and perhaps even pre C++14 times, looping over different
+types was a big hassle and the union data type approach likely was the
+only practicable solution. One downside of this approach, however, is
+that with just a single (union) type, one can't leverage the compiler's
+type system, most importantly static branching, for example with overload resolution.
+As a consequence, only dynamic branching with ``if`` statements remains.
+
+Consider, for instance, the implementation of the top-level
+``calc_listed(const InteractionDefinitions& idef, ...)`` in GROMACS, which in its essence,
+looks like this:
+
+.. code:: cpp
+
+   void calc_listed(const InteractionDefinitions& idef, ...)
+   {
+       // manage timing and multi-threading 
+
+       for (int ftype = 0; ftype < F_NRE; ++type)
+       {
+           // branch out and descend stack for 2 intermediate functions based on
+           // the type of interaction that ftype corresponds to
+           // then call a function from a pointer table
+
+           bondFunction* bonded = bondedInteractionFunctions[ftype]; 
+
+           // compute all forces for ftype
+           bonded(idef.iparams, idef.il[ftype], ...);
+       }
+
+       // reduce thread output
+   }
+
+GROMACS supports a lot of different listed interaction types, such as different
+types of bonds, angles and proper and improper dihedrals. These different types
+require different handling and finally the right force kernel chosen from a table
+of function pointers.
+The handling code required to correctly branch out to all the different cases
+results in quite a deep call stack, a lot of branching logic and ends up accounting
+for a fair part of the overall complexity, which should ideally just consist of
+the type-specific force calculation implementations.
+
+
+A type-aware approach to listed forces
+--------------------------------------
+
+NB-LIB aims to reduce the overall code complexity with a type-aware data format
+where each interaction type is implemented as a separate (C++)-type.
+The format for a given interaction type looks like this:
+
+.. code:: cpp
+
+   template <class Interaction>
+   struct InteractionData
+   {
+       std::vector<Index<Interaction>> indices;
+       std::vector<Interaction>        parameters;
+   };
+
+For each type of interaction, we store the interaction indices plus the
+interaction parameters. While the (C++)-types are different, the actual data stored is
+exactly the same: ``N+1`` integer indices per ``N``-center interaction plus the unique parameters.
+An example for ``Interaction`` would be ``HarmonicBond``, the public part of which looks like this:
+
+.. code:: cpp
+
+   class HarmonicBond
+   {
+   public:
+       // return lvalue ref for use with std::tie
+       // in order to leverage std::tuple comparison ops
+       const real& forceConstant();
+       const real& equilDistance();
+   };
+
+The ``Index`` traits class deduces to ``std::array<int, 3>``, because
+for each harmonic bond, we need two ``int``\ s for the coordinate
+indices and a third ``int`` to look up the bond parameters in the
+``parameters`` vector. For angles and dihedrals, the ``Index`` trait
+would add an additional one or two ``int``\ s to hold the additional
+coordinate indices.
+
+Finally, we gather all types of interactions in a
+``std::tuple``, such that the complete definition for listed forces
+in NB-LIB looks like this:
+
+.. code:: cpp
+
+   using ListedInteractions = std::tuple<InteractionData<HarmonicBond>, ..., InteractionData<HarmonicAngle>, ...>;
+
+One important property of ``ListedInteractions`` is that it stores exactly the same information as ``InteractionDefinitions``
+and therefore conversion in either direction is easy to implement.
+
+
+The NB-LIB listed forces pipeline
+---------------------------------
+
+Given the listed interaction data provided in the format described above,
+the steps required to calculate the corresponding forces
+are, in brief: 
+
+  * Loop over all interaction types
+  * Loop over all interactions for given type
+  * Call interaction type kernel, store forces and return energy
+
+
+This procedure is identical to the current implementation in GROMACS.
+In actual code, the first step looks like this:
+
+.. code:: cpp
+
+   template<class Buffer, class Pbc>
+   auto reduceListedForces(const ListedInteractions& interactions,
+                           const std::vector<gmx::RVec>& x,
+                           Buffer* forces,
+                           const Pbc& pbc)
+   {
+       std::array<real, std::tuple_size<ListedInteractions>::value> energies;
+
+       // lambda function, will be applied to each type
+       auto computeForceType = [forces, &x, &energies, &pbc](const auto& ielem) {
+           real energy = computeForces(ielem.indices, ielem.parameters, x, forces, pbc);
+           energies[FindIndex<std::decay_t<decltype(ilem)>, ListedInteractions>{}] = energy;
+       };
+
+       // apply the lambda to all bond types
+       for_each_tuple(computeForceType, interactions);
+
+       return energies;
+   }
+
+With the help of a generic lambda and C++17’s ``std::apply`` in the
+one-liner ``for_each_tuple``, we can generate the loop over the
+different types in the tuple quite effortlessly. While
+``reduceListedForces`` implements a loop over the interaction types, the
+next layer, ``computeForces`` implements a loop over all interactions of
+a given type:
+
+.. code:: cpp
+
+   template <class Index, class InteractionType, class Buffer, class Pbc>
+   real computeForces(const std::vector<Index>& indices,
+                      const std::vector<InteractionType>& iParams,
+                      const std::vector<gmx::RVec>& x,
+                      Buffer* forces,
+                      const Pbc& pbc)
+   {
+       real Epot = 0.0;
+
+       for (const auto& index : indices)
+       {
+           Epot += dispatchInteraction(index, iParams, x, forces);
+       }
+
+       return Epot;
+   }
+
+Compared to the union data type approach where this loop has been manually
+implemented for all interaction types, in NB-LIB, only a single implementation
+is required.
+
+We’re now down to the level of individual bonds, angles and dihedrals.
+At this point, the next steps depend on the actual type of the
+interaction. But instead of dispatching each harmonic bond, cubic bond,
+harmonic angle and so on to their seperate paths just yet, we just
+differentiate based on the number of interaction centers for now.
+Through overload resolution, the appropriate version
+``dispatchInteraction`` gets called now, such as this one for the case
+of 2-center interactions:
+
+.. code:: cpp
+
+   template <class Buffer, class TwoCenterType, class Pbc>
+   std::enable_if_t<IsTwoCenter<TwoCenterType>::value, real>
+   dispatchInteraction(const InteractionIndex<TwoCenterType>& index,
+                       const std::vector<TwoCenterType>& bondInstances,
+                       const std::vector<gmx::RVec>& x,
+                       Buffer* forces,
+                       const Pbc& pbc)
+   {
+       int i = std::get<0>(index);
+       int j = std::get<1>(index);
+       const gmx::RVec& x1 = x[i];
+       const gmx::RVec& x2 = x[j];
+       const TwoCenterType& bond = bondInstances[std::get<2>(index)];
+
+       gmx::RVec dx;
+       // calculate x1 - x2 modulo pbc
+       pbc.dxAiuc(x1, x2, dx);
+       real dr2 = dot(dx, dx);
+       real dr  = std::sqrt(dr2);
+
+       auto [force, energy] = bondKernel(dr, bond);
+
+       // avoid division by 0
+       if (dr2 != 0.0)
+       {
+           force /= dr;
+           detail::spreadTwoCenterForces(force, dx, &(*forces)[i], &(*forces)[j]);
+       }
+
+       return energy;
+   }
+
+We can again observe that common parts among different 2-center interaction types
+are reused. The common parts are 
+
+ * coordinate retrieval
+ * computation of the scalar distance
+ * spreading of the scalar part of the force to the two centers
+
+The only remaining thing to do now is to call the actual
+kernel to compute the force. Since ``bond`` has a distinct type, we can
+again use overload resolution:
+
+.. code:: cpp
+
+   template <class T>
+   auto bondKernel(T dr, const HarmonicBond& bond)
+   {
+       return harmonicScalarForce(bond.forceConstant(), bond.equilDistance(), dr);
+   }
+
+and call the actual kernel, which in its simplest form for a harmonic
+bond looks like this:
+
+.. code:: cpp
+
+   template <class T>
+   std::tuple<T, T> harmonicScalarForce(T k, T x0, T x)
+   {
+       real dx  = x - x0;
+       real dx2 = dx * dx;
+
+       real force = -k * dx;
+       real epot = 0.5 * k * dx2;
+
+       return std::make_tuple(force, epot);
+
+       /* That was 6 flops */
+   }
+
+That’s it! The approach outlined here manages to reuse (between different types)
+a significant part of the code that feeds input data to force kernels.
+Notably, not a single ``if(ftype)`` is required to implement the control flow.
+The remaining parts for a feature complete implementation are
+overloads of ``dispatchInteraction`` for the 3- to 5-center interactions and
+the type-aware wrappers for all the different kernels implemented in
+GROMACS. They have been omitted for brevity.
+
+A note on **multithreading**: multithreading is handled above the top-level
+``reduceListedForces`` described here. For parallel execution, the
+input ``ListedInteractions`` tuple is split into ``nThreads`` parts and a
+``Buffer`` object is set up for each thread. ``reduceListedForces`` is then
+called once by each thread with the assigned fraction of ``ListedInteractions``
+and the ``Buffer`` as argument.
+The lifetime of the ``ListedInteractions`` splits is coupled to the domain decomposition.
+
+Summary
+-------
+
+NB-LIB listed forces employs a (C++)-type aware data format that
+is otherwise equivalent to its counter-part in GROMACS.
+The type-aware data format is then used to simplify the "routing" layer that
+connects data input to the appropriate kernels. Thanks to static branching and polymorphism,
+increased code reuse and simplified branching logic could be achieved.
+**The force kernels themselves do not need to be changed and NB-LIB refers to
+GROMACS for their implementation.**
+
+
+Outlook
+-------
+
+The data flow management for listed forces described here allows further
+improvements to be implemented:
+
+* Aggregate interaction types: fuse interactions of different types into
+  aggregated types. For example, a dihedral interaction and the bonds and angles
+  that are present among the same four particle indices can be combined into a single
+  aggregated interaction. This allows to reuse the particle coordinates loaded from memory
+  for multiple types and also combines the store operations for the forces.
+  Type aggregates also likely simplify an efficient GPU implementation of listed forces.
+
+* Separation of a topology containing both parameter sets for a system state A and B into two
+  separate topologies for the A and B system states.
diff --git a/docs/nblib/listed-dev.rst b/docs/nblib/listed-dev.rst

new file mode 100644 (file)

index 0000000..ab8f18c
--- /dev/null
+++ b/docs/nblib/listed-dev.rst
@@ -0,0 +1,98 @@
+Adding New Listed-Interaction Types in NB-LIB
+=============================================
+
+NB-LIB currently has code paths for listed interactions that occur between two, three, four and five different particles.
+To extend NB-LIB to support more types of particle interactions, modify the following three files.
+
+Two center interactions must use the distance between the centers as an input to the force kernel.
+Three center interactions take the form ``(particleI, particleJ, ParticleK)``.
+In this case, the middle particle, ``particleJ`` is taken as the center around which the angle is computed.
+This angle must be an input to a three center force kernel.
+Likewise for four center interactions, the dihedral angle phi must be an input to the force kernel.
+Accepting these constraints, it is possible to add a new kernel by modifying the following three files.
+
+1) bondtypes.h_
+2) definitions.h_
+3) kernels.hpp_
+
+.. _bondtypes.h:
+
+1) bondtypes.h
+---------------
+
+This file contains one C++ type to store the parameters for each interaction type.
+New interaction types are added here as separate C++ types.
+The interface of these types is completely unrestricted.
+The only requirements are equality and less than comparison, and that the interface be
+compatible with the corresponding (user-added) kernel.
+
+.. _definitions.h:
+
+2) definitions.h
+------------------------
+
+This file begins with pre-processor macro lists that classify concrete interaction types into two, three, four and five center types.
+To add a new type, the user must add the interaction type parameter struct name to the macro of the correct center number.
+In this case, ``NewBondType`` is an example of a two center interaction.
+As such it would get added to the ``SUPPORTED_TWO_CENTER_TYPES`` macro.
+Assuming that the only other two center interaction is called ``DefaultBond``, the result would look like the following snippet.
+
+.. code:: cpp
+
+    #define SUPPORTED_TWO_CENTER_TYPES DefaultBond, NewBondType
+
+.. _kernels.hpp:
+
+Adding ``NewBondType`` to this macro ensures that the NBLIB ``molecule``
+class ``addInteraction`` function supports adding the new bond type
+and includes it in the listed interaction data that the ``topology`` class
+provides. The ``SUPPORTED_TWO_CENTER_TYPES`` macro is immediately converted into a
+C++ type list that is implemented as a variadic template. The type list
+is then used to define all the dependent data structures. Apart from creating
+the type list, the only place where the macro is needed is explicit template instantiation.
+
+Note that, as of C++17, there's no alternative to preprocessor macros for adding
+the required template instantiations controlled through the macros described here.
+(Other than manually adding the template instantiations, which would require the instantiation list
+of several templates to be updated each time a new interaction type is added. Compared to the preprocessor
+based solution where just a single macro has to be extended, this would clearly be an inferior solution.)
+In NBLIB, the design decision we took, was that we did not want to expose a templated
+interface in a user header and it is for this reason that we explicitly need
+to instantiate the interface with all the supported listed interaction types defined
+in this macro.
+
+3) kernels.hpp
+---------------------
+
+In this file the actual force kernels for each interaction type are implemented.
+Each kernel call is templated to allow various precisions and is
+accessed through an overload ``bondKernel`` that extracts the relevant
+parameters from a ``const NewBondType&`` argument.
+The kernel return type is always an ``std::tuple`` of the force and the potential.
+
+.. code:: cpp
+
+   /*! \brief kernel to calculate the new bond type force
+    *
+    * \param k     Force constant
+    * \param x0    Equilibrium distance
+    * \param scale The scaling factor
+    * \param x     Input bond length
+    *
+    * \return tuple<force, potential energy>
+    */
+   template <class T>
+   std::tuple<T, T> newBondForce(T k, T x0, T scale, T x)
+   {
+       real exponent = std::exp( (x - x0) / scale);
+       real epot = k * exponent;
+       real force =  epot / scale;
+       return std::make_tuple(force, epot);
+   }
+
+  template <class T>
+  inline std::tuple<T, T> bondKernel(T dr, const NewBondType& bond)
+  {
+      return newBondForce(bond.forceConstant(), bond.equilDistance(), bond.scaleFactor(), dr);
+  }
+
diff --git a/docs/reference-manual/special/awh.rst b/docs/reference-manual/special/awh.rst

index faa9f4197900011b55c3b95d91407b26efdaea0c..f5b2fbf225608561a66d6e6aecbf94d2b4a20315 100644 (file)
--- a/docs/reference-manual/special/awh.rst
+++ b/docs/reference-manual/special/awh.rst
@@ -602,10 +602,16 @@ free energy scales as :math:`\varepsilon^2 \sim 1/(ND)`
  estimate used by AWH to initialize :math:`N` in terms of more meaningful
  quantities
  
-.. math:: \frac{1}{N_0} = \frac{1}{N_0(\varepsilon_0, D)} \sim D\varepsilon_0^2.
+.. math:: \frac{1}{N_0} = \frac{1}{N_0(\varepsilon_0, D)} = \frac{1}{\Delta
+         t_\mathrm{sample}} \max_d \frac{L_d^2}{2D_d} \varepsilon_0^2
            :label: eqawhn0
  
-Essentially, this tells us that a slower system (small :math:`D`)
+where :math:`L_d` is the length of the interval and :math:`D_d` is
+the diffusion constant along dimension :math:`d` of the AWH bias.
+For one dimension, :math:`L^2/2D` is the average time to diffuse
+over a distance of :math:`L`. We then takes the maximum crossing
+time over all dimensions involved in the bias.
+Essentially, this formula tells us that a slower system (small :math:`D`)
  requires more samples (larger :math:`N^0`) to attain the same level of
  accuracy (:math:`\varepsilon_0`) at a given sampling rate. Conversely,
  for a system of given diffusion, how to choose the initial biasing rate
@@ -621,9 +627,10 @@ run a short trial simulation and after the first covering check the
  maximum free energy difference of the PMF estimate. If this is much
  larger than the expected magnitude of the free energy barriers that
  should be crossed, then the system is probably being pulled too hard and
-:math:`D` should be decreased. :math:`\varepsilon_0` on the other hand,
-would only be tweaked when starting an AWH simulation using a fairly
-accurate guess of the PMF as input.
+:math:`D` should be decreased. An accurate estimate of the diffusion
+can be obtaining from an AWH simulation with the :ref:`gmx awh` tool.
+:math:`\varepsilon_0` on the other hand, should be a rough estimate
+of the initial error.
  
  Tips for efficient sampling
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/docs/release-notes/2021/major/bugs-fixed.rst b/docs/release-notes/2021/major/bugs-fixed.rst

index 5d1551ae540096dcfb8b20aace3f20c7cdcf177d..cd2f2c78b33bd140697889ff11b18d8a7e18c99c 100644 (file)
--- a/docs/release-notes/2021/major/bugs-fixed.rst
+++ b/docs/release-notes/2021/major/bugs-fixed.rst
@@ -24,3 +24,16 @@ and writing pdb files. This affected naming of
  H atoms in particular.
  
  :issue:`3469`
+
+Corrected AWH initial histogram size
+""""""""""""""""""""""""""""""""""""
+
+The initial histogram size for AWH biases depended (weakly) on the force
+constant. This dependence has been removed, which increases the histogram
+size by a about a factor of 3. In practice this has only a minor effect
+on the time to solution. For multiple dimensions, the histogram size was
+underestimated, in particular with a combination of slower and faster
+dimensions. The, now simplified, formula for the initial histogram size is
+given in the reference manual.
+
+:issue:`3751`
diff --git a/docs/release-notes/2021/major/performance.rst b/docs/release-notes/2021/major/performance.rst

index b94c016d48585e286b367c96babdcd8ea25fb55b..85b08653c852b526852e9827ca3248e437134c25 100644 (file)
--- a/docs/release-notes/2021/major/performance.rst
+++ b/docs/release-notes/2021/major/performance.rst
@@ -32,3 +32,10 @@ CPU SIMD accelerated implementation of harmonic bonds
  SIMD acceleration for bonds slightly improves performance for systems
  with H-bonds only constrained or no constraints. This gives a significant
  improvement with multiple time stepping.
+
+Allow offloading GPU update and constraints without direct GPU communication
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Allow domain-decomposition and separate PME rank parallel runs to offload update and
+constraints to a GPU with CUDA without requiring the (experimental) direct GPU
+communication features to be also enabled.
diff --git a/docs/release-notes/2021/major/portability.rst b/docs/release-notes/2021/major/portability.rst

index 75e2449a7973556eb4f891baad8860df4cb23310..ce89ed3325503cdf7d3c18351b5687429d681be1 100644 (file)
--- a/docs/release-notes/2021/major/portability.rst
+++ b/docs/release-notes/2021/major/portability.rst
@@ -43,6 +43,9 @@ Windows
  |Gromacs| now builds correctly on Windows with MSVC even when the path
  to the source or build directory has a space in it.
  
+Builds with MSVC 2019 correctly detect the proper static linking setup
+during CMake configuration.
+
  RDTSCP usage and reporting
  """"""""""""""""""""""""""
  
diff --git a/docs/user-guide/mdp-options.rst b/docs/user-guide/mdp-options.rst

index 251172ae96120975235720a2b3c4af5c990f71bc..e547ec841812bea5bfaae306c5df118672ca36cf 100644 (file)
--- a/docs/user-guide/mdp-options.rst
+++ b/docs/user-guide/mdp-options.rst
@@ -241,7 +241,8 @@ Run control
  
        Use a multiple timing-stepping integrator to evaluate some forces, as specified
        by :mdp:`mts-level2-forces` every :mdp:`mts-level2-factor` integration
-      steps. All other forces are evaluated at every step.
+      steps. All other forces are evaluated at every step. MTS is currently
+      only supported with :mdp-value:`integrator=md`.
  
  .. mdp:: mts-levels
  
diff --git a/scripts/GMXRC.bash.cmakein b/scripts/GMXRC.bash.cmakein

index a6fdbe7865ca857a03ca6e7551f1e73b401cce33..c5081de324b9d433b0b230a939e658e0d5a270b0 100644 (file)
--- a/scripts/GMXRC.bash.cmakein
+++ b/scripts/GMXRC.bash.cmakein
@@ -9,41 +9,41 @@ test -n "${ZSH_VERSION+set}" && setopt shwordsplit
  old_IFS="$IFS"
  IFS=":"
  
-# First remove gromacs part of ld_library_path
-tmppath=""
-for i in $@LD_LIBRARY_PATH@; do
-  if test "$i" != "$GMXLDLIB"; then
-    tmppath="${tmppath}${tmppath:+:}${i}"
-  fi
-done
-@LD_LIBRARY_PATH@=$tmppath
-
-# remove gromacs part of PKG_CONFIG_PATH
-tmppath=""
-for i in $PKG_CONFIG_PATH; do
-  if test "$i" != "$GMXLDLIB/pkgconfig"; then
-    tmppath="${tmppath}${tmppath:+:}${i}"
-  fi
-done
-PKG_CONFIG_PATH=$tmppath
-
-# remove gromacs part of path
-tmppath=""
-for i in $PATH; do
-  if test "$i" != "$GMXBIN"; then
-    tmppath="${tmppath}${tmppath:+:}${i}"
+replace_in_path() {
+  # Parse PATH-like variable $1, and return a copy of it with any instances of $3 removed and $2 added to the beginning.
+  # If $3 is empty, do not remove anything.
+  local tmppath oldpath to_remove to_add old_shell_opts
+  oldpath="$1"
+  to_add="$2"
+  to_remove="$3"
+  if test -z "${oldpath}"; then
+    echo "${to_add}"
+  else
+    if test "${oldpath}" = ":"; then
+      echo "${to_add}:"
+    else
+      tmppath="${to_add}"
+      old_shell_opts="$-"
+      set -o noglob
+      set -- ${oldpath}"" # Will put tokens to $@, including empty ones
+      # If did not have noglob ("f") enabled before, disable it back
+      if test -n "${old_shell_opts##*f*}"; then
+        set +o noglob
+      fi
+      for i in "$@"; do
+        if test \( -z "${to_remove}" \) -o \( "$i" != "${to_remove}" \); then
+          tmppath="${tmppath}:${i}"
+        fi
+      done
+      echo "${tmppath}"
+    fi
    fi
-done
-PATH=$tmppath
+}
  
-# and remove the gmx part of manpath
-tmppath=""
-for i in $MANPATH; do
-  if test "$i" != "$GMXMAN"; then
-    tmppath="${tmppath}${tmppath:+:}${i}"
-  fi
-done
-MANPATH=$tmppath
+# Keep current values to remove later
+OLD_GMXLDLIB="$GMXLDLIB"
+OLD_GMXBIN="$GMXBIN"
+OLD_GMXMAN="$GMXMAN"
  
  ##########################################################
  # This is the real configuration part. We save the Gromacs
@@ -58,11 +58,10 @@ GMXDATA=${GMXPREFIX}/@GMX_INSTALL_GMXDATADIR@
  GMXTOOLCHAINDIR=${GMXPREFIX}/@GMX_INSTALL_CMAKEDIR@
  GROMACS_DIR=${GMXPREFIX}
  
-@LD_LIBRARY_PATH@=${GMXLDLIB}${@LD_LIBRARY_PATH@:+:}${@LD_LIBRARY_PATH@}
-PKG_CONFIG_PATH=${GMXLDLIB}/pkgconfig${PKG_CONFIG_PATH:+:}${PKG_CONFIG_PATH}
-PATH=${GMXBIN}${PATH:+:}${PATH}
-#debian/ubuntu needs a : at the end
-MANPATH=${GMXMAN}:${MANPATH}
+@LD_LIBRARY_PATH@=$(replace_in_path "${@LD_LIBRARY_PATH@}" "${GMXLDLIB}" "${OLD_GMXLDLIB}")
+PKG_CONFIG_PATH=$(replace_in_path "${PKG_CONFIG_PATH}" "${GMXLDLIB}/pkgconfig" "${OLD_GMXLDLIB}/pkgconfig")
+PATH=$(replace_in_path "${PATH}" "${GMXBIN}" "${OLD_GMXBIN}")
+MANPATH=$(replace_in_path "${MANPATH}" "${GMXMAN}" "${OLD_GMXMAN}")
  
  # export should be separate, so /bin/sh understands it
  export GMXBIN GMXLDLIB GMXMAN GMXDATA @LD_LIBRARY_PATH@ PATH MANPATH
diff --git a/src/gromacs/CMakeLists.txt b/src/gromacs/CMakeLists.txt

index e99890ac99807bb710f7794a6d31478ce3c811fd..962c2b558c30404d85257b5b0a15303cff15e900 100644 (file)
--- a/src/gromacs/CMakeLists.txt
+++ b/src/gromacs/CMakeLists.txt
@@ -40,7 +40,7 @@ if (GMX_CLANG_CUDA)
  endif()
  
  set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES)
-set_property(GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES)
+set_property(GLOBAL PROPERTY CUDA_SOURCES)
  set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS)
  set_property(GLOBAL PROPERTY GMX_AVX_512_SOURCE)
  
@@ -169,19 +169,8 @@ list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE})
  
  # Mark some shared GPU implementation files to compile with CUDA if needed
  if (GMX_GPU_CUDA)
-    get_property(LIBGROMACS_GPU_IMPL_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES)
-    set_source_files_properties(${LIBGROMACS_GPU_IMPL_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
-endif()
-
-# set up CUDA compilation with clang
-if (GMX_CLANG_CUDA)
-    foreach (_file ${LIBGROMACS_SOURCES})
-        get_filename_component(_ext ${_file} EXT)
-        get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT)
-        if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format)
-            gmx_compile_cuda_file_with_clang(${_file})
-        endif()
-    endforeach()
+    get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES)
+    set_source_files_properties(${CUDA_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
  endif()
  
  if (GMX_GPU_CUDA)
@@ -288,10 +277,21 @@ if (GMX_GPU_CUDA AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
      # must suppress them.
      GMX_TEST_CXXFLAG(CXXFLAGS_NO_ZERO_AS_NULL_POINTER_CONSTANT "-Wno-zero-as-null-pointer-constant" NVCC_CLANG_SUPPRESSIONS_CXXFLAGS)
  
-    get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES)
      foreach(_compile_flag ${NVCC_CLANG_SUPPRESSIONS_CXXFLAGS})
-        set_source_files_properties(${CUDA_SOURCES} PROPERTIES COMPILE_FLAGS ${_compile_flag})
+        set(GMX_CUDA_CLANG_FLAGS "${GMX_CUDA_CLANG_FLAGS} ${_compile_flag}")
      endforeach()
+    if (GMX_CLANG_CUDA)
+        foreach (_file ${LIBGROMACS_SOURCES})
+            get_filename_component(_ext ${_file} EXT)
+            get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT)
+            if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format)
+                gmx_compile_cuda_file_with_clang(${_file})
+            endif()
+        endforeach()
+    else()
+        get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES)
+        set_source_files_properties(${CUDA_SOURCES} PROPERTIES COMPILE_FLAGS ${GMX_CUDA_CLANG_FLAGS})
+    endif()
  endif()
  
  # Only add the -fsycl flag to sources that really need it
diff --git a/src/gromacs/applied_forces/awh/biasparams.cpp b/src/gromacs/applied_forces/awh/biasparams.cpp

index 96f1b5e67cdbb72420e3d347d5c7c5e0082e0fe4..182713007747a0f5f87954939993af84fac69d02 100644 (file)
--- a/src/gromacs/applied_forces/awh/biasparams.cpp
+++ b/src/gromacs/applied_forces/awh/biasparams.cpp
@@ -165,123 +165,34 @@ int64_t calcCheckCoveringInterval(const AwhParams&              awhParams,
      return numStepsCheck;
  }
  
-/*! \brief
- * Returns an approximation of the geometry factor used for initializing the AWH update size.
- *
- * The geometry factor is defined as the following sum of Gaussians:
- * sum_{k!=0} exp(-0.5*(k*pi*x)^2)/(pi*k)^2,
- * where k is a xArray.size()-dimensional integer vector with k_i in {0,1,..}.
- *
- * \param[in] xArray  Array to evaluate.
- * \returns the geometry factor.
- */
-double gaussianGeometryFactor(gmx::ArrayRef<const double> xArray)
-{
-    /* For convenience we give the geometry factor function a name: zeta(x) */
-    constexpr size_t                    tableSize  = 5;
-    std::array<const double, tableSize> xTabulated = { { 1e-5, 1e-4, 1e-3, 1e-2, 1e-1 } };
-    std::array<const double, tableSize> zetaTable1d = { { 0.166536811948, 0.16653116886, 0.166250075882,
-                                                          0.162701098306, 0.129272430287 } };
-    std::array<const double, tableSize> zetaTable2d = { { 2.31985974274, 1.86307292523, 1.38159772648,
-                                                          0.897554759158, 0.405578211115 } };
-
-    gmx::ArrayRef<const double> zetaTable;
-
-    if (xArray.size() == 1)
-    {
-        zetaTable = zetaTable1d;
-    }
-    else if (xArray.size() == 2)
-    { // NOLINT bugprone-branch-clone
-        zetaTable = zetaTable2d;
-    }
-    else
-    {
-        /* TODO... but this is anyway a rough estimate and > 2 dimensions is not so popular.
-         * Remove the above NOLINT when addressing this */
-        zetaTable = zetaTable2d;
-    }
-
-    /* TODO. Really zeta is a function of an ndim-dimensional vector x and we shoudl have a ndim-dimensional lookup-table.
-       Here we take the geometric average of the components of x which is ok if the x-components are not very different. */
-    double xScalar = 1;
-    for (const double& x : xArray)
-    {
-        xScalar *= x;
-    }
-
-    GMX_ASSERT(!xArray.empty(), "We should have a non-empty input array");
-    xScalar = std::pow(xScalar, 1.0 / xArray.size());
-
-    /* Look up zeta(x) */
-    size_t xIndex = 0;
-    while ((xIndex < xTabulated.size()) && (xScalar > xTabulated[xIndex]))
-    {
-        xIndex++;
-    }
-
-    double zEstimate;
-    if (xIndex == xTabulated.size())
-    {
-        /* Take last value */
-        zEstimate = zetaTable[xTabulated.size() - 1];
-    }
-    else if (xIndex == 0)
-    {
-        zEstimate = zetaTable[xIndex];
-    }
-    else
-    {
-        /* Interpolate */
-        double x0 = xTabulated[xIndex - 1];
-        double x1 = xTabulated[xIndex];
-        double w  = (xScalar - x0) / (x1 - x0);
-        zEstimate = w * zetaTable[xIndex - 1] + (1 - w) * zetaTable[xIndex];
-    }
-
-    return zEstimate;
-}
-
  /*! \brief
   * Estimate a reasonable initial reference weight histogram size.
   *
- * \param[in] dimParams         Parameters for the dimensions of the coordinate.
   * \param[in] awhBiasParams     Bias parameters.
   * \param[in] gridAxis          The BiasGrid axes.
   * \param[in] beta              1/(k_B T).
   * \param[in] samplingTimestep  Sampling frequency of probability weights.
   * \returns estimate of initial histogram size.
   */
-double getInitialHistogramSizeEstimate(const std::vector<DimParams>& dimParams,
-                                       const AwhBiasParams&          awhBiasParams,
-                                       const std::vector<GridAxis>&  gridAxis,
-                                       double                        beta,
-                                       double                        samplingTimestep)
+double getInitialHistogramSizeEstimate(const AwhBiasParams&         awhBiasParams,
+                                       const std::vector<GridAxis>& gridAxis,
+                                       double                       beta,
+                                       double                       samplingTimestep)
  {
      /* Get diffusion factor */
-    double              crossingTime = 0.;
+    double              maxCrossingTime = 0.;
      std::vector<double> x;
      for (size_t d = 0; d < gridAxis.size(); d++)
      {
-        double axisLength = gridAxis[d].isFepLambdaAxis() ? 1.0 : gridAxis[d].length();
-        if (axisLength > 0)
-        {
-            crossingTime += awhBiasParams.dimParams[d].diffusion / (axisLength * axisLength);
-            /* The sigma of the Gaussian distribution in the umbrella */
-            double sigma = 1.;
-            if (dimParams[d].isPullDimension())
-            {
-                GMX_RELEASE_ASSERT(dimParams[d].pullDimParams().betak != 0,
-                                   "beta*k cannot be zero");
-                sigma /= std::sqrt(dimParams[d].pullDimParams().betak);
-            }
-            x.push_back(sigma / axisLength);
-        }
+        GMX_RELEASE_ASSERT(awhBiasParams.dimParams[d].diffusion > 0, "We need positive diffusion");
+        // With diffusion it takes on average T = L^2/2D time to cross length L
+        double axisLength   = gridAxis[d].isFepLambdaAxis() ? 1.0 : gridAxis[d].length();
+        double crossingTime = (axisLength * axisLength) / (2 * awhBiasParams.dimParams[d].diffusion);
+        maxCrossingTime     = std::max(maxCrossingTime, crossingTime);
      }
-    GMX_RELEASE_ASSERT(crossingTime > 0, "We need at least one dimension with non-zero length");
+    GMX_RELEASE_ASSERT(maxCrossingTime > 0, "We need at least one dimension with non-zero length");
      double errorInitialInKT = beta * awhBiasParams.errorInitial;
-    double histogramSize    = gaussianGeometryFactor(x)
-                           / (crossingTime * gmx::square(errorInitialInKT) * samplingTimestep);
+    double histogramSize    = maxCrossingTime / (gmx::square(errorInitialInKT) * samplingTimestep);
  
      return histogramSize;
  }
@@ -332,11 +243,8 @@ BiasParams::BiasParams(const AwhParams&              awhParams,
      updateWeight(numSamplesUpdateFreeEnergy_ * numSharedUpdate),
      localWeightScaling(eTarget == eawhtargetLOCALBOLTZMANN ? temperatureScaleFactor : 1),
      initialErrorInKT(beta * awhBiasParams.errorInitial),
-    initialHistogramSize(getInitialHistogramSizeEstimate(dimParams,
-                                                         awhBiasParams,
-                                                         gridAxis,
-                                                         beta,
-                                                         numStepsSampleCoord_ * mdTimeStep)),
+    initialHistogramSize(
+            getInitialHistogramSizeEstimate(awhBiasParams, gridAxis, beta, numStepsSampleCoord_ * mdTimeStep)),
      convolveForce(awhParams.ePotential == eawhpotentialCONVOLVED),
      biasIndex(biasIndex),
      disableUpdateSkips_(disableUpdateSkips == DisableUpdateSkips::yes)
diff --git a/src/gromacs/applied_forces/awh/biasstate.cpp b/src/gromacs/applied_forces/awh/biasstate.cpp

index c248699916bce5910114656c06fad4e8407cdb31..3713c0bbce8164e8a69054aff8edaad38126a617 100644 (file)
--- a/src/gromacs/applied_forces/awh/biasstate.cpp
+++ b/src/gromacs/applied_forces/awh/biasstate.cpp
@@ -1003,13 +1003,16 @@ bool BiasState::isSamplingRegionCovered(const BiasParams&             params,
      {
          if (grid.axis(d).isFepLambdaAxis())
          {
-            /* TODO: Verify that a threshold of 1.0 is OK. With a very high sample weight 1.0 can be
-             * reached quickly even in regions with low probability. Should the sample weight be
-             * taken into account here? */
+            /* Do not modify the weight threshold based on a FEP lambda axis. The spread
+             * of the sampling weights is not depending on a Gaussian distribution (like
+             * below). */
              weightThreshold *= 1.0;
          }
          else
          {
+            /* The spacing is proportional to 1/sqrt(betak). The weight threshold will be
+             * approximately (given that the spacing can be modified if the dimension is periodic)
+             * proportional to sqrt(1/(2*pi)). */
              weightThreshold *= grid.axis(d).spacing()
                                 * std::sqrt(dimParams[d].pullDimParams().betak * 0.5 * M_1_PI);
          }
diff --git a/src/gromacs/applied_forces/awh/tests/bias.cpp b/src/gromacs/applied_forces/awh/tests/bias.cpp

index c7b0cedadd6e0d7c277ced3e7f044566be455095..a0f8aae73531ea71382916a301088e4511016785 100644 (file)
--- a/src/gromacs/applied_forces/awh/tests/bias.cpp
+++ b/src/gromacs/applied_forces/awh/tests/bias.cpp
@@ -94,8 +94,9 @@ static AwhTestParameters getAwhTestParameters(int eawhgrowth, int eawhpotential)
  
      AwhDimParams& awhDimParams = params.awhDimParams;
  
-    awhDimParams.period         = 0;
-    awhDimParams.diffusion      = 0.1;
+    awhDimParams.period = 0;
+    // Correction for removal of GaussianGeometryFactor/2 in histogram size
+    awhDimParams.diffusion      = 0.1 / (0.144129616073222 * 2);
      awhDimParams.origin         = 0.5;
      awhDimParams.end            = 1.5;
      awhDimParams.coordValueInit = awhDimParams.origin;
diff --git a/src/gromacs/applied_forces/awh/tests/bias_fep_lambda_state.cpp b/src/gromacs/applied_forces/awh/tests/bias_fep_lambda_state.cpp

index e716966043e99895de242eaa04ef479e3ca387ef..a68095e0100cbfe68f3b6b5e22f9ff74eee0095c 100644 (file)
--- a/src/gromacs/applied_forces/awh/tests/bias_fep_lambda_state.cpp
+++ b/src/gromacs/applied_forces/awh/tests/bias_fep_lambda_state.cpp
@@ -96,8 +96,9 @@ static AwhFepLambdaStateTestParameters getAwhFepLambdaTestParameters(int eawhgro
  
      AwhDimParams& awhDimParams = params.awhDimParams;
  
-    awhDimParams.period         = 0;
-    awhDimParams.diffusion      = 1e-4;
+    awhDimParams.period = 0;
+    // Correction for removal of GaussianGeometryFactor/2 in histogram size
+    awhDimParams.diffusion      = 1e-4 / (0.12927243028700 * 2);
      awhDimParams.origin         = 0;
      awhDimParams.end            = numLambdaStates - 1;
      awhDimParams.coordValueInit = awhDimParams.origin;
diff --git a/src/gromacs/compat/mp11.h b/src/gromacs/compat/mp11.h

new file mode 100644 (file)

index 0000000..439084e
--- /dev/null
+++ b/src/gromacs/compat/mp11.h
@@ -0,0 +1,109 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ * \brief Provides ported functions/classes from boost::mp11
+ *
+ * Adapted from the Boost Library 1.67
+ *
+ * \author Roland Schulz <roland.schulz@intel.com>
+ * \ingroup module_compat
+ * \inlibraryapi
+ */
+#ifndef GMX_COMPAT_MP11_H
+#define GMX_COMPAT_MP11_H
+
+#include <utility>
+
+#include "gromacs/utility/exceptions.h"
+
+namespace gmx
+{
+namespace compat
+{
+
+/** \internal \brief Simplified analogue of boost::mp11::mp_with_index, compatible only with C++17 and up.
+ *
+ * \c mp_with_index<N>(i, f) calls \p f with \c mp_size_t<i>() and returns the result.
+ * \p i must be less than \p N.
+ *
+ * Example usage:
+ * \code
+    constexpr int foo_max = 3;
+    template<int i, typename = std::enable_if_t<(i < foo_max)>>
+    bool constexpr foo();
+
+    bool bar(int i)
+    {
+        return mp_with_index<foo_max>(i, [](auto i) {
+            return foo<i>();
+        });
+    }
+ * \endcode
+ */
+template<std::size_t N, class F, typename std::enable_if<(N <= 1)>::type* = nullptr>
+static auto mp_with_index(std::size_t i, F&& f)
+{
+    // Last step of recursion. Must have one active "return" for proper type deduction.
+    if (i == N - 1)
+    {
+        return std::forward<F>(f)(std::integral_constant<std::size_t, N - 1>());
+    }
+    else
+    {
+        const std::string errorMessage =
+                "Invalid arguments of mp_with_index (i=" + std::to_string(i) + ")";
+        GMX_THROW(InternalError(errorMessage));
+    }
+}
+
+template<std::size_t N, class F, typename std::enable_if<(N > 1)>::type* = nullptr>
+static auto mp_with_index(std::size_t i, F&& f)
+{
+    if (i == N - 1)
+    {
+        return std::forward<F>(f)(std::integral_constant<std::size_t, N - 1>());
+    }
+    else
+    {
+        return mp_with_index<N - 1>(i, std::forward<F>(f));
+    }
+}
+
+
+} // namespace compat
+} // namespace gmx
+
+#endif
diff --git a/src/gromacs/compat/pointers.h b/src/gromacs/compat/pointers.h

index f10760581fec26b052fe82d55306c35bfe8083a5..37689a919033a1927d7491a0cea59d6254578a9b 100644 (file)
--- a/src/gromacs/compat/pointers.h
+++ b/src/gromacs/compat/pointers.h
@@ -89,24 +89,24 @@ template<class T>
  class not_null
  {
  public:
-    static_assert(std::is_assignable_v<T&, std::nullptr_t>, "T cannot be assigned nullptr.");
+    static_assert(std::is_assignable<T&, std::nullptr_t>::value, "T cannot be assigned nullptr.");
  
      //! Move constructor. Asserts in debug mode if \c is nullptr.
-    template<typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
+    template<typename U, typename = std::enable_if_t<std::is_convertible<U, T>::value>>
      constexpr explicit not_null(U&& u) : ptr_(std::forward<U>(u))
      {
          Expects(ptr_ != nullptr);
      }
  
      //! Simple constructor. Asserts in debug mode if \c u is nullptr.
-    template<typename = std::enable_if_t<!std::is_same_v<std::nullptr_t, T>>>
+    template<typename = std::enable_if_t<!std::is_same<std::nullptr_t, T>::value>>
      constexpr explicit not_null(T u) : ptr_(u)
      {
          Expects(ptr_ != nullptr);
      }
  
      //! Copy constructor.
-    template<typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
+    template<typename U, typename = std::enable_if_t<std::is_convertible<U, T>::value>>
      constexpr not_null(const not_null<U>& other) : not_null(other.get())
      {
      }
diff --git a/src/gromacs/compat/tests/CMakeLists.txt b/src/gromacs/compat/tests/CMakeLists.txt

index 67f5ad9a7f5f1f8db94dc8a195c1ac5a95a3344e..7a82a48e58b2a90cca0c4f8cafedbf0cc0c480e8 100644 (file)
--- a/src/gromacs/compat/tests/CMakeLists.txt
+++ b/src/gromacs/compat/tests/CMakeLists.txt
@@ -36,6 +36,7 @@
  if (GMX_BUILD_UNITTESTS)
      gmx_add_unit_test(CompatibilityHelpersTests compat-test
          CPP_SOURCE_FILES
+            mp11.cpp
              pointers.cpp
              )
      # Maintainer note: The files here may be borrowed from other projects, and
diff --git a/src/gromacs/compat/tests/mp11.cpp b/src/gromacs/compat/tests/mp11.cpp

new file mode 100644 (file)

index 0000000..221e546
--- /dev/null
+++ b/src/gromacs/compat/tests/mp11.cpp
@@ -0,0 +1,113 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#include "gmxpre.h"
+
+#include "gromacs/compat/mp11.h"
+
+#include <gtest/gtest.h>
+
+// Defining some dummy functions to use later
+
+namespace gmx
+{
+namespace compat
+{
+namespace test
+{
+enum class Options
+{
+    Op0   = 0,
+    Op1   = 1,
+    Op2   = 2,
+    Count = 3
+};
+
+template<int i>
+static int testIncrement()
+{
+    return i + 1;
+}
+
+template<bool i>
+static bool testNot()
+{
+    return !i;
+}
+
+template<Options i, Options j>
+static int testEnumTwoIPlusJPlusK(int k)
+{
+    return 2 * int(i) + int(j) + k;
+}
+
+TEST(TemplateMPTest, MpWithIndexInt)
+{
+    static constexpr int maxArgValue = 4;
+    int inc_0 = mp_with_index<maxArgValue>(0, [](auto i) { return testIncrement<i>(); });
+    EXPECT_EQ(inc_0, 1);
+    int inc_3 = mp_with_index<maxArgValue>(3, [](auto i) { return testIncrement<i>(); });
+    EXPECT_EQ(inc_3, 4);
+}
+
+TEST(TemplateMPTest, MpWithIndexIntBad)
+{
+    static constexpr int maxArgValue = 4;
+    int                  i           = maxArgValue;
+    // Function requirement: i < maxArgValue
+    EXPECT_THROW(mp_with_index<maxArgValue>(i, [](auto i) { return testIncrement<i>(); }),
+                 gmx::InternalError);
+}
+
+TEST(TemplateMPTest, MpWithIndexBool)
+{
+    bool not_true = mp_with_index<2>(size_t(true), [](auto i) { return testNot<i>(); });
+    EXPECT_FALSE(not_true);
+    bool not_false = mp_with_index<2>(size_t(false), [](auto i) { return testNot<i>(); });
+    EXPECT_TRUE(not_false);
+}
+
+TEST(TemplateMPTest, MpWithIndexEnum)
+{
+    int five           = 5;
+    int two1plus2plus5 = mp_with_index<static_cast<size_t>(Options::Count)>(
+            static_cast<size_t>(Options::Op2), [=](auto i) {
+                return testEnumTwoIPlusJPlusK<Options::Op1, static_cast<Options>(size_t(i))>(five);
+            });
+    EXPECT_EQ(two1plus2plus5, 9);
+}
+
+} // namespace test
+} // namespace compat
+} // namespace gmx
diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cu b/src/gromacs/domdec/gpuhaloexchange_impl.cu

index 9efebf69046f39b924781df34245549857c83cd1..e7045d8b2a6fccf05247fc8b46c16d53f4051e72 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange_impl.cu
+++ b/src/gromacs/domdec/gpuhaloexchange_impl.cu
@@ -464,7 +464,6 @@ GpuHaloExchange::Impl::Impl(gmx_domdec_t*        dd,
                              int                  pulse,
                              gmx_wallcycle*       wcycle) :
      dd_(dd),
-    dimIndex_(dimIndex),
      sendRankX_(dd->neighbor[dimIndex][1]),
      recvRankX_(dd->neighbor[dimIndex][0]),
      sendRankF_(dd->neighbor[dimIndex][0]),
@@ -475,6 +474,7 @@ GpuHaloExchange::Impl::Impl(gmx_domdec_t*        dd,
      deviceContext_(deviceContext),
      localStream_(localStream),
      nonLocalStream_(nonLocalStream),
+    dimIndex_(dimIndex),
      pulse_(pulse),
      wcycle_(wcycle)
  {
diff --git a/src/gromacs/domdec/gpuhaloexchange_impl.cuh b/src/gromacs/domdec/gpuhaloexchange_impl.cuh

index 761938a0133c3c5f5645d17e2121d818356ca9a7..5dd619a343fe5827e93c0a9103dd2d47588734f1 100644 (file)
--- a/src/gromacs/domdec/gpuhaloexchange_impl.cuh
+++ b/src/gromacs/domdec/gpuhaloexchange_impl.cuh
@@ -204,8 +204,6 @@ private:
      int dimIndex_ = 0;
      //! The pulse corresponding to this halo exchange instance
      int pulse_ = 0;
-    //! Number of zones. Always 1 for 1-D case.
-    const int nzone_ = 1;
      //! The wallclock counter
      gmx_wallcycle* wcycle_ = nullptr;
      //! The atom offset for receive (x) or send (f) for dimension index and pulse corresponding to this halo exchange instance
diff --git a/src/gromacs/domdec/tests/CMakeLists.txt b/src/gromacs/domdec/tests/CMakeLists.txt

index 3fb225255565d60d0a0d0f3c157cc613096df9ba..20e771f13b87778d7623c750188c90a348b4a29b 100644 (file)
--- a/src/gromacs/domdec/tests/CMakeLists.txt
+++ b/src/gromacs/domdec/tests/CMakeLists.txt
@@ -38,7 +38,7 @@ gmx_add_unit_test(DomDecTests domdec-test
          localatomsetmanager.cpp
          )
  
-gmx_add_mpi_unit_test(DomDecMpiTests domdec-mpi-test 4
-    CPP_SOURCE_FILES
+gmx_add_mpi_unit_test(DomDecMpiTests domdec-mpi-test 4 HARDWARE_DETECTION
+    GPU_CPP_SOURCE_FILES
          haloexchange_mpi.cpp
          )
diff --git a/src/gromacs/domdec/tests/haloexchange_mpi.cpp b/src/gromacs/domdec/tests/haloexchange_mpi.cpp

index 3237c12612f127c4bd85d95cdbde653846caf95a..224d54e934a8880e1ed56104b42c827d0afd0106 100644 (file)
--- a/src/gromacs/domdec/tests/haloexchange_mpi.cpp
+++ b/src/gromacs/domdec/tests/haloexchange_mpi.cpp
@@ -35,15 +35,14 @@
  /*! \internal \file
   * \brief Tests for the halo exchange
   *
- *  The test sets up a 2D rank topology and performs a coordinate halo
- *  exchange (using the pre-existing CPU codepath), with 2 pulses in
- *  the first dimension and 1 pulse in the second. Each pulse involves
- *  a few non-contiguous indices. The sending rank, atom number and
- *  spatial 3D index are encoded in the x values, to allow correctness
- *  checking following the halo exchange.
+ *  The test sets up the rank topology and performs a coordinate halo
+ *  exchange (for both CPU and GPU codepaths) for several 1D and 2D
+ *  pulse configirations. Each pulse involves a few non-contiguous
+ *  indices. The sending rank, atom number and spatial 3D index are
+ *  encoded in the x values, to allow correctness checking following
+ *  the halo exchange.
   *
- * \todo Add more test variations
- * \todo Port to GPU codepath
+ * \todo Add 3D case
   *
   * \author Alan Gray <alang@nvidia.com>
   * \ingroup module_domdec
@@ -51,6 +50,8 @@
  
  #include "gmxpre.h"
  
+#include "config.h"
+
  #include <array>
  
  #include <gtest/gtest.h>
@@ -58,12 +59,21 @@
  #include "gromacs/domdec/atomdistribution.h"
  #include "gromacs/domdec/domdec_internal.h"
  #include "gromacs/domdec/gpuhaloexchange.h"
+#if GMX_GPU_CUDA
+#    include "gromacs/gpu_utils/device_stream.h"
+#    include "gromacs/gpu_utils/devicebuffer.h"
+#    include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#endif
+#include "gromacs/gpu_utils/hostallocator.h"
  #include "gromacs/mdtypes/inputrec.h"
  
  #include "testutils/mpitest.h"
+#include "testutils/test_hardware_environment.h"
  
  namespace gmx
  {
+namespace test
+{
  namespace
  {
  
@@ -100,6 +110,77 @@ void initHaloData(RVec* x, const int numHomeAtoms, const int numAtomsTotal)
      }
  }
  
+/*! \brief Perform GPU halo exchange, including required setup and data transfers
+ *
+ * \param [in] dd             Domain decomposition object
+ * \param [in] box            Box matrix
+ * \param [in] h_x            Atom coordinate data array on host
+ * \param [in] numAtomsTotal  Total number of atoms, including halo
+ */
+void gpuHalo(gmx_domdec_t* dd, matrix box, RVec* h_x, int numAtomsTotal)
+{
+#if (GMX_GPU_CUDA && GMX_THREAD_MPI)
+    // Set up GPU hardware environment and assign this MPI rank to a device
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+    int         numDevices = getTestHardwareEnvironment()->getTestDeviceList().size();
+    const auto& testDevice = getTestHardwareEnvironment()->getTestDeviceList()[rank % numDevices];
+    const auto& deviceContext = testDevice->deviceContext();
+    setActiveDevice(testDevice->deviceInfo());
+    DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
+
+    // Set up GPU buffer and copy input data from host
+    DeviceBuffer<RVec> d_x;
+    int                d_x_size       = -1;
+    int                d_x_size_alloc = -1;
+    reallocateDeviceBuffer(&d_x, numAtomsTotal, &d_x_size, &d_x_size_alloc, deviceContext);
+
+    copyToDeviceBuffer(&d_x, h_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+
+    GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
+    coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
+
+    // Perform GPU halo exchange
+    for (int d = 0; d < dd->ndim; d++)
+    {
+        for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
+        {
+            GpuHaloExchange gpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, deviceStream,
+                                            deviceStream, pulse, nullptr);
+            gpuHaloExchange.reinitHalo(d_x, nullptr);
+            gpuHaloExchange.communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
+        }
+    }
+
+    GpuEventSynchronizer haloCompletedEvent;
+    haloCompletedEvent.markEvent(deviceStream);
+    haloCompletedEvent.waitForEvent();
+
+    // Copy results back to host
+    copyFromDeviceBuffer(h_x, &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+
+    freeDeviceBuffer(d_x);
+#else
+    GMX_UNUSED_VALUE(dd);
+    GMX_UNUSED_VALUE(box);
+    GMX_UNUSED_VALUE(h_x);
+    GMX_UNUSED_VALUE(numAtomsTotal);
+#endif
+}
+
+/*! \brief Define 1D rank topology with 4 MPI tasks
+ *
+ * \param [in] dd  Domain decomposition object
+ */
+void define1dRankTopology(gmx_domdec_t* dd)
+{
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    dd->neighbor[0][0] = (rank + 1) % 4;
+    dd->neighbor[0][1] = (rank == 0) ? 3 : rank - 1;
+}
+
  /*! \brief Define 2D rank topology with 4 MPI tasks
   *
   *    -----
@@ -144,12 +225,142 @@ void define2dRankTopology(gmx_domdec_t* dd)
      }
  }
  
+/*! \brief Define a 1D halo with 1 pulses
+ *
+ * \param [in] dd      Domain decomposition object
+ * \param [in] indvec  Vector of index vectors
+ */
+void define1dHaloWith1Pulse(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    std::vector<int> indexvec;
+    gmx_domdec_ind_t ind;
+
+    dd->ndim     = 1;
+    int nzone    = 1;
+    int dimIndex = 0;
+
+    // Set up indices involved in halo
+    indexvec.clear();
+    indvec->clear();
+
+    dd->comm->cd[dimIndex].receiveInPlace = true;
+    dd->dim[dimIndex]                     = 0;
+    dd->ci[dimIndex]                      = rank;
+
+    // First pulse involves (arbitrary) indices 1 and 3
+    indexvec.push_back(1);
+    indexvec.push_back(3);
+
+    ind.index            = indexvec;
+    ind.nsend[nzone + 1] = 2;
+    ind.nrecv[nzone + 1] = 2;
+    indvec->push_back(ind);
+
+    dd->comm->cd[dimIndex].ind = *indvec;
+}
+
+/*! \brief Define a 1D halo with 2 pulses
+ *
+ * \param [in] dd      Domain decomposition object
+ * \param [in] indvec  Vector of index vectors
+ */
+void define1dHaloWith2Pulses(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    std::vector<int> indexvec;
+    gmx_domdec_ind_t ind;
+
+    dd->ndim     = 1;
+    int nzone    = 1;
+    int dimIndex = 0;
+
+    // Set up indices involved in halo
+    indexvec.clear();
+    indvec->clear();
+
+    dd->comm->cd[dimIndex].receiveInPlace = true;
+    dd->dim[dimIndex]                     = 0;
+    dd->ci[dimIndex]                      = rank;
+
+    // First pulse involves (arbitrary) indices 1 and 3
+    indexvec.push_back(1);
+    indexvec.push_back(3);
+
+    ind.index            = indexvec;
+    ind.nsend[nzone + 1] = 2;
+    ind.nrecv[nzone + 1] = 2;
+    indvec->push_back(ind);
+
+    // Add another pulse with (arbitrary) indices 4,5,7
+    indexvec.clear();
+
+    indexvec.push_back(4);
+    indexvec.push_back(5);
+    indexvec.push_back(7);
+
+    ind.index            = indexvec;
+    ind.nsend[nzone + 1] = 3;
+    ind.nrecv[nzone + 1] = 3;
+    indvec->push_back(ind);
+
+    dd->comm->cd[dimIndex].ind = *indvec;
+}
+
+/*! \brief Define a 2D halo with 1 pulse in each dimension
+ *
+ * \param [in] dd      Domain decomposition object
+ * \param [in] indvec  Vector of index vectors
+ */
+void define2dHaloWith1PulseInEachDim(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    std::vector<int> indexvec;
+    gmx_domdec_ind_t ind;
+
+    dd->ndim  = 2;
+    int nzone = 1;
+    for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
+    {
+
+        // Set up indices involved in halo
+        indexvec.clear();
+        indvec->clear();
+
+        dd->comm->cd[dimIndex].receiveInPlace = true;
+        dd->dim[dimIndex]                     = 0;
+        dd->ci[dimIndex]                      = rank;
+
+        // Single pulse involving (arbitrary) indices 1 and 3
+        indexvec.push_back(1);
+        indexvec.push_back(3);
+
+        ind.index            = indexvec;
+        ind.nsend[nzone + 1] = 2;
+        ind.nrecv[nzone + 1] = 2;
+        indvec->push_back(ind);
+
+        dd->comm->cd[dimIndex].ind = *indvec;
+
+        nzone += nzone;
+    }
+}
+
  /*! \brief Define a 2D halo with 2 pulses in the first dimension
   *
   * \param [in] dd      Domain decomposition object
   * \param [in] indvec  Vector of index vectors
   */
-void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t> indvec)
+void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
  {
  
      int rank;
@@ -165,7 +376,7 @@ void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_
  
          // Set up indices involved in halo
          indexvec.clear();
-        indvec.clear();
+        indvec->clear();
  
          dd->comm->cd[dimIndex].receiveInPlace = true;
          dd->dim[dimIndex]                     = 0;
@@ -178,14 +389,12 @@ void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_
          ind.index            = indexvec;
          ind.nsend[nzone + 1] = 2;
          ind.nrecv[nzone + 1] = 2;
-        indvec.push_back(ind);
+        indvec->push_back(ind);
  
          if (dimIndex == 0) // Add another pulse with (arbitrary) indices 4,5,7
          {
              indexvec.clear();
  
-            dd->comm->cd[dimIndex].ind = indvec;
-
              indexvec.push_back(4);
              indexvec.push_back(5);
              indexvec.push_back(7);
@@ -193,15 +402,73 @@ void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_
              ind.index            = indexvec;
              ind.nsend[nzone + 1] = 3;
              ind.nrecv[nzone + 1] = 3;
-            indvec.push_back(ind);
+            indvec->push_back(ind);
          }
  
-        dd->comm->cd[dimIndex].ind = indvec;
+        dd->comm->cd[dimIndex].ind = *indvec;
  
          nzone += nzone;
      }
  }
  
+/*! \brief Check results for above-defined 1D halo with 1 pulse
+ *
+ * \param [in] x             Atom coordinate data array
+ * \param [in] dd            Domain decomposition object
+ * \param [in] numHomeAtoms  Number of home atoms
+ */
+void checkResults1dHaloWith1Pulse(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+    // Check results are expected from values encoded in x data
+    for (int j = 0; j < DIM; j++)
+    {
+        // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+        EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+        EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+    }
+}
+
+/*! \brief Check results for above-defined 1D halo with 2 pulses
+ *
+ * \param [in] x             Atom coordinate data array
+ * \param [in] dd            Domain decomposition object
+ * \param [in] numHomeAtoms  Number of home atoms
+ */
+void checkResults1dHaloWith2Pulses(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+    // Check results are expected from values encoded in x data
+    for (int j = 0; j < DIM; j++)
+    {
+        // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+        EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+        EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+        // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
+        EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
+        EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
+        EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
+    }
+}
+
+/*! \brief Check results for above-defined 2D halo with 1 pulse in each dimension
+ *
+ * \param [in] x             Atom coordinate data array
+ * \param [in] dd            Domain decomposition object
+ * \param [in] numHomeAtoms  Number of home atoms
+ */
+void checkResults2dHaloWith1PulseInEachDim(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+    // Check results are expected from values encoded in x data
+    for (int j = 0; j < DIM; j++)
+    {
+        // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+        EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+        EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+        // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
+        EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[1][0], 1, j));
+        EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[1][0], 3, j));
+    }
+}
+
  /*! \brief Check results for above-defined 2D halo with 2 pulses in the first dimension
   *
   * \param [in] x             Atom coordinate data array
@@ -226,17 +493,173 @@ void checkResults2dHaloWith2PulsesInDim1(const RVec* x, const gmx_domdec_t* dd,
      }
  }
  
+TEST(HaloExchangeTest, Coordinates1dHaloWith1Pulse)
+{
+    GMX_MPI_TEST(4);
+
+    // Set up atom data
+    const int        numHomeAtoms  = 10;
+    const int        numHaloAtoms  = 2;
+    const int        numAtomsTotal = numHomeAtoms + numHaloAtoms;
+    HostVector<RVec> h_x;
+    changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
+    h_x.resize(numAtomsTotal);
+
+    initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+    // Set up dd
+    t_inputrec   ir;
+    gmx_domdec_t dd(ir);
+    dd.mpi_comm_all = MPI_COMM_WORLD;
+    gmx_domdec_comm_t comm;
+    dd.comm                      = &comm;
+    dd.unitCellInfo.haveScrewPBC = false;
+
+    DDAtomRanges atomRanges;
+    atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+    dd.comm->atomRanges = atomRanges;
+
+    define1dRankTopology(&dd);
+
+    std::vector<gmx_domdec_ind_t> indvec;
+    define1dHaloWith1Pulse(&dd, &indvec);
+
+    // Perform halo exchange
+    matrix box = { { 0., 0., 0. } };
+    dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+    // Check results
+    checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
+
+    if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+    {
+        // Re-initialize input
+        initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+        // Perform GPU halo exchange
+        gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+
+        // Check results
+        checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
+    }
+}
+
+TEST(HaloExchangeTest, Coordinates1dHaloWith2Pulses)
+{
+    GMX_MPI_TEST(4);
+
+    // Set up atom data
+    const int        numHomeAtoms  = 10;
+    const int        numHaloAtoms  = 5;
+    const int        numAtomsTotal = numHomeAtoms + numHaloAtoms;
+    HostVector<RVec> h_x;
+    changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
+    h_x.resize(numAtomsTotal);
+
+    initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+    // Set up dd
+    t_inputrec   ir;
+    gmx_domdec_t dd(ir);
+    dd.mpi_comm_all = MPI_COMM_WORLD;
+    gmx_domdec_comm_t comm;
+    dd.comm                      = &comm;
+    dd.unitCellInfo.haveScrewPBC = false;
+
+    DDAtomRanges atomRanges;
+    atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+    dd.comm->atomRanges = atomRanges;
+
+    define1dRankTopology(&dd);
+
+    std::vector<gmx_domdec_ind_t> indvec;
+    define1dHaloWith2Pulses(&dd, &indvec);
+
+    // Perform halo exchange
+    matrix box = { { 0., 0., 0. } };
+    dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+    // Check results
+    checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
+
+    if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+    {
+        // Re-initialize input
+        initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+        // Perform GPU halo exchange
+        gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+
+        // Check results
+        checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
+    }
+}
+
+
+TEST(HaloExchangeTest, Coordinates2dHaloWith1PulseInEachDim)
+{
+    GMX_MPI_TEST(4);
+
+    // Set up atom data
+    const int        numHomeAtoms  = 10;
+    const int        numHaloAtoms  = 4;
+    const int        numAtomsTotal = numHomeAtoms + numHaloAtoms;
+    HostVector<RVec> h_x;
+    changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
+    h_x.resize(numAtomsTotal);
+
+    initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+    // Set up dd
+    t_inputrec   ir;
+    gmx_domdec_t dd(ir);
+    dd.mpi_comm_all = MPI_COMM_WORLD;
+    gmx_domdec_comm_t comm;
+    dd.comm                      = &comm;
+    dd.unitCellInfo.haveScrewPBC = false;
+
+    DDAtomRanges atomRanges;
+    atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+    dd.comm->atomRanges = atomRanges;
+
+    define2dRankTopology(&dd);
+
+    std::vector<gmx_domdec_ind_t> indvec;
+    define2dHaloWith1PulseInEachDim(&dd, &indvec);
+
+    // Perform halo exchange
+    matrix box = { { 0., 0., 0. } };
+    dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+    // Check results
+    checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
+
+    if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+    {
+        // Re-initialize input
+        initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+        // Perform GPU halo exchange
+        gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+
+        // Check results
+        checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
+    }
+}
  
  TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
  {
      GMX_MPI_TEST(4);
  
      // Set up atom data
-    const int numHomeAtoms  = 10;
-    const int numHaloAtoms  = 7;
-    const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
-    RVec      x[numAtomsTotal];
-    initHaloData(x, numHomeAtoms, numAtomsTotal);
+    const int        numHomeAtoms  = 10;
+    const int        numHaloAtoms  = 7;
+    const int        numAtomsTotal = numHomeAtoms + numHaloAtoms;
+    HostVector<RVec> h_x;
+    changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
+    h_x.resize(numAtomsTotal);
+
+    initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
  
      // Set up dd
      t_inputrec   ir;
@@ -253,15 +676,27 @@ TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
      define2dRankTopology(&dd);
  
      std::vector<gmx_domdec_ind_t> indvec;
-    define2dHaloWith2PulsesInDim1(&dd, indvec);
+    define2dHaloWith2PulsesInDim1(&dd, &indvec);
  
      // Perform halo exchange
      matrix box = { { 0., 0., 0. } };
-    dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(x), nullptr);
+    dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+    // Check results
+    checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
+
+#if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+    // Re-initialize input
+    initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+    // Perform GPU halo exchange
+    gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
  
      // Check results
-    checkResults2dHaloWith2PulsesInDim1(x, &dd, numHomeAtoms);
+    checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
+#endif
  }
  
  } // namespace
+} // namespace test
  } // namespace gmx
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h

index 0408b87f77be914e4029659fca3845ace8fd446e..29411cad0d864694c17c239677887797f93b9f36 100644 (file)
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -344,7 +344,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_prepare_computation(gmx_pme_t*     GPU_FUNC_ARGU
  GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t*            GPU_FUNC_ARGUMENT(pme),
                                                GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
                                                gmx_wallcycle*        GPU_FUNC_ARGUMENT(wcycle),
-                                              const real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+                                              real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
  
  /*! \brief
   * Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.
@@ -367,7 +367,7 @@ pme_gpu_launch_complex_transforms(gmx_pme_t*               GPU_FUNC_ARGUMENT(pme
   */
  GPU_FUNC_QUALIFIER void pme_gpu_launch_gather(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
                                                gmx_wallcycle*   GPU_FUNC_ARGUMENT(wcycle),
-                                              const real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+                                              real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
  
  /*! \brief
   * Attempts to complete PME GPU tasks.
@@ -394,7 +394,7 @@ GPU_FUNC_QUALIFIER bool pme_gpu_try_finish_task(gmx_pme_t*               GPU_FUN
                                                  gmx_wallcycle*           GPU_FUNC_ARGUMENT(wcycle),
                                                  gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
                                                  gmx_enerdata_t*       GPU_FUNC_ARGUMENT(enerd),
-                                                const real            GPU_FUNC_ARGUMENT(lambdaQ),
+                                                real                  GPU_FUNC_ARGUMENT(lambdaQ),
                                                  GpuTaskCompletion GPU_FUNC_ARGUMENT(completionKind))
          GPU_FUNC_TERM_WITH_RETURN(false);
  
@@ -414,7 +414,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_wait_and_reduce(gmx_pme_t*               GPU_FUN
                                                  gmx_wallcycle*           GPU_FUNC_ARGUMENT(wcycle),
                                                  gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
                                                  gmx_enerdata_t*       GPU_FUNC_ARGUMENT(enerd),
-                                                const real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+                                                real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
  
  /*! \brief
   * The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
diff --git a/src/gromacs/ewald/pme_gather.clh b/src/gromacs/ewald/pme_gather.clh

index 8c7106acf9138fce11e7e6d51d45a2263026424e..98cac0f2824c4ba445485ba3f167e2f93544dea4 100644 (file)
--- a/src/gromacs/ewald/pme_gather.clh
+++ b/src/gromacs/ewald/pme_gather.clh
@@ -49,8 +49,8 @@
   *  \author Aleksei Iupinov <a.yupinov@gmail.com>
   */
  
-#include "pme_gpu_types.h"
  #include "pme_gpu_calculate_splines.clh"
+#include "pme_gpu_types.h"
  
  #ifndef COMPILE_GATHER_HELPERS_ONCE
  #    define COMPILE_GATHER_HELPERS_ONCE
@@ -442,9 +442,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_gather_kernel)(const struct PmeOpenCLKe
      if (numGrids == 2)
      {
          barrier(CLK_LOCAL_MEM_FENCE);
-        fx          = 0.0f;
-        fy          = 0.0f;
-        fz          = 0.0f;
+        fx          = 0.0F;
+        fy          = 0.0F;
+        fz          = 0.0F;
          chargeCheck = pme_gpu_check_atom_charge(gm_coefficientsB[atomIndexGlobal]);
          if (chargeCheck)
          {
@@ -473,8 +473,8 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_gather_kernel)(const struct PmeOpenCLKe
  #pragma unroll
              for (int i = 0; i < numIter; i++)
              {
-                const int outputIndexLocal  = i * iterThreads + threadLocalId;
-                const int outputIndexGlobal = get_group_id(XX) * blockForcesSize + outputIndexLocal;
+                const int outputIndexLocal = i * iterThreads + threadLocalId;
+                const int outputIndexGlobal = (int)get_group_id(XX) * blockForcesSize + outputIndexLocal;
                  const float outputForceComponent = sm_forces[outputIndexLocal];
                  gm_forces[outputIndexGlobal] += outputForceComponent;
              }
diff --git a/src/gromacs/ewald/pme_gpu.cpp b/src/gromacs/ewald/pme_gpu.cpp

index 19215fa90fff848956a4cd0e8c562f43a54bf65b..fcae02ab6917b1d93fb6e5ea53f354dc2b0d77be 100644 (file)
--- a/src/gromacs/ewald/pme_gpu.cpp
+++ b/src/gromacs/ewald/pme_gpu.cpp
@@ -344,7 +344,7 @@ bool pme_gpu_try_finish_task(gmx_pme_t*               pme,
      // time needed for that checking, but do not yet record that the
      // gather has occured.
      bool           needToSynchronize      = true;
-    constexpr bool c_streamQuerySupported = bool(GMX_GPU_CUDA);
+    constexpr bool c_streamQuerySupported = GMX_GPU_CUDA;
  
      // TODO: implement c_streamQuerySupported with an additional GpuEventSynchronizer per stream (#2521)
      if ((completionKind == GpuTaskCompletion::Check) && c_streamQuerySupported)
diff --git a/src/gromacs/ewald/pme_gpu_3dfft.h b/src/gromacs/ewald/pme_gpu_3dfft.h

index c334d013e3abdc513750fadcba3b3e30ff422bcb..a39f751babc7192d87083459105329c4b5eb970e 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_3dfft.h
+++ b/src/gromacs/ewald/pme_gpu_3dfft.h
@@ -75,7 +75,7 @@ public:
       * \param[in] pmeGpu                  The PME GPU structure.
       * \param[in] gridIndex               The index of the grid on which to perform the calculations.
       */
-    GpuParallel3dFft(const PmeGpu* pmeGpu, const int gridIndex);
+    GpuParallel3dFft(const PmeGpu* pmeGpu, int gridIndex);
      /*! \brief Destroys the FFT plans. */
      ~GpuParallel3dFft();
      /*! \brief Performs the FFT transform in given direction
diff --git a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh

index 05649b600ad6914930b2f258144fe4e0a4f3e226..fc319eb1c23a78e53f18505bc8f1c9a41a5f9a37 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh
+++ b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh
@@ -124,7 +124,7 @@ template<typename T>
  __device__ inline void assertIsFinite(T arg);
  
  template<>
-__device__ inline void assertIsFinite(float3 arg)
+__device__ inline void assertIsFinite(float3 gmx_unused arg)
  {
      assert(isfinite(float(arg.x)));
      assert(isfinite(float(arg.y)));
@@ -132,7 +132,7 @@ __device__ inline void assertIsFinite(float3 arg)
  }
  
  template<typename T>
-__device__ inline void assertIsFinite(T arg)
+__device__ inline void assertIsFinite(T gmx_unused arg)
  {
      assert(isfinite(float(arg)));
  }
diff --git a/src/gromacs/ewald/pme_gpu_internal.h b/src/gromacs/ewald/pme_gpu_internal.h

index 1220b139845a87ef2808adf767aed48260da527e..632557d13e737163981578361de03da4a2238f72 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_internal.h
+++ b/src/gromacs/ewald/pme_gpu_internal.h
@@ -354,7 +354,7 @@ GPU_FUNC_QUALIFIER void pme_gpu_spread(const PmeGpu*         GPU_FUNC_ARGUMENT(p
                                         float**               GPU_FUNC_ARGUMENT(h_grids),
                                         bool                  GPU_FUNC_ARGUMENT(computeSplines),
                                         bool                  GPU_FUNC_ARGUMENT(spreadCharges),
-                                       const real GPU_FUNC_ARGUMENT(lambda)) GPU_FUNC_TERM;
+                                       real GPU_FUNC_ARGUMENT(lambda)) GPU_FUNC_TERM;
  
  /*! \libinternal \brief
   * 3D FFT R2C/C2R routine.
diff --git a/src/gromacs/ewald/pme_solve.clh b/src/gromacs/ewald/pme_solve.clh

index 0c21512b6fbb28a3640af598c31f8e8c545a4ced..ae96f584d531891b3d565e438e2781c93f8e3c44 100644 (file)
--- a/src/gromacs/ewald/pme_solve.clh
+++ b/src/gromacs/ewald/pme_solve.clh
@@ -70,9 +70,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
                                                         __global float2* __restrict__ gm_grid)
  {
      /* This kernel supports 2 different grid dimension orderings: YZX and XYZ */
-    int majorDim;
-    int middleDim;
-    int minorDim;
+    int majorDim  = 0;
+    int middleDim = 0;
+    int minorDim  = 0;
      if (gridOrdering == YZX)
      {
          majorDim  = YY;
@@ -163,9 +163,9 @@ __kernel void CUSTOMIZED_KERNEL_NAME(pme_solve_kernel)(const struct PmeOpenCLKer
          /* We should skip the k-space point (0,0,0) */
          const bool notZeroPoint = (kMinor > 0) | (kMajor > 0) | (kMiddle > 0);
  
-        float mX;
-        float mY;
-        float mZ;
+        float mX = 0.0F;
+        float mY = 0.0F;
+        float mZ = 0.0F;
          if (gridOrdering == YZX)
          {
              mX = mMinor;
diff --git a/src/gromacs/ewald/pme_spread.clh b/src/gromacs/ewald/pme_spread.clh

index 3342120d51d46a67528ffebe718ed22e80f763bf..9e887a9e6efb7fe14eab0007065baf83b7f46ac4 100644 (file)
--- a/src/gromacs/ewald/pme_spread.clh
+++ b/src/gromacs/ewald/pme_spread.clh
@@ -53,8 +53,8 @@
  
  #include "gromacs/gpu_utils/vectype_ops.clh"
  
-#include "pme_gpu_types.h"
  #include "pme_gpu_calculate_splines.clh"
+#include "pme_gpu_types.h"
  
  /*
   * This define affects the spline calculation behaviour in the kernel.
@@ -180,11 +180,10 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
          /* Indices interpolation */
          if (orderIndex == 0)
          {
-            int          tableIndex;
-            int          tInt;
-            float        n;
-            float        t;
-            const float3 x = vload3(atomIndexLocal, sm_coordinates);
+            int          tableIndex = 0;
+            float        n          = 0.0F;
+            float        t          = 0.0F;
+            const float3 x          = vload3(atomIndexLocal, sm_coordinates);
  
              /* Accessing fields in fshOffset/nXYZ/recipbox/... with dimIndex offset
               * puts them into local memory(!) instead of accessing the constant memory directly.
@@ -225,7 +224,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
  
              /* Fractional coordinates along box vectors, adding a positive shift to ensure t is positive for triclinic boxes */
              t                                 = (t + shift) * n;
-            tInt                              = (int)t;
+            const int tInt                    = (int)t;
              sm_fractCoords[sharedMemoryIndex] = t - (float)tInt;
              tableIndex += tInt;
              assert(tInt >= 0);
@@ -241,7 +240,6 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
          const int chargeCheck = pme_gpu_check_atom_charge(sm_coefficients[atomIndexLocal]);
          if (chargeCheck)
          {
-            float div;
              int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1
  
              const float dr = sm_fractCoords[sharedMemoryIndex];
@@ -255,7 +253,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
  #    pragma unroll order
              for (int k = 3; k < order; k++)
              {
-                div                     = 1.0F / ((float)k - 1.0F);
+                const float div         = 1.0F / ((float)k - 1.0F);
                  *SPLINE_DATA_PTR(k - 1) = div * dr * SPLINE_DATA(k - 2);
  #    pragma unroll
                  for (int l = 1; l < (k - 1); l++)
@@ -287,7 +285,7 @@ gmx_opencl_inline void calculate_splines(const struct PmeOpenCLKernelParams kern
                  gm_dtheta[thetaGlobalIndex] = dtheta;
              }
  
-            div                         = 1.0F / (order - 1.0F);
+            const float div             = 1.0F / (order - 1.0F);
              *SPLINE_DATA_PTR(order - 1) = div * dr * SPLINE_DATA(order - 2);
  #    pragma unroll
              for (int k = 1; k < (order - 1); k++)
diff --git a/src/gromacs/fileio/filetypes.cpp b/src/gromacs/fileio/filetypes.cpp

index 9cc48d08a4240658858666e2ccdfa99e401fa2ee..ac6c5820be15783e7832fe3ae9898818542d1f18 100644 (file)
--- a/src/gromacs/fileio/filetypes.cpp
+++ b/src/gromacs/fileio/filetypes.cpp
@@ -138,7 +138,8 @@ static const t_deffile deffile[efNR] = {
      { eftASC, ".edi", "sam", nullptr, "ED sampling input" },
      { eftASC, ".cub", "pot", nullptr, "Gaussian cube file" },
      { eftASC, ".xpm", "root", nullptr, "X PixMap compatible matrix file" },
-    { eftASC, "", "rundir", nullptr, "Run directory" }
+    { eftASC, "", "rundir", nullptr, "Run directory" },
+    { eftASC, ".csv", "bench", nullptr, "CSV data file" }
  };
  
  const char* ftp2ext(int ftp)
diff --git a/src/gromacs/gmxpreprocess/readpull.cpp b/src/gromacs/gmxpreprocess/readpull.cpp

index 3ea9de940767dda272154e3867c9a9e4af57595c..0a5b1afea5b1d79603aa46c61cff74761da4e6dd 100644 (file)
--- a/src/gromacs/gmxpreprocess/readpull.cpp
+++ b/src/gromacs/gmxpreprocess/readpull.cpp
@@ -456,6 +456,7 @@ void process_pull_groups(gmx::ArrayRef<t_pull_group>      pullGroups,
                        gmx::ssize(pullGroup.ind));
          }
  
+        pullGroup.pbcatom_input = pullGroup.pbcatom;
          if (pullGroup.ind.size() == 1)
          {
              /* No pbc is required for this group */
diff --git a/src/gromacs/gpu_utils/device_stream.cu b/src/gromacs/gpu_utils/device_stream.cu

index 0e07b00b2743661558aeb806d24e48ca2d58d17e..cc1f8798622bc30a284cdef90f3967cef8eae88e 100644 (file)
--- a/src/gromacs/gpu_utils/device_stream.cu
+++ b/src/gromacs/gpu_utils/device_stream.cu
@@ -117,4 +117,4 @@ void DeviceStream::synchronize() const
                         gmx::formatString("cudaStreamSynchronize failed  (CUDA error %d: %s).", stat,
                                           cudaGetErrorString(stat))
                                 .c_str());
-}
-\ No newline at end of file
+}
diff --git a/src/gromacs/gpu_utils/devicebuffer_ocl.h b/src/gromacs/gpu_utils/devicebuffer_ocl.h

index b8e047a87defc8156778493199f23d71cfa805cd..81501a2b0862d9c65bc09af357d403574670498e 100644 (file)
--- a/src/gromacs/gpu_utils/devicebuffer_ocl.h
+++ b/src/gromacs/gpu_utils/devicebuffer_ocl.h
@@ -321,7 +321,7 @@ void initParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer,
   * \param[in,out] deviceBuffer  Device buffer to store data in.
   */
  template<typename ValueType>
-void destroyParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer, DeviceTexture& /* deviceTexture*/)
+void destroyParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer, const DeviceTexture& /* deviceTexture*/)
  {
      freeDeviceBuffer(deviceBuffer);
  }
diff --git a/src/gromacs/gpu_utils/tests/CMakeLists.txt b/src/gromacs/gpu_utils/tests/CMakeLists.txt

index 6e4a700930ed6a1ec225b06a7920a54c1a74a6d6..604337d901bd295010220fb28bc92d1611a09fc1 100644 (file)
--- a/src/gromacs/gpu_utils/tests/CMakeLists.txt
+++ b/src/gromacs/gpu_utils/tests/CMakeLists.txt
@@ -46,10 +46,10 @@ gmx_add_unit_test(GpuUtilsUnitTests gpu_utils-test HARDWARE_DETECTION
          device_stream_manager.cpp
          hostallocator.cpp
          pinnedmemorychecker.cpp
-        typecasts.cpp
          
      GPU_CPP_SOURCE_FILES
          device_buffer.cpp
+        typecasts.cpp
  
      CUDA_CU_SOURCE_FILES
          devicetransfers.cu
diff --git a/src/gromacs/hardware/CMakeLists.txt b/src/gromacs/hardware/CMakeLists.txt

index 8161c495dd2d8b24104fcdd9f380f4cd86e18452..668e6ab1867766ba4c3e9694cec5d09e89686ead 100644 (file)
--- a/src/gromacs/hardware/CMakeLists.txt
+++ b/src/gromacs/hardware/CMakeLists.txt
@@ -37,6 +37,7 @@ gmx_add_libgromacs_sources(
      detecthardware.cpp
      device_management_common.cpp
      hardwaretopology.cpp
+    prepare_detection.cpp
      printhardware.cpp
      identifyavx512fmaunits.cpp
      )
diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp

index 3bcdac4835f2d52b5db9855e7aaab9109b213f40..54e2eefb3e7ff4d7d55e1079800d4d776114acf1 100644 (file)
--- a/src/gromacs/hardware/detecthardware.cpp
+++ b/src/gromacs/hardware/detecthardware.cpp
@@ -41,13 +41,10 @@
  
  #include <algorithm>
  #include <array>
-#include <chrono>
  #include <memory>
  #include <string>
-#include <thread>
  #include <vector>
  
-#include "gromacs/compat/pointers.h"
  #include "gromacs/hardware/cpuinfo.h"
  #include "gromacs/hardware/device_management.h"
  #include "gromacs/hardware/hardwaretopology.h"
@@ -62,11 +59,11 @@
  #include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/inmemoryserializer.h"
  #include "gromacs/utility/logger.h"
-#include "gromacs/utility/mutex.h"
  #include "gromacs/utility/physicalnodecommunicator.h"
  
  #include "architecture.h"
  #include "device_information.h"
+#include "prepare_detection.h"
  
  #ifdef HAVE_UNISTD_H
  #    include <unistd.h> // sysconf()
@@ -94,26 +91,38 @@ namespace gmx
  #    define _SC_NPROCESSORS_CONF _SC_NPROC_CONF
  #endif
  
-/*! \brief Information about the hardware of all nodes (common to all threads in this process).
+/*! \brief The result of device detection
   *
- * This information is constructed only when required, but thereafter
- * its lifetime is that of the whole process, potentially across
- * multiple successive simulation parts. It's wise to ensure that only
- * one thread can create the information, but thereafter they can all
- * read it without e.g. needing a std::shared_ptr to ensure its
- * lifetime exceeds that of the thread. */
-static std::unique_ptr<gmx_hw_info_t> g_hardwareInfo;
-//! A mutex to protect the hwinfo structure
-static Mutex g_hardwareInfoMutex;
-
-//! Detect GPUs, if that makes sense to attempt.
-static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
-                            const PhysicalNodeCommunicator&  physicalNodeComm,
-                            compat::not_null<gmx_hw_info_t*> hardwareInfo)
+ * Note that non-functional device detection still produces
+ * a detection result, ie. of no devices. This might not be
+ * what the user wanted, so it makes sense to log later when
+ * that is possible. */
+struct DeviceDetectionResult
  {
+    //! The device information detected
+    std::vector<std::unique_ptr<DeviceInformation>> deviceInfoList_;
+    //! Container of possible warnings to issue when that is possible
+    std::vector<std::string> deviceDetectionWarnings_;
+};
+
+/*! \brief Detect GPUs when that makes sense to attempt.
+ *
+ * \param[in]  physicalNodeComm  The communicator across this physical node
+ * \return The result of the detection, perhaps including diagnostic messages
+ *         to issue later.
+ *
+ * \todo Coordinating the efficient detection of devices across
+ * multiple ranks per node should be separated from the lower-level
+ * hardware detection. See
+ * https://gitlab.com/gromacs/gromacs/-/issues/3650.
+ */
+static DeviceDetectionResult detectAllDeviceInformation(const PhysicalNodeCommunicator& physicalNodeComm)
+{
+    DeviceDetectionResult deviceDetectionResult;
+
      if (!isDeviceDetectionEnabled())
      {
-        return;
+        return deviceDetectionResult;
      }
  
      std::string errorMessage;
@@ -122,15 +131,22 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
  #if GMX_LIB_MPI
      isMasterRankOfPhysicalNode = (physicalNodeComm.rank_ == 0);
  #else
-    // We choose to run the detection only once with thread-MPI and
-    // use a mutex to enforce it.
+    // Without an MPI library, this process is trivially the only one
+    // on the physical node. This code runs before e.g. thread-MPI
+    // ranks are spawned, so detection is race-free by construction.
+    // Read-only access is enforced with providing those ranks with a
+    // handle to a const object, so usage is also free of races.
      GMX_UNUSED_VALUE(physicalNodeComm);
-    isMasterRankOfPhysicalNode = true;
+    isMasterRankOfPhysicalNode        = true;
  #endif
  
-    /* The OpenCL support requires us to run detection on all ranks.
+    /* The SYCL and OpenCL support requires us to run detection on all
+     * ranks.
+     *
       * With CUDA we don't need to, and prefer to detect on one rank
-     * and send the information to the other ranks over MPI. */
+     * and send the information to the other ranks over MPI. This
+     * avoids creating a start-up bottleneck with each MPI rank on a
+     * node making the same GPU API calls. */
      constexpr bool allRanksMustDetectGpus = (GMX_GPU_OPENCL != 0 || GMX_GPU_SYCL != 0);
      bool           gpusCanBeDetected      = false;
      if (isMasterRankOfPhysicalNode || allRanksMustDetectGpus)
@@ -139,19 +155,14 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
          gpusCanBeDetected = isDeviceDetectionFunctional(&errorMessage);
          if (!gpusCanBeDetected)
          {
-            GMX_LOG(mdlog.warning)
-                    .asParagraph()
-                    .appendTextFormatted(
-                            "NOTE: Detection of GPUs failed. The API reported:\n"
-                            "      %s\n"
-                            "      GROMACS cannot run tasks on a GPU.",
-                            errorMessage.c_str());
+            deviceDetectionResult.deviceDetectionWarnings_.emplace_back(
+                    "Detection of GPUs failed. The API reported:\n" + errorMessage);
          }
      }
  
      if (gpusCanBeDetected)
      {
-        hardwareInfo->deviceInfoList = findDevices();
+        deviceDetectionResult.deviceInfoList_ = findDevices();
          // No need to tell the user anything at this point, they get a
          // hardware report later.
      }
@@ -166,7 +177,7 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
          if (isMasterRankOfPhysicalNode)
          {
              gmx::InMemorySerializer writer;
-            serializeDeviceInformations(hardwareInfo->deviceInfoList, &writer);
+            serializeDeviceInformations(deviceDetectionResult.deviceInfoList_, &writer);
              buffer       = writer.finishAndGetBuffer();
              sizeOfBuffer = buffer.size();
          }
@@ -180,17 +191,18 @@ static void gmx_detect_gpus(const gmx::MDLogger&             mdlog,
              if (!isMasterRankOfPhysicalNode)
              {
                  gmx::InMemoryDeserializer reader(buffer, false);
-                hardwareInfo->deviceInfoList = deserializeDeviceInformations(&reader);
+                deviceDetectionResult.deviceInfoList_ = deserializeDeviceInformations(&reader);
              }
          }
      }
  #endif
+    return deviceDetectionResult;
  }
  
  //! Reduce the locally collected \p hardwareInfo over MPI ranks
-static void gmx_collect_hardware_mpi(const gmx::CpuInfo&              cpuInfo,
-                                     const PhysicalNodeCommunicator&  physicalNodeComm,
-                                     compat::not_null<gmx_hw_info_t*> hardwareInfo)
+static void gmx_collect_hardware_mpi(const gmx::CpuInfo&             cpuInfo,
+                                     const PhysicalNodeCommunicator& physicalNodeComm,
+                                     gmx_hw_info_t*                  hardwareInfo)
  {
      const int ncore = hardwareInfo->hardwareTopology->numberOfCores();
      /* Zen1 is assumed for:
@@ -281,7 +293,6 @@ static void gmx_collect_hardware_mpi(const gmx::CpuInfo&              cpuInfo,
      hardwareInfo->bIdenticalGPUs      = (maxMinReduced[4] == -maxMinReduced[9]);
      hardwareInfo->haveAmdZen1Cpu      = (maxMinReduced[10] > 0);
  #else
-    /* All ranks use the same pointer, protected by a mutex in the caller */
      hardwareInfo->nphysicalnode       = 1;
      hardwareInfo->ncore_tot           = ncore;
      hardwareInfo->ncore_min           = ncore;
@@ -300,97 +311,8 @@ static void gmx_collect_hardware_mpi(const gmx::CpuInfo&              cpuInfo,
  #endif
  }
  
-/*! \brief Utility that does dummy computing for max 2 seconds to spin up cores
- *
- *  This routine will check the number of cores configured and online
- *  (using sysconf), and the spins doing dummy compute operations for up to
- *  2 seconds, or until all cores have come online. This can be used prior to
- *  hardware detection for platforms that take unused processors offline.
- *
- *  This routine will not throw exceptions. In principle it should be
- *  declared noexcept, but at least icc 19.1 and 21-beta08 with the
- *  libstdc++-7.5 has difficulty implementing a std::vector of
- *  std::thread started with this function when declared noexcept. It
- *  is not clear whether the problem is the compiler or the standard
- *  library. Fortunately, this function is not performance sensitive,
- *  and only runs on platforms other than x86 and POWER (ie ARM),
- *  so the possible overhead introduced by omitting noexcept is not
- *  important.
- */
-static void spinUpCore()
-{
-#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN)
-    float dummy           = 0.1;
-    int   countConfigured = sysconf(_SC_NPROCESSORS_CONF);    // noexcept
-    auto  start           = std::chrono::steady_clock::now(); // noexcept
-
-    while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured
-           && std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
-    {
-        for (int i = 1; i < 10000; i++)
-        {
-            dummy /= i;
-        }
-    }
-
-    if (dummy < 0)
-    {
-        printf("This cannot happen, but prevents loop from being optimized away.");
-    }
-#endif
-}
-
-/*! \brief Prepare the system before hardware topology detection
- *
- * This routine should perform any actions we want to put the system in a state
- * where we want it to be before detecting the hardware topology. For most
- * processors there is nothing to do, but some architectures (in particular ARM)
- * have support for taking configured cores offline, which will make them disappear
- * from the online processor count.
- *
- * This routine checks if there is a mismatch between the number of cores
- * configured and online, and in that case we issue a small workload that
- * attempts to wake sleeping cores before doing the actual detection.
- *
- * This type of mismatch can also occur for x86 or PowerPC on Linux, if SMT has only
- * been disabled in the kernel (rather than bios). Since those cores will never
- * come online automatically, we currently skip this test for x86 & PowerPC to
- * avoid wasting 2 seconds. We also skip the test if there is no thread support.
- *
- * \note Cores will sleep relatively quickly again, so it's important to issue
- *       the real detection code directly after this routine.
- */
-static void hardwareTopologyPrepareDetection()
-{
-#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) \
-        && (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS))
-
-    // Modify this conditional when/if x86 or PowerPC starts to sleep some cores
-    if (c_architecture != Architecture::X86 && c_architecture != Architecture::PowerPC)
-    {
-        int                      countConfigured = sysconf(_SC_NPROCESSORS_CONF);
-        std::vector<std::thread> workThreads(countConfigured);
-
-        for (auto& t : workThreads)
-        {
-            t = std::thread(spinUpCore);
-        }
-
-        for (auto& t : workThreads)
-        {
-            t.join();
-        }
-    }
-#endif
-}
-
-/*! \brief Sanity check hardware topology and print some notes to log
- *
- *  \param mdlog            Logger.
- *  \param hardwareTopology Reference to hardwareTopology object.
- */
-static void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
-                                                 const gmx::HardwareTopology gmx_unused& hardwareTopology)
+void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
+                                          const gmx::HardwareTopology gmx_unused& hardwareTopology)
  {
  #if defined HAVE_SYSCONF && defined(_SC_NPROCESSORS_CONF)
      if (hardwareTopology.supportLevel() < gmx::HardwareTopology::SupportLevel::LogicalProcessorCount)
@@ -430,24 +352,15 @@ static void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused&
                              "performance.");
          }
      }
+#else
+    GMX_UNUSED_VALUE(mdlog);
+    GMX_UNUSED_VALUE(hardwareTopology);
  #endif
  }
  
-gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNodeCommunicator& physicalNodeComm)
+std::unique_ptr<gmx_hw_info_t> gmx_detect_hardware(const PhysicalNodeCommunicator& physicalNodeComm)
  {
-    // By construction, only one thread ever runs hardware detection,
-    // but we may as well prevent issues arising if that would change.
-    // Taking the lock early ensures that exactly one thread can
-    // attempt to construct g_hardwareInfo.
-    lock_guard<Mutex> lock(g_hardwareInfoMutex);
-
-    // If we already have the information, just return a handle to it.
-    if (g_hardwareInfo != nullptr)
-    {
-        return g_hardwareInfo.get();
-    }
-
-    // Make the new hardwareInfo in a temporary.
+    // Ensure all cores have spun up, where applicable.
      hardwareTopologyPrepareDetection();
  
      // TODO: We should also do CPU hardware detection only once on each
@@ -456,24 +369,29 @@ gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNod
              std::make_unique<CpuInfo>(CpuInfo::detect()),
              std::make_unique<HardwareTopology>(HardwareTopology::detect()));
  
-    // If we detected the topology on this system, double-check that it makes sense
-    if (hardwareInfo->hardwareTopology->isThisSystem())
-    {
-        hardwareTopologyDoubleCheckDetection(mdlog, *hardwareInfo->hardwareTopology);
-    }
-
      // TODO: Get rid of this altogether.
      hardwareInfo->nthreads_hw_avail = hardwareInfo->hardwareTopology->machine().logicalProcessorCount;
  
      // Detect GPUs
-    gmx_detect_gpus(mdlog, physicalNodeComm, compat::make_not_null(hardwareInfo));
-    gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, compat::make_not_null(hardwareInfo));
+    // Open a nested scope so no temporary variables can
+    // be mis-used later.
+    {
+        DeviceDetectionResult deviceDetectionResult = detectAllDeviceInformation(physicalNodeComm);
+        hardwareInfo->deviceInfoList.swap(deviceDetectionResult.deviceInfoList_);
+        std::swap(hardwareInfo->hardwareDetectionWarnings_, deviceDetectionResult.deviceDetectionWarnings_);
+    }
  
-    // Now that the temporary is fully constructed, swap it to become
-    // the real thing.
-    g_hardwareInfo.swap(hardwareInfo);
+    gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, hardwareInfo.get());
+
+    return hardwareInfo;
+}
  
-    return g_hardwareInfo.get();
+void logHardwareDetectionWarnings(const gmx::MDLogger& mdlog, const gmx_hw_info_t& hardwareInformation)
+{
+    for (const std::string& warningString : hardwareInformation.hardwareDetectionWarnings_)
+    {
+        GMX_LOG(mdlog.warning).asParagraph().appendText(warningString);
+    }
  }
  
  } // namespace gmx
diff --git a/src/gromacs/hardware/detecthardware.h b/src/gromacs/hardware/detecthardware.h

index efacba0028a37402a646fe69cbf9fe2b587b3c3d..84286c9bd9798356fe645fd96c7233864484ddef 100644 (file)
--- a/src/gromacs/hardware/detecthardware.h
+++ b/src/gromacs/hardware/detecthardware.h
@@ -36,30 +36,45 @@
  #ifndef GMX_HARDWARE_DETECTHARDWARE_H
  #define GMX_HARDWARE_DETECTHARDWARE_H
  
+#include <memory>
+
  struct gmx_hw_info_t;
  
  namespace gmx
  {
+class HardwareTopology;
  class MDLogger;
  class PhysicalNodeCommunicator;
  
-/*! \brief Run detection, consistency checks, and make consistent
+/*! \brief Run detection and make correct and consistent
   * hardware information available on all ranks.
   *
- * This routine constructs the global hwinfo structure and returns a pointer to
- * it. It will run a preamble before executing cpu and hardware checks, and
- * then run consistency checks afterwards. The results will also be made
- * available on all nodes.
- *
   * May do communication on MPI_COMM_WORLD when compiled with real MPI.
   *
- * All processes in a physical node need to coordinate calling this
- * routine. With thread-MPI only the first call leads to detection
- * work, and any subsequent call receives the same handle. With real
- * MPI, communication is needed to coordinate the results. In all
- * cases, any thread within a process may use the returned handle. */
-gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger&            mdlog,
-                                   const PhysicalNodeCommunicator& physicalNodeComm);
+ * This routine is designed to be called once on each process.  In a
+ * thread-MPI configuration, it may only be called before the threads
+ * are spawned. With real MPI, communication is needed to coordinate
+ * the results. In all cases, any thread within a process may use the
+ * returned handle.
+ *
+ * \todo Replace the use of MPI_COMM_WORLD e.g. by using a libraryCommWorld
+ * argument. See https://gitlab.com/gromacs/gromacs/-/issues/3650
+ */
+std::unique_ptr<gmx_hw_info_t> gmx_detect_hardware(const PhysicalNodeCommunicator& physicalNodeComm);
+
+/*! \brief Sanity check hardware topology and print some notes to log
+ *
+ *  \param mdlog            Logger.
+ *  \param hardwareTopology Reference to hardwareTopology object.
+ */
+void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger&         mdlog,
+                                          const gmx::HardwareTopology& hardwareTopology);
+
+/*! \brief Issue warnings to mdlog that were decided during detection
+ *
+ * \param[in] mdlog                Logger
+ * \param[in] hardwareInformation  The hardwareInformation */
+void logHardwareDetectionWarnings(const gmx::MDLogger& mdlog, const gmx_hw_info_t& hardwareInformation);
  
  } // namespace gmx
  
diff --git a/src/gromacs/hardware/hw_info.h b/src/gromacs/hardware/hw_info.h

index b7b796f8b6da8ee93d13841d03fdbe92a3dfa6ac..4d51d955adef28c88cd822041b06d4bd1e1183e1 100644 (file)
--- a/src/gromacs/hardware/hw_info.h
+++ b/src/gromacs/hardware/hw_info.h
@@ -92,6 +92,9 @@ struct gmx_hw_info_t
  
      gmx_bool bIdenticalGPUs; /* TRUE if all ranks have the same type(s) and order of GPUs */
      bool     haveAmdZen1Cpu; /* TRUE when at least one CPU in any of the nodes is AMD Zen of the first generation */
+
+    //! Container of warning strings to log later when that is possible.
+    std::vector<std::string> hardwareDetectionWarnings_;
  };
  
  
diff --git a/src/gromacs/hardware/prepare_detection.cpp b/src/gromacs/hardware/prepare_detection.cpp

new file mode 100644 (file)

index 0000000..885afdc
--- /dev/null
+++ b/src/gromacs/hardware/prepare_detection.cpp
@@ -0,0 +1,134 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief Defines routine for activating potentially deactivated cores
+ * so they can be detected.
+ *
+ * The use of std::thread makes for brittle interaction with std
+ * library headers. Its caller also handles GPU detection and
+ * allocation of device-specific data structures. This is more
+ * manageable when separated into two distinct translation units.
+ *
+ * \author Erik Lindahl <erik.lindahl@scilifelab.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_hardware
+ */
+#include "gmxpre.h"
+
+#include "prepare_detection.h"
+
+#include "config.h"
+
+#include <cstdio>
+
+#include <chrono>
+#include <thread>
+#include <vector>
+
+#include "architecture.h"
+
+#ifdef HAVE_UNISTD_H
+#    include <unistd.h> // sysconf()
+#endif
+
+namespace gmx
+{
+
+/*! \brief Utility that does dummy computing for max 2 seconds to spin up cores
+ *
+ *  This routine will check the number of cores configured and online
+ *  (using sysconf), and the spins doing dummy compute operations for up to
+ *  2 seconds, or until all cores have come online. This can be used prior to
+ *  hardware detection for platforms that take unused processors offline.
+ *
+ *  This routine will not throw exceptions. In principle it should be
+ *  declared noexcept, but at least icc 19.1 and 21-beta08 with the
+ *  libstdc++-7.5 has difficulty implementing a std::vector of
+ *  std::thread started with this function when declared noexcept. It
+ *  is not clear whether the problem is the compiler or the standard
+ *  library. Fortunately, this function is not performance sensitive,
+ *  and only runs on platforms other than x86 and POWER (ie ARM),
+ *  so the possible overhead introduced by omitting noexcept is not
+ *  important.
+ */
+static void spinUpCore()
+{
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN)
+    float dummy           = 0.1;
+    int   countConfigured = sysconf(_SC_NPROCESSORS_CONF);    // noexcept
+    auto  start           = std::chrono::steady_clock::now(); // noexcept
+
+    while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured
+           && std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
+    {
+        for (int i = 1; i < 10000; i++)
+        {
+            dummy /= i;
+        }
+    }
+
+    if (dummy < 0)
+    {
+        printf("This cannot happen, but prevents loop from being optimized away.");
+    }
+#endif
+}
+
+void hardwareTopologyPrepareDetection()
+{
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) \
+        && (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS))
+
+    // Modify this conditional when/if x86 or PowerPC starts to sleep some cores
+    if (c_architecture != Architecture::X86 && c_architecture != Architecture::PowerPC)
+    {
+        int                      countConfigured = sysconf(_SC_NPROCESSORS_CONF);
+        std::vector<std::thread> workThreads(countConfigured);
+
+        for (auto& t : workThreads)
+        {
+            t = std::thread(spinUpCore);
+        }
+
+        for (auto& t : workThreads)
+        {
+            t.join();
+        }
+    }
+#endif
+}
+
+} // namespace gmx
diff --git a/src/gromacs/hardware/prepare_detection.h b/src/gromacs/hardware/prepare_detection.h

new file mode 100644 (file)

index 0000000..38c94b0
--- /dev/null
+++ b/src/gromacs/hardware/prepare_detection.h
@@ -0,0 +1,74 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal
+ * \file
+ * \brief Declares routine for activating potentially deactivated
+ * cores so they can be detected.
+ *
+ * \author Erik Lindahl <erik.lindahl@scilifelab.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_hardware
+ */
+#ifndef GMX_HARDWARE_PREPAREDETECTION_H
+#define GMX_HARDWARE_PREPAREDETECTION_H
+
+namespace gmx
+{
+
+/*! \brief Prepare the system before hardware topology detection
+ *
+ * This routine should perform any actions we want to put the system in a state
+ * where we want it to be before detecting the hardware topology. For most
+ * processors there is nothing to do, but some architectures (in particular ARM)
+ * have support for taking configured cores offline, which will make them disappear
+ * from the online processor count.
+ *
+ * This routine checks if there is a mismatch between the number of cores
+ * configured and online, and in that case we issue a small workload that
+ * attempts to wake sleeping cores before doing the actual detection.
+ *
+ * This type of mismatch can also occur for x86 or PowerPC on Linux, if SMT has only
+ * been disabled in the kernel (rather than bios). Since those cores will never
+ * come online automatically, we currently skip this test for x86 & PowerPC to
+ * avoid wasting 2 seconds. We also skip the test if there is no thread support.
+ *
+ * \note Cores will sleep relatively quickly again, so it's important to issue
+ *       the real detection code directly after this routine.
+ */
+void hardwareTopologyPrepareDetection();
+
+} // namespace gmx
+
+#endif
diff --git a/src/gromacs/listed_forces/listed_forces.cpp b/src/gromacs/listed_forces/listed_forces.cpp

index d68b21b93f76c719d9401045c389742b93fe03da..e64de500d01abf8d5bee92674bad37a69b64191a 100644 (file)
--- a/src/gromacs/listed_forces/listed_forces.cpp
+++ b/src/gromacs/listed_forces/listed_forces.cpp
@@ -158,6 +158,17 @@ void ListedForces::setup(const InteractionDefinitions& domainIdef, const int num
          selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
  
          idefSelection_.ilsort = domainIdef.ilsort;
+
+        if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
+        {
+            idefSelection_.iparams_posres   = domainIdef.iparams_posres;
+            idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
+        }
+        else
+        {
+            idefSelection_.iparams_posres.clear();
+            idefSelection_.iparams_fbposres.clear();
+        }
      }
  
      setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
diff --git a/src/gromacs/mdlib/leapfrog_gpu.h b/src/gromacs/mdlib/leapfrog_gpu.h

index 7554e6e793dacd482f391ac23728aeabe9e78e99..8bcf5e37a5585f402f7b945c1a7575e5702b1d64 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu.h
+++ b/src/gromacs/mdlib/leapfrog_gpu.h
@@ -86,7 +86,6 @@ enum class VelocityScalingType
  {
      None,     //!< Do not apply velocity scaling (not a PR-coupling run or step)
      Diagonal, //!< Apply velocity scaling using a diagonal matrix
-    Full      //!< Apply velocity scaling using a full matrix
  };
  
  class LeapFrogGpu
diff --git a/src/gromacs/mdlib/lincs.cpp b/src/gromacs/mdlib/lincs.cpp

index 10d8acf46b93f0bb8fbedcf02dfbb1677bef4268..2f8119da4ad3d786a6e3e3944ba678f4516e8961 100644 (file)
--- a/src/gromacs/mdlib/lincs.cpp
+++ b/src/gromacs/mdlib/lincs.cpp
@@ -2132,6 +2132,9 @@ void set_lincs(const InteractionDefinitions& idef,
      }
  
      set_lincs_matrix(li, invmass, lambda);
+
+    li->rmsdData[0] = 0.0;
+    li->rmsdData[1] = 0.0;
  }
  
  //! Issues a warning when LINCS constraints cannot be satisfied.
diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp

index e788c6b66b2d2d5237d0f005f4aad8795402a016..18606f120fb1f1320d85542e64a27dabcef9f8f7 100644 (file)
--- a/src/gromacs/mdlib/sim_util.cpp
+++ b/src/gromacs/mdlib/sim_util.cpp
@@ -1213,6 +1213,30 @@ void do_force(FILE*                               fplog,
                                                         AtomLocality::Local, simulationWork, stepWork)
                                               : nullptr;
  
+    // Copy coordinate from the GPU if update is on the GPU and there
+    // are forces to be computed on the CPU, or for the computation of
+    // virial, or if host-side data will be transferred from this task
+    // to a remote task for halo exchange or PME-PP communication. At
+    // search steps the current coordinates are already on the host,
+    // hence copy is not needed.
+    const bool haveHostPmePpComms =
+            !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication;
+
+    GMX_ASSERT(simulationWork.useGpuHaloExchange
+                       == ((cr->dd != nullptr) && (!cr->dd->gpuHaloExchange[0].empty())),
+               "The GPU halo exchange is active, but it has not been constructed.");
+    const bool haveHostHaloExchangeComms =
+            havePPDomainDecomposition(cr) && !simulationWork.useGpuHaloExchange;
+
+    bool gmx_used_in_debug haveCopiedXFromGpu = false;
+    if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch
+        && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial
+            || haveHostPmePpComms || haveHostHaloExchangeComms))
+    {
+        stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::Local);
+        haveCopiedXFromGpu = true;
+    }
+
      // If coordinates are to be sent to PME task from CPU memory, perform that send here.
      // Otherwise the send will occur after H2D coordinate transfer.
      if (GMX_MPI && !thisRankHasDuty(cr, DUTY_PME) && !pmeSendCoordinatesFromGpu && stepWork.computeSlowForces)
@@ -1220,11 +1244,7 @@ void do_force(FILE*                               fplog,
          /* Send particle coordinates to the pme nodes */
          if (!stepWork.doNeighborSearch && simulationWork.useGpuUpdate)
          {
-            GMX_RELEASE_ASSERT(false,
-                               "GPU update and separate PME ranks are only supported with GPU "
-                               "direct communication!");
-            // TODO: when this code-path becomes supported add:
-            // stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
+            stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
          }
  
          gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()), lambda[efptCOUL],
@@ -1260,31 +1280,6 @@ void do_force(FILE*                               fplog,
          }
      }
  
-    // Copy coordinate from the GPU if update is on the GPU and there
-    // are forces to be computed on the CPU, or for the computation of
-    // virial, or if host-side data will be transferred from this task
-    // to a remote task for halo exchange or PME-PP communication. At
-    // search steps the current coordinates are already on the host,
-    // hence copy is not needed.
-    const bool haveHostPmePpComms =
-            !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication;
-
-    GMX_ASSERT(simulationWork.useGpuHaloExchange
-                       == ((cr->dd != nullptr) && (!cr->dd->gpuHaloExchange[0].empty())),
-               "The GPU halo exchange is active, but it has not been constructed.");
-    const bool haveHostHaloExchangeComms =
-            havePPDomainDecomposition(cr) && !simulationWork.useGpuHaloExchange;
-
-    bool gmx_used_in_debug haveCopiedXFromGpu = false;
-    if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch
-        && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial
-            || haveHostPmePpComms || haveHostHaloExchangeComms))
-    {
-        GMX_ASSERT(stateGpu != nullptr, "stateGpu should not be null");
-        stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::Local);
-        haveCopiedXFromGpu = true;
-    }
-
      // If coordinates are to be sent to PME task from GPU memory, perform that send here.
      // Otherwise the send will occur before the H2D coordinate transfer.
      if (!thisRankHasDuty(cr, DUTY_PME) && pmeSendCoordinatesFromGpu)
@@ -1495,10 +1490,12 @@ void do_force(FILE*                               fplog,
              }
              else
              {
-                // Note: GPU update + DD without direct communication is not supported,
-                // a waitCoordinatesReadyOnHost() should be issued if it will be.
-                GMX_ASSERT(!simulationWork.useGpuUpdate,
-                           "GPU update is not supported with CPU halo exchange");
+                if (simulationWork.useGpuUpdate)
+                {
+                    GMX_ASSERT(haveCopiedXFromGpu,
+                               "a wait should only be triggered if copy has been scheduled");
+                    stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
+                }
                  dd_move_x(cr->dd, box, x.unpaddedArrayRef(), wcycle);
              }
  
@@ -1978,10 +1975,10 @@ void do_force(FILE*                               fplog,
          wallcycle_stop(wcycle, ewcFORCE);
      }
  
-    // If on GPU PME-PP comms or GPU update path, receive forces from PME before GPU buffer ops
+    // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops
      // TODO refactor this and unify with below default-path call to the same function
      if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && stepWork.computeSlowForces
-        && (simulationWork.useGpuPmePpCommunication || simulationWork.useGpuUpdate))
+        && simulationWork.useGpuPmePpCommunication)
      {
          /* In case of node-splitting, the PP nodes receive the long-range
           * forces, virial and energy from the PME nodes here.
@@ -2039,7 +2036,8 @@ void do_force(FILE*                               fplog,
              //       copy call done in sim_utils(...) for the output.
              // NOTE: If there are virtual sites, the forces are modified on host after this D2H copy. Hence,
              //       they should not be copied in do_md(...) for the output.
-            if (!simulationWork.useGpuUpdate || vsite)
+            if (!simulationWork.useGpuUpdate
+                || (simulationWork.useGpuUpdate && DOMAINDECOMP(cr) && haveHostPmePpComms) || vsite)
              {
                  stateGpu->copyForcesFromGpu(forceWithShift, AtomLocality::Local);
                  stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
@@ -2076,7 +2074,7 @@ void do_force(FILE*                               fplog,
  
      // TODO refactor this and unify with above GPU PME-PP / GPU update path call to the same function
      if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication
-        && !simulationWork.useGpuUpdate && stepWork.computeSlowForces)
+        && stepWork.computeSlowForces)
      {
          /* In case of node-splitting, the PP nodes receive the long-range
           * forces, virial and energy from the PME nodes here.
diff --git a/src/gromacs/mdlib/update_constrain_gpu.h b/src/gromacs/mdlib/update_constrain_gpu.h

index a30cbe825d305c92ccf9c211014bda5032b52dcc..581851f9a271cd93929b0f62ce8e145a2e92f98d 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu.h
+++ b/src/gromacs/mdlib/update_constrain_gpu.h
@@ -46,6 +46,7 @@
  
  #include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/mdtypes/group.h"
+#include "gromacs/timing/wallcycle.h"
  #include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/classhelpers.h"
  
@@ -69,10 +70,10 @@ public:
      /*! \brief Create Update-Constrain object.
       *
       * The constructor is given a non-nullptr \p deviceStream, in which all the update and constrain
-     * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that modify
-     * coordinates. The event is maintained outside this class and also passed to all (if any) consumers
-     * of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr because the
-     * markEvent(...) method is called unconditionally.
+     * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that
+     * modify coordinates. The event is maintained outside this class and also passed to all (if
+     * any) consumers of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr
+     * because the markEvent(...) method is called unconditionally.
       *
       * \param[in] ir                Input record data: LINCS takes number of iterations and order of
       *                              projection from it.
@@ -80,13 +81,16 @@ public:
       *                              and target O-H and H-H distances from this object.
       * \param[in] deviceContext     GPU device context.
       * \param[in] deviceStream      GPU stream to use.
-     * \param[in] xUpdatedOnDevice  The event synchronizer to use to mark that update is done on the GPU.
+     * \param[in] xUpdatedOnDevice  The event synchronizer to use to mark that update is done
+     *                              on the GPU.
+     * \param[in] wcycle            The wallclock counter
       */
      UpdateConstrainGpu(const t_inputrec&     ir,
                         const gmx_mtop_t&     mtop,
                         const DeviceContext&  deviceContext,
                         const DeviceStream&   deviceStream,
-                       GpuEventSynchronizer* xUpdatedOnDevice);
+                       GpuEventSynchronizer* xUpdatedOnDevice,
+                       gmx_wallcycle*        wcycle);
  
      ~UpdateConstrainGpu();
  
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp

index 5c4afd2acd0757b48298ff9f3b8220ab71250319..dc2b0421c3d8ce1ce1e1e0e959bdb6a7dc3abd67 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp
@@ -60,7 +60,8 @@ UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& /* ir   */,
                                         const gmx_mtop_t& /* mtop */,
                                         const DeviceContext& /* deviceContext */,
                                         const DeviceStream& /* deviceStream */,
-                                       GpuEventSynchronizer* /* xUpdatedOnDevice */) :
+                                       GpuEventSynchronizer* /* xUpdatedOnDevice */,
+                                       gmx_wallcycle* /*wcycle*/) :
      impl_(nullptr)
  {
      GMX_ASSERT(!impl_,
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cu b/src/gromacs/mdlib/update_constrain_gpu_impl.cu

index b9dd8632db293e8a7ad3e60e72ab0d1601ced86c..825890ce82617273dd07e7d048fa05ee55d7b3e8 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.cu
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cu
@@ -67,6 +67,7 @@
  #include "gromacs/mdlib/settle_gpu.cuh"
  #include "gromacs/mdlib/update_constrain_gpu.h"
  #include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/timing/wallcycle.h"
  
  namespace gmx
  {
@@ -116,6 +117,9 @@ void UpdateConstrainGpu::Impl::integrate(GpuEventSynchronizer*             fRead
                                           const float                       dtPressureCouple,
                                           const matrix                      prVelocityScalingMatrix)
  {
+    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+
      // Clearing virial matrix
      // TODO There is no point in having separate virial matrix for constraints
      clear_mat(virial);
@@ -145,11 +149,17 @@ void UpdateConstrainGpu::Impl::integrate(GpuEventSynchronizer*             fRead
  
      coordinatesReady_->markEvent(deviceStream_);
  
+    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+
      return;
  }
  
  void UpdateConstrainGpu::Impl::scaleCoordinates(const matrix scalingMatrix)
  {
+    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+
      ScalingMatrix mu;
      mu.xx = scalingMatrix[XX][XX];
      mu.yy = scalingMatrix[YY][YY];
@@ -165,10 +175,16 @@ void UpdateConstrainGpu::Impl::scaleCoordinates(const matrix scalingMatrix)
      // TODO: Although this only happens on the pressure coupling steps, this synchronization
      //       can affect the performance if nstpcouple is small.
      deviceStream_.synchronize();
+
+    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
  }
  
  void UpdateConstrainGpu::Impl::scaleVelocities(const matrix scalingMatrix)
  {
+    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+
      ScalingMatrix mu;
      mu.xx = scalingMatrix[XX][XX];
      mu.yy = scalingMatrix[YY][YY];
@@ -184,16 +200,21 @@ void UpdateConstrainGpu::Impl::scaleVelocities(const matrix scalingMatrix)
      // TODO: Although this only happens on the pressure coupling steps, this synchronization
      //       can affect the performance if nstpcouple is small.
      deviceStream_.synchronize();
+
+    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
  }
  
  UpdateConstrainGpu::Impl::Impl(const t_inputrec&     ir,
                                 const gmx_mtop_t&     mtop,
                                 const DeviceContext&  deviceContext,
                                 const DeviceStream&   deviceStream,
-                               GpuEventSynchronizer* xUpdatedOnDevice) :
+                               GpuEventSynchronizer* xUpdatedOnDevice,
+                               gmx_wallcycle*        wcycle) :
      deviceContext_(deviceContext),
      deviceStream_(deviceStream),
-    coordinatesReady_(xUpdatedOnDevice)
+    coordinatesReady_(xUpdatedOnDevice),
+    wcycle_(wcycle)
  {
      GMX_ASSERT(xUpdatedOnDevice != nullptr, "The event synchronizer can not be nullptr.");
  
@@ -217,6 +238,10 @@ void UpdateConstrainGpu::Impl::set(DeviceBuffer<RVec>            d_x,
                                     const t_mdatoms&              md,
                                     const int                     numTempScaleValues)
  {
+    // TODO wallcycle
+    wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+    wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+
      GMX_ASSERT(d_x != nullptr, "Coordinates device buffer should not be null.");
      GMX_ASSERT(d_v != nullptr, "Velocities device buffer should not be null.");
      GMX_ASSERT(d_f != nullptr, "Forces device buffer should not be null.");
@@ -239,10 +264,14 @@ void UpdateConstrainGpu::Impl::set(DeviceBuffer<RVec>            d_x,
  
      coordinateScalingKernelLaunchConfig_.gridSize[0] =
              (numAtoms_ + c_threadsPerBlock - 1) / c_threadsPerBlock;
+
+    wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+    wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
  }
  
  void UpdateConstrainGpu::Impl::setPbc(const PbcType pbcType, const matrix box)
  {
+    // TODO wallcycle
      setPbcAiuc(numPbcDimensions(pbcType), box, &pbcAiuc_);
  }
  
@@ -255,8 +284,9 @@ UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec&     ir,
                                         const gmx_mtop_t&     mtop,
                                         const DeviceContext&  deviceContext,
                                         const DeviceStream&   deviceStream,
-                                       GpuEventSynchronizer* xUpdatedOnDevice) :
-    impl_(new Impl(ir, mtop, deviceContext, deviceStream, xUpdatedOnDevice))
+                                       GpuEventSynchronizer* xUpdatedOnDevice,
+                                       gmx_wallcycle*        wcycle) :
+    impl_(new Impl(ir, mtop, deviceContext, deviceStream, xUpdatedOnDevice, wcycle))
  {
  }
  
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.h b/src/gromacs/mdlib/update_constrain_gpu_impl.h

index 9ee067791318d5d70ee2c34c3029711ad06220ad..7453a98105712aca5264b16aa737789d8f53e213 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.h
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.h
@@ -66,10 +66,10 @@ public:
      /*! \brief Create Update-Constrain object.
       *
       * The constructor is given a non-nullptr \p deviceStream, in which all the update and constrain
-     * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that modify
-     * coordinates. The event is maintained outside this class and also passed to all (if any) consumers
-     * of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr because the
-     * markEvent(...) method is called unconditionally.
+     * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that
+     * modify coordinates. The event is maintained outside this class and also passed to all (if
+     * any) consumers of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr
+     * because the markEvent(...) method is called unconditionally.
       *
       * \param[in] ir                Input record data: LINCS takes number of iterations and order of
       *                              projection from it.
@@ -77,13 +77,16 @@ public:
       *                              and target O-H and H-H distances from this object.
       * \param[in] deviceContext     GPU device context.
       * \param[in] deviceStream      GPU stream to use.
-     * \param[in] xUpdatedOnDevice  The event synchronizer to use to mark that update is done on the GPU.
+     * \param[in] xUpdatedOnDevice  The event synchronizer to use to mark that
+     *                              update is done on the GPU.
+     * \param[in] wcycle            The wallclock counter
       */
      Impl(const t_inputrec&     ir,
           const gmx_mtop_t&     mtop,
           const DeviceContext&  deviceContext,
           const DeviceStream&   deviceStream,
-         GpuEventSynchronizer* xUpdatedOnDevice);
+         GpuEventSynchronizer* xUpdatedOnDevice,
+         gmx_wallcycle*        wcycle);
  
      ~Impl();
  
@@ -220,6 +223,8 @@ private:
  
      //! An pointer to the event to indicate when the update of coordinates is complete
      GpuEventSynchronizer* coordinatesReady_;
+    //! The wallclock counter
+    gmx_wallcycle* wcycle_ = nullptr;
  };
  
  } // namespace gmx
diff --git a/src/gromacs/mdrun/legacymdrunoptions.h b/src/gromacs/mdrun/legacymdrunoptions.h

index 796e4794903989e848b5fe774b156cf1ef90494a..474f6f0396258f227def1cc012ec38bdeddca284 100644 (file)
--- a/src/gromacs/mdrun/legacymdrunoptions.h
+++ b/src/gromacs/mdrun/legacymdrunoptions.h
@@ -3,7 +3,7 @@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019, by the GROMACS development team, led by
+ * Copyright (c) 2011-2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -68,13 +68,19 @@ namespace gmx
   * support for the CLI and API without duplicating code. It should be
   * eliminated following the TODOs below.
   *
+ * \warning Instances provide lifetime scope for members that do not have
+ *  effective lifetime management or which are frequently accessed unsafely.
+ *  The caller is responsible for keeping a LegacyMdrunOptions object alive
+ *  for as long as any consumers, direct or transitive.
+ *
   * \todo Modules in mdrun should acquire proper option handling so
- * that all of these declarations and defaults are local to the
- * modules.
+ *       that all of these declarations and defaults are local to the
+ *       modules.
   *
   * \todo Contextual aspects, such as working directory
- * and environment variable handling are more properly
- * the role of SimulationContext, and should be moved there */
+ *       and environment variable handling are more properly
+ *       the role of SimulationContext, and should be moved there.
+ */
  class LegacyMdrunOptions
  {
  public:
diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp

index f010e6a10a86a450fe4bf0a66a7cf05fcb752126..f5c0fd393e527b48ecbb59402f34748c0afcf9a7 100644 (file)
--- a/src/gromacs/mdrun/md.cpp
+++ b/src/gromacs/mdrun/md.cpp
@@ -415,7 +415,7 @@ void gmx::LegacySimulator::do_md()
          integrator = std::make_unique<UpdateConstrainGpu>(
                  *ir, *top_global, fr->deviceStreamManager->context(),
                  fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
-                stateGpu->xUpdatedOnDevice());
+                stateGpu->xUpdatedOnDevice(), wcycle);
  
          integrator->setPbc(PbcType::Xyz, state->box);
      }
@@ -1255,6 +1255,8 @@ void gmx::LegacySimulator::do_md()
  
          if (useGpuForUpdate)
          {
+            wallcycle_stop(wcycle, ewcUPDATE);
+
              if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
              {
                  integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(),
@@ -1265,9 +1267,16 @@ void gmx::LegacySimulator::do_md()
                  stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
              }
  
-            // If the buffer ops were not offloaded this step, the forces are on the host and have to be copied
-            if (!runScheduleWork->stepWork.useGpuFBufferOps)
+            if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
+                && !thisRankHasDuty(cr, DUTY_PME))
+            {
+                // The PME forces were recieved to the host, so have to be copied
+                stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
+            }
+            else if (!runScheduleWork->stepWork.useGpuFBufferOps)
              {
+                // The buffer ops were not offloaded this step, so the forces are on the
+                // host and have to be copied
                  stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
              }
  
diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp

index 4e27012f621aa242889bfc95532f482da32bc511..e78faae8d511116296804c5e9a3276e829970363 100644 (file)
--- a/src/gromacs/mdrun/runner.cpp
+++ b/src/gromacs/mdrun/runner.cpp
@@ -77,6 +77,7 @@
  #include "gromacs/hardware/cpuinfo.h"
  #include "gromacs/hardware/detecthardware.h"
  #include "gromacs/hardware/device_management.h"
+#include "gromacs/hardware/hardwaretopology.h"
  #include "gromacs/hardware/printhardware.h"
  #include "gromacs/imd/imd.h"
  #include "gromacs/listed_forces/disre.h"
@@ -335,6 +336,7 @@ Mdrunner Mdrunner::cloneOnSpawnedThread() const
      newRunner.hw_opt    = hw_opt;
      newRunner.filenames = filenames;
  
+    newRunner.hwinfo_         = hwinfo_;
      newRunner.oenv            = oenv;
      newRunner.mdrunOptions    = mdrunOptions;
      newRunner.domdecOptions   = domdecOptions;
@@ -724,7 +726,6 @@ int Mdrunner::mdrunner()
      gmx_wallcycle_t           wcycle;
      gmx_walltime_accounting_t walltime_accounting = nullptr;
      MembedHolder              membedHolder(filenames.size(), filenames.data());
-    gmx_hw_info_t*            hwinfo = nullptr;
  
      /* CAUTION: threads may be started later on in this function, so
         cr doesn't reflect the final parallel state right now */
@@ -763,20 +764,9 @@ int Mdrunner::mdrunner()
      gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
      gmx::MDLogger    mdlog(logOwner.logger());
  
-    // TODO The thread-MPI master rank makes a working
-    // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
-    // after the threads have been launched. This works because no use
-    // is made of that communicator until after the execution paths
-    // have rejoined. But it is likely that we can improve the way
-    // this is expressed, e.g. by expressly running detection only the
-    // master rank for thread-MPI, rather than relying on the mutex
-    // and reference count.
-    PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
-    hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
-
-    gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
+    gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo_);
  
-    std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->deviceInfoList, hw_opt.gpuIdsAvailable);
+    std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo_->deviceInfoList, hw_opt.gpuIdsAvailable);
      const int        numDevicesToUse = gmx::ssize(gpuIdsToUse);
  
      // Print citation requests after all software/hardware printing
@@ -825,7 +815,7 @@ int Mdrunner::mdrunner()
                      gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
                      hw_opt.nthreads_tmpi);
              useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
-                    useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo,
+                    useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo_,
                      *inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
          }
          GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
@@ -836,15 +826,13 @@ int Mdrunner::mdrunner()
           * prevent any possible subsequent checks from working
           * correctly. */
          hw_opt.nthreads_tmpi =
-                get_nthreads_mpi(hwinfo, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
+                get_nthreads_mpi(hwinfo_, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
                                   inputrec.get(), &mtop, mdlog, membedHolder.doMembed());
  
          // Now start the threads for thread MPI.
          spawnThreads(hw_opt.nthreads_tmpi);
          // The spawned threads enter mdrunner() and execution of
          // master and spawned threads joins at the end of this block.
-        physicalNodeComm =
-                PhysicalNodeCommunicator(libraryWorldCommunicator, gmx_physicalnode_id_hash());
      }
  
      GMX_RELEASE_ASSERT(ms || simulationCommunicator != MPI_COMM_NULL,
@@ -853,6 +841,14 @@ int Mdrunner::mdrunner()
      t_commrec*    cr       = crHandle.get();
      GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
  
+    PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
+
+    // If we detected the topology on this system, double-check that it makes sense
+    if (hwinfo_->hardwareTopology->isThisSystem())
+    {
+        hardwareTopologyDoubleCheckDetection(mdlog, *hwinfo_->hardwareTopology);
+    }
+
      if (PAR(cr))
      {
          /* now broadcast everything to the non-master nodes/threads: */
@@ -883,7 +879,7 @@ int Mdrunner::mdrunner()
      bool useGpuForPme       = false;
      bool useGpuForBonded    = false;
      bool useGpuForUpdate    = false;
-    bool gpusWereDetected   = hwinfo->ngpu_compatible_tot > 0;
+    bool gpusWereDetected   = hwinfo_->ngpu_compatible_tot > 0;
      try
      {
          // It's possible that there are different numbers of GPUs on
@@ -895,14 +891,11 @@ int Mdrunner::mdrunner()
                  nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded,
                  gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected);
          useGpuForPme = decideWhetherToUseGpusForPme(
-                useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec,
+                useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo_, *inputrec,
                  cr->sizeOfDefaultCommunicator, domdecOptions.numPmeRanks, gpusWereDetected);
-        auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr)
-                                  && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
-        useGpuForBonded = decideWhetherToUseGpusForBonded(
-                useGpuForNonbonded, useGpuForPme, bondedTarget, canUseGpuForBonded,
-                EVDW_PME(inputrec->vdwtype), EEL_PME_EWALD(inputrec->coulombtype),
-                domdecOptions.numPmeRanks, gpusWereDetected);
+        useGpuForBonded = decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
+                                                          bondedTarget, *inputrec, mtop,
+                                                          domdecOptions.numPmeRanks, gpusWereDetected);
      }
      GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
  
@@ -1167,7 +1160,7 @@ int Mdrunner::mdrunner()
       */
      prepare_verlet_scheme(fplog, cr, inputrec.get(), nstlist_cmdline, &mtop, box,
                            useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
-                          *hwinfo->cpuInfo);
+                          *hwinfo_->cpuInfo);
  
      // This builder is necessary while we have multi-part construction
      // of DD. Before DD is constructed, we use the existence of
@@ -1197,7 +1190,7 @@ int Mdrunner::mdrunner()
  
      // Produce the task assignment for this rank - done after DD is constructed
      GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build(
-            gpuIdsToUse, userGpuTaskAssignment, *hwinfo, simulationCommunicator, physicalNodeComm,
+            gpuIdsToUse, userGpuTaskAssignment, *hwinfo_, simulationCommunicator, physicalNodeComm,
              nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
              useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
              // TODO cr->duty & DUTY_PME should imply that a PME
@@ -1334,12 +1327,12 @@ int Mdrunner::mdrunner()
      // that existing affinity setting was from OpenMP or something
      // else, so we run this code both before and after we initialize
      // the OpenMP support.
-    gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+    gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, FALSE);
      /* Check and update the number of OpenMP threads requested */
-    checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
+    checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo_, cr, ms, physicalNodeComm.size_,
                                              pmeRunMode, mtop, *inputrec);
  
-    gmx_omp_nthreads_init(mdlog, cr, hwinfo->nthreads_hw_avail, physicalNodeComm.size_,
+    gmx_omp_nthreads_init(mdlog, cr, hwinfo_->nthreads_hw_avail, physicalNodeComm.size_,
                            hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP));
  
      // Enable FP exception detection, but not in
@@ -1360,7 +1353,7 @@ int Mdrunner::mdrunner()
      }
  
      /* Now that we know the setup is consistent, check for efficiency */
-    check_resource_division_efficiency(hwinfo, gpuTaskAssignments.thisRankHasAnyGpuTask(),
+    check_resource_division_efficiency(hwinfo_, gpuTaskAssignments.thisRankHasAnyGpuTask(),
                                         mdrunOptions.ntompOptionIsSet, cr, mdlog);
  
      /* getting number of PP/PME threads on this MPI / tMPI rank.
@@ -1369,7 +1362,7 @@ int Mdrunner::mdrunner()
       */
      const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded)
                                                                    : gmx_omp_nthreads_get(emntPME);
-    checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo->hardwareTopology,
+    checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo_->hardwareTopology,
                                    physicalNodeComm, mdlog);
  
      // Enable Peer access between GPUs where available
@@ -1388,14 +1381,14 @@ int Mdrunner::mdrunner()
           * - which indicates that probably the OpenMP library has changed it
           * since we first checked).
           */
-        gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
+        gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, TRUE);
  
          int numThreadsOnThisNode, intraNodeThreadOffset;
          analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
                                   &intraNodeThreadOffset);
  
          /* Set the CPU affinity */
-        gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology, numThreadsOnThisRank,
+        gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo_->hardwareTopology, numThreadsOnThisRank,
                                  numThreadsOnThisNode, intraNodeThreadOffset, nullptr);
      }
  
@@ -1464,7 +1457,7 @@ int Mdrunner::mdrunner()
                      deviceStreamManager->stream(DeviceStreamType::PmePpTransfer));
          }
  
-        fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo,
+        fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo_,
                                          runScheduleWork.simulationWork.useGpuNonbonded,
                                          deviceStreamManager.get(), &mtop, box, wcycle);
          // TODO: Move the logic below to a GPU bonded builder
@@ -1800,7 +1793,7 @@ int Mdrunner::mdrunner()
      sfree(disresdata);
      sfree(oriresdata);
  
-    if (!hwinfo->deviceInfoList.empty())
+    if (!hwinfo_->deviceInfoList.empty())
      {
          /* stop the GPU profiler (only CUDA) */
          stopGpuProfiler();
@@ -1902,6 +1895,8 @@ public:
                                                  real                forceWarningThreshold,
                                                  StartingBehavior    startingBehavior);
  
+    void addHardwareDetectionResult(const gmx_hw_info_t* hwinfo);
+
      void addDomdec(const DomdecOptions& options);
  
      void addInput(SimulationInputHandle inputHolder);
@@ -1967,6 +1962,9 @@ private:
      //! The modules that comprise the functionality of mdrun.
      std::unique_ptr<MDModules> mdModules_;
  
+    //! Detected hardware.
+    const gmx_hw_info_t* hwinfo_ = nullptr;
+
      //! \brief Parallelism information.
      gmx_hw_opt_t hardwareOptions_;
  
@@ -2067,6 +2065,16 @@ Mdrunner Mdrunner::BuilderImplementation::build()
      // nullptr is a valid value for the multisim handle
      newRunner.ms = multiSimulation_;
  
+    if (hwinfo_)
+    {
+        newRunner.hwinfo_ = hwinfo_;
+    }
+    else
+    {
+        GMX_THROW(gmx::APIError(
+                "MdrunnerBuilder::addHardwareDetectionResult() is required before build()"));
+    }
+
      if (inputHolder_)
      {
          newRunner.inputHolder_ = std::move(inputHolder_);
@@ -2145,6 +2153,11 @@ Mdrunner Mdrunner::BuilderImplementation::build()
      return newRunner;
  }
  
+void Mdrunner::BuilderImplementation::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo)
+{
+    hwinfo_ = hwinfo;
+}
+
  void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
  {
      nbpu_opt_ = nbpu_opt;
@@ -2204,6 +2217,12 @@ MdrunnerBuilder::MdrunnerBuilder(std::unique_ptr<MDModules>           mdModules,
  
  MdrunnerBuilder::~MdrunnerBuilder() = default;
  
+MdrunnerBuilder& MdrunnerBuilder::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo)
+{
+    impl_->addHardwareDetectionResult(hwinfo);
+    return *this;
+}
+
  MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions&    options,
                                                        real                   forceWarningThreshold,
                                                        const StartingBehavior startingBehavior)
diff --git a/src/gromacs/mdrun/runner.h b/src/gromacs/mdrun/runner.h

index 2dd38531bb86ce11037c759501ff5be5dab30a5d..703e632b34cf4bf18e1f35d9f6d8f85e0709945d 100644 (file)
--- a/src/gromacs/mdrun/runner.h
+++ b/src/gromacs/mdrun/runner.h
@@ -306,6 +306,9 @@ private:
      //! The modules that comprise mdrun.
      std::unique_ptr<MDModules> mdModules_;
  
+    //! Non-owning handle to the results of the hardware detection.
+    const gmx_hw_info_t* hwinfo_ = nullptr;
+
      /*!
       * \brief Holds simulation input specification provided by client, if any.
       *
@@ -402,6 +405,17 @@ public:
       */
      Mdrunner build();
  
+    /*!
+     * \brief Supply the result of hardware detection to the gmx::Mdrunner
+     *
+     * \param hwinfo  Non-owning not-null handle to result of hardware detection.
+     *
+     * \todo It would be better to express this as either a not-null const pointer or
+     * a const reference, but neither of those is consistent with incremental
+     * building of an object. This motivates future work to be able to make a deep copy
+     * of the detection result. See https://gitlab.com/gromacs/gromacs/-/issues/3650 */
+    MdrunnerBuilder& addHardwareDetectionResult(const gmx_hw_info_t* hwinfo);
+
      /*!
       * \brief Set up non-bonded short-range force calculations.
       *
diff --git a/src/gromacs/mdtypes/pull_params.h b/src/gromacs/mdtypes/pull_params.h

index fbd35e98e799d1a0b0ec789d072274e6b7356f80..c0709ca353edb9ac738bcf1681d44e57c2fe039d 100644 (file)
--- a/src/gromacs/mdtypes/pull_params.h
+++ b/src/gromacs/mdtypes/pull_params.h
@@ -74,39 +74,73 @@ static const int c_pullCoordNgroupMax = 6;
  /*! \brief Struct that defines a pull coordinate */
  struct t_pull_coord
  {
-    int                                   eType; /**< The pull type: umbrella, constraint, ... */
-    std::string                           externalPotentialProvider; /**< Name of the module providing the external potential, only used with eType==epullEXTERNAL */
-    int                                   eGeom;  /**< The pull geometry */
-    int                                   ngroup; /**< The number of groups, depends on eGeom */
-    std::array<int, c_pullCoordNgroupMax> group; /**< The pull groups: indices into the group arrays in pull_t and pull_params_t, ngroup indices are used */
-    gmx::IVec                             dim;   /**< Used to select components for constraint */
-    gmx::RVec                             origin; /**< The origin for the absolute reference */
-    gmx::RVec                             vec;    /**< The pull vector, direction or position */
-    bool                                  bStart; /**< Set init based on the initial structure */
-    real                                  init; /**< Initial reference displacement (nm) or (deg) */
-    real                                  rate; /**< Rate of motion (nm/ps) or (deg/ps) */
-    real                                  k; /**< Force constant (kJ/(mol nm^2) or kJ/(mol rad^2) for umbrella pull type, or kJ/(mol nm) or kJ/(mol rad) for constant force pull type */
-    real                                  kB; /**< Force constant for state B */
+    //! The pull type: umbrella, constraint, ...
+    int eType = 0;
+    //! Name of the module providing   the external potential, only used with eType==epullEXTERNAL
+    std::string externalPotentialProvider;
+    //! The pull geometry
+    int eGeom = 0;
+    //! The number of groups, depends on eGeom
+    int ngroup = 0;
+    /*! \brief The pull groups:
+     *
+     *  indices into the group arrays in pull_t and pull_params_t,
+     *   ngroup indices are used
+     */
+    std::array<int, c_pullCoordNgroupMax> group;
+    //! Used to select components for constraint
+    gmx::IVec dim = { 0, 0, 0 };
+    //! The origin for the absolute reference
+    gmx::RVec origin = { 0, 0, 0 };
+    //! The pull vector, direction or position
+    gmx::RVec vec = { 0, 0, 0 };
+    //! Set init based on the initial structure
+    bool bStart = false;
+    //! Initial reference displacement (nm) or (deg)
+    real init = 0.0;
+    //! Rate of motion (nm/ps) or (deg/ps)
+    real rate = 0.0;
+    /*! \brief Force constant
+     *
+     * For umbrella pull type this is (kJ/(mol nm^2) or kJ/(mol rad^2).
+     * For constant force pull type it is kJ/(mol nm) or kJ/(mol rad).
+     */
+    real k = 0.0;
+    //! Force constant for state B
+    real kB = 0.0;
  };
  
  /*! \brief Struct containing all pull parameters */
  struct pull_params_t
  {
-    int  ngroup;         /**< Number of pull groups */
-    int  ncoord;         /**< Number of pull coordinates */
-    real cylinder_r;     /**< Radius of cylinder for dynamic COM (nm) */
-    real constr_tol;     /**< Absolute tolerance for constraints in (nm) */
-    bool bPrintCOM;      /**< Print coordinates of COM for each coord */
-    bool bPrintRefValue; /**< Print the reference value for each coord */
-    bool bPrintComp;     /**< Print cartesian components for each coord with geometry=distance */
-    bool bSetPbcRefToPrevStepCOM; /**< Use the COM of each group from the previous step as reference */
-    int  nstxout;                 /**< Output interval for pull x */
-    int  nstfout;                 /**< Output interval for pull f */
-    bool bXOutAverage;            /**< Write the average coordinate during the output interval */
-    bool bFOutAverage;            /**< Write the average force during the output interval */
-
-    std::vector<t_pull_group> group; /**< groups to pull/restrain/etc/ */
-    std::vector<t_pull_coord> coord; /**< the pull coordinates */
+    //! Number of pull groups
+    int ngroup = 0;
+    //! Number of pull coordinates
+    int ncoord = 0;
+    //! Radius of cylinder for dynamic COM (nm)
+    real cylinder_r = 0.0;
+    //! Absolute tolerance for constraints in (nm)
+    real constr_tol = 0.0;
+    //! Print coordinates of COM for each coord
+    bool bPrintCOM = false;
+    //! Print the reference value for each coord
+    bool bPrintRefValue = false;
+    //! Print cartesian components for each coord with geometry=distance
+    bool bPrintComp = false;
+    //! Use the COM of each group from the previous step as reference
+    bool bSetPbcRefToPrevStepCOM = false;
+    //! Output interval for pull x
+    int nstxout = 0;
+    //! Output interval for pull f
+    int nstfout = 0;
+    //! Write the average coordinate during the output interval
+    bool bXOutAverage = false;
+    //! Write the average force during the output interval
+    bool bFOutAverage = false;
+    //! groups to pull/restrain/etc/
+    std::vector<t_pull_group> group;
+    //! the pull coordinates
+    std::vector<t_pull_coord> coord;
  };
  
  /*! \endcond */
diff --git a/src/gromacs/nbnxm/benchmark/bench_setup.cpp b/src/gromacs/nbnxm/benchmark/bench_setup.cpp

index ee4427e9b255438108ffeec00ac835afecd1d395..9fffb257ea21f4f4db102c79b1b4d7e1d225fba8 100644 (file)
--- a/src/gromacs/nbnxm/benchmark/bench_setup.cpp
+++ b/src/gromacs/nbnxm/benchmark/bench_setup.cpp
@@ -100,6 +100,11 @@ static std::optional<std::string> checkKernelSetup(const KernelBenchOptions& opt
          return "the requested SIMD kernel was not set up at configuration time";
      }
  
+    if (options.reportTime && (0 > gmx_cycles_calibrate(1.0)))
+    {
+        return "the -time option is not supported on this system";
+    }
+
      return {};
  }
  
@@ -304,6 +309,28 @@ static void setupAndRunInstance(const gmx::BenchmarkSystem& system,
                  options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
                  options.useHalfLJOptimization ? "half" : "all",
                  combruleNames[options.ljCombinationRule].c_str(), kernelNames[options.nbnxmSimd].c_str());
+        if (!options.outputFile.empty())
+        {
+            fprintf(system.csv,
+                    "\"%d\",\"%zu\",\"%g\",\"%d\",\"%d\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%"
+                    "s\",",
+#if GMX_SIMD
+                    (options.nbnxmSimd != BenchMarkKernels::SimdNo) ? GMX_SIMD_REAL_WIDTH : 0,
+#else
+                    0,
+#endif
+                    system.coordinates.size(), options.pairlistCutoff, options.numThreads,
+                    options.numIterations, options.computeVirialAndEnergy ? "yes" : "no",
+                    (options.coulombType != BenchMarkCoulomb::ReactionField)
+                            ? ((options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr)
+                                       ? "table"
+                                       : "analytical")
+                            : "",
+                    options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
+                    options.useHalfLJOptimization ? "half" : "all",
+                    combruleNames[options.ljCombinationRule].c_str(),
+                    kernelNames[options.nbnxmSimd].c_str());
+        }
      }
  
      // Run pre-iteration to avoid cache misses
@@ -326,18 +353,50 @@ static void setupAndRunInstance(const gmx::BenchmarkSystem& system,
      cycles = gmx_cycles_read() - cycles;
      if (!doWarmup)
      {
-        const double dCycles = static_cast<double>(cycles);
-        if (options.cyclesPerPair)
+        if (options.reportTime)
          {
-            fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", cycles * 1e-6,
-                    dCycles / options.numIterations * 1e-6, dCycles / (options.numIterations * numPairs),
-                    dCycles / (options.numIterations * numUsefulPairs));
+            const double uSec = static_cast<double>(cycles) * gmx_cycles_calibrate(1.0) * 1.e6;
+            if (options.cyclesPerPair)
+            {
+                fprintf(stdout, "%13.2f %13.3f %10.3f %10.3f\n", uSec, uSec / options.numIterations,
+                        uSec / (options.numIterations * numPairs),
+                        uSec / (options.numIterations * numUsefulPairs));
+                if (!options.outputFile.empty())
+                {
+                    fprintf(system.csv, "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n", uSec,
+                            uSec / options.numIterations, uSec / (options.numIterations * numPairs),
+                            uSec / (options.numIterations * numUsefulPairs));
+                }
+            }
+            else
+            {
+                fprintf(stdout, "%13.2f %13.3f %10.3f %10.3f\n", uSec, uSec / options.numIterations,
+                        options.numIterations * numPairs / uSec,
+                        options.numIterations * numUsefulPairs / uSec);
+                if (!options.outputFile.empty())
+                {
+                    fprintf(system.csv, "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n", uSec,
+                            uSec / options.numIterations, options.numIterations * numPairs / uSec,
+                            options.numIterations * numUsefulPairs / uSec);
+                }
+            }
          }
          else
          {
-            fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", dCycles * 1e-6,
-                    dCycles / options.numIterations * 1e-6, options.numIterations * numPairs / dCycles,
-                    options.numIterations * numUsefulPairs / dCycles);
+            const double dCycles = static_cast<double>(cycles);
+            if (options.cyclesPerPair)
+            {
+                fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", cycles * 1e-6,
+                        dCycles / options.numIterations * 1e-6,
+                        dCycles / (options.numIterations * numPairs),
+                        dCycles / (options.numIterations * numUsefulPairs));
+            }
+            else
+            {
+                fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", dCycles * 1e-6,
+                        dCycles / options.numIterations * 1e-6, options.numIterations * numPairs / dCycles,
+                        options.numIterations * numUsefulPairs / dCycles);
+            }
          }
      }
  }
@@ -348,7 +407,7 @@ void bench(const int sizeFactor, const KernelBenchOptions& options)
      gmx_omp_nthreads_set(emntPairsearch, options.numThreads);
      gmx_omp_nthreads_set(emntNonbonded, options.numThreads);
  
-    const gmx::BenchmarkSystem system(sizeFactor);
+    const gmx::BenchmarkSystem system(sizeFactor, options.outputFile);
  
      real minBoxSize = norm(system.box[XX]);
      for (int dim = YY; dim < DIM; dim++)
@@ -413,14 +472,46 @@ void bench(const int sizeFactor, const KernelBenchOptions& options)
          setupAndRunInstance(system, optionsList[0], true);
      }
  
-    fprintf(stdout, "Coulomb LJ   comb. SIMD    Mcycles  Mcycles/it.   %s\n",
-            options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
-    fprintf(stdout, "                                                total    useful\n");
+    if (options.reportTime)
+    {
+        fprintf(stdout, "Coulomb LJ   comb. SIMD       usec         usec/it.        %s\n",
+                options.cyclesPerPair ? "usec/pair" : "pairs/usec");
+        if (!options.outputFile.empty())
+        {
+            fprintf(system.csv,
+                    "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
+                    "energy\",\"Ewald excl. "
+                    "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"usec\",\"usec/it\",\"total "
+                    "pairs/usec\",\"useful pairs/usec\"\n");
+        }
+        fprintf(stdout,
+                "                                                        total      useful\n");
+    }
+    else
+    {
+        fprintf(stdout, "Coulomb LJ   comb. SIMD    Mcycles  Mcycles/it.   %s\n",
+                options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
+        if (!options.outputFile.empty())
+        {
+            fprintf(system.csv,
+                    "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
+                    "energy\",\"Ewald excl. "
+                    "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"Mcycles\",\"Mcycles/"
+                    "it\",\"total "
+                    "total cycles/pair\",\"total cycles per useful pair\"\n");
+        }
+        fprintf(stdout, "                                                total    useful\n");
+    }
  
      for (const auto& optionsInstance : optionsList)
      {
          setupAndRunInstance(system, optionsInstance, false);
      }
+
+    if (!options.outputFile.empty())
+    {
+        fclose(system.csv);
+    }
  }
  
  } // namespace Nbnxm
diff --git a/src/gromacs/nbnxm/benchmark/bench_setup.h b/src/gromacs/nbnxm/benchmark/bench_setup.h

index 2e33352376579ff74a89a900b12477b2b7d907c2..d7b0f296fb089388254fdd54a50792e878d0ed11 100644 (file)
--- a/src/gromacs/nbnxm/benchmark/bench_setup.h
+++ b/src/gromacs/nbnxm/benchmark/bench_setup.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -46,6 +46,8 @@
  #ifndef GMX_NBNXN_BENCH_SETUP_H
  #define GMX_NBNXN_BENCH_SETUP_H
  
+#include <string>
+
  #include "gromacs/utility/real.h"
  
  namespace Nbnxm
@@ -113,6 +115,10 @@ struct KernelBenchOptions
      int numWarmupIterations = 0;
      //! Print cycles/pair instead of pairs/cycle
      bool cyclesPerPair = false;
+    //! Report in micro seconds instead of cycles
+    bool reportTime = false;
+    //! Also report into a csv file
+    std::string outputFile;
  };
  
  /*! \brief
diff --git a/src/gromacs/nbnxm/benchmark/bench_system.cpp b/src/gromacs/nbnxm/benchmark/bench_system.cpp

index 553f4cafd26df838b92570d1e4187bfe4bc5bc98..de4d738969b6e6daeebc94a9805a857c0682fb3f 100644 (file)
--- a/src/gromacs/nbnxm/benchmark/bench_system.cpp
+++ b/src/gromacs/nbnxm/benchmark/bench_system.cpp
@@ -150,7 +150,7 @@ static void generateCoordinates(int multiplicationFactor, std::vector<gmx::RVec>
      }
  }
  
-BenchmarkSystem::BenchmarkSystem(const int multiplicationFactor)
+BenchmarkSystem::BenchmarkSystem(const int multiplicationFactor, const std::string& outputFile)
  {
      numAtomTypes = 2;
      nonbondedParameters.resize(numAtomTypes * numAtomTypes * 2, 0);
@@ -199,6 +199,10 @@ BenchmarkSystem::BenchmarkSystem(const int multiplicationFactor)
      forceRec.nbfp  = nonbondedParameters;
      snew(forceRec.shift_vec, SHIFTS);
      calc_shifts(box, forceRec.shift_vec);
+    if (!outputFile.empty())
+    {
+        csv = fopen(outputFile.c_str(), "w+");
+    }
  }
  
  } // namespace gmx
diff --git a/src/gromacs/nbnxm/benchmark/bench_system.h b/src/gromacs/nbnxm/benchmark/bench_system.h

index adcc85d4ffacc443b36b48dc8040e09ffa2fb1aa..acf02326699b1ba27e85e788e62dd2b58f7b6c10 100644 (file)
--- a/src/gromacs/nbnxm/benchmark/bench_system.h
+++ b/src/gromacs/nbnxm/benchmark/bench_system.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -44,6 +44,7 @@
  #ifndef GMX_NBNXN_BENCH_SYSTEM_H
  #define GMX_NBNXN_BENCH_SYSTEM_H
  
+#include <string>
  #include <vector>
  
  #include "gromacs/math/vectypes.h"
@@ -64,8 +65,9 @@ struct BenchmarkSystem
       * with 3000 atoms total.
       *
       * \param[in] multiplicationFactor  Should be a power of 2, is checked
+     * \param[in] outputFile            The name of the csv file to write benchmark results
       */
-    BenchmarkSystem(int multiplicationFactor);
+    BenchmarkSystem(int multiplicationFactor, const std::string& outputFile);
  
      //! Number of different atom types in test system.
      int numAtomTypes;
@@ -87,6 +89,8 @@ struct BenchmarkSystem
      matrix box;
      //! Forcerec with only the entries used in the benchmark set
      t_forcerec forceRec;
+    //! csv output file
+    FILE* csv;
  };
  
  } // namespace gmx
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh b/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh

index db3fb4a939ddcc5ae1704241fe9028bc61ef06d6..9a9ffc6c1ce8ca46bd4f726eb4fab006e1c2d2cd 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh
+++ b/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh
@@ -117,53 +117,3 @@ static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns,
          }
      }
  }
-
-/*! \brief CUDA kernel to sum up the force components
- *
- * \tparam        accumulateForce  If the initial forces in \p gm_fTotal should be saved.
- * \tparam        addPmeForce      Whether the PME force should be added to the total.
- *
- * \param[in]     gm_fNB     Non-bonded forces in nbnxm format.
- * \param[in]     gm_fPme    PME forces.
- * \param[in,out] gm_fTotal  Force buffer to be reduced into.
- * \param[in]     cell       Cell index mapping.
- * \param[in]     atomStart  Start atom index.
- * \param[in]     numAtoms   Number of atoms.
- */
-template<bool accumulateForce, bool addPmeForce>
-static __global__ void nbnxn_gpu_add_nbat_f_to_f_kernel(const float3* __restrict__ gm_fNB,
-                                                        const float3* __restrict__ gm_fPme,
-                                                        float3* gm_fTotal,
-                                                        const int* __restrict__ gm_cell,
-                                                        const int atomStart,
-                                                        const int numAtoms)
-{
-
-    /* map particle-level parallelism to 1D CUDA thread and block index */
-    const int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
-
-    /* perform addition for each particle*/
-    if (threadIndex < numAtoms)
-    {
-
-        const int i        = gm_cell[atomStart + threadIndex];
-        float3*   gm_fDest = &gm_fTotal[atomStart + threadIndex];
-        float3    temp;
-
-        if (accumulateForce)
-        {
-            temp = *gm_fDest;
-            temp += gm_fNB[i];
-        }
-        else
-        {
-            temp = gm_fNB[i];
-        }
-        if (addPmeForce)
-        {
-            temp += gm_fPme[atomStart + threadIndex];
-        }
-        *gm_fDest = temp;
-    }
-    return;
-}
diff --git a/src/gromacs/nbnxm/opencl/CMakeLists.txt b/src/gromacs/nbnxm/opencl/CMakeLists.txt

index 69d86e96ba2bcbaa229bef10f3b5acd7e369af94..dab11de27837bc842a8dafe168f84117ef0b6ea6 100644 (file)
--- a/src/gromacs/nbnxm/opencl/CMakeLists.txt
+++ b/src/gromacs/nbnxm/opencl/CMakeLists.txt
@@ -79,7 +79,7 @@ foreach(ELEC_DEF IN LISTS ELEC_DEFS)
                  -Xclang -finclude-default-header  -D_${VENDOR}_SOURCE_
                  -DGMX_OCL_FASTGEN ${ELEC_DEF} ${VDW_DEF}
                  -Dc_nbnxnGpuClusterSize=${CLUSTER_SIZE}
-                -Dc_nbnxnMinDistanceSquared=3.82e-07F
+                -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=3.82e-07
                  -Dc_nbnxnGpuNumClusterPerSupercluster=8
                  -Dc_nbnxnGpuJgroupSize=4
                  -DIATYPE_SHMEM
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp

index 359683337a41829e07d97f34e095fbfe4d590024..48e8ed03ad37e1189abc522330af67ef03932779 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp
@@ -899,7 +899,7 @@ void gpu_launch_cpyback(NbnxmGpu*                nb,
      /* DtoH f */
      GMX_ASSERT(sizeof(*nbatom->out[0].f.data()) == sizeof(float),
                 "The host force buffer should be in single precision to match device data size.");
-    copyFromDeviceBuffer(&nbatom->out[0].f.data()[adat_begin * DIM], &adat->f, adat_begin * DIM,
+    copyFromDeviceBuffer(&nbatom->out[0].f[adat_begin * DIM], &adat->f, adat_begin * DIM,
                           adat_len * DIM, deviceStream, GpuApiCallBehavior::Async,
                           bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
  
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp

index 19b861db0ce02ce59408146b07e406cdd20ad2b5..29989c8095b5bbf0a0117b39902e223c45739db8 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp
@@ -622,15 +622,19 @@ void gpu_free(NbnxmGpu* nb)
      }
  
      /* Free kernels */
+    // NOLINTNEXTLINE(bugprone-sizeof-expression)
      int kernel_count = sizeof(nb->kernel_ener_noprune_ptr) / sizeof(nb->kernel_ener_noprune_ptr[0][0]);
      free_kernels(nb->kernel_ener_noprune_ptr[0], kernel_count);
  
+    // NOLINTNEXTLINE(bugprone-sizeof-expression)
      kernel_count = sizeof(nb->kernel_ener_prune_ptr) / sizeof(nb->kernel_ener_prune_ptr[0][0]);
      free_kernels(nb->kernel_ener_prune_ptr[0], kernel_count);
  
+    // NOLINTNEXTLINE(bugprone-sizeof-expression)
      kernel_count = sizeof(nb->kernel_noener_noprune_ptr) / sizeof(nb->kernel_noener_noprune_ptr[0][0]);
      free_kernels(nb->kernel_noener_noprune_ptr[0], kernel_count);
  
+    // NOLINTNEXTLINE(bugprone-sizeof-expression)
      kernel_count = sizeof(nb->kernel_noener_prune_ptr) / sizeof(nb->kernel_noener_prune_ptr[0][0]);
      free_kernels(nb->kernel_noener_prune_ptr[0], kernel_count);
  
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp

index 40eb905184bd1c5ae2405639f5a288f76b818b78..18c583937b6e50caf78c994d0ea3c59461b0b4b3 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp
@@ -187,10 +187,13 @@ void nbnxn_gpu_compile_kernels(NbnxmGpu* nb)
          /* Here we pass macros and static const/constexpr int variables defined
           * in include files outside the opencl as macros, to avoid
           * including those files in the plain-C JIT compilation that happens
-         * at runtime. */
+         * at runtime.
+         * Note that we need to re-add the the suffix to the floating point literals
+         * passed the to the kernel to avoid type ambiguity.
+         */
          extraDefines += gmx::formatString(
                  " -Dc_nbnxnGpuClusterSize=%d"
-                " -Dc_nbnxnMinDistanceSquared=%g"
+                " -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=%g"
                  " -Dc_nbnxnGpuNumClusterPerSupercluster=%d"
                  " -Dc_nbnxnGpuJgroupSize=%d"
                  "%s",
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh

index b238843a91efe632b73285eca77573db6689c97e..11516fd4f1caf4214b93f3b5a288d20332e5e1a2 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh
@@ -254,10 +254,12 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl)
  #endif
      barrier(CLK_LOCAL_MEM_FENCE);
  
-    float3 fci_buf[c_nbnxnGpuNumClusterPerSupercluster]; /* i force buffer */
+    fvec fci_buf[c_nbnxnGpuNumClusterPerSupercluster]; /* i force buffer */
      for (int ci_offset = 0; ci_offset < c_nbnxnGpuNumClusterPerSupercluster; ci_offset++)
      {
-        fci_buf[ci_offset] = (float3)(0.0F);
+        fci_buf[ci_offset][0] = 0.0F;
+        fci_buf[ci_offset][1] = 0.0F;
+        fci_buf[ci_offset][2] = 0.0F;
      }
  
  #ifdef LJ_EWALD
@@ -415,19 +417,18 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl)
                                  const float c12    = ljcp_i.y * ljcp_j.y;
  #    else
                                  /* LJ 2^(1/6)*sigma and 12*epsilon */
-                                const float sigma   = ljcp_i.x + ljcp_j.x;
-                                const float epsilon = ljcp_i.y * ljcp_j.y;
+                                const float  sigma   = ljcp_i.x + ljcp_j.x;
+                                const float  epsilon = ljcp_i.y * ljcp_j.y;
  #        if defined CALC_ENERGIES || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
-                                float       c6, c12;
-                                convert_sigma_epsilon_to_c6_c12(sigma, epsilon, &c6, &c12);
+                                const float2 c6c12 = convert_sigma_epsilon_to_c6_c12(sigma, epsilon);
+                                const float  c6    = c6c12.x;
+                                const float  c12   = c6c12.y;
  #        endif
  #    endif /* LJ_COMB_GEOM */
  #endif     /* LJ_COMB */
  
                                  // Ensure distance do not become so small that r^-12 overflows.
-                                // Cast to float to ensure the correct built-in max() function
-                                // is called.
-                                r2 = max(r2, (float)c_nbnxnMinDistanceSquared);
+                                r2 = max(r2, c_nbnxnMinDistanceSquared);
  
                                  const float inv_r  = rsqrt(r2);
                                  const float inv_r2 = inv_r * inv_r;
@@ -555,7 +556,9 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl)
                                  fcj_buf -= f_ij;
  
                                  /* accumulate i forces in registers */
-                                fci_buf[i] += f_ij;
+                                fci_buf[i][0] += f_ij.x;
+                                fci_buf[i][1] += f_ij.y;
+                                fci_buf[i][2] += f_ij.z;
                              }
                          }
  
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh

index 16dd0c4962dbf0b853403b9ec955b6029b1d4e16..dfafdcbf28d2ca2adf115661b94a47870a3f1078 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh
@@ -166,7 +166,7 @@ nbnxn_kernel_prune_rolling_opencl
      /* loop over the j clusters = seen by any of the atoms in the current super-cluster */
      for (int j4 = cij4_start + tidxz; j4 < cij4_end; j4 += NTHREAD_Z)
      {
-        unsigned int imaskFull, imaskCheck, imaskNew;
+        unsigned int imaskFull = 0, imaskCheck = 0, imaskNew = 0;
  
          if (haveFreshList)
          {
diff --git a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh

index 7d70a5908abf11b4595df527e7560e7e1fdb38a9..46c08c2d715020cb014767e2d35c37e37686cf89 100644 (file)
--- a/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh
+++ b/src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh
@@ -122,6 +122,11 @@
  #        define gmx_unused
  #    endif
  
+/*! \brief Single precision floating point short vector type (as rvec in the CPU codebase).
+ *   Currently only used to avoid float3 register arrays.
+ */
+typedef float fvec[3];
+
  // Data structures shared between OpenCL device code and OpenCL host code
  // TODO: review, improve
  // Replaced real by float for now, to avoid including any other header
@@ -234,6 +239,9 @@ typedef struct
  /*! i-cluster interaction mask for a super-cluster with all c_nbnxnGpuNumClusterPerSupercluster bits set */
  __constant unsigned supercl_interaction_mask = ((1U << c_nbnxnGpuNumClusterPerSupercluster) - 1U);
  
+/*! Minimum single precision threshold for r^2 to avoid r^-12 overflow. */
+__constant float c_nbnxnMinDistanceSquared = NBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT;
+
  gmx_opencl_inline void preloadCj4Generic(__local int*        sm_cjPreload,
                                           const __global int* gm_cj,
                                           int                 tidxi,
@@ -327,14 +335,14 @@ loadCj(CjType cjs, const __global int gmx_unused* gm_cj, int jm, int gmx_unused
  }
  
  /*! Convert LJ sigma,epsilon parameters to C6,C12. */
-gmx_opencl_inline void convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon, float* c6, float* c12)
-{
-    float sigma2, sigma6;
-
-    sigma2 = sigma * sigma;
-    sigma6 = sigma2 * sigma2 * sigma2;
-    *c6    = epsilon * sigma6;
-    *c12   = *c6 * sigma6;
+gmx_opencl_inline float2 convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon)
+{
+    const float  sigma2 = sigma * sigma;
+    const float  sigma6 = sigma2 * sigma2 * sigma2;
+    const float  c6     = epsilon * sigma6;
+    const float2 c6c12  = (float2)(c6,           /* c6 */
+                                  c6 * sigma6); /* c12 */
+    return c6c12;
  }
  
  
@@ -346,17 +354,15 @@ gmx_opencl_inline void calculate_force_switch_F(const cl_nbparam_params_t* nbpar
                                                  float                      r2,
                                                  float*                     F_invr)
  {
-    float r, r_switch;
-
      /* force switch constants */
-    float disp_shift_V2 = nbparam->dispersion_shift.c2;
-    float disp_shift_V3 = nbparam->dispersion_shift.c3;
-    float repu_shift_V2 = nbparam->repulsion_shift.c2;
-    float repu_shift_V3 = nbparam->repulsion_shift.c3;
+    const float disp_shift_V2 = nbparam->dispersion_shift.c2;
+    const float disp_shift_V3 = nbparam->dispersion_shift.c3;
+    const float repu_shift_V2 = nbparam->repulsion_shift.c2;
+    const float repu_shift_V3 = nbparam->repulsion_shift.c3;
  
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
-    r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+    const float r        = r2 * inv_r;
+    float       r_switch = r - nbparam->rvdw_switch;
+    r_switch             = r_switch >= 0.0F ? r_switch : 0.0F;
  
      *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
                 + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
@@ -371,22 +377,20 @@ gmx_opencl_inline void calculate_force_switch_F_E(const cl_nbparam_params_t* nbp
                                                    float*                     F_invr,
                                                    float*                     E_lj)
  {
-    float r, r_switch;
-
      /* force switch constants */
-    float disp_shift_V2 = nbparam->dispersion_shift.c2;
-    float disp_shift_V3 = nbparam->dispersion_shift.c3;
-    float repu_shift_V2 = nbparam->repulsion_shift.c2;
-    float repu_shift_V3 = nbparam->repulsion_shift.c3;
+    const float disp_shift_V2 = nbparam->dispersion_shift.c2;
+    const float disp_shift_V3 = nbparam->dispersion_shift.c3;
+    const float repu_shift_V2 = nbparam->repulsion_shift.c2;
+    const float repu_shift_V3 = nbparam->repulsion_shift.c3;
  
-    float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3;
-    float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4;
-    float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3;
-    float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4;
+    const float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3;
+    const float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4;
+    const float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3;
+    const float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4;
  
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
-    r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+    const float r        = r2 * inv_r;
+    float       r_switch = r - nbparam->rvdw_switch;
+    r_switch             = r_switch >= 0.0F ? r_switch : 0.0F;
  
      *F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
                 + c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
@@ -401,25 +405,24 @@ gmx_opencl_inline void calculate_potential_switch_F(const cl_nbparam_params_t* n
                                                      float*                     F_invr,
                                                      const float*               E_lj)
  {
-    float r, r_switch;
-    float sw, dsw;
-
      /* potential switch constants */
-    float switch_V3 = nbparam->vdw_switch.c3;
-    float switch_V4 = nbparam->vdw_switch.c4;
-    float switch_V5 = nbparam->vdw_switch.c5;
-    float switch_F2 = nbparam->vdw_switch.c3;
-    float switch_F3 = nbparam->vdw_switch.c4;
-    float switch_F4 = nbparam->vdw_switch.c5;
+    const float switch_V3 = nbparam->vdw_switch.c3;
+    const float switch_V4 = nbparam->vdw_switch.c4;
+    const float switch_V5 = nbparam->vdw_switch.c5;
+    const float switch_F2 = nbparam->vdw_switch.c3;
+    const float switch_F3 = nbparam->vdw_switch.c4;
+    const float switch_F4 = nbparam->vdw_switch.c5;
  
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
+    const float r        = r2 * inv_r;
+    const float r_switch = r - nbparam->rvdw_switch;
  
      /* Unlike in the F+E kernel, conditional is faster here */
      if (r_switch > 0.0F)
      {
-        sw  = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
-        dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
+        const float sw = 1.0F
+                         + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch
+                                   * r_switch * r_switch;
+        const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
  
          *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
      }
@@ -432,24 +435,22 @@ gmx_opencl_inline void calculate_potential_switch_F_E(const cl_nbparam_params_t*
                                                        float*                     F_invr,
                                                        float*                     E_lj)
  {
-    float r, r_switch;
-    float sw, dsw;
-
      /* potential switch constants */
-    float switch_V3 = nbparam->vdw_switch.c3;
-    float switch_V4 = nbparam->vdw_switch.c4;
-    float switch_V5 = nbparam->vdw_switch.c5;
-    float switch_F2 = nbparam->vdw_switch.c3;
-    float switch_F3 = nbparam->vdw_switch.c4;
-    float switch_F4 = nbparam->vdw_switch.c5;
+    const float switch_V3 = nbparam->vdw_switch.c3;
+    const float switch_V4 = nbparam->vdw_switch.c4;
+    const float switch_V5 = nbparam->vdw_switch.c5;
+    const float switch_F2 = nbparam->vdw_switch.c3;
+    const float switch_F3 = nbparam->vdw_switch.c4;
+    const float switch_F4 = nbparam->vdw_switch.c5;
  
-    r        = r2 * inv_r;
-    r_switch = r - nbparam->rvdw_switch;
-    r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+    const float r        = r2 * inv_r;
+    float       r_switch = r - nbparam->rvdw_switch;
+    r_switch             = r_switch >= 0.0F ? r_switch : 0.0F;
  
      /* Unlike in the F-only kernel, masking is faster here */
-    sw  = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
-    dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
+    const float sw =
+            1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
+    const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
  
      *F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
      *E_lj *= sw;
@@ -467,15 +468,13 @@ gmx_opencl_inline void calculate_lj_ewald_comb_geom_F(__constant const float* nb
                                                        float                   lje_coeff6_6,
                                                        float*                  F_invr)
  {
-    float c6grid, inv_r6_nm, cr2, expmcr2, poly;
-
-    c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
+    const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
  
      /* Recalculate inv_r6 without exclusion mask */
-    inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
-    cr2       = lje_coeff2 * r2;
-    expmcr2   = exp(-cr2);
-    poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
+    const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+    const float cr2       = lje_coeff2 * r2;
+    const float expmcr2   = exp(-cr2);
+    const float poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
  
      /* Subtract the grid force from the total LJ force */
      *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
@@ -496,21 +495,19 @@ gmx_opencl_inline void calculate_lj_ewald_comb_geom_F_E(__constant const float*
                                                          float*                     F_invr,
                                                          float*                     E_lj)
  {
-    float c6grid, inv_r6_nm, cr2, expmcr2, poly, sh_mask;
-
-    c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
+    const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
  
      /* Recalculate inv_r6 without exclusion mask */
-    inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
-    cr2       = lje_coeff2 * r2;
-    expmcr2   = exp(-cr2);
-    poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
+    const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+    const float cr2       = lje_coeff2 * r2;
+    const float expmcr2   = exp(-cr2);
+    const float poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
  
      /* Subtract the grid force from the total LJ force */
      *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
  
      /* Shift should be applied only to real LJ pairs */
-    sh_mask = nbparam->sh_lj_ewald * int_bit;
+    const float sh_mask = nbparam->sh_lj_ewald * int_bit;
      *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
  }
  
@@ -532,32 +529,28 @@ gmx_opencl_inline void calculate_lj_ewald_comb_LB_F_E(__constant const float*
                                                        float*                     F_invr,
                                                        float*                     E_lj)
  {
-    float c6grid, inv_r6_nm, cr2, expmcr2, poly;
-    float sigma, sigma2, epsilon;
-
      /* sigma and epsilon are scaled to give 6*C6 */
-    sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej];
+    const float sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej];
  
-    epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1];
+    const float epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1];
  
-    sigma2 = sigma * sigma;
-    c6grid = epsilon * sigma2 * sigma2 * sigma2;
+    const float sigma2 = sigma * sigma;
+    const float c6grid = epsilon * sigma2 * sigma2 * sigma2;
  
      /* Recalculate inv_r6 without exclusion mask */
-    inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
-    cr2       = lje_coeff2 * r2;
-    expmcr2   = exp(-cr2);
-    poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
+    const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+    const float cr2       = lje_coeff2 * r2;
+    const float expmcr2   = exp(-cr2);
+    const float poly      = 1.0F + cr2 + HALF_F * cr2 * cr2;
  
      /* Subtract the grid force from the total LJ force */
      *F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
  
      if (with_E_lj)
      {
-        float sh_mask;
  
          /* Shift should be applied only to real LJ pairs */
-        sh_mask = nbparam->sh_lj_ewald * int_bit;
+        const float sh_mask = nbparam->sh_lj_ewald * int_bit;
          *E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
      }
  }
@@ -594,24 +587,21 @@ gmx_opencl_inline float pmecorrF(float z2)
      const float FD1 = 0.50736591960530292870F;
      const float FD0 = 1.0F;
  
-    float z4;
-    float polyFN0, polyFN1, polyFD0, polyFD1;
-
-    z4 = z2 * z2;
+    const float z4 = z2 * z2;
  
-    polyFD0 = FD4 * z4 + FD2;
-    polyFD1 = FD3 * z4 + FD1;
-    polyFD0 = polyFD0 * z4 + FD0;
-    polyFD0 = polyFD1 * z2 + polyFD0;
+    float polyFD0 = FD4 * z4 + FD2;
+    float polyFD1 = FD3 * z4 + FD1;
+    polyFD0       = polyFD0 * z4 + FD0;
+    polyFD0       = polyFD1 * z2 + polyFD0;
  
      polyFD0 = 1.0F / polyFD0;
  
-    polyFN0 = FN6 * z4 + FN4;
-    polyFN1 = FN5 * z4 + FN3;
-    polyFN0 = polyFN0 * z4 + FN2;
-    polyFN1 = polyFN1 * z4 + FN1;
-    polyFN0 = polyFN0 * z4 + FN0;
-    polyFN0 = polyFN1 * z2 + polyFN0;
+    float polyFN0 = FN6 * z4 + FN4;
+    float polyFN1 = FN5 * z4 + FN3;
+    polyFN0       = polyFN0 * z4 + FN2;
+    polyFN1       = polyFN1 * z4 + FN1;
+    polyFN0       = polyFN0 * z4 + FN0;
+    polyFN0       = polyFN1 * z2 + polyFN0;
  
      return polyFN0 * polyFD0;
  }
@@ -684,7 +674,7 @@ gmx_opencl_inline void reduce_force_j(__local float gmx_unused* f_buf,
  }
  
  #    if REDUCE_SHUFFLE
-gmx_opencl_inline void reduce_force_i_and_shift_shfl(float3*         fci_buf,
+gmx_opencl_inline void reduce_force_i_and_shift_shfl(__private fvec  fci_buf[],
                                                       __global float* fout,
                                                       bool            bCalcFshift,
                                                       int             tidxi,
@@ -699,7 +689,7 @@ gmx_opencl_inline void reduce_force_i_and_shift_shfl(float3*         fci_buf,
      for (int ci_offset = 0; ci_offset < c_nbnxnGpuNumClusterPerSupercluster; ci_offset++)
      {
          int    aidx = (sci * c_nbnxnGpuNumClusterPerSupercluster + ci_offset) * CL_SIZE + tidxi;
-        float3 fin  = fci_buf[ci_offset];
+        float3 fin  = (float3)(fci_buf[ci_offset][0], fci_buf[ci_offset][1], fci_buf[ci_offset][2]);
          fin.x += intel_sub_group_shuffle_down(fin.x, fin.x, CL_SIZE);
          fin.y += intel_sub_group_shuffle_up(fin.y, fin.y, CL_SIZE);
          fin.z += intel_sub_group_shuffle_down(fin.z, fin.z, CL_SIZE);
@@ -742,14 +732,14 @@ gmx_opencl_inline void reduce_force_i_and_shift_shfl(float3*         fci_buf,
   *  array sizes.
   */
  gmx_opencl_inline void reduce_force_i_and_shift_pow2(volatile __local float* f_buf,
-                                                     float3*                 fci_buf,
-                                                     __global float*         fout,
-                                                     bool                    bCalcFshift,
-                                                     int                     tidxi,
-                                                     int                     tidxj,
-                                                     int                     sci,
-                                                     int                     shift,
-                                                     __global float*         fshift)
+                                                     __private fvec  fci_buf[],
+                                                     __global float* fout,
+                                                     bool            bCalcFshift,
+                                                     int             tidxi,
+                                                     int             tidxj,
+                                                     int             sci,
+                                                     int             shift,
+                                                     __global float* fshift)
  {
      float fshift_buf = 0;
      for (int ci_offset = 0; ci_offset < c_nbnxnGpuNumClusterPerSupercluster; ci_offset++)
@@ -757,9 +747,9 @@ gmx_opencl_inline void reduce_force_i_and_shift_pow2(volatile __local float* f_b
          int aidx = (sci * c_nbnxnGpuNumClusterPerSupercluster + ci_offset) * CL_SIZE + tidxi;
          int tidx = tidxi + tidxj * CL_SIZE;
          /* store i forces in shmem */
-        f_buf[tidx]                   = fci_buf[ci_offset].x;
-        f_buf[FBUF_STRIDE + tidx]     = fci_buf[ci_offset].y;
-        f_buf[2 * FBUF_STRIDE + tidx] = fci_buf[ci_offset].z;
+        f_buf[tidx]                   = fci_buf[ci_offset][0];
+        f_buf[FBUF_STRIDE + tidx]     = fci_buf[ci_offset][1];
+        f_buf[2 * FBUF_STRIDE + tidx] = fci_buf[ci_offset][2];
          barrier(CLK_LOCAL_MEM_FENCE);
  
          /* Reduce the initial CL_SIZE values for each i atom to half
@@ -821,14 +811,14 @@ gmx_opencl_inline void reduce_force_i_and_shift_pow2(volatile __local float* f_b
  /*! Final i-force reduction
   */
  gmx_opencl_inline void reduce_force_i_and_shift(__local float gmx_unused* f_buf,
-                                                float3*                   fci_buf,
-                                                __global float*           f,
-                                                bool                      bCalcFshift,
-                                                int                       tidxi,
-                                                int                       tidxj,
-                                                int                       sci,
-                                                int                       shift,
-                                                __global float*           fshift)
+                                                __private fvec  fci_buf[],
+                                                __global float* f,
+                                                bool            bCalcFshift,
+                                                int             tidxi,
+                                                int             tidxj,
+                                                int             sci,
+                                                int             shift,
+                                                __global float* fshift)
  {
  #    if REDUCE_SHUFFLE
      reduce_force_i_and_shift_shfl(fci_buf, f, bCalcFshift, tidxi, tidxj, sci, shift, fshift);
@@ -868,12 +858,10 @@ gmx_opencl_inline void reduce_energy_pow2(volatile __local float*  buf,
                                            volatile __global float* e_el,
                                            int                      tidx)
  {
-    int j;
-
      int i = WARP_SIZE / 2;
  
      /* Can't just use i as loop variable because than nvcc refuses to unroll. */
-    for (j = WARP_SIZE_LOG2 - 1; j > 0; j--)
+    for (int j = WARP_SIZE_LOG2 - 1; j > 0; j--)
      {
          if (tidx < i)
          {
diff --git a/src/gromacs/nbnxm/pairlist.cpp b/src/gromacs/nbnxm/pairlist.cpp

index aa29c61cdee6f7057a5014fbdaf4139fc09ce19c..3f9a9f21c563926dfcfcd5df2ad18b0a43584da5 100644 (file)
--- a/src/gromacs/nbnxm/pairlist.cpp
+++ b/src/gromacs/nbnxm/pairlist.cpp
@@ -2873,7 +2873,7 @@ static float boundingbox_only_distance2(const Grid::Dimensions& iGridDims,
  #if !GMX_DOUBLE
      return rbb2;
  #else
-    return (float)((1 + GMX_FLOAT_EPS) * rbb2);
+    return static_cast<float>((1 + GMX_FLOAT_EPS) * rbb2);
  #endif
  }
  
diff --git a/src/gromacs/options/filenameoption.cpp b/src/gromacs/options/filenameoption.cpp

index acbb852be33e58d95ac6794f68ba6b2c1422acde..9df3722459ea383fae4194513bd7b66b089c2182 100644 (file)
--- a/src/gromacs/options/filenameoption.cpp
+++ b/src/gromacs/options/filenameoption.cpp
@@ -83,7 +83,8 @@ struct FileTypeMapping
  const FileTypeMapping c_fileTypeMapping[] = { { eftTopology, efTPS },   { eftRunInput, efTPR },
                                                { eftTrajectory, efTRX }, { eftEnergy, efEDR },
                                                { eftPDB, efPDB },        { eftIndex, efNDX },
-                                              { eftPlot, efXVG },       { eftGenericData, efDAT } };
+                                              { eftPlot, efXVG },       { eftGenericData, efDAT },
+                                              { eftCsv, efCSV } };
  
  /********************************************************************
   * FileTypeHandler
diff --git a/src/gromacs/options/optionfiletype.h b/src/gromacs/options/optionfiletype.h

index 99357478a1218ac6107253516c8b6770bae278e5..cc8e4d5dc4a37d464cf6f1bb7dddd96cd500ec4f 100644 (file)
--- a/src/gromacs/options/optionfiletype.h
+++ b/src/gromacs/options/optionfiletype.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2010,2011,2012,2015,2019, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2015,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -62,6 +62,7 @@ enum OptionFileType
      eftIndex,
      eftPlot,
      eftGenericData,
+    eftCsv,
      eftOptionFileType_NR
  };
  
diff --git a/src/gromacs/options/tests/filenameoption.cpp b/src/gromacs/options/tests/filenameoption.cpp

index c8a2afd86a4aef659ab3b64f762169cc85ac90f7..4196277d4c5103b9957cb032e35ed15e99ba756a 100644 (file)
--- a/src/gromacs/options/tests/filenameoption.cpp
+++ b/src/gromacs/options/tests/filenameoption.cpp
@@ -207,4 +207,38 @@ TEST(FileNameOptionTest, GivesErrorOnInvalidFileSuffix)
      EXPECT_TRUE(value.empty());
  }
  
+TEST(FileNameOptionTest, HandlesRequiredCsvValueWithoutExtension)
+{
+    gmx::Options options;
+    std::string  value;
+    ASSERT_NO_THROW_GMX(options.addOption(
+            FileNameOption("f").store(&value).required().filetype(gmx::eftCsv).outputFile().defaultBasename("testfile")));
+    EXPECT_EQ("testfile.csv", value);
+
+    gmx::OptionsAssigner assigner(&options);
+    EXPECT_NO_THROW_GMX(assigner.start());
+    EXPECT_NO_THROW_GMX(assigner.finish());
+    EXPECT_NO_THROW_GMX(options.finish());
+
+    EXPECT_EQ("testfile.csv", value);
+}
+
+TEST(FileNameOptionTest, HandlesRequiredCsvOptionWithoutValue)
+{
+    gmx::Options options;
+    std::string  value;
+    ASSERT_NO_THROW_GMX(options.addOption(
+            FileNameOption("f").store(&value).required().filetype(gmx::eftCsv).outputFile().defaultBasename("testfile")));
+    EXPECT_EQ("testfile.csv", value);
+
+    gmx::OptionsAssigner assigner(&options);
+    EXPECT_NO_THROW_GMX(assigner.start());
+    EXPECT_NO_THROW_GMX(assigner.startOption("f"));
+    EXPECT_NO_THROW_GMX(assigner.finishOption());
+    EXPECT_NO_THROW_GMX(assigner.finish());
+    EXPECT_NO_THROW_GMX(options.finish());
+
+    EXPECT_EQ("testfile.csv", value);
+}
+
  } // namespace
diff --git a/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_double.h b/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_double.h

index 15b05d7848d469188e70492a1546bbb9c605f90f..dac65b097816f658f7b1b4b4ccca6b47c8215172 100644 (file)
--- a/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_double.h
+++ b/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_double.h
@@ -60,7 +60,7 @@ namespace
  // shifting. Currently up to 8 is accelerated. Could be accelerated for any
  // number with a constexpr log2 function.
  template<int n>
-SimdDInt32 fastMultiply(SimdDInt32 x)
+static inline SimdDInt32 fastMultiply(SimdDInt32 x)
  {
      if (n == 2)
      {
diff --git a/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h b/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h

index 99daabee6fcc265daebe4d2c7fa4b3a7850b1695..8c3f9621a1f123ec28bf99a5a872deb34f620223 100644 (file)
--- a/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h
+++ b/src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h
@@ -60,7 +60,7 @@ namespace
  // shifting. Currently up to 8 is accelerated. Could be accelerated for any
  // number with a constexpr log2 function.
  template<int n>
-SimdFInt32 fastMultiply(SimdFInt32 x)
+static inline SimdFInt32 fastMultiply(SimdFInt32 x)
  {
      if (n == 2)
      {
diff --git a/src/gromacs/taskassignment/decidegpuusage.cpp b/src/gromacs/taskassignment/decidegpuusage.cpp

index eb88b85d87d8be21e11694a5df8abff2995b6c48..e22b51a29c5a4205b2a2693de140aa6235295134 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.cpp
+++ b/src/gromacs/taskassignment/decidegpuusage.cpp
@@ -57,6 +57,7 @@
  #include "gromacs/hardware/detecthardware.h"
  #include "gromacs/hardware/hardwaretopology.h"
  #include "gromacs/hardware/hw_info.h"
+#include "gromacs/listed_forces/gpubonded.h"
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
  #include "gromacs/mdlib/update_constrain_gpu.h"
  #include "gromacs/mdtypes/commrec.h"
@@ -458,27 +459,36 @@ PmeRunMode determinePmeRunMode(const bool useGpuForPme, const TaskTarget& pmeFft
      }
  }
  
-bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
-                                     const bool       useGpuForPme,
-                                     const TaskTarget bondedTarget,
-                                     const bool       canUseGpuForBonded,
-                                     const bool       usingLJPme,
-                                     const bool       usingElecPmeOrEwald,
-                                     const int        numPmeRanksPerSimulation,
-                                     const bool       gpusWereDetected)
+bool decideWhetherToUseGpusForBonded(bool              useGpuForNonbonded,
+                                     bool              useGpuForPme,
+                                     TaskTarget        bondedTarget,
+                                     const t_inputrec& inputrec,
+                                     const gmx_mtop_t& mtop,
+                                     int               numPmeRanksPerSimulation,
+                                     bool              gpusWereDetected)
  {
      if (bondedTarget == TaskTarget::Cpu)
      {
          return false;
      }
  
-    if (!canUseGpuForBonded)
+    std::string errorMessage;
+
+    if (!buildSupportsGpuBondeds(&errorMessage))
      {
          if (bondedTarget == TaskTarget::Gpu)
          {
-            GMX_THROW(InconsistentInputError(
-                    "Bonded interactions on the GPU were required, but not supported for these "
-                    "simulation settings. Change your settings, or do not require using GPUs."));
+            GMX_THROW(InconsistentInputError(errorMessage.c_str()));
+        }
+
+        return false;
+    }
+
+    if (!inputSupportsGpuBondeds(inputrec, mtop, &errorMessage))
+    {
+        if (bondedTarget == TaskTarget::Gpu)
+        {
+            GMX_THROW(InconsistentInputError(errorMessage.c_str()));
          }
  
          return false;
@@ -514,7 +524,8 @@ bool decideWhetherToUseGpusForBonded(const bool       useGpuForNonbonded,
      // Note that here we assume that the auto setting of PME ranks will not
      // choose seperate PME ranks when nonBonded are assigned to the GPU.
      bool usingOurCpuForPmeOrEwald =
-            (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
+            (EVDW_PME(inputrec.vdwtype)
+             || (EEL_PME_EWALD(inputrec.coulombtype) && !useGpuForPme && numPmeRanksPerSimulation <= 0));
  
      return gpusWereDetected && usingOurCpuForPmeOrEwald;
  }
@@ -550,26 +561,19 @@ bool decideWhetherToUseGpuForUpdate(const bool                     isDomainDecom
  
      if (isDomainDecomposition)
      {
-        if (!devFlags.enableGpuHaloExchange)
+        if (hasAnyConstraints && !useUpdateGroups)
          {
-            errorMessage += "Domain decomposition without GPU halo exchange is not supported.\n ";
+            errorMessage +=
+                    "Domain decomposition is only supported with constraints when update "
+                    "groups "
+                    "are used. This means constraining all bonds is not supported, except for "
+                    "small molecules, and box sizes close to half the pair-list cutoff are not "
+                    "supported.\n ";
          }
-        else
-        {
-            if (hasAnyConstraints && !useUpdateGroups)
-            {
-                errorMessage +=
-                        "Domain decomposition is only supported with constraints when update "
-                        "groups "
-                        "are used. This means constraining all bonds is not supported, except for "
-                        "small molecules, and box sizes close to half the pair-list cutoff are not "
-                        "supported.\n ";
-            }
  
-            if (pmeUsesCpu)
-            {
-                errorMessage += "With domain decomposition, PME must run fully on the GPU.\n";
-            }
+        if (pmeUsesCpu)
+        {
+            errorMessage += "With domain decomposition, PME must run fully on the GPU.\n";
          }
      }
  
@@ -579,11 +583,6 @@ bool decideWhetherToUseGpuForUpdate(const bool                     isDomainDecom
          {
              errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
          }
-
-        if (!devFlags.enableGpuPmePPComm)
-        {
-            errorMessage += "With separate PME rank(s), PME must use direct communication.\n";
-        }
      }
  
      if (inputrec.useMts)
diff --git a/src/gromacs/taskassignment/decidegpuusage.h b/src/gromacs/taskassignment/decidegpuusage.h

index 7dd6ae9b3008fd451dae97f4615f9e33085cb6d7..b5fd83907fae8e834cfb18bd137a20ef8ea5b210 100644 (file)
--- a/src/gromacs/taskassignment/decidegpuusage.h
+++ b/src/gromacs/taskassignment/decidegpuusage.h
@@ -247,9 +247,8 @@ PmeRunMode determinePmeRunMode(bool useGpuForPme, const TaskTarget& pmeFftTarget
   * \param[in]  useGpuForNonbonded        Whether GPUs will be used for nonbonded interactions.
   * \param[in]  useGpuForPme              Whether GPUs will be used for PME interactions.
   * \param[in]  bondedTarget              The user's choice for mdrun -bonded for where to assign tasks.
- * \param[in]  canUseGpuForBonded        Whether the bonded interactions can run on a GPU
- * \param[in]  usingLJPme                Whether Vdw interactions use LJ-PME.
- * \param[in]  usingElecPmeOrEwald       Whether a PME or Ewald type method is used for electrostatics.
+ * \param[in]  inputrec                  The user input.
+ * \param[in]  mtop                      The global topology.
   * \param[in]  numPmeRanksPerSimulation  The number of PME ranks in each simulation, can be -1 for auto.
   * \param[in]  gpusWereDetected          Whether compatible GPUs were detected on any node.
   *
@@ -257,14 +256,13 @@ PmeRunMode determinePmeRunMode(bool useGpuForPme, const TaskTarget& pmeFftTarget
   *
   * \throws     std::bad_alloc          If out of memory
   *             InconsistentInputError  If the user requirements are inconsistent. */
-bool decideWhetherToUseGpusForBonded(bool       useGpuForNonbonded,
-                                     bool       useGpuForPme,
-                                     TaskTarget bondedTarget,
-                                     bool       canUseGpuForBonded,
-                                     bool       usingLJPme,
-                                     bool       usingElecPmeOrEwald,
-                                     int        numPmeRanksPerSimulation,
-                                     bool       gpusWereDetected);
+bool decideWhetherToUseGpusForBonded(bool              useGpuForNonbonded,
+                                     bool              useGpuForPme,
+                                     TaskTarget        bondedTarget,
+                                     const t_inputrec& inputrec,
+                                     const gmx_mtop_t& mtop,
+                                     int               numPmeRanksPerSimulation,
+                                     bool              gpusWereDetected);
  
  /*! \brief Decide whether to use GPU for update.
   *
diff --git a/src/gromacs/timing/cyclecounter.cpp b/src/gromacs/timing/cyclecounter.cpp

index 6c1a0d43669e1265d9d51efcfc250567fc54df9b..30637ade0c622e6a7a2b40d3e90d5568cff457c9 100644 (file)
--- a/src/gromacs/timing/cyclecounter.cpp
+++ b/src/gromacs/timing/cyclecounter.cpp
@@ -65,7 +65,52 @@
   */
  double gmx_cycles_calibrate(double sampletime)
  {
-#ifdef _MSC_VER
+    /* On ARM and recent-generation x86-64, we can use the more accurate cycle counters
+     * that allow better timing for things that depend on it (e.g. load balancing, profiling).
+     */
+#if ((defined __aarch64__) \
+     && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
+    /* 64-bit ARM cycle counters with GCC inline assembly */
+    unsigned long cycles;
+    __asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(cycles));
+    /* Only first 32 bits are significant */
+    cycles &= 0xFFFFFFFF;
+    return 1. / cycles;
+    GMX_UNUSED_VALUE(sampletime);
+#else
+#    if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) \
+         && defined(__x86_64__) && !defined(_CRAYC))
+    long gmx_unused tmp;
+    int             cpuid1;
+    int gmx_unused cpuid2;
+    const int      l0  = 0x0;
+    const int      l16 = 0x16;
+    gmx_cycles_t   cycles;
+
+    /* cpuid clobbers ebx but it must be restored for -fPIC so save
+     * then restore ebx */
+    __asm__ volatile(
+            "xchg %%rbx, %2\n"
+            "cpuid\n"
+            "xchg %%rbx, %2\n"
+            : "=a"(cpuid1), "=d"(cpuid2), "=r"(tmp)
+            : "a"(l0)
+            : "ecx", "ebx");
+    if (cpuid1 >= 0x16)
+    {
+        /* This CPU is recent enough so the timer frequency can be directly queried */
+        __asm__ volatile(
+                "xchg %%rbx, %2\n"
+                "cpuid\n"
+                "xchg %%rbx, %2\n"
+                : "=a"(cpuid1), "=d"(cpuid2), "=r"(tmp)
+                : "a"(l16)
+                : "ecx", "ebx");
+        cycles = static_cast<gmx_cycles_t>(cpuid1) * static_cast<gmx_cycles_t>(1000000);
+        return 1. / cycles;
+    }
+#    endif
+#    ifdef _MSC_VER
  
      /* Windows does not have gettimeofday, but it provides a special
       * routine that returns the cycle counter frequency.
@@ -77,7 +122,7 @@ double gmx_cycles_calibrate(double sampletime)
      return 1.0 / static_cast<double>(i.QuadPart);
      /* end of MS Windows implementation */
  
-#elif HAVE_GETTIMEOFDAY
+#    elif HAVE_GETTIMEOFDAY
  
      /*  generic implementation with gettimeofday() */
      struct timeval t1, t2;
@@ -90,7 +135,7 @@ double gmx_cycles_calibrate(double sampletime)
          return -1;
      }
  
-#    if (defined(__alpha__) || defined(__alpha))
+#        if (defined(__alpha__) || defined(__alpha))
      /* Alpha cannot count to more than 4e9, but I don't expect
       * that the architecture will go over 2GHz before it dies, so
       * up to 2.0 seconds of sampling should be safe.
@@ -99,7 +144,7 @@ double gmx_cycles_calibrate(double sampletime)
      {
          sampletime = 2.0;
      }
-#    endif
+#        endif
  
      /* Start a timing loop. We want this to be largely independent
       * of machine speed, so we need to start with a very small number
@@ -138,9 +183,10 @@ double gmx_cycles_calibrate(double sampletime)
      /* Return seconds per cycle */
      return timediff / cyclediff;
  
-#else
+#    else
      /* No timing function available */
      return -1;
      GMX_UNUSED_VALUE(sampletime);
+#    endif
  #endif
  }
diff --git a/src/gromacs/timing/wallcycle.cpp b/src/gromacs/timing/wallcycle.cpp

index e69df666c1d9f40c340856b8774fbd5b07c3a852..f1b428bf05ccaffc7691d4a36204cef54f5c919c 100644 (file)
--- a/src/gromacs/timing/wallcycle.cpp
+++ b/src/gromacs/timing/wallcycle.cpp
@@ -180,6 +180,7 @@ static const char* wcsn[ewcsNR] = {
      "Launch GPU NB F buffer ops.",
      "Launch GPU Comm. coord.",
      "Launch GPU Comm. force.",
+    "Launch GPU update",
      "Test subcounter",
  };
  
diff --git a/src/gromacs/timing/wallcycle.h b/src/gromacs/timing/wallcycle.h

index 0bc53f0fe69f9db152776eaf2230b9c4b36001ea..2cbc7fd3cb192ba6fe18ea7f4b3ba1afde2ffe7d 100644 (file)
--- a/src/gromacs/timing/wallcycle.h
+++ b/src/gromacs/timing/wallcycle.h
@@ -132,6 +132,7 @@ enum
      ewcsLAUNCH_GPU_NB_F_BUF_OPS,
      ewcsLAUNCH_GPU_MOVEX,
      ewcsLAUNCH_GPU_MOVEF,
+    ewcsLAUNCH_GPU_UPDATE_CONSTRAIN,
      ewcsTEST,
      ewcsNR
  };
diff --git a/src/gromacs/utility/template_mp.h b/src/gromacs/utility/template_mp.h

new file mode 100644 (file)

index 0000000..14b4f53
--- /dev/null
+++ b/src/gromacs/utility/template_mp.h
@@ -0,0 +1,103 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares utilities for template metaprogramming
+ *
+ * \author Roland Schulz <roland.schulz@intel.com>
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_TEMPLATE_MP_H
+#define GMX_UTILITY_TEMPLATE_MP_H
+
+#include <cassert>
+#include <cstddef>
+
+#include <utility>
+
+#include "gromacs/compat/mp11.h"
+
+namespace gmx
+{
+
+template<class Function>
+auto dispatchTemplatedFunction(Function&& f)
+{
+    return std::forward<Function>(f)();
+}
+
+/** \internal \brief Helper function to select appropriate template based on runtime values.
+ *
+ * Can only use enums for template parameters.
+ * These enums must have a member \c Count indicating the total number of valid values.
+ *
+ * Example usage:
+ * \code
+    enum class Options {
+        Op1 = 0,
+        Op2 = 1,
+        Count = 2
+    };
+
+    template<Options p1, Options p2>
+    bool foo(int i);
+
+    bool bar(Options p1, Options p2, int i) {
+        return dispatchTemplatedFunction(
+            [=](auto p1, auto p2) {
+                return foo<p1, p2>(i);
+            },
+            p1, p2);
+    }
+ * \endcode
+ */
+template<class Function, class Enum, class... Enums>
+auto dispatchTemplatedFunction(Function&& f, Enum e, Enums... es)
+{
+    return dispatchTemplatedFunction(
+            [&](auto... es_) {
+                return compat::mp_with_index<size_t(Enum::Count)>(size_t(e), [&](auto e_) {
+                    return std::forward<Function>(f)(
+                            std::integral_constant<Enum, static_cast<Enum>(size_t(e_))>(), es_...);
+                });
+            },
+            es...);
+}
+
+} // namespace gmx
+
+#endif // GMX_UTILITY_TEMPLATE_MP_H
diff --git a/src/gromacs/utility/tests/CMakeLists.txt b/src/gromacs/utility/tests/CMakeLists.txt

index e3f60426fbf36c19b9cd503c52e36aa83758ff32..8bbbc308f2b4e8959d50dffc9d3335befc1274d1 100644 (file)
--- a/src/gromacs/utility/tests/CMakeLists.txt
+++ b/src/gromacs/utility/tests/CMakeLists.txt
@@ -54,6 +54,7 @@ gmx_add_unit_test(UtilityUnitTests utility-test
          range.cpp
          strconvert.cpp
          stringutil.cpp
+        template_mp.cpp
          textreader.cpp
          textwriter.cpp
          typetraits.cpp
diff --git a/src/gromacs/utility/tests/template_mp.cpp b/src/gromacs/utility/tests/template_mp.cpp

new file mode 100644 (file)

index 0000000..dba1058
--- /dev/null
+++ b/src/gromacs/utility/tests/template_mp.cpp
@@ -0,0 +1,70 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#include "gmxpre.h"
+
+#include "gromacs/utility/template_mp.h"
+
+#include <gtest/gtest.h>
+
+namespace gmx
+{
+namespace
+{
+
+enum class Options
+{
+    Op0   = 0,
+    Op1   = 1,
+    Op2   = 2,
+    Count = 3
+};
+
+template<Options i, Options j>
+static int testEnumTwoIPlusJPlusK(int k)
+{
+    return 2 * int(i) + int(j) + k;
+}
+
+TEST(TemplateMPTest, DispatchTemplatedFunction)
+{
+    int five           = 5;
+    int two1plus2plus5 = dispatchTemplatedFunction(
+            [=](auto p1, auto p2) { return testEnumTwoIPlusJPlusK<p1, p2>(five); }, Options::Op1,
+            Options::Op2);
+    EXPECT_EQ(two1plus2plus5, 9);
+}
+
+} // anonymous namespace
+} // namespace gmx
diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt

index 29da4ea35f3ae885aa435570e9f38cc895632c5a..5797b1de820b9d43dcc56fe9c1bd9d7208793d8c 100644 (file)
--- a/src/programs/CMakeLists.txt
+++ b/src/programs/CMakeLists.txt
@@ -79,6 +79,7 @@ else()
      add_library(gmx_objlib OBJECT ${GMX_MAIN_SOURCES})
      target_link_libraries(gmx_objlib PRIVATE legacy_api)
      target_include_directories(gmx_objlib SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
+    target_include_directories(gmx_objlib SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
      add_executable(gmx
          $<TARGET_OBJECTS:gmx_objlib>
          $<TARGET_OBJECTS:mdrun_objlib>
diff --git a/src/programs/mdrun/mdrun.cpp b/src/programs/mdrun/mdrun.cpp

index f85fe269318ba8729cf720ad880d7eae035ff187..77a25f1fdfab7963a8594359b643f35731d6af8d 100644 (file)
--- a/src/programs/mdrun/mdrun.cpp
+++ b/src/programs/mdrun/mdrun.cpp
@@ -59,6 +59,7 @@
  #include "gromacs/commandline/pargs.h"
  #include "gromacs/domdec/options.h"
  #include "gromacs/fileio/gmxfio.h"
+#include "gromacs/hardware/detecthardware.h"
  #include "gromacs/mdrun/legacymdrunoptions.h"
  #include "gromacs/mdrun/runner.h"
  #include "gromacs/mdrun/simulationcontext.h"
@@ -66,15 +67,25 @@
  #include "gromacs/mdrunutility/logging.h"
  #include "gromacs/mdrunutility/multisim.h"
  #include "gromacs/utility/arrayref.h"
-#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/basenetwork.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
  
  #include "mdrun_main.h"
  
  namespace gmx
  {
  
-//! Implements C-style main function for mdrun
  int gmx_mdrun(int argc, char* argv[])
+{
+    // Set up the communicator, where possible (see docs for
+    // SimulationContext).
+    MPI_Comm                 communicator = GMX_LIB_MPI ? MPI_COMM_WORLD : MPI_COMM_NULL;
+    PhysicalNodeCommunicator physicalNodeCommunicator(communicator, gmx_physicalnode_id_hash());
+    std::unique_ptr<gmx_hw_info_t> hwinfo = gmx_detect_hardware(physicalNodeCommunicator);
+    return gmx_mdrun(communicator, *hwinfo, argc, argv);
+}
+
+int gmx_mdrun(MPI_Comm communicator, const gmx_hw_info_t& hwinfo, int argc, char* argv[])
  {
      auto mdModules = std::make_unique<MDModules>();
  
@@ -215,9 +226,6 @@ int gmx_mdrun(int argc, char* argv[])
      ArrayRef<const std::string> multiSimDirectoryNames =
              opt2fnsIfOptionSet("-multidir", ssize(options.filenames), options.filenames.data());
  
-    // Set up the communicator, where possible (see docs for
-    // SimulationContext).
-    MPI_Comm communicator = GMX_LIB_MPI ? MPI_COMM_WORLD : MPI_COMM_NULL;
      // The SimulationContext is necessary with gmxapi so that
      // resources owned by the client code can have suitable
      // lifetime. The gmx wrapper binary uses the same infrastructure,
@@ -245,6 +253,7 @@ int gmx_mdrun(int argc, char* argv[])
       */
      auto builder = MdrunnerBuilder(std::move(mdModules),
                                     compat::not_null<SimulationContext*>(&simulationContext));
+    builder.addHardwareDetectionResult(&hwinfo);
      builder.addSimulationMethod(options.mdrunOptions, options.pforce, startingBehavior);
      builder.addDomainDecomposition(options.domdecOptions);
      // \todo pass by value
diff --git a/src/programs/mdrun/mdrun_main.h b/src/programs/mdrun/mdrun_main.h

index 8a6e7ecae92bb9cb0c402fce594e417df7a7a8fa..5985428de35615246d283163b12e13d23c142655 100644 (file)
--- a/src/programs/mdrun/mdrun_main.h
+++ b/src/programs/mdrun/mdrun_main.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2013,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2018,2019,2020, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -32,14 +32,52 @@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
+/*! \internal \file
+ *
+ * \brief This file declares C-style entrypoints for mdrun
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ *
+ * \ingroup module_mdrun
+ */
  #ifndef GMX_PROGRAMS_MDRUN_MDRUN_H
  #define GMX_PROGRAMS_MDRUN_MDRUN_H
  
+#include "gromacs/utility/gmxmpi.h"
+
+struct gmx_hw_info_t;
+
  namespace gmx
  {
  
+/*! \brief Implements C-style main function for mdrun
+ *
+ * This implementation detects hardware itself, as suits
+ * the gmx wrapper binary.
+ *
+ * \param[in]  argc          Number of C-style command-line arguments
+ * \param[in]  argv          C-style command-line argument strings
+ */
  int gmx_mdrun(int argc, char* argv[]);
  
+/*! \brief Implements C-style main function for mdrun
+ *
+ * This implementation facilitates reuse of infrastructure. This
+ * includes the information about the hardware detected across the
+ * given \c communicator. That suits e.g. efficient implementation of
+ * test fixtures.
+ *
+ * \param[in]  communicator  The communicator to use for the simulation
+ * \param[in]  hwinfo        Describes the hardware detected on the physical nodes of the communicator
+ * \param[in]  argc          Number of C-style command-line arguments
+ * \param[in]  argv          C-style command-line argument strings
+ *
+ * \todo Progress on https://gitlab.com/gromacs/gromacs/-/issues/3774
+ * will remove the need of test binaries to call gmx_mdrun in a way
+ * that is different from the command-line and gmxapi.
+ */
+int gmx_mdrun(MPI_Comm communicator, const gmx_hw_info_t& hwinfo, int argc, char* argv[]);
+
  } // namespace gmx
  
  #endif
diff --git a/src/programs/mdrun/nonbonded_bench.cpp b/src/programs/mdrun/nonbonded_bench.cpp

index 9d4cd52eeb04860b614f4c1b2dab611706c90ca3..bd693790b3499337cb7cb0aa44d89a3dd638daef 100644 (file)
--- a/src/programs/mdrun/nonbonded_bench.cpp
+++ b/src/programs/mdrun/nonbonded_bench.cpp
@@ -208,6 +208,14 @@ void NonbondedBenchmark::initOptions(IOptionsContainer* options, ICommandLineOpt
      options->addOption(BooleanOption("cycles")
                                 .store(&benchmarkOptions_.cyclesPerPair)
                                 .description("Report cycles/pair instead of pairs/cycle"));
+    options->addOption(
+            BooleanOption("time").store(&benchmarkOptions_.reportTime).description("Report micro-seconds instead of cycles"));
+    options->addOption(FileNameOption("o")
+                               .filetype(eftCsv)
+                               .outputFile()
+                               .store(&benchmarkOptions_.outputFile)
+                               .defaultBasename("nonbonded-benchmark")
+                               .description("Also output results in csv format"));
  }
  
  void NonbondedBenchmark::optionsFinished()
diff --git a/src/programs/mdrun/tests/moduletest.cpp b/src/programs/mdrun/tests/moduletest.cpp

index a551798aacfe34a07f2f577420d255890c45929f..9cb67be37eb6557dd2f11eba60dfe178ae68b291 100644 (file)
--- a/src/programs/mdrun/tests/moduletest.cpp
+++ b/src/programs/mdrun/tests/moduletest.cpp
@@ -48,15 +48,19 @@
  
  #include <cstdio>
  
+#include <utility>
+
  #include "gromacs/gmxana/gmx_ana.h"
  #include "gromacs/gmxpreprocess/grompp.h"
  #include "gromacs/hardware/detecthardware.h"
+#include "gromacs/hardware/hw_info.h"
  #include "gromacs/options/basicoptions.h"
  #include "gromacs/options/ioptionscontainer.h"
  #include "gromacs/tools/convert_tpr.h"
  #include "gromacs/utility/basedefinitions.h"
  #include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
  #include "gromacs/utility/textwriter.h"
  #include "programs/mdrun/mdrun_main.h"
  
@@ -108,6 +112,14 @@ SimulationRunner::SimulationRunner(TestFileManager* fileManager) :
  {
  #if GMX_LIB_MPI
      GMX_RELEASE_ASSERT(gmx_mpi_initialized(), "MPI system not initialized for mdrun tests");
+
+    // It would be better to also detect this in a thread-MPI build,
+    // but there is no way to do that currently, and it is also not a
+    // problem for such a build. Any code based on such an invalid
+    // test fixture will be found in CI testing, however.
+    GMX_RELEASE_ASSERT(MdrunTestFixtureBase::communicator_ != MPI_COMM_NULL,
+                       "SimulationRunner may only be used from a test fixture that inherits from "
+                       "MdrunTestFixtureBase");
  #endif
  }
  
@@ -223,7 +235,7 @@ int SimulationRunner::callGrompp(const CommandLine& callerRef)
      // Make sure rank zero has written the .tpr file before other
      // ranks try to read it. Thread-MPI and serial do this just fine
      // on their own.
-    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Barrier(MdrunTestFixtureBase::communicator_);
  #endif
      return returnValue;
  }
@@ -304,7 +316,8 @@ int SimulationRunner::callMdrun(const CommandLine& callerRef)
      caller.addOption("-ntomp", g_numOpenMPThreads);
  #endif
  
-    return gmx_mdrun(caller.argc(), caller.argv());
+    return gmx_mdrun(MdrunTestFixtureBase::communicator_, *MdrunTestFixtureBase::hwinfo_,
+                     caller.argc(), caller.argv());
  }
  
  int SimulationRunner::callMdrun()
@@ -314,6 +327,26 @@ int SimulationRunner::callMdrun()
  
  // ====
  
+// static
+MPI_Comm MdrunTestFixtureBase::communicator_ = MPI_COMM_NULL;
+// static
+std::unique_ptr<gmx_hw_info_t> MdrunTestFixtureBase::hwinfo_;
+
+// static
+void MdrunTestFixtureBase::SetUpTestCase()
+{
+    communicator_ = MPI_COMM_WORLD;
+    auto newHwinfo =
+            gmx_detect_hardware(PhysicalNodeCommunicator{ communicator_, gmx_physicalnode_id_hash() });
+    std::swap(hwinfo_, newHwinfo);
+}
+
+// static
+void MdrunTestFixtureBase::TearDownTestCase()
+{
+    hwinfo_.reset(nullptr);
+}
+
  MdrunTestFixtureBase::MdrunTestFixtureBase()
  {
  #if GMX_LIB_MPI
@@ -331,7 +364,7 @@ MdrunTestFixture::~MdrunTestFixture()
  {
  #if GMX_LIB_MPI
      // fileManager_ should only clean up after all the ranks are done.
-    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Barrier(MdrunTestFixtureBase::communicator_);
  #endif
  }
  
diff --git a/src/programs/mdrun/tests/moduletest.h b/src/programs/mdrun/tests/moduletest.h

index bbf140cee51e981e0b3ce6aee5ac79e3a7235ae5..d29ebe15d30ef32b7efd9f430a36831ebafa5443 100644 (file)
--- a/src/programs/mdrun/tests/moduletest.h
+++ b/src/programs/mdrun/tests/moduletest.h
@@ -51,10 +51,13 @@
  #include <gtest/gtest.h>
  
  #include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/gmxmpi.h"
  
  #include "testutils/cmdlinetest.h"
  #include "testutils/testfilemanager.h"
  
+struct gmx_hw_info_t;
+
  namespace gmx
  {
  namespace test
@@ -79,9 +82,9 @@ enum class SimulationRunnerMdpSource
   * \brief Helper object for running grompp and mdrun in
   * integration tests of mdrun functionality
   *
- * Objects of this class are intended to be owned by
- * IntegrationTestFixture objects, and an IntegrationTestFixture
- * object might own more than one SimulationRunner.
+ * Objects of this class must be owned by objects descended from
+ * MdrunTestFixtureBase, which sets up necessary infrastructure for
+ * it. Such an object may own more than one SimulationRunner.
   *
   * The setup phase creates various temporary files for input and
   * output that are common for mdrun tests, using the file manager
@@ -174,6 +177,7 @@ public:
      std::string mdpInputContents_;
  
  private:
+    //! The file manager used to manage I/O
      TestFileManager& fileManager_;
  
      GMX_DISALLOW_COPY_AND_ASSIGN(SimulationRunner);
@@ -183,31 +187,31 @@ private:
   * \brief Declares test fixture base class for
   * integration tests of mdrun functionality
   *
- * Derived fixture classes (or individual test cases) that might have
- * specific requirements should assert that behaviour, rather than
- * hard-code the requirements. A test that (for example) can't run
- * with more than one thread should report that as a diagnostic, so the
- * person running the test (or designing the test harness) can get
- * feedback on what tests need what conditions without having to read
- * the code of lots of tests.
- *
- * Specifying the execution context (such as numbers of threads and
- * processors) is normally sensible to specify from the test harness
- * (i.e. when CMake/CTest/the user runs a test executable), because
- * only there is information about the hardware available. The default
- * values for such context provided in test fixtures for mdrun should
- * mirror the defaults for mdrun, but currently mdrun.c hard-codes
- * those in a gmx_hw_opt_t.
- *
- * Any method in this class may throw std::bad_alloc if out of memory.
+ * Heavyweight resources are set up here and shared
+ * across all tests in the test case fixture, e.g.
+ * the MPI communicator for the tests and the hardware
+ * detected that is available to it.
   *
   * \ingroup module_mdrun_integration_tests
   */
  class MdrunTestFixtureBase : public ::testing::Test
  {
  public:
+    //! Per-test-case setup for lengthy processes that need run only once.
+    static void SetUpTestCase();
+    //! Per-test-case tear down
+    static void TearDownTestCase();
+
      MdrunTestFixtureBase();
      ~MdrunTestFixtureBase() override;
+
+    //! Communicator over which the test fixture works
+    static MPI_Comm communicator_;
+    /*! \brief Hardware information object
+     *
+     * Detected within \c communicator_ and available to re-use
+     * over all tests in the test case of this text fixture. */
+    static std::unique_ptr<gmx_hw_info_t> hwinfo_;
  };
  
  /*! \internal
@@ -218,7 +222,7 @@ public:
   *
   * \ingroup module_mdrun_integration_tests
   */
-class MdrunTestFixture : public ::testing::Test
+class MdrunTestFixture : public MdrunTestFixtureBase
  {
  public:
      MdrunTestFixture();
diff --git a/src/programs/mdrun/tests/multisimtest.h b/src/programs/mdrun/tests/multisimtest.h

index 05990d6981af98f42603d882b2bbbbcc704d9fb1..81c975c4ea1a6a6bda4d76b1df9b6fc6c815b680 100644 (file)
--- a/src/programs/mdrun/tests/multisimtest.h
+++ b/src/programs/mdrun/tests/multisimtest.h
@@ -69,7 +69,7 @@ typedef std::unique_ptr<CommandLine> CommandLinePointer;
   *
   * \ingroup module_mdrun_integration_tests
   */
-class MultiSimTest : public ::testing::Test, public ::testing::WithParamInterface<const char*>
+class MultiSimTest : public MdrunTestFixtureBase, public ::testing::WithParamInterface<const char*>
  {
  public:
      MultiSimTest();
diff --git a/src/programs/mdrun/tests/pmetest.cpp b/src/programs/mdrun/tests/pmetest.cpp

index 4aa1219ccade36859d14b72dc98c4831486c2e9e..a4e4aa37b532b3829d53afa1f89edc5929c0eda7 100644 (file)
--- a/src/programs/mdrun/tests/pmetest.cpp
+++ b/src/programs/mdrun/tests/pmetest.cpp
@@ -61,7 +61,6 @@
  #include "gromacs/trajectory/energyframe.h"
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/loggerbuilder.h"
  #include "gromacs/utility/physicalnodecommunicator.h"
  #include "gromacs/utility/stringutil.h"
  
@@ -110,8 +109,8 @@ void PmeTest::runTest(const RunModesList& runModes)
          EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks
      }
  
-    auto hardwareInfo_ = gmx_detect_hardware(
-            MDLogger{}, PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash()));
+    auto hardwareInfo_ =
+            gmx_detect_hardware(PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash()));
  
      for (const auto& mode : runModes)
      {
diff --git a/src/testutils/CMakeLists.txt b/src/testutils/CMakeLists.txt

index 3dd632ef550fa8c4d3ed269e4d0c012b3a129817..2d96c50053ed249cb76ff5b3a1fbe7a7854d5b10 100644 (file)
--- a/src/testutils/CMakeLists.txt
+++ b/src/testutils/CMakeLists.txt
@@ -75,7 +75,10 @@ if (GMX_GPU_CUDA)
      if (NOT GMX_CLANG_CUDA)
          gmx_cuda_add_library(testutils ${TESTUTILS_SOURCES})
      else()
+        set_source_files_properties(test_device.cpp PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+        gmx_compile_cuda_file_with_clang(test_device.cpp)
          add_library(testutils STATIC ${TESTUTILS_SOURCES})
+        target_link_libraries(testutils PRIVATE ${GMX_CUDA_CLANG_LINK_LIBS})
      endif()
      target_link_libraries(testutils PRIVATE ${CUDA_CUFFT_LIBRARIES})
  else()
diff --git a/src/testutils/TestMacros.cmake b/src/testutils/TestMacros.cmake

index 6e7586654a699f11590ab9be7532ca8ce45e7792..70dd60e0364012ebc6ee69948859c61fdf783273 100644 (file)
--- a/src/testutils/TestMacros.cmake
+++ b/src/testutils/TestMacros.cmake
@@ -134,6 +134,7 @@ function (gmx_add_gtest_executable EXENAME)
                      ${ARG_GPU_CPP_SOURCE_FILES})
                  set_source_files_properties(${ARG_GPU_CPP_SOURCE_FILES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
                  gmx_compile_cuda_file_with_clang(${ARG_CUDA_CU_SOURCE_FILES})
+                gmx_compile_cuda_file_with_clang(${ARG_GPU_CPP_SOURCE_FILES})
                  if(ARG_CUDA_CU_SOURCE_FILES OR ARG_GPU_CPP_SOURCE_FILES)
                      target_link_libraries(${EXENAME} PRIVATE ${GMX_EXTRA_LIBRARIES})
                  endif()
diff --git a/src/testutils/test_hardware_environment.cpp b/src/testutils/test_hardware_environment.cpp

index a17aa980075829760013337cffceddee4db08775..267c73cf27b927af5b3c6b0c520cc377784e8b65 100644 (file)
--- a/src/testutils/test_hardware_environment.cpp
+++ b/src/testutils/test_hardware_environment.cpp
@@ -54,7 +54,6 @@
  #include "gromacs/hardware/hw_info.h"
  #include "gromacs/utility/basenetwork.h"
  #include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/loggerbuilder.h"
  #include "gromacs/utility/physicalnodecommunicator.h"
  
  namespace gmx
@@ -89,17 +88,14 @@ void callAddGlobalTestEnvironment()
      getTestHardwareEnvironment();
  }
  
-//! Simple hardware initialization
-static gmx_hw_info_t* hardwareInit()
+TestHardwareEnvironment::TestHardwareEnvironment() :
+    hardwareInfo_(gmx_detect_hardware(PhysicalNodeCommunicator{ MPI_COMM_WORLD, gmx_physicalnode_id_hash() }))
  {
-    PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
-    return gmx_detect_hardware(MDLogger{}, physicalNodeComm);
  }
  
  void TestHardwareEnvironment::SetUp()
  {
      testDeviceList_.clear();
-    hardwareInfo_ = hardwareInit();
      // Constructing contexts for all compatible GPUs - will be empty on non-GPU builds
      for (const DeviceInformation& compatibleDeviceInfo : getCompatibleDevices(hardwareInfo_->deviceInfoList))
      {
@@ -111,6 +107,11 @@ void TestHardwareEnvironment::SetUp()
  void TestHardwareEnvironment::TearDown()
  {
      testDeviceList_.clear();
+    /* In OneAPI 2021.1-beta9 and beta10, there is a bug that cause a
+     * segfault when a sycl::device is destructed too late. So, we
+     * explicitly destroy device handles here by resetting
+     * hardwareInfo_, which does no harm to anything else. */
+    hardwareInfo_.reset(nullptr);
  }
  
  } // namespace test
diff --git a/src/testutils/test_hardware_environment.h b/src/testutils/test_hardware_environment.h

index b012ae90d5c78749c34190ecf6058cfd77599837..dbab83183aab62687c6e875fa3e2bc8e32d1d5ab 100644 (file)
--- a/src/testutils/test_hardware_environment.h
+++ b/src/testutils/test_hardware_environment.h
@@ -46,6 +46,7 @@
   */
  
  #include <map>
+#include <memory>
  #include <vector>
  
  #include <gtest/gtest.h>
@@ -68,11 +69,12 @@ class TestHardwareEnvironment : public ::testing::Environment
  {
  private:
      //! General hardware info
-    gmx_hw_info_t* hardwareInfo_;
+    std::unique_ptr<gmx_hw_info_t> hardwareInfo_;
      //! Storage of hardware contexts
      std::vector<std::unique_ptr<TestDevice>> testDeviceList_;
  
  public:
+    TestHardwareEnvironment();
      //! This is called by GTest framework once to query the hardware
      void SetUp() override;
      //! This is called by GTest framework once release the hardware
@@ -82,9 +84,10 @@ public:
      {
          return testDeviceList_;
      }
+    //! Whether the available hardware has any compatible devices
      bool hasCompatibleDevices() const { return !testDeviceList_.empty(); }
      //! Get available hardware information.
-    const gmx_hw_info_t* hwinfo() const { return hardwareInfo_; }
+    const gmx_hw_info_t* hwinfo() const { return hardwareInfo_.get(); }
  };
  
  //! Get the test environment
author	Paul Bauer <paul.bauer.q@gmail.com>
	Thu, 5 Nov 2020 10:53:20 +0000 (11:53 +0100)
committer	Paul Bauer <paul.bauer.q@gmail.com>
	Thu, 5 Nov 2020 12:12:18 +0000 (13:12 +0100)
.github/workflows/build_cmake.yml		patch \| blob \| history
admin/ci-scripts/build-and-test-py-gmxapi-0.2.sh		patch \| blob \| history
admin/containers/buildall.sh		patch \| blob \| history
admin/containers/scripted_gmx_docker_builds.py		patch \| blob \| history
admin/containers/utility.py		patch \| blob \| history
admin/gitlab-ci/archive.gitlab-ci.yml		patch \| blob \| history
admin/gitlab-ci/documentation.gitlab-ci.yml		patch \| blob \| history
admin/gitlab-ci/gromacs.gitlab-ci.yml		patch \| blob \| history
admin/gitlab-ci/lint.gitlab-ci.yml		patch \| blob \| history
admin/gitlab-ci/python-gmxapi.gitlab-ci.yml		patch \| blob \| history
admin/gitlab-ci/sample_restraint.gitlab-ci.yml		patch \| blob \| history
api/gmxapi/CMakeLists.txt		patch \| blob \| history
api/gmxapi/cpp/context.cpp		patch \| blob \| history
api/gmxapi/cpp/context_impl.h		patch \| blob \| history
api/legacy/include/gromacs/fileio/filetypes.h		patch \| blob \| history
api/nblib/CMakeLists.txt		patch \| blob \| history
api/nblib/forcecalculator.cpp		patch \| blob \| history
api/nblib/forcecalculator.h		patch \| blob \| history
api/nblib/gmxcalculator.cpp		patch \| blob \| history
api/nblib/gmxcalculator.h		patch \| blob \| history
api/nblib/gmxsetup.cpp		patch \| blob \| history
api/nblib/gmxsetup.h		patch \| blob \| history
api/nblib/integrator.cpp		patch \| blob \| history
api/nblib/listed_forces/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
api/nblib/listed_forces/bondtypes.h	[new file with mode: 0644]	patch \| blob
api/nblib/listed_forces/calculator.h	[new file with mode: 0644]	patch \| blob
api/nblib/listed_forces/definitions.h	[new file with mode: 0644]	patch \| blob
api/nblib/listed_forces/tests/CMakeLists.txt	[new file with mode: 0644]	patch \| blob
api/nblib/listed_forces/tests/bondtypes.cpp	[new file with mode: 0644]	patch \| blob
api/nblib/listed_forces/traits.h	[new file with mode: 0644]	patch \| blob
api/nblib/molecules.cpp		patch \| blob \| history
api/nblib/molecules.h		patch \| blob \| history
api/nblib/nblib.h		patch \| blob \| history
api/nblib/ppmap.h	[new file with mode: 0644]	patch \| blob
api/nblib/simulationstate.cpp		patch \| blob \| history
api/nblib/tests/molecules.cpp		patch \| blob \| history
api/nblib/tests/nbkernelsystem.cpp		patch \| blob \| history
api/nblib/tests/testsystems.cpp		patch \| blob \| history
api/nblib/topology.cpp		patch \| blob \| history
api/nblib/topology.h		patch \| blob \| history
api/nblib/topologyhelpers.cpp		patch \| blob \| history
api/nblib/topologyhelpers.h		patch \| blob \| history
api/nblib/util/internal.h		patch \| blob \| history
api/nblib/util/tests/CMakeLists.txt		patch \| blob \| history
api/nblib/util/tests/internal.cpp	[new file with mode: 0644]	patch \| blob
api/nblib/util/tests/user.cpp		patch \| blob \| history
api/nblib/util/user.cpp		patch \| blob \| history
api/nblib/util/user.h		patch \| blob \| history
cmake/gmxManageNvccConfig.cmake		patch \| blob \| history
docs/dev-manual/containers.rst		patch \| blob \| history
docs/dev-manual/gitlab.rst		patch \| blob \| history
docs/install-guide/index.rst		patch \| blob \| history
docs/nblib/listed-data-format.rst	[new file with mode: 0644]	patch \| blob
docs/nblib/listed-dev.rst	[new file with mode: 0644]	patch \| blob
docs/reference-manual/special/awh.rst		patch \| blob \| history
docs/release-notes/2021/major/bugs-fixed.rst		patch \| blob \| history
docs/release-notes/2021/major/performance.rst		patch \| blob \| history
docs/release-notes/2021/major/portability.rst		patch \| blob \| history
docs/user-guide/mdp-options.rst		patch \| blob \| history
scripts/GMXRC.bash.cmakein		patch \| blob \| history
src/gromacs/CMakeLists.txt		patch \| blob \| history
src/gromacs/applied_forces/awh/biasparams.cpp		patch \| blob \| history
src/gromacs/applied_forces/awh/biasstate.cpp		patch \| blob \| history
src/gromacs/applied_forces/awh/tests/bias.cpp		patch \| blob \| history
src/gromacs/applied_forces/awh/tests/bias_fep_lambda_state.cpp		patch \| blob \| history
src/gromacs/compat/mp11.h	[new file with mode: 0644]	patch \| blob
src/gromacs/compat/pointers.h		patch \| blob \| history
src/gromacs/compat/tests/CMakeLists.txt		patch \| blob \| history
src/gromacs/compat/tests/mp11.cpp	[new file with mode: 0644]	patch \| blob
src/gromacs/domdec/gpuhaloexchange_impl.cu		patch \| blob \| history
src/gromacs/domdec/gpuhaloexchange_impl.cuh		patch \| blob \| history
src/gromacs/domdec/tests/CMakeLists.txt		patch \| blob \| history
src/gromacs/domdec/tests/haloexchange_mpi.cpp		patch \| blob \| history
src/gromacs/ewald/pme.h		patch \| blob \| history
src/gromacs/ewald/pme_gather.clh		patch \| blob \| history
src/gromacs/ewald/pme_gpu.cpp		patch \| blob \| history
src/gromacs/ewald/pme_gpu_3dfft.h		patch \| blob \| history
src/gromacs/ewald/pme_gpu_calculate_splines.cuh		patch \| blob \| history
src/gromacs/ewald/pme_gpu_internal.h		patch \| blob \| history
src/gromacs/ewald/pme_solve.clh		patch \| blob \| history
src/gromacs/ewald/pme_spread.clh		patch \| blob \| history
src/gromacs/fileio/filetypes.cpp		patch \| blob \| history
src/gromacs/gmxpreprocess/readpull.cpp		patch \| blob \| history
src/gromacs/gpu_utils/device_stream.cu		patch \| blob \| history
src/gromacs/gpu_utils/devicebuffer_ocl.h		patch \| blob \| history
src/gromacs/gpu_utils/tests/CMakeLists.txt		patch \| blob \| history
src/gromacs/hardware/CMakeLists.txt		patch \| blob \| history
src/gromacs/hardware/detecthardware.cpp		patch \| blob \| history
src/gromacs/hardware/detecthardware.h		patch \| blob \| history
src/gromacs/hardware/hw_info.h		patch \| blob \| history
src/gromacs/hardware/prepare_detection.cpp	[new file with mode: 0644]	patch \| blob
src/gromacs/hardware/prepare_detection.h	[new file with mode: 0644]	patch \| blob
src/gromacs/listed_forces/listed_forces.cpp		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu.h		patch \| blob \| history
src/gromacs/mdlib/lincs.cpp		patch \| blob \| history
src/gromacs/mdlib/sim_util.cpp		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu.h		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.cpp		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.cu		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.h		patch \| blob \| history
src/gromacs/mdrun/legacymdrunoptions.h		patch \| blob \| history
src/gromacs/mdrun/md.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.cpp		patch \| blob \| history
src/gromacs/mdrun/runner.h		patch \| blob \| history
src/gromacs/mdtypes/pull_params.h		patch \| blob \| history
src/gromacs/nbnxm/benchmark/bench_setup.cpp		patch \| blob \| history
src/gromacs/nbnxm/benchmark/bench_setup.h		patch \| blob \| history
src/gromacs/nbnxm/benchmark/bench_system.cpp		patch \| blob \| history
src/gromacs/nbnxm/benchmark/bench_system.h		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh		patch \| blob \| history
src/gromacs/nbnxm/opencl/CMakeLists.txt		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_data_mgmt.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_jit_support.cpp		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel.clh		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_pruneonly.clh		patch \| blob \| history
src/gromacs/nbnxm/opencl/nbnxm_ocl_kernel_utils.clh		patch \| blob \| history
src/gromacs/nbnxm/pairlist.cpp		patch \| blob \| history
src/gromacs/options/filenameoption.cpp		patch \| blob \| history
src/gromacs/options/optionfiletype.h		patch \| blob \| history
src/gromacs/options/tests/filenameoption.cpp		patch \| blob \| history
src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_double.h		patch \| blob \| history
src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.cpp		patch \| blob \| history
src/gromacs/taskassignment/decidegpuusage.h		patch \| blob \| history
src/gromacs/timing/cyclecounter.cpp		patch \| blob \| history
src/gromacs/timing/wallcycle.cpp		patch \| blob \| history
src/gromacs/timing/wallcycle.h		patch \| blob \| history
src/gromacs/utility/template_mp.h	[new file with mode: 0644]	patch \| blob
src/gromacs/utility/tests/CMakeLists.txt		patch \| blob \| history
src/gromacs/utility/tests/template_mp.cpp	[new file with mode: 0644]	patch \| blob
src/programs/CMakeLists.txt		patch \| blob \| history
src/programs/mdrun/mdrun.cpp		patch \| blob \| history
src/programs/mdrun/mdrun_main.h		patch \| blob \| history
src/programs/mdrun/nonbonded_bench.cpp		patch \| blob \| history
src/programs/mdrun/tests/moduletest.cpp		patch \| blob \| history
src/programs/mdrun/tests/moduletest.h		patch \| blob \| history
src/programs/mdrun/tests/multisimtest.h		patch \| blob \| history
src/programs/mdrun/tests/pmetest.cpp		patch \| blob \| history
src/testutils/CMakeLists.txt		patch \| blob \| history
src/testutils/TestMacros.cmake		patch \| blob \| history
src/testutils/test_hardware_environment.cpp		patch \| blob \| history
src/testutils/test_hardware_environment.h		patch \| blob \| history