set(path_separator ";")
endif()
set(ENV{PATH} "$ENV{GITHUB_WORKSPACE}${path_separator}$ENV{PATH}")
+ set(openmp_use "ON")
+ if ("${{ runner.os }}" STREQUAL "Windows")
+ set(openmp_use "OFF")
+ endif()
execute_process(
COMMAND cmake
-D GMX_GPU=OFF
-D GMX_SIMD=None
-D GMX_FFT_LIBRARY=FFTPACK
+ -D GMX_OPENMP=${openmp_use}
RESULT_VARIABLE result
)
if (NOT result EQUAL 0)
fi
# Run Python acceptance tests.
-python -m pytest python_packaging/test --junitxml=$PY_ACCEPTANCE_TEST_XML
+python -m pytest python_packaging/test --junitxml=$PY_ACCEPTANCE_TEST_XML --threads=2
# Note: Multiple pytest processes getting --junitxml output file argument
# may cause problems, so we set the option on only one of the launched processes.
# images needed, because the same one can test library,
# thread and no MPI configurations.
-tag="gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.15.7 --gcc 8 --cuda 11.0 --opencl --clfft --mpi openmpi \
-| docker build -t $tag -
-
-tag="gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.13.0 --gcc 7 --opencl amd --clfft --mpi openmpi --ubuntu 18.04 | docker build -t $tag -
-
-tag="gromacs/cmake-3.13.0-llvm-8-tsan-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.13.0 --llvm 8 --tsan | docker build -t $tag -
-
-tag="gromacs/cmake-3.15.7-llvm-8-cuda-10.0-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.15.7 --llvm 8 --cuda 10.0 --mpi openmpi | docker build -t $tag -
-
-tag="gromacs/cmake-3.15.7-llvm-8-cuda-11.0-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.15.7 --llvm 8 --cuda 11.0 --mpi openmpi | docker build -t $tag -
-
-tag="gromacs/cmake-3.15.7-llvm-9-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.15.7 --llvm 9 --mpi openmpi | docker build -t $tag -
-
-tag="gromacs/cmake-3.13.0-llvm-9-intelopencl-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.13.0 --llvm 9 --opencl intel --mpi openmpi | docker build -t $tag -
-
-tag="gromacs/cmake-3.13.0-llvm-9-amdopencl-openmpi-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.13.0 --llvm 9 --opencl amd --mpi openmpi --ubuntu 18.04 | docker build -t $tag -
-
-tag="gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.17.2 --oneapi 2021.1-beta09 | docker build -t $tag -
-
-tag="gromacs/ci-docs-llvm-master"
-tags[${#tags[@]}]=$tag
-python3 $SCRIPT --cmake 3.17.2 --llvm --doxygen | docker build -t $tag -
+args[${#args[@]}]="--gcc 8 --cuda 11.0 --clfft --mpi openmpi"
+args[${#args[@]}]="--gcc 7 --clfft --mpi openmpi --ubuntu 18.04"
+args[${#args[@]}]="--llvm 8 --tsan"
+args[${#args[@]}]="--llvm 8 --cuda 10.0 --clfft --mpi openmpi"
+args[${#args[@]}]="--llvm 8 --cuda 11.0 --clfft --mpi openmpi"
+args[${#args[@]}]="--llvm 9 --clfft --mpi openmpi --ubuntu 18.04"
+args[${#args[@]}]="--oneapi 2021.1-beta09"
+args[${#args[@]}]="--llvm --doxygen"
+
+echo "Building the following images."
+for arg_string in "${args[@]}"; do
+ # shellcheck disable=SC2086
+ python3 -m utility $arg_string
+done
+echo
+
+for arg_string in "${args[@]}"; do
+ # shellcheck disable=SC2086
+ tag=$(python3 -m utility $arg_string)
+ tags[${#tags[@]}]=$tag
+ # shellcheck disable=SC2086
+ python3 $SCRIPT $arg_string | docker build -t $tag -
+done
echo "Run the following to upload the updated images."
echo "docker login"
# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
-"""
+"""Building block based Dockerfile generation for CI testing images.
+
Generates a set of docker images used for running GROMACS CI on Gitlab.
The images are prepared according to a selection of build configuration targets
that hope to cover a broad enough scope of different possible systems,
Based on the example script provided by the NVidia HPCCM repository.
+Reference:
+ `NVidia HPC Container Maker <https://github.com/NVIDIA/hpc-container-maker>`__
+
Authors:
* Paul Bauer <paul.bauer.q@gmail.com>
* Eric Irrgang <ericirrgang@gmail.com>
$ python3 scripted_gmx_docker_builds.py --format docker > Dockerfile && docker build .
$ python3 scripted_gmx_docker_builds.py | docker build -
+See Also:
+ :file:`buildall.sh`
+
"""
import argparse
'ccache',
'git',
'gnupg',
+ 'gpg-agent',
'libfftw3-dev',
'libhwloc-dev',
'liblapack-dev',
'wget',
'xsltproc']
+_opencl_extra_packages = [
+ 'nvidia-opencl-dev',
+ # The following require apt_ppas=['ppa:intel-opencl/intel-opencl']
+ 'intel-opencl-icd',
+ 'ocl-icd-libopencl1',
+ 'ocl-icd-opencl-dev',
+ 'opencl-headers',
+ # The following require
+ # apt_keys=['http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key'],
+ # apt_repositories=['deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main']
+ 'libelf1',
+ 'rocm-opencl',
+ 'rocm-dev',
+ 'clinfo'
+]
+
# Extra packages needed to build Python installations from source.
_python_extra_packages = ['build-essential',
'ca-certificates',
'texlive-fonts-recommended',
'texlive-fonts-extra']
-# Supported Python versions for maintained branches.
-_python_versions = ['3.6.10', '3.7.7', '3.8.2']
-
# Parse command line arguments
-parser = argparse.ArgumentParser(description='GROMACS CI image creation script', parents=[utility.parser])
+parser = argparse.ArgumentParser(description='GROMACS CI image creation script',
+ parents=[utility.parser])
parser.add_argument('--format', type=str, default='docker',
choices=['docker', 'singularity'],
help='Container specification format (default: docker)')
-parser.add_argument('--venvs', nargs='*', type=str, default=_python_versions,
- help='List of Python versions ("major.minor.patch") for which to install venvs. '
- 'Default: {}'.format(' '.join(_python_versions)))
def base_image_tag(args) -> str:
return []
+def get_opencl_packages(args) -> typing.Iterable[str]:
+ if (args.doxygen is None) and (args.oneapi is None):
+ return _opencl_extra_packages
+ else:
+ return []
+
+
def get_compiler(args, compiler_build_stage: hpccm.Stage = None) -> bb_base:
# Compiler
if args.icc is not None:
return None
-def get_opencl(args):
- # Add OpenCL environment if needed
- if (args.opencl is not None):
- if args.opencl == 'nvidia':
- if (args.cuda is None):
- raise RuntimeError('Need Nvidia environment for Nvidia OpenCL image')
-
- return hpccm.building_blocks.packages(ospackages=['nvidia-opencl-dev'])
-
- elif args.opencl == 'intel':
- # Note, when using oneapi, there is bundled OpenCL support, so this
- # installation is not needed.
- return hpccm.building_blocks.packages(
- apt_ppas=['ppa:intel-opencl/intel-opencl'],
- ospackages=['opencl-headers', 'ocl-icd-libopencl1',
- 'ocl-icd-opencl-dev', 'intel-opencl-icd'])
-
- elif args.opencl == 'amd':
- # libelf1 is a necessary dependency for something in the ROCm stack,
- # which they should set up, but seem to have omitted.
- return hpccm.building_blocks.packages(
- apt_keys=['http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key'],
- apt_repositories=['deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main'],
- ospackages=['ocl-icd-libopencl1', 'ocl-icd-opencl-dev', 'opencl-headers', 'libelf1', 'rocm-opencl', 'rocm-dev', 'clinfo'])
- else:
- return None
-
-
def get_clfft(args):
if (args.clfft is not None):
return hpccm.building_blocks.generic_cmake(
# Building blocks are chunks of container-builder instructions that can be
# copied to any build stage with the addition operator.
building_blocks = collections.OrderedDict()
+ building_blocks['base_packages'] = hpccm.building_blocks.packages(
+ ospackages=_common_packages)
# These are the most expensive and most reusable layers, so we put them first.
building_blocks['compiler'] = get_compiler(args, compiler_build_stage=stages.get('compiler_build'))
building_blocks['mpi'] = get_mpi(args, building_blocks['compiler'])
+ for i, cmake in enumerate(args.cmake):
+ building_blocks['cmake' + str(i)] = hpccm.building_blocks.cmake(
+ eula=True,
+ prefix='/usr/local/cmake-{}'.format(cmake),
+ version=cmake)
# Install additional packages early in the build to optimize Docker build layer cache.
- os_packages = _common_packages + get_llvm_packages(args)
+ os_packages = list(get_llvm_packages(args)) + get_opencl_packages(args)
if args.doxygen is not None:
os_packages += _docs_extra_packages
if args.oneapi is not None:
os_packages += ['lsb-release']
- building_blocks['ospackages'] = hpccm.building_blocks.packages(ospackages=os_packages)
+ building_blocks['extra_packages'] = hpccm.building_blocks.packages(
+ ospackages=os_packages,
+ apt_ppas=['ppa:intel-opencl/intel-opencl'],
+ apt_keys=['http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key'],
+ apt_repositories=['deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main']
+ )
- building_blocks['cmake'] = hpccm.building_blocks.cmake(eula=True, version=args.cmake)
- building_blocks['opencl'] = get_opencl(args)
building_blocks['clfft'] = get_clfft(args)
# Add Python environments to MPI images, only, so we don't have to worry
# To help us fund GROMACS development, we humbly ask that you cite
# the research papers on the package. Check out http://www.gromacs.org.
-"""A `utility` module helps manage the matrix of configurations for CI testing and build containers.
+"""A utility module to help manage the matrix of configurations for CI testing and build containers.
-Provides importable argument parser.
+When called as a stand alone script, prints a Docker image name based on the
+command line arguments. The Docker image name is of the form used in the GROMACS
+CI pipeline jobs.
+
+Example::
+
+ $ python3 -m utility --llvm --doxygen
+ gromacs/ci-ubuntu-18.04-llvm-7-docs
+
+See Also:
+ :file:`buildall.sh`
+
+As a module, provides importable argument parser and docker image name generator.
+
+Note that the parser is created with ``add_help=False`` to make it friendly as a
+parent parser, but this means that you must derive a new parser from it if you
+want to see the full generated command line help.
+
+Example::
+
+ import utility.parser
+ # utility.parser does not support `-h` or `--help`
+ parser = argparse.ArgumentParser(
+ description='GROMACS CI image creation script',
+ parents=[utility.parser])
+ # ArgumentParser(add_help=True) is default, so parser supports `-h` and `--help`
+
+See Also:
+ :file:`scripted_gmx_docker_builds.py`
Authors:
* Paul Bauer <paul.bauer.q@gmail.com>
Instead, inherit from it with the *parents* argument to :py:class:`argparse.ArgumentParser`
"""
-parser.add_argument('--cmake', type=str, default='3.13.0',
+parser.add_argument('--cmake', nargs='*', type=str, default=['3.13.0', '3.15.7', '3.17.2'],
help='Selection of CMake version to provide to base image')
+
compiler_group = parser.add_mutually_exclusive_group()
compiler_group.add_argument('--gcc', type=int, nargs='?', const=7, default=7,
help='Select GNU compiler tool chain. (Default) '
parser.add_argument('--tsan', type=str, nargs='?', const='llvm', default=None,
help='Build special compiler versions with TSAN OpenMP support')
-parser.add_argument('--opencl', type=str, nargs='?', const='nvidia', default=None,
- help='Provide environment for OpenCL builds')
-
parser.add_argument('--clfft', type=str, nargs='?', const='master', default=None,
help='Add external clFFT libraries to the build image')
parser.add_argument('--doxygen', type=str, nargs='?', const='1.8.5', default=None,
help='Add doxygen environment for documentation builds. Also adds other requirements needed for final docs images.')
+
+# Supported Python versions for maintained branches.
+_python_versions = ['3.6.10', '3.7.7', '3.8.2']
+parser.add_argument('--venvs', nargs='*', type=str, default=_python_versions,
+ help='List of Python versions ("major.minor.patch") for which to install venvs. '
+ 'Default: {}'.format(' '.join(_python_versions)))
+
+
+def image_name(configuration: argparse.Namespace) -> str:
+ """Generate docker image name.
+
+ The configuration slug has the form::
+
+ <distro>-<version>-<compiler>-<major version>[-<gpusdk>-<version>][-<use case>]
+
+ Image name is prefixed by ``gromacs/ci-``
+
+ Arguments:
+ configuration: Docker image configuration as described by the parsed arguments.
+
+ """
+ elements = []
+ for distro in ('centos', 'ubuntu'):
+ version = getattr(configuration, distro, None)
+ if version is not None:
+ elements.append(distro + '-' + version)
+ break
+ for compiler in ('icc', 'llvm', 'gcc'):
+ version = getattr(configuration, compiler, None)
+ if version is not None:
+ elements.append(compiler + '-' + str(version).split('.')[0])
+ break
+ for gpusdk in ('cuda',):
+ version = getattr(configuration, gpusdk, None)
+ if version is not None:
+ elements.append(gpusdk + '-' + version)
+ if configuration.oneapi is not None:
+ elements.append('oneapi-' + configuration.oneapi)
+
+ # Check for special cases
+ # The following attribute keys indicate the image is built for the named
+ # special use case.
+ cases = {'doxygen': 'docs',
+ 'tsan': 'tsan'}
+ for attr in cases:
+ value = getattr(configuration, attr, None)
+ if value is not None:
+ elements.append(cases[attr])
+ slug = '-'.join(elements)
+ return 'gromacs/ci-' + slug
+
+
+if __name__ == "__main__":
+ args = argparse.ArgumentParser(parents=[parser]).parse_args()
+ print(image_name(args))
- .variables:default
- .rules:nightly-only-for-release
cache: {}
- # Docker image uploaded to dockerhub by user eriklindahl
- # TODO: Get DockerFile for admin/dockerfiles
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
stage: configure-build
variables:
KUBERNETES_CPU_LIMIT: 1
- .variables:default
- .rules:merge-and-post-merge-acceptance
cache: {}
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
stage: configure-build
variables:
KUBERNETES_CPU_LIMIT: 1
cache: {}
# Docker image uploaded to dockerhub by user eriklindahl
# TODO: Get DockerFile for admin/dockerfiles
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
stage: release-package
variables:
KUBERNETES_CPU_LIMIT: 1
- .use-ccache
- .before_script:default
- .docs:build
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
variables:
KUBERNETES_CPU_LIMIT: 4
KUBERNETES_CPU_REQUEST: 2
BUILD_DIR: build-package
release-verify:
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
stage: release-verify
extends:
- .variables:default
- .gromacs:base:configure
- .before_script:default
# TODO (#3480) this should be organized more like the current documentation.py script
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
stage: configure-build
cache: {}
variables:
- .before_script:default
- .rules:nightly-only-for-release
# TODO (#3480) this should be organized more like the current documentation.py script
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
stage: release-configure
cache: {}
variables:
.docs:build:
# TODO (#3480) this should be organized more like the current documentation.py script
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
script:
- cd $BUILD_DIR
- cmake --build . --target gmx -- -j8
- .gromacs:base:build
- .before_script:default
# TODO (#3480) this should be organized more like the current documentation.py script
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
cache: {}
variables:
KUBERNETES_CPU_LIMIT: 4
- .use-clang:base
- .rules:basic-push
stage: pre-build
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
KUBERNETES_CPU_LIMIT: 8
KUBERNETES_CPU_REQUEST: 4
KUBERNETES_MEMORY_REQUEST: 8Gi
COMPILER_MAJOR_VERSION: 9
BUILD_DIR: simple-build
script:
+ - CMAKE=${CMAKE:-$(which cmake)}
- echo $CMAKE_COMPILER_SCRIPT
- echo $CMAKE_EXTRA_OPTIONS
- echo $CMAKE_SIMD_OPTIONS
mkdir $BUILD_DIR ;
fi
- cd $BUILD_DIR
- - cmake ..
+ - $CMAKE ..
-DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
$CMAKE_COMPILER_SCRIPT
$CMAKE_EXTRA_OPTIONS
2>&1 | tee cmakeLog.log
- awk '/CMake Warning/,/^--|^$/' cmakeLog.log | tee cmakeErrors.log
- if [ -s cmakeErrors.log ] ; then echo "Found CMake warning while processing build"; cat cmakeErrors.log ; exit 1; fi
- - cmake --build . -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee buildLogFile.log
- - cmake --build . --target tests -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee testBuildLogFile.log
+ - $CMAKE --build . -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee buildLogFile.log
+ - $CMAKE --build . --target tests -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee testBuildLogFile.log
- awk '/warning/,/warning.*generated|^$/' buildLogFile.log testBuildLogFile.log
| grep -v "CMake" | tee buildErrors.log || true
- grep "cannot be built" buildLogFile.log testBuildLogFile.log | tee -a buildErrors.log || true
- - cmake --build . --target install 2>&1 | tee installBuildLogFile.log
+ - $CMAKE --build . --target install 2>&1 | tee installBuildLogFile.log
- if [ -s buildErrors.log ] ; then echo "Found compiler warning during build"; cat buildErrors.log; exit 1; fi
- ctest -D ExperimentalTest --output-on-failure | tee ctestLog.log || true
- awk '/The following tests FAILED/,/^Errors while running CTest|^$/'
CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=Debug"
CMAKE_GPU_OPTIONS: "-DGMX_GPU=OFF"
script:
+ - CMAKE=${CMAKE:-$(which cmake)}
- echo $CMAKE_COMPILER_SCRIPT
- echo $CMAKE_EXTRA_OPTIONS
- echo $CMAKE_SIMD_OPTIONS
mkdir $BUILD_DIR ;
fi
- cd $BUILD_DIR
- - cmake ..
+ - $CMAKE ..
-DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
$CMAKE_COMPILER_SCRIPT
$CMAKE_EXTRA_OPTIONS
RELEASE_SOURCE: release-source-from-tarball
RELEASE_REGRESSIONTESTS: release-regressiontests-from-tarball
script:
+ - CMAKE=${CMAKE:-$(which cmake)}
- VERSION=`cat version.json |
python3 -c "import json,sys; print(json.load(sys.stdin)['version'])"`
- if [[ $GROMACS_RELEASE != "true" ]] ; then
- .use-gcc:base
- .use-opencl
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=AVX2_256"
CMAKE_EXTRA_OPTIONS: "-DGMX_EXTERNAL_CLFFT=ON -DGMX_INSTALL_LEGACY_API=ON"
COMPILER_MAJOR_VERSION: 7
+gromacs:clang-8-cuda-10.0:configure:
+ extends:
+ - .gromacs:base:configure
+ - .use-clang:base
+ - .use-cuda
+ - .rules:merge-and-post-merge-acceptance
+ image: gromacs/ci-ubuntu-18.04-llvm-8-cuda-10.0
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+ CMAKE_SIMD_OPTIONS: "-DGMX_USE_SIMD_KERNELS=off"
+ CMAKE_EXTRA_OPTIONS: "-DGMX_CLANG_CUDA=ON"
+ CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=RelWithDebInfo"
+ COMPILER_MAJOR_VERSION: 8
+
gromacs:gcc-8-cuda-11.0:configure:
extends:
- .gromacs:base:configure
- .use-gcc:base
- .use-cuda
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=SSE4.1"
COMPILER_MAJOR_VERSION: 8
- .use-cuda
- .use-mpi
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=SSE4.1"
COMPILER_MAJOR_VERSION: 8
- .gromacs:base:configure
- .use-clang:base
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
COMPILER_MAJOR_VERSION: 8
CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=TSAN"
- .use-clang:base
- .use-mpi
- .rules:merge-requests
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
COMPILER_MAJOR_VERSION: 9
CMAKE_PRECISION_OPTIONS: -DGMX_DOUBLE=ON
- .gromacs:base:configure
- .use-clang:base
- .rules:merge-requests
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
variables:
- CMAKE_COMPILER_SCRIPT: "-DCMAKE_CXX_COMPILER=/usr/local/libexec/c++-analyzer -DCMAKE_C_COMPILER=gcc"
- CMAKE_EXTRA_OPTIONS: "-DGMX_CLANG_ANALYZER=ON -DGMX_OPENMP=OFF -DGMX_USE_RDTSCP=OFF -DGMX_FFT_LIBRARY=fftpack -DGMX_DEVELOPER_BUILD=ON"
- CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=None"
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+ CMAKE_COMPILER_SCRIPT: "-DCMAKE_CXX_COMPILER=/usr/local/libexec/c++-analyzer -DCMAKE_C_COMPILER=gcc"
+ CMAKE_EXTRA_OPTIONS: "-DGMX_CLANG_ANALYZER=ON -DGMX_OPENMP=OFF -DGMX_USE_RDTSCP=OFF -DGMX_FFT_LIBRARY=fftpack -DGMX_DEVELOPER_BUILD=ON"
+ CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=None"
gromacs:clang-ASAN:configure:
extends:
- .gromacs:base:configure
- .use-clang:base
- .rules:merge-requests
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
COMPILER_MAJOR_VERSION: 8
CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=ASAN"
- .use-oneapi:base
- .use-opencl
- .rules:merge-requests
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
COMPILER_MAJOR_VERSION: 2021
gromacs:oneapi-2021.1-beta09-sycl:configure:
- .use-oneapi:base
- .use-sycl
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
COMPILER_MAJOR_VERSION: 2021
gromacs:clang-UBSAN:configure:
- .gromacs:base:configure
- .use-clang:base
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
COMPILER_MAJOR_VERSION: 8
CMAKE_BUILD_TYPE_OPTIONS: "-DCMAKE_BUILD_TYPE=UBSAN"
- .use-mpi
- .use-cuda
- .rules:nightly-only-for-release
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
COMPILER_MAJOR_VERSION: 8
RELEASE_BUILD_DIR: release-builds-gcc
CMAKE_EXTRA_OPTIONS: "-DGMX_BUILD_MDRUN_ONLY=ON"
- .use-gcc:base
- .use-opencl
- .rules:nightly-only-for-release
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
COMPILER_MAJOR_VERSION: 7
CMAKE_EXTRA_OPTIONS: "-DGMX_EXTERNAL_CLFFT=ON"
RELEASE_BUILD_DIR: release-builds-gcc
- .use-clang:base
- .use-mpi
- .rules:nightly-only-for-release
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
COMPILER_MAJOR_VERSION: 9
RELEASE_BUILD_DIR: release-builds-clang
CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=ON"
- .use-oneapi:base
- .use-opencl
- .rules:nightly-only-for-release
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
COMPILER_MAJOR_VERSION: 2021
RELEASE_BUILD_DIR: release-builds-oneapi
.gromacs:base:build:
stage: build
script:
+ - CMAKE=${CMAKE:-$(which cmake)}
- cd $BUILD_DIR
- - cmake --build . -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee buildLogFile.log
- - cmake --build . --target tests -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee testBuildLogFile.log
+ - $CMAKE --build . -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee buildLogFile.log
+ - $CMAKE --build . --target tests -- -j$KUBERNETES_CPU_LIMIT 2>&1 | tee testBuildLogFile.log
- awk '/warning/,/warning.*generated|^$/' buildLogFile.log testBuildLogFile.log
| grep -v "CMake" | tee buildErrors.log || true
- grep "cannot be built" buildLogFile.log testBuildLogFile.log | tee -a buildErrors.log || true
- - cmake --build . --target install 2>&1 | tee installBuildLogFile.log
+ - $CMAKE --build . --target install 2>&1 | tee installBuildLogFile.log
- if [ -s buildErrors.log ] ; then echo "Found compiler warning during build"; cat buildErrors.log; exit 1; fi
- for file in `find . -mindepth 1 -name "*.o" ! -type l` ; do echo $file ; rm $file ; done 2>&1 > remove-build-objects.log
- cd ..
.gromacs:static-analyzer-build:
stage: build
script:
+ - CMAKE=${CMAKE:-$(which cmake)}
- cd $BUILD_DIR
- - scan-build -o scan_html cmake --build . -- -j8 2>&1 | tee buildLogFile.log
+ - scan-build -o scan_html $CMAKE --build . -- -j8 2>&1 | tee buildLogFile.log
- awk '/warning/,/warning.*generated|^$/' buildLogFile.log
| grep -v "CMake" | tee buildErrors.log || true
- grep "cannot be built" buildLogFile.log | tee -a buildErrors.log || true
- .before_script:default
- .use-ccache
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
needs:
- job: gromacs:gcc-7:configure
- artifacts: true
+
+gromacs:clang-8-cuda-10.0:build:
+ extends:
+ - .variables:default
+ - .gromacs:base:build
+ - .use-clang:base
+ - .use-ccache
+ - .rules:merge-and-post-merge-acceptance
+ image: gromacs/ci-ubuntu-18.04-llvm-8-cuda-10.0
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+ needs:
+ - job: gromacs:clang-8-cuda-10.0:configure
gromacs:gcc-8-cuda-11.0:build:
extends:
- .before_script:default
- .use-ccache
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
+ variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
needs:
- job: gromacs:gcc-8-cuda-11.0:configure
- .before_script:default
- .use-ccache
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
+ variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
needs:
- job: gromacs:gcc-8-cuda-11.0:configureMPI
- .use-clang:base
- .use-ccache
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
needs:
- job: gromacs:clang-TSAN:configure
- .use-clang:base
- .use-ccache
- .rules:merge-requests
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
tags:
- k8s-scilifelab
needs:
- .use-clang:base
- .use-ccache
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
tags:
- k8s-scilifelab
needs:
- .use-clang:base
- .use-ccache
- .rules:merge-requests
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
tags:
- k8s-scilifelab
needs:
- .use-ccache
- .use-oneapi:base
- .rules:merge-requests
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+ variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
needs:
- job: gromacs:oneapi-2021.1-beta09-opencl:configure
- .use-ccache
- .use-oneapi:base
- .rules:merge-and-post-merge-acceptance
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+ variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
needs:
- job: gromacs:oneapi-2021.1-beta09-sycl:configure
- .before_script:default
- .use-ccache
- .rules:merge-requests
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
+ variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
needs:
- job: gromacs:clang-9-mpi:configure
- .rules:nightly-only-for-release
stage: release-build
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
BUILD_DIR: release-builds-gcc
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
needs:
- job: gromacs:gcc-8-cuda-11.0:release:configure
- .rules:nightly-only-for-release
stage: release-build
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
BUILD_DIR: release-builds-gcc
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
needs:
- job: gromacs:gcc-7:release:configure
- .rules:nightly-only-for-release
stage: release-build
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
BUILD_DIR: release-builds-clang
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
needs:
- job: gromacs:clang-9:release:configure
- .rules:nightly-only-for-release
stage: release-build
variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
BUILD_DIR: release-builds-oneapi
COMPILER_MAJOR_VERSION: 2021
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
needs:
- job: gromacs:oneapi-2021.1-beta09-opencl:release:configure
variables:
CTEST_RUN_MODE: "ExperimentalTest"
script:
+ - CMAKE=${CMAKE:-$(which cmake)}
- cd $BUILD_DIR
- export UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1:suppressions=$CI_PROJECT_DIR/admin/ubsan-suppressions.txt
# Needed to run MPI enabled code in the docker images, until we set up different users
extends:
- .gromacs:base:test
- .rules:merge-requests
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
KUBERNETES_EXTENDED_RESOURCE_NAME: "amd.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
LD_LIBRARY_PATH: "/opt/rocm-3.5.0/opencl/lib"
needs:
- job: gromacs:gcc-7:build
+gromacs:clang-8-cuda-10.0:test:
+ extends:
+ - .gromacs:base:test
+ - .rules:post-merge-acceptance
+ image: gromacs/ci-ubuntu-18.04-llvm-8-cuda-10.0
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+ KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
+ KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
+ tags:
+ - k8s-scilifelab
+ needs:
+ - job: gromacs:clang-8-cuda-10.0:build
+
gromacs:gcc-8-cuda-11.0:test:
extends:
- .gromacs:base:test
- .rules:merge-requests
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
tags:
extends:
- .gromacs:base:test
- .rules:post-merge-acceptance
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
GMX_GPU_DD_COMMS: 1
extends:
- .gromacs:base:test
- .rules:post-merge-acceptance
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
needs:
- job: gromacs:clang-TSAN:build
- .gromacs:base:test
- .use-clang:base
- .rules:merge-requests
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
CTEST_RUN_MODE: "ExperimentalMemCheck"
tags:
- k8s-scilifelab
- .gromacs:base:test
- .use-clang:base
- .rules:post-merge-acceptance
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
tags:
- k8s-scilifelab
needs:
- .gromacs:base:test
- .use-oneapi:base
- .rules:merge-requests
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+ variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
needs:
- job: gromacs:oneapi-2021.1-beta09-opencl:build
- .gromacs:base:test
- .use-oneapi:base
- .rules:post-merge-acceptance
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+ variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
needs:
- job: gromacs:oneapi-2021.1-beta09-sycl:build
extends:
- .gromacs:base:test
- .rules:merge-requests
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
+ variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
tags:
- k8s-scilifelab
needs:
extends:
- .gromacs:base:regressiontest
- .rules:post-merge-acceptance
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
KUBERNETES_EXTENDED_RESOURCE_NAME: "amd.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
REGRESSIONTEST_PME_RANK_NUMBER: 0
- job: gromacs:gcc-7:build
- job: regressiontests:prepare
+gromacs:clang-8-cuda-10.0:regressiontest:
+ extends:
+ - .gromacs:base:regressiontest
+ - .rules:post-merge-acceptance
+ image: gromacs/ci-ubuntu-18.04-llvm-8-cuda-10.0
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
+ KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
+ KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
+ REGRESSIONTEST_PME_RANK_NUMBER: 0
+ REGRESSIONTEST_TOTAL_RANK_NUMBER: 2
+ REGRESSIONTEST_OMP_RANK_NUMBER: 1
+ tags:
+ - k8s-scilifelab
+ needs:
+ - job: gromacs:clang-8-cuda-10.0:build
+ - job: regressiontests:prepare
+
+
gromacs:gcc-8-cuda-11.0:regressiontest:
extends:
- .gromacs:base:regressiontest
- .rules:merge-requests
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
REGRESSIONTEST_PME_RANK_NUMBER: 0
extends:
- .gromacs:base:regressiontest
- .rules:post-merge-acceptance
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 2
REGRESSIONTEST_PME_RANK_NUMBER: 0
when: always
expire_in: 1 week
-gromacs:gcc-8-cuda-11.0:regressiontest-gpucommupd-MPI:
+gromacs:gcc-8-cuda-11.0:regressiontest-upd-tMPI:
extends:
- .gromacs:base:regressiontest
- .rules:post-merge-acceptance
image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
variables:
+ KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
+ KUBERNETES_EXTENDED_RESOURCE_LIMIT: 2
+ REGRESSIONTEST_PME_RANK_NUMBER: 0
+ REGRESSIONTEST_TOTAL_RANK_NUMBER: 4
+ REGRESSIONTEST_OMP_RANK_NUMBER: 1
+ GMX_FORCE_UPDATE_DEFAULT_GPU: 1
+ tags:
+ - k8s-scilifelab
+ needs:
+ - job: gromacs:gcc-8-cuda-11.0:build
+ - job: regressiontests:prepare
+ artifacts:
+ paths:
+ - regressiontests
+ when: always
+ expire_in: 1 week
+
+gromacs:gcc-8-cuda-11.0:regressiontest-gpucommupd-MPI:
+ extends:
+ - .gromacs:base:regressiontest
+ - .rules:post-merge-acceptance
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
+ variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 2
REGRESSIONTEST_PME_RANK_NUMBER: 0
extends:
- .gromacs:base:regressiontest
- .rules:post-merge-acceptance
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
tags:
- k8s-scilifelab
needs:
- .gromacs:base:regressiontest
- .use-clang:base
- .rules:merge-requests
- image: gromacs/cmake-3.13.0-llvm-8-tsan-master
+ image: gromacs/ci-ubuntu-18.04-llvm-8-tsan
+ variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
tags:
- k8s-scilifelab
needs:
extends:
- .gromacs:base:regressiontest
- .rules:merge-requests
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
REGRESSIONTEST_DOUBLE: "-double"
REGRESSIONTEST_PARALLEL: "-np"
tags:
- .gromacs:base:regressiontest
- .use-oneapi:base
- .rules:merge-requests
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+ variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
needs:
- job: gromacs:oneapi-2021.1-beta09-opencl:build
- job: regressiontests:prepare
- .gromacs:base:regressiontest
- .use-oneapi:base
- .rules:post-merge-acceptance
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
+ variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
needs:
- job: gromacs:oneapi-2021.1-beta09-sycl:build
- job: regressiontests:prepare
- .gromacs:base:test
- .rules:nightly-only-for-release
stage: release-tests
- image: gromacs/cmake-3.15.7-gcc-8-cuda-11.0-nvidiaopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-8-cuda-11.0
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
KUBERNETES_EXTENDED_RESOURCE_NAME: "nvidia.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
BUILD_DIR: release-builds-gcc
- .gromacs:base:test
- .rules:nightly-only-for-release
stage: release-tests
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
BUILD_DIR: release-builds-gcc
KUBERNETES_EXTENDED_RESOURCE_NAME: "amd.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
- .gromacs:base:test
- .rules:nightly-only-for-release
stage: release-tests
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
BUILD_DIR: release-builds-clang
needs:
- job: gromacs:clang-9:release:configure
- .use-oneapi:base
- .rules:nightly-only-for-release
stage: release-tests
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
BUILD_DIR: release-builds-oneapi
needs:
- job: gromacs:oneapi-2021.1-beta09-opencl:release:configure
- .gromacs:base:regressiontest
- .rules:nightly-only-for-release
stage: release-tests
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
variables:
+ CMAKE: /usr/local/cmake-3.13.0/bin/cmake
BUILD_DIR: release-builds-gcc
KUBERNETES_EXTENDED_RESOURCE_NAME: "amd.com/gpu"
KUBERNETES_EXTENDED_RESOURCE_LIMIT: 1
- .gromacs:base:regressiontest
- .rules:nightly-only-for-release
stage: release-tests
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
BUILD_DIR: release-builds-clang
REGRESSIONTEST_DOUBLE: "-double"
REGRESSIONTEST_PARALLEL: "-np"
- .use-oneapi:base
- .rules:nightly-only-for-release
stage: release-tests
- image: gromacs/cmake-3.17.2-oneapi-2021.1-beta09-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7-oneapi-2021.1-beta09
variables:
+ CMAKE: /usr/local/cmake-3.17.2/bin/cmake
BUILD_DIR: release-builds-oneapi
REGRESSIONTEST_PME_RANK_NUMBER: 0
REGRESSIONTEST_TOTAL_RANK_NUMBER: 2
- .gromacs:base:configure
- .use-clang:base
- .rules:basic-push
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
COMPILER_MAJOR_VERSION: 9
BUILD_DIR: build-clang-tidy
CMAKE_EXTRA_OPTIONS: -DCLANG_TIDY=clang-tidy-$COMPILER_MAJOR_VERSION -DGMX_CLANG_TIDY=ON -DGMX_COMPILER_WARNINGS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
- .gromacs:base:configure
- .use-clang:base
- .rules:nightly-not-for-release
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
COMPILER_MAJOR_VERSION: 9
BUILD_DIR: build-clang-tidy
CMAKE_EXTRA_OPTIONS: -DCLANG_TIDY=clang-tidy-$COMPILER_MAJOR_VERSION -DGMX_CLANG_TIDY=ON -DGMX_COMPILER_WARNINGS=ON
- .variables:default
- .rules:nightly-not-for-release
stage: source-check
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
needs:
- job: clang-tidy:configure-schedule
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
BUILD_DIR: build-clang-tidy
clang-tidy:test:
- .variables:default
- .rules:basic-push
stage: source-check
- image: gromacs/cmake-3.15.7-llvm-9-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-llvm-9
needs:
- job: clang-tidy:configure-push
variables:
+ CMAKE: /usr/local/cmake-3.15.7/bin/cmake
COMPILER_MAJOR_VERSION: 9
BUILD_DIR: build-clang-tidy
EXTRA_INSTALLS: clang-tidy-$COMPILER_MAJOR_VERSION
- .rules:basic-push
cache: {}
stage: pre-build
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
variables:
COMPILER_MAJOR_VERSION: 7
KUBERNETES_CPU_LIMIT: 1
- .rules:basic-push
cache: {}
stage: pre-build
- image: gromacs/ci-docs-llvm-master
+ image: gromacs/ci-ubuntu-18.04-llvm-7-docs
variables:
KUBERNETES_CPU_LIMIT: 1
KUBERNETES_CPU_REQUEST: 1
KUBERNETES_MEMORY_REQUEST: 2Gi
BUILD_DIR: build-docs
script:
+ - CMAKE=${CMAKE:-$(which cmake)}
- cd $BUILD_DIR
- - cmake --build . --target check-source
+ - $CMAKE --build . --target check-source
- awk '/warning.*include style.*order/,/You can use.*rst|^$/' docs/doxygen/check-source.log | tee doxygenError.txt || true
- awk '/warning:.*includes/,/unnecessarily|^$/' docs/doxygen/check-source.log | tee -a doxygenError.txt || true
- awk '/Traceback/,/.*rror|^$/' docs/doxygen/doxygen*log docs/doxygen/check-source.log | tee -a doxygenError.txt || true
extends:
- .variables:default
- .use-clang:base
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
stage: test
variables:
KUBERNETES_CPU_LIMIT: 2
extends:
- .variables:default
- .use-clang:base
- image: gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi-master
+ image: gromacs/ci-ubuntu-18.04-gcc-7
stage: test
variables:
KUBERNETES_CPU_LIMIT: 2
EXTRA_INSTALLS: "curl libbz2-dev libffi-dev liblzma-dev libncurses5-dev libncursesw5-dev libreadline-dev libsqlite3-dev libssl-dev llvm python-openssl tk-dev zlib1g-dev"
script:
- source $INSTALL_DIR/bin/GMXRC
- - source $VENVPATH/bin/activate && INSTALL_DIR=$PWD/$INSTALL_DIR OMP_NUM_THREADS=1 bash admin/ci-scripts/build-and-test-sample_restraint-2020.sh
+ - source $VENVPATH/bin/activate && INSTALL_DIR=$PWD/$INSTALL_DIR OMP_NUM_THREADS=1 bash admin/ci-scripts/build-and-test-sample_restraint-2021.sh
artifacts:
reports:
junit:
# GROMACS is built against an MPI library.
# Clarification should be possible with resolution of #3672.
set(_gmx_mpi_type "library")
+ # Ref https://cmake.org/cmake/help/v3.13/module/FindMPI.html#variables-for-using-mpi
+ find_package(MPI COMPONENTS C)
+ if (MPI_C_FOUND)
+ target_link_libraries(gmxapi PRIVATE MPI::MPI_C)
+ else()
+ message(FATAL_ERROR "Building gmxapi for MPI-enabled GROMACS, but no MPI toolchain found.")
+ endif ()
elseif(GMX_THREAD_MPI)
# GROMACS is built with its internal thread-MPI implementation.
set(_gmx_mpi_type "tmpi")
#include "gromacs/commandline/filenm.h"
#include "gromacs/commandline/pargs.h"
#include "gromacs/gmxlib/network.h"
+#include "gromacs/hardware/detecthardware.h"
+#include "gromacs/hardware/hw_info.h"
#include "gromacs/mdlib/stophandler.h"
#include "gromacs/mdrunutility/logging.h"
#include "gromacs/mdrunutility/multisim.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxmpi.h"
#include "gromacs/utility/init.h"
-#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
#include "gmxapi/mpi/resourceassignment.h"
#include "gmxapi/exceptions.h"
}
ContextImpl::ContextImpl(MpiContextManager&& mpi) noexcept(std::is_nothrow_constructible_v<gmx::LegacyMdrunOptions>) :
- mpi_(std::move(mpi))
+ mpi_(std::move(mpi)),
+ hardwareInformation_(gmx_detect_hardware(
+ gmx::PhysicalNodeCommunicator(mpi_.communicator(), gmx_physicalnode_id_hash())))
{
// Confirm our understanding of the MpiContextManager invariant.
GMX_ASSERT(mpi_.communicator() == MPI_COMM_NULL ? !GMX_LIB_MPI : GMX_LIB_MPI,
auto mdModules = std::make_unique<MDModules>();
const char* desc[] = { "gmxapi placeholder text" };
- if (options_.updateFromCommandLine(argc, argv.data(), desc) == 0)
+
+ // LegacyMdrunOptions needs to be kept alive for the life of ContextImpl,
+ // so we use a data member for now.
+ gmx::LegacyMdrunOptions& options = options_;
+ if (options.updateFromCommandLine(argc, argv.data(), desc) == 0)
{
return nullptr;
}
ArrayRef<const std::string> multiSimDirectoryNames =
- opt2fnsIfOptionSet("-multidir", ssize(options_.filenames), options_.filenames.data());
+ opt2fnsIfOptionSet("-multidir", ssize(options.filenames), options.filenames.data());
+
// The SimulationContext is necessary with gmxapi so that
// resources owned by the client code can have suitable
// lifetime. The gmx wrapper binary uses the same infrastructure,
// but the lifetime is now trivially that of the invocation of the
// wrapper binary.
+ //
+ // For now, this should match the communicator used for hardware
+ // detection. There's no way to assert this is true.
auto communicator = mpi_.communicator();
// Confirm the precondition for simulationContext().
GMX_ASSERT(communicator == MPI_COMM_NULL ? !GMX_LIB_MPI : GMX_LIB_MPI,
SimulationContext simulationContext(communicator, multiSimDirectoryNames);
- StartingBehavior startingBehavior = StartingBehavior::NewSimulation;
- LogFilePtr logFileGuard = nullptr;
- gmx_multisim_t* ms = simulationContext.multiSimulation_.get();
- std::tie(startingBehavior, logFileGuard) =
- handleRestart(findIsSimulationMasterRank(ms, simulationContext.simulationCommunicator_),
- communicator, ms, options_.mdrunOptions.appendingBehavior,
- ssize(options_.filenames), options_.filenames.data());
+ StartingBehavior startingBehavior = StartingBehavior::NewSimulation;
+ LogFilePtr logFileGuard = nullptr;
+ gmx_multisim_t* ms = simulationContext.multiSimulation_.get();
+ std::tie(startingBehavior, logFileGuard) = handleRestart(
+ findIsSimulationMasterRank(ms, simulationContext.simulationCommunicator_),
+ simulationContext.simulationCommunicator_, ms, options.mdrunOptions.appendingBehavior,
+ ssize(options.filenames), options.filenames.data());
auto builder = MdrunnerBuilder(std::move(mdModules),
compat::not_null<SimulationContext*>(&simulationContext));
- builder.addSimulationMethod(options_.mdrunOptions, options_.pforce, startingBehavior);
- builder.addDomainDecomposition(options_.domdecOptions);
+ builder.addHardwareDetectionResult(hardwareInformation_.get());
+ builder.addSimulationMethod(options.mdrunOptions, options.pforce, startingBehavior);
+ builder.addDomainDecomposition(options.domdecOptions);
// \todo pass by value
- builder.addNonBonded(options_.nbpu_opt_choices[0]);
+ builder.addNonBonded(options.nbpu_opt_choices[0]);
// \todo pass by value
- builder.addElectrostatics(options_.pme_opt_choices[0], options_.pme_fft_opt_choices[0]);
- builder.addBondedTaskAssignment(options_.bonded_opt_choices[0]);
- builder.addUpdateTaskAssignment(options_.update_opt_choices[0]);
- builder.addNeighborList(options_.nstlist_cmdline);
- builder.addReplicaExchange(options_.replExParams);
+ builder.addElectrostatics(options.pme_opt_choices[0], options.pme_fft_opt_choices[0]);
+ builder.addBondedTaskAssignment(options.bonded_opt_choices[0]);
+ builder.addUpdateTaskAssignment(options.update_opt_choices[0]);
+ builder.addNeighborList(options.nstlist_cmdline);
+ builder.addReplicaExchange(options.replExParams);
// Need to establish run-time values from various inputs to provide a resource handle to Mdrunner
- builder.addHardwareOptions(options_.hw_opt);
+ builder.addHardwareOptions(options.hw_opt);
// \todo File names are parameters that should be managed modularly through further factoring.
- builder.addFilenames(options_.filenames);
+ builder.addFilenames(options.filenames);
// TODO: Remove `s` and `-cpi` from LegacyMdrunOptions before launch(). #3652
- auto simulationInput = makeSimulationInput(options_);
+ auto simulationInput = makeSimulationInput(options);
builder.addInput(simulationInput);
// Note: The gmx_output_env_t life time is not managed after the call to parse_common_args.
// \todo Implement lifetime management for gmx_output_env_t.
// \todo Output environment should be configured outside of Mdrunner and provided as a resource.
- builder.addOutputEnvironment(options_.oenv);
+ builder.addOutputEnvironment(options.oenv);
builder.addLogFile(logFileGuard.get());
// Note, creation is not mature enough to be exposed in the external API yet.
#include "gmxapi/context.h"
#include "gmxapi/session.h"
+struct gmx_hw_info_t;
+
namespace gmxapi
{
*/
const MpiContextManager mpi_;
+ /*! \brief Owning handle to the results of the hardware detection.
+ *
+ * The hardware is detected across the whole environment described
+ * by \c mpi_ */
+ std::unique_ptr<gmx_hw_info_t> hardwareInformation_;
+
private:
/*!
* \brief Basic constructor.
efCUB,
efXPM,
efRND,
+ efCSV,
efNR
};
COMMENT "Running nblib tests"
USES_TERMINAL VERBATIM)
-add_library(nblib SHARED "")
+set(NBLIB_MAJOR 0)
+set(NBLIB_MINOR 1)
+set(NBLIB_RELEASE ${NBLIB_MAJOR}.${NBLIB_MINOR}.0)
+
+add_library(nblib)
+set_target_properties(nblib PROPERTIES
+ VERSION_MAJOR ${NBLIB_MAJOR}
+ VERSION_MINOR ${NBLIB_MINOR}
+ SOVERSION ${NBLIB_MAJOR}
+ RELEASE ${NBLIB_RELEASE}
+ VERSION ${NBLIB_RELEASE}
+ LINKER_LANGUAGE CXX
+ OUTPUT_NAME "nblib")
target_sources(nblib
PRIVATE
gmx_target_compile_options(nblib)
-set_target_properties(nblib
- PROPERTIES
- LINKER_LANGUAGE CXX
- OUTPUT_NAME "nblib"
- )
-
target_link_libraries(nblib PRIVATE libgromacs)
target_include_directories(nblib PRIVATE ${PROJECT_SOURCE_DIR}/api)
include_directories(BEFORE ${CMAKE_SOURCE_DIR}/api)
kerneloptions.h
nblib.h
particletype.h
+ ppmap.h
simulationstate.h
topology.h
topologyhelpers.h
DESTINATION include/nblib)
endif()
+add_subdirectory(listed_forces)
add_subdirectory(samples)
add_subdirectory(util)
* \author Prashanth Kanduri <kanduri@cscs.ch>
* \author Sebastian Keller <keller@cscs.ch>
*/
+#include "nblib/exception.h"
#include "nblib/forcecalculator.h"
#include "nblib/gmxcalculator.h"
#include "nblib/gmxsetup.h"
void ForceCalculator::compute(gmx::ArrayRef<const Vec3> coordinates, gmx::ArrayRef<Vec3> forces)
{
- return gmxForceCalculator_->compute(coordinates, forces);
+ if (coordinates.size() != forces.size())
+ {
+ throw InputException("Coordinates array and force buffer size mismatch");
+ }
+
+ gmxForceCalculator_->compute(coordinates, forces);
}
void ForceCalculator::updatePairList(gmx::ArrayRef<const int> particleInfoAllVdW,
* costly to create this object since much of the SimulationState and NBKernelOptions has to be
* passed to the gromacs backend. However, once constructed, compute can be called repeatedly only
* paying the cost of the actual nonbonded force calculation. Repeated calls to compute on the same
- * coordinated will always return the same forces (within precision), so the user must update the
+ * coordinates will always return the same forces (within precision), so the user must update the
* positions using the forces generated here to advance a simulation. If the coordinates move
* sufficiently far from their positions at construction time, the efficiency of the calculation
* will suffer. To alleviate this, the user can call updatePairList.
// update the coordinates in the backend
nbv_->convertCoordinates(gmx::AtomLocality::Local, false, coordinateInput);
- // set forces to zero
- std::fill(forceOutput.begin(), forceOutput.end(), gmx::RVec{ 0, 0, 0 });
-
nbv_->dispatchNonbondedKernel(gmx::InteractionLocality::Local, *interactionConst_, *stepWork_,
enbvClearFYes, *forcerec_, enerd_.get(), nrnb_.get());
class SimulationState;
struct NBKernelOptions;
+/*! \brief GROMACS non-bonded force calculation backend
+ *
+ * This class encapsulates the various GROMACS data structures and their interplay
+ * from the NBLIB user. The class is a private member of the ForceCalculator and
+ * is not intended for the public interface.
+ *
+ * Handles the task of storing the simulation problem description using the internal
+ * representation used within GROMACS. It currently supports short range non-bonded
+ * interactions (PP) on a single node CPU.
+ *
+ */
+
class GmxForceCalculator final
{
public:
const Box& box);
private:
+ //! Friend to allow setting up private members in this class
friend class NbvSetupUtil;
//! Non-Bonded Verlet object for force calculation
// Put everything together
auto nbv = std::make_unique<nonbonded_verlet_t>(std::move(pairlistSets), std::move(pairSearch),
- std::move(atomData), kernelSetup, nullptr, nullptr);
+ std::move(atomData), kernelSetup, nullptr,
+ nullWallcycle);
// Needs to be called with the number of unique ParticleTypes
nbnxn_atomdata_init(gmx::MDLogger(), nbv->nbat.get(), kernelSetup.kernelType, combinationRule,
namespace nblib
{
+/*! \brief Sets up the GROMACS data structures for the non-bonded force calculator
+ *
+ * This data structure initializes the GmxForceCalculator object which internally
+ * contains various objects needed to perform non-bonded force calculations using
+ * the internal representation for the problem as required for GROMACS.
+ *
+ * The public functions of this class basically translate the problem description
+ * specified by the user in NBLIB. This ultimately returns the GmxForceCalculator
+ * object which is used by the ForceCalculator object in the user-facing library.
+ *
+ */
class NbvSetupUtil final
{
public:
//! Sets up t_forcerec object on the GmxForceCalculator
void setupForceRec(const matrix& box);
+ //! Returns a unique pointer a GmxForceCalculator object
std::unique_ptr<GmxForceCalculator> getGmxForceCalculator()
{
return std::move(gmxForceCalculator_);
std::unique_ptr<GmxForceCalculator> gmxForceCalculator_;
};
+/*! \brief Calls the setup utilities needed to initialize a GmxForceCalculator object
+ *
+ * The GmxSetupDirector encapsulates the multi-stage setup of the GmxForceCalculator which
+ * is done using the public functions of the NbvSetupUtil. This separation ensures that the
+ * NbvSetupUtil object is temporary in scope. The function definition makes it easy for the
+ * developers to follow the sequence of calls and the dataflow involved in setting up
+ * the non-bonded force calculation backend. This is the only function needed to be called
+ * from the ForceCalculator during construction.
+ *
+ */
class GmxSetupDirector
{
public:
namespace nblib
{
-// NOLINTNEXTLINE(performance-unnecessary-value-param)
+
LeapFrog::LeapFrog(const Topology& topology, const Box& box) : box_(box)
{
inverseMasses_.resize(topology.numParticles());
--- /dev/null
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2020, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+#
+# \author Victor Holanda <victor.holanda@cscs.ch>
+# \author Joe Jordan <ejjordan@kth.se>
+# \author Prashanth Kanduri <kanduri@cscs.ch>
+# \author Sebastian Keller <keller@cscs.ch>
+#
+
+if(GMX_INSTALL_NBLIB_API)
+ install(FILES
+ bondtypes.h
+ calculator.h
+ definitions.h
+ DESTINATION include/nblib)
+endif()
+
+if(BUILD_TESTING)
+ add_subdirectory(tests)
+endif()
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \inpublicapi \file
+ * \brief
+ * Implements nblib supported bondtypes
+ *
+ * We choose to forward comparison operations to the
+ * corresponding std::tuple comparison operations.
+ * In order to do that without temporary copies,
+ * we employ std::tie, which requires lvalues as input.
+ * For this reason, bond type parameter getters are implemented
+ * with a const lvalue reference return.
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ */
+#ifndef NBLIB_LISTEDFORCES_BONDTYPES_H
+#define NBLIB_LISTEDFORCES_BONDTYPES_H
+
+#include <array>
+
+#include "nblib/particletype.h"
+#include "nblib/ppmap.h"
+#include "nblib/util/user.h"
+
+namespace nblib
+{
+using Name = std::string;
+using ForceConstant = real;
+using EquilDistance = real;
+using Exponent = real;
+
+using Degrees = StrongType<real, struct DegreeParameter>;
+using Radians = StrongType<real, struct RadianParameter>;
+
+/*! \brief Basic template for interactions with 2 parameters named forceConstant and equilDistance
+ *
+ * \tparam Phantom unused template parameter for type distinction
+ *
+ * Distinct bond types can be generated from this template with using declarations
+ * and declared, but undefined structs. For example:
+ * using HarmonicBondType = TwoParameterInteraction<struct HarmonicBondTypeParameter>;
+ * Note that HarmonicBondTypeParameter does not have to be defined.
+ */
+template<class Phantom>
+class TwoParameterInteraction
+{
+public:
+ TwoParameterInteraction() = default;
+ TwoParameterInteraction(ForceConstant f, EquilDistance d) : forceConstant_(f), equilDistance_(d)
+ {
+ }
+
+ [[nodiscard]] const ForceConstant& forceConstant() const { return forceConstant_; }
+ [[nodiscard]] const EquilDistance& equilDistance() const { return equilDistance_; }
+
+private:
+ ForceConstant forceConstant_;
+ EquilDistance equilDistance_;
+};
+
+template<class Phantom>
+inline bool operator<(const TwoParameterInteraction<Phantom>& a, const TwoParameterInteraction<Phantom>& b)
+{
+ return std::tie(a.forceConstant(), a.equilDistance())
+ < std::tie(b.forceConstant(), b.equilDistance());
+}
+
+template<class Phantom>
+inline bool operator==(const TwoParameterInteraction<Phantom>& a, const TwoParameterInteraction<Phantom>& b)
+{
+ return std::tie(a.forceConstant(), a.equilDistance())
+ == std::tie(b.forceConstant(), b.equilDistance());
+}
+
+/*! \brief harmonic bond type
+ *
+ * It represents the interaction of the form
+ * V(r; forceConstant, equilDistance) = 0.5 * forceConstant * (r - equilDistance)^2
+ */
+using HarmonicBondType = TwoParameterInteraction<struct HarmonicBondTypeParameter>;
+
+
+/*! \brief GROMOS bond type
+ *
+ * It represents the interaction of the form
+ * V(r; forceConstant, equilDistance) = 0.25 * forceConstant * (r^2 - equilDistance^2)^2
+ */
+using G96BondType = TwoParameterInteraction<struct G96BondTypeParameter>;
+
+
+/*! \brief FENE bond type
+ *
+ * It represents the interaction of the form
+ * V(r; forceConstant, equilDistance) = - 0.5 * forceConstant * equilDistance^2 * log( 1 - (r / equilDistance)^2)
+ */
+using FENEBondType = TwoParameterInteraction<struct FENEBondTypeParameter>;
+
+
+/*! \brief Half-attractive quartic bond type
+ *
+ * It represents the interaction of the form
+ * V(r; forceConstant, equilDistance) = 0.5 * forceConstant * (r - equilDistance)^4
+ */
+using HalfAttractiveQuarticBondType =
+ TwoParameterInteraction<struct HalfAttractiveQuarticBondTypeParameter>;
+
+
+/*! \brief Cubic bond type
+ *
+ * It represents the interaction of the form
+ * V(r; quadraticForceConstant, cubicForceConstant, equilDistance) = quadraticForceConstant * (r -
+ * equilDistance)^2 + quadraticForceConstant * cubicForceConstant * (r - equilDistance)
+ */
+struct CubicBondType
+{
+ CubicBondType() = default;
+ CubicBondType(ForceConstant fq, ForceConstant fc, EquilDistance d) :
+ quadraticForceConstant_(fq),
+ cubicForceConstant_(fc),
+ equilDistance_(d)
+ {
+ }
+
+ [[nodiscard]] const ForceConstant& quadraticForceConstant() const
+ {
+ return quadraticForceConstant_;
+ }
+ [[nodiscard]] const ForceConstant& cubicForceConstant() const { return cubicForceConstant_; }
+ [[nodiscard]] const EquilDistance& equilDistance() const { return equilDistance_; }
+
+private:
+ ForceConstant quadraticForceConstant_;
+ ForceConstant cubicForceConstant_;
+ EquilDistance equilDistance_;
+};
+
+inline bool operator<(const CubicBondType& a, const CubicBondType& b)
+{
+ return std::tie(a.quadraticForceConstant(), a.cubicForceConstant(), a.equilDistance())
+ < std::tie(b.quadraticForceConstant(), b.cubicForceConstant(), b.equilDistance());
+}
+
+inline bool operator==(const CubicBondType& a, const CubicBondType& b)
+{
+ return std::tie(a.quadraticForceConstant(), a.cubicForceConstant(), a.equilDistance())
+ == std::tie(b.quadraticForceConstant(), b.cubicForceConstant(), b.equilDistance());
+}
+
+/*! \brief Morse bond type
+ *
+ * It represents the interaction of the form
+ * V(r; forceConstant, exponent, equilDistance) = forceConstant * ( 1 - exp( -exponent * (r - equilDistance))
+ */
+class MorseBondType
+{
+public:
+ MorseBondType() = default;
+ MorseBondType(ForceConstant f, Exponent e, EquilDistance d) :
+ forceConstant_(f),
+ exponent_(e),
+ equilDistance_(d)
+ {
+ }
+
+ [[nodiscard]] const ForceConstant& forceConstant() const { return forceConstant_; }
+ [[nodiscard]] const Exponent& exponent() const { return exponent_; }
+ [[nodiscard]] const EquilDistance& equilDistance() const { return equilDistance_; }
+
+private:
+ ForceConstant forceConstant_;
+ Exponent exponent_;
+ EquilDistance equilDistance_;
+};
+
+inline bool operator<(const MorseBondType& a, const MorseBondType& b)
+{
+ return std::tie(a.forceConstant(), a.exponent(), a.equilDistance())
+ < std::tie(b.forceConstant(), b.exponent(), b.equilDistance());
+}
+
+inline bool operator==(const MorseBondType& a, const MorseBondType& b)
+{
+ return std::tie(a.forceConstant(), a.exponent(), a.equilDistance())
+ == std::tie(b.forceConstant(), b.exponent(), b.equilDistance());
+}
+
+
+/*! \brief default angle type
+ *
+ * Note: the angle is always stored as radians internally
+ */
+struct DefaultAngle : public TwoParameterInteraction<struct DefaultAngleParameter>
+{
+ DefaultAngle() = default;
+ //! \brief construct from angle given in radians
+ DefaultAngle(Radians angle, ForceConstant f) :
+ TwoParameterInteraction<struct DefaultAngleParameter>{ f, angle }
+ {
+ }
+
+ //! \brief construct from angle given in degrees
+ DefaultAngle(Degrees angle, ForceConstant f) :
+ TwoParameterInteraction<struct DefaultAngleParameter>{ f, angle * DEG2RAD }
+ {
+ }
+};
+
+/*! \brief Proper Dihedral Implementation
+ */
+class ProperDihedral
+{
+public:
+ using Multiplicity = int;
+
+ ProperDihedral() = default;
+ ProperDihedral(Radians phi, ForceConstant f, Multiplicity m) :
+ phi_(phi),
+ forceConstant_(f),
+ multiplicity_(m)
+ {
+ }
+ ProperDihedral(Degrees phi, ForceConstant f, Multiplicity m) :
+ phi_(phi * DEG2RAD),
+ forceConstant_(f),
+ multiplicity_(m)
+ {
+ }
+
+ [[nodiscard]] const EquilDistance& equilDistance() const { return phi_; }
+ [[nodiscard]] const ForceConstant& forceConstant() const { return forceConstant_; }
+ [[nodiscard]] const Multiplicity& multiplicity() const { return multiplicity_; }
+
+private:
+ EquilDistance phi_;
+ ForceConstant forceConstant_;
+ Multiplicity multiplicity_;
+};
+
+inline bool operator<(const ProperDihedral& a, const ProperDihedral& b)
+{
+ return std::tie(a.equilDistance(), a.forceConstant(), a.multiplicity())
+ < std::tie(b.equilDistance(), b.forceConstant(), b.multiplicity());
+}
+
+inline bool operator==(const ProperDihedral& a, const ProperDihedral& b)
+{
+ return std::tie(a.equilDistance(), a.forceConstant(), a.multiplicity())
+ == std::tie(b.equilDistance(), b.forceConstant(), b.multiplicity());
+}
+
+
+/*! \brief Improper Dihedral Implementation
+ */
+struct ImproperDihedral : public TwoParameterInteraction<struct ImproperDihdedralParameter>
+{
+ ImproperDihedral() = default;
+ ImproperDihedral(Radians phi, ForceConstant f) :
+ TwoParameterInteraction<struct ImproperDihdedralParameter>{ f, phi }
+ {
+ }
+ ImproperDihedral(Degrees phi, ForceConstant f) :
+ TwoParameterInteraction<struct ImproperDihdedralParameter>{ f, phi * DEG2RAD }
+ {
+ }
+};
+
+/*! \brief Ryckaert-Belleman Dihedral Implementation
+ */
+class RyckaertBellemanDihedral
+{
+public:
+ RyckaertBellemanDihedral() = default;
+ RyckaertBellemanDihedral(real p1, real p2, real p3, real p4, real p5, real p6) :
+ parameters_{ p1, p2, p3, p4, p5, p6 }
+ {
+ }
+
+ const real& operator[](std::size_t i) const { return parameters_[i]; }
+
+ [[nodiscard]] const std::array<real, 6>& parameters() const { return parameters_; }
+
+ [[nodiscard]] std::size_t size() const { return parameters_.size(); }
+
+private:
+ std::array<real, 6> parameters_;
+};
+
+inline bool operator<(const RyckaertBellemanDihedral& a, const RyckaertBellemanDihedral& b)
+{
+ return a.parameters() < b.parameters();
+}
+
+inline bool operator==(const RyckaertBellemanDihedral& a, const RyckaertBellemanDihedral& b)
+{
+ return a.parameters() == b.parameters();
+}
+
+
+/*! \brief Type for 5-center interaction (C-MAP)
+ *
+ * Note: no kernels currently implemented
+ */
+class Default5Center
+{
+public:
+ Default5Center() = default;
+ Default5Center(Radians phi, Radians psi, ForceConstant fphi, ForceConstant fpsi) :
+ phi_(phi),
+ psi_(psi),
+ fphi_(fphi),
+ fpsi_(fpsi)
+ {
+ }
+
+ [[nodiscard]] const Radians& phi() const { return phi_; }
+ [[nodiscard]] const Radians& psi() const { return psi_; }
+ [[nodiscard]] const ForceConstant& fphi() const { return fphi_; }
+ [[nodiscard]] const ForceConstant& fpsi() const { return fpsi_; }
+
+private:
+ Radians phi_, psi_;
+ ForceConstant fphi_, fpsi_;
+};
+
+inline bool operator<(const Default5Center& a, const Default5Center& b)
+{
+ return std::tie(a.phi(), a.psi(), a.fphi(), a.fpsi())
+ < std::tie(b.phi(), b.psi(), b.fphi(), b.fpsi());
+}
+
+inline bool operator==(const Default5Center& a, const Default5Center& b)
+{
+ return std::tie(a.phi(), a.psi(), a.fphi(), a.fpsi())
+ == std::tie(b.phi(), b.psi(), b.fphi(), b.fpsi());
+}
+
+
+} // namespace nblib
+#endif // NBLIB_LISTEDFORCES_BONDTYPES_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \inpublicapi \file
+ * \brief
+ * Implements a force calculator based on GROMACS data structures.
+ *
+ * Intended for internal use inside the ForceCalculator.
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ */
+
+#ifndef NBLIB_LISTEDFORCES_CALCULATOR_H
+#define NBLIB_LISTEDFORCES_CALCULATOR_H
+
+#include <memory>
+#include <unordered_map>
+
+#include "nblib/listed_forces/definitions.h"
+
+namespace gmx
+{
+template<typename T>
+class ArrayRef;
+} // namespace gmx
+
+namespace nblib
+{
+class Box;
+class PbcHolder;
+template<class T>
+class ForceBuffer;
+
+/*! \internal \brief object to calculate listed forces
+ *
+ */
+class ListedForceCalculator
+{
+public:
+ using EnergyType = std::array<real, std::tuple_size<ListedInteractionData>::value>;
+
+ ListedForceCalculator(const ListedInteractionData& interactions,
+ size_t bufferSize,
+ int numThreads,
+ const Box& box);
+
+ /*! \brief Dispatch the listed force kernels and reduce the forces
+ *
+ * This function adds the computed listed forces to all values in the passed in forces buffer,
+ * so it can be regarded as an output only param. In case this is being used in a simulation
+ * that uses the same force buffer for both non-bonded and listed forces, this call should be
+ * made only after the compute() call from the non-bonded ForceCalculator
+ *
+ * This function also stores the forces and energies from listed interactions in the internal
+ * buffer of the ListedForceCalculator object
+ *
+ * \param[in] coordinates to be used for the force calculation
+ * \param[out] forces buffer to store the output forces
+ */
+ void compute(gmx::ArrayRef<const Vec3> coordinates, gmx::ArrayRef<Vec3> forces, bool usePbc = false);
+
+ //! Alternative overload with the energies in an output buffer
+ void compute(gmx::ArrayRef<const Vec3> coordinates,
+ gmx::ArrayRef<Vec3> forces,
+ EnergyType& energies,
+ bool usePbc = false);
+
+ /*! \brief We need to declare the destructor here to move the (still default) implementation
+ * to the .cpp file. Omitting this declaration would mean an inline destructor
+ * which can't compile because the unique_ptr dtor needs ~ForceBuffer, which is not available
+ * here because it's incomplete.
+ */
+ ~ListedForceCalculator();
+
+private:
+ int numThreads;
+
+ //! the main buffer to hold the final listed forces
+ std::vector<gmx::RVec> masterForceBuffer_;
+
+ //! holds the array of energies computed
+ EnergyType energyBuffer_;
+
+ //! holds the listed interactions split into groups for multithreading
+ std::vector<ListedInteractionData> threadedInteractions_;
+
+ //! reduction force buffers
+ std::vector<std::unique_ptr<ForceBuffer<gmx::RVec>>> threadedForceBuffers_;
+
+ //! PBC objects
+ std::unique_ptr<PbcHolder> pbcHolder_;
+
+ //! compute listed forces and energies, overwrites the internal buffers
+ void computeForcesAndEnergies(gmx::ArrayRef<const Vec3> x, bool usePbc = false);
+};
+
+} // namespace nblib
+
+#endif // NBLIB_LISTEDFORCES_CALCULATOR_H
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \inpublicapi \file
+ * \brief
+ * Definitions for supported nblib listed interaction data, such as bonds, angles, dihedrals, etc
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ *
+ * A note on the preprocessor (PP) usage in this file:
+ *
+ * The PP macros defined here are used exclusively to generate
+ * template instantiations declarations of the form "extern template function(X)"
+ * in header files and "template function(X)" in .cpp files.
+ * These declarations do not affect the program logic in any way and neither are they
+ * required to read and understand the behavior of the code as they do not
+ * result in any executable instructions.
+ * In fact, it would even be technically possible to omit these PP generated
+ * declarations in the header files and replace them with an unused static function
+ * in the .cpp file that calls the template function in question
+ * (e.g. Molecule::addInteraction) once with each type from the variadic template
+ * TypeLists declared in this file. This would be enough to create the required instantiations.
+ * It would, however, create more work for the compiler which then has to instantiate the
+ * templates in the header in each translation unit where the header is included.
+ * Doing this results in a compiler warning.
+ *
+ */
+#ifndef NBLIB_LISTEDFORCES_DEFINITIONS_H
+#define NBLIB_LISTEDFORCES_DEFINITIONS_H
+
+#include "nblib/util/user.h"
+#include "bondtypes.h"
+
+namespace nblib
+{
+
+//***********************************************************************************
+
+/*! \brief These macros define what interaction types are supported in
+ * -Molecule
+ * -Topology
+ * -ListedForceCalculator
+ *
+ * To enable force calculation for your new interaction type that you've added to bondtypes.h,
+ * list your new type here under the appropriate category and make sure that you've added
+ * a kernel in kernels.hpp
+ */
+
+#define SUPPORTED_TWO_CENTER_TYPES \
+ HarmonicBondType, G96BondType, CubicBondType, FENEBondType, HalfAttractiveQuarticBondType
+
+#define SUPPORTED_THREE_CENTER_TYPES DefaultAngle
+
+#define SUPPORTED_FOUR_CENTER_TYPES ProperDihedral, ImproperDihedral, RyckaertBellemanDihedral
+
+#define SUPPORTED_FIVE_CENTER_TYPES Default5Center
+
+//***********************************************************************************
+
+#define SUPPORTED_LISTED_TYPES \
+ SUPPORTED_TWO_CENTER_TYPES, SUPPORTED_THREE_CENTER_TYPES, SUPPORTED_FOUR_CENTER_TYPES, \
+ SUPPORTED_FIVE_CENTER_TYPES
+
+#define NBLIB_ALWAYS_INLINE __attribute((always_inline))
+
+//! \brief encodes the number of integers needed to represent 2-center interactions (bonds, pairs)
+using TwoCenterInteractionIndex = std::array<int, 3>;
+//! \brief encodes the number of integers needed to represent 3-center interactions (angles)
+using ThreeCenterInteractionIndex = std::array<int, 4>;
+//! \brief encodes the number of integers needed to represent 4-center interactions (dihedrals)
+using FourCenterInteractionIndex = std::array<int, 5>;
+//! \brief encodes the number of integers needed to represent 5-center interactions (CMAP)
+using FiveCenterInteractionIndex = std::array<int, 6>;
+
+//! \brief data type for pairwise interactions, e.g. bonds
+template<class TwoCenterType>
+struct TwoCenterData
+{
+ using type = TwoCenterType;
+
+ // tuple format: <particleID i, particleID j, TwoCenterInstanceIndex>
+ std::vector<TwoCenterInteractionIndex> indices;
+ // vector of unique TwoCenterType instances
+ std::vector<TwoCenterType> parameters;
+};
+
+//! \brief data type for three-center interactions, e.g. angles
+template<class ThreeCenterType>
+struct ThreeCenterData
+{
+ using type = ThreeCenterType;
+
+ // tuple format: <particleID i, particleID j, particleID k, ThreeCenterInstanceIndex>
+ std::vector<ThreeCenterInteractionIndex> indices;
+ // vector of unique ThreeCenterType instances
+ std::vector<ThreeCenterType> parameters;
+};
+
+//! \brief data type for four-center interactions, e.g. dihedrals
+template<class FourCenterType>
+struct FourCenterData
+{
+ using type = FourCenterType;
+
+ // tuple format: <particleID i, particleID j, particleID k, particleID l, FourCenterInstanceIndex>
+ std::vector<FourCenterInteractionIndex> indices;
+ // vector of unique FiveCenterType instances
+ std::vector<FourCenterType> parameters;
+};
+
+//! \brief data type for five-center interactions, e.g. CMAP
+template<class FiveCenterType>
+struct FiveCenterData
+{
+ using type = FiveCenterType;
+
+ // tuple format: <particleID i, particleID j, particleID k, particleID l, particleID m, FiveCenterInstanceIndex>
+ std::vector<FiveCenterInteractionIndex> indices;
+ // vector of unique FiveCenterType instances
+ std::vector<FiveCenterType> parameters;
+};
+
+
+using SupportedTwoCenterTypes = TypeList<SUPPORTED_TWO_CENTER_TYPES>;
+// std::tuple<TwoCenterData<TwoCenterType1>, ...>
+using TwoCenterInteractionData = Reduce<std::tuple, Map<TwoCenterData, SupportedTwoCenterTypes>>;
+
+using SupportedThreeCenterTypes = TypeList<SUPPORTED_THREE_CENTER_TYPES>;
+// std::tuple<AngleData<ThreeCenterType1>, ...>
+using ThreeCenterInteractionData = Reduce<std::tuple, Map<ThreeCenterData, SupportedThreeCenterTypes>>;
+
+using SupportedFourCenterTypes = TypeList<SUPPORTED_FOUR_CENTER_TYPES>;
+// std::tuple<FourCenterData<FourCenterType1>, ...>
+using FourCenterInteractionData = Reduce<std::tuple, Map<FourCenterData, SupportedFourCenterTypes>>;
+
+using SupportedFiveCenterTypes = TypeList<SUPPORTED_FIVE_CENTER_TYPES>;
+// std::tuple<FiveCenterData<FiveCenterType1>, ...>
+using FiveCenterInteractionData = Reduce<std::tuple, Map<FiveCenterData, SupportedFiveCenterTypes>>;
+
+//! This is the complete type that holds all listed interaction data
+using ListedInteractionData = decltype(std::tuple_cat(TwoCenterInteractionData{},
+ ThreeCenterInteractionData{},
+ FourCenterInteractionData{},
+ FiveCenterInteractionData{}));
+} // namespace nblib
+#endif // NBLIB_LISTEDFORCES_DEFINITIONS_H
--- /dev/null
+#
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2020, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+#
+# \author Victor Holanda <victor.holanda@cscs.ch>
+# \author Joe Jordan <ejjordan@kth.se>
+# \author Prashanth Kanduri <kanduri@cscs.ch>
+# \author Sebastian Keller <keller@cscs.ch>
+#
+
+# Make a static library for test infrastructure code that we re-use
+# in multiple test executables across the repository.
+
+set(testname "NbLibListedForcesTests")
+set(exename "nblib-listed-forces-test")
+
+gmx_add_gtest_executable(
+ ${exename}
+ CPP_SOURCE_FILES
+ # files with code for tests
+ bondtypes.cpp
+)
+target_link_libraries(${exename} PRIVATE nblib_test_infrastructure nblib)
+gmx_register_gtest_test(${testname} ${exename} INTEGRATION_TEST)
+add_dependencies(check-nblib ${exename})
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * This implements basic nblib box tests
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ */
+#include "nblib/listed_forces/bondtypes.h"
+#include "nblib/util/internal.h"
+
+#include "testutils/testasserts.h"
+
+namespace nblib
+{
+
+namespace test_detail
+{
+
+template<class B>
+void testTwoParameterBondEquality(const B& deduceType)
+{
+ ignore_unused(deduceType);
+ B a(1, 2);
+ B b(1, 2);
+ EXPECT_TRUE(a == b);
+
+ B c(1, 3);
+ EXPECT_FALSE(a == c);
+}
+
+template<class B>
+void testThreeParameterBondEquality(const B& deduceType)
+{
+ ignore_unused(deduceType);
+ B a(1, 2, 3);
+ B b(1, 2, 3);
+ EXPECT_TRUE(a == b);
+
+ B c(2, 3, 4);
+ EXPECT_FALSE(a == c);
+}
+
+template<class B>
+void testTwoParameterBondLessThan(const B& deduceType)
+{
+ ignore_unused(deduceType);
+ B a(1, 2);
+ B b(1, 3);
+ EXPECT_TRUE(a < b);
+ EXPECT_FALSE(b < a);
+
+ B c(1, 2);
+ B d(1, 2);
+ EXPECT_FALSE(c < d);
+
+ B e(2, 1);
+ B f(3, 1);
+ EXPECT_TRUE(e < f);
+ EXPECT_FALSE(f < e);
+}
+
+template<class B>
+void testThreeParameterBondLessThan(const B& deduceType)
+{
+ ignore_unused(deduceType);
+ B a(1, 2, 1);
+ B b(1, 3, 1);
+ EXPECT_TRUE(a < b);
+ EXPECT_FALSE(b < a);
+
+ B c(1, 2, 3);
+ B d(1, 2, 3);
+ EXPECT_FALSE(c < d);
+
+ B e(4, 1, 3);
+ B f(5, 1, 2);
+ EXPECT_TRUE(e < f);
+ EXPECT_FALSE(f < e);
+}
+
+} // namespace test_detail
+
+TEST(NBlibTest, BondTypesOperatorEqualWorks)
+{
+ auto bondList3 = std::make_tuple(HarmonicBondType(), G96BondType(), FENEBondType(),
+ HalfAttractiveQuarticBondType());
+ for_each_tuple([](const auto& b) { test_detail::testTwoParameterBondEquality(b); }, bondList3);
+
+ auto bondList4 = std::make_tuple(CubicBondType(), MorseBondType());
+ for_each_tuple([](const auto& b) { test_detail::testThreeParameterBondEquality(b); }, bondList4);
+}
+
+TEST(NBlibTest, BondTypesLessThanWorks)
+{
+ auto bondList3 = std::make_tuple(HarmonicBondType(), G96BondType(), FENEBondType(),
+ HalfAttractiveQuarticBondType());
+ for_each_tuple([](const auto& b) { test_detail::testTwoParameterBondLessThan(b); }, bondList3);
+
+ auto bondList4 = std::make_tuple(CubicBondType(), MorseBondType());
+ for_each_tuple([](const auto& b) { test_detail::testThreeParameterBondLessThan(b); }, bondList4);
+}
+
+
+} // namespace nblib
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * These traits defined here for supported nblib listed interaction data types
+ * are used to control the dataflow in dataflow.hpp
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ * \author Artem Zhmurov <zhmurov@gmail.com>
+ */
+#ifndef NBLIB_LISTEDFORCES_TRAITS_H
+#define NBLIB_LISTEDFORCES_TRAITS_H
+
+#include <numeric>
+
+#include "nblib/util/internal.h"
+#include "bondtypes.h"
+#include "definitions.h"
+
+namespace nblib
+{
+
+namespace detail
+{
+
+template<class InteractionType, class = void>
+struct CoordinateIndex_
+{
+};
+
+template<class InteractionType>
+struct CoordinateIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedTwoCenterTypes>{}>>
+{
+ typedef std::array<int, 2> type;
+};
+
+template<class InteractionType>
+struct CoordinateIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedThreeCenterTypes>{}>>
+{
+ typedef std::array<int, 3> type;
+};
+
+template<class InteractionType>
+struct CoordinateIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedFourCenterTypes>{}>>
+{
+ typedef std::array<int, 4> type;
+};
+
+template<class InteractionType>
+struct CoordinateIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedFiveCenterTypes>{}>>
+{
+ typedef std::array<int, 5> type;
+};
+
+} // namespace detail
+
+/*! \brief traits class to determine the coordinate index type for InteractionType
+ * \internal
+ *
+ * \tparam InteractionCategory
+ */
+template<class InteractionType>
+using CoordinateIndex = typename detail::CoordinateIndex_<InteractionType>::type;
+
+
+namespace detail
+{
+
+template<class InteractionType, class = void>
+struct InteractionIndex_
+{
+};
+
+template<class InteractionType>
+struct InteractionIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedTwoCenterTypes>{}>>
+{
+ typedef TwoCenterInteractionIndex type;
+};
+
+template<class InteractionType>
+struct InteractionIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedThreeCenterTypes>{}>>
+{
+ typedef ThreeCenterInteractionIndex type;
+};
+
+template<class InteractionType>
+struct InteractionIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedFourCenterTypes>{}>>
+{
+ typedef FourCenterInteractionIndex type;
+};
+
+template<class InteractionType>
+struct InteractionIndex_<InteractionType, std::enable_if_t<Contains<InteractionType, SupportedFiveCenterTypes>{}>>
+{
+ typedef FiveCenterInteractionIndex type;
+};
+
+} // namespace detail
+
+/*! \brief traits class to determine the InteractionIndex type for InteractionType
+ * \internal
+ *
+ * \tparam InteractionType
+ */
+template<class InteractionType>
+using InteractionIndex = typename detail::InteractionIndex_<InteractionType>::type;
+
+
+template<class I, class = void>
+struct HasTwoCenterAggregate : std::false_type
+{
+};
+
+template<class I>
+struct HasTwoCenterAggregate<I, std::void_t<typename I::TwoCenterAggregateType>> : std::true_type
+{
+};
+
+template<class I, class = void>
+struct HasThreeCenterAggregate : std::false_type
+{
+};
+
+template<class I>
+struct HasThreeCenterAggregate<I, std::void_t<typename I::ThreeCenterAggregateType>> : std::true_type
+{
+};
+
+//! \internal \brief determines the energy storage location of the carrier part for InteractionTypes without aggregates
+template<class InteractionType, class = void>
+struct CarrierIndex :
+ std::integral_constant<size_t, FindIndex<InteractionType, ListedInteractionData>{}>
+{
+};
+
+//! \internal \brief determines the energy storage location of the carrier part for InteractionTypes with aggregates
+template<class InteractionType>
+struct CarrierIndex<InteractionType, std::void_t<typename InteractionType::CarrierType>> :
+ std::integral_constant<size_t, FindIndex<typename InteractionType::CarrierType, ListedInteractionData>{}>
+{
+};
+
+//! \internal \brief determines the energy storage location of the 2-C aggregate part for InteractionTypes without aggregates
+template<class InteractionType, class = void>
+struct TwoCenterAggregateIndex : std::integral_constant<size_t, 0>
+{
+};
+
+//! \internal \brief determines the energy storage location of the 2-C aggregate part for InteractionTypes with 2-C aggregates
+template<class InteractionType>
+struct TwoCenterAggregateIndex<InteractionType, std::void_t<typename InteractionType::TwoCenterAggregateType>> :
+ std::integral_constant<size_t, FindIndex<typename InteractionType::TwoCenterAggregateType, ListedInteractionData>{}>
+{
+};
+
+//! \internal \brief determines the energy storage location of the 3-C aggregate part for InteractionTypes without aggregates
+template<class InteractionType, class = void>
+struct ThreeCenterAggregateIndex : std::integral_constant<size_t, 0>
+{
+};
+
+//! \internal \brief determines the energy storage location of the 3-C aggregate part for InteractionTypes with 3-C aggregates
+template<class InteractionType>
+struct ThreeCenterAggregateIndex<InteractionType, std::void_t<typename InteractionType::ThreeCenterAggregateType>> :
+ std::integral_constant<size_t, FindIndex<typename InteractionType::ThreeCenterAggregateType, ListedInteractionData>{}>
+{
+};
+
+/*! \brief return type to hold the energies of the different overloads of "dispatchInteraction"
+ * \internal
+ *
+ * \tparam T
+ */
+template<class T>
+class KernelEnergy
+{
+public:
+ KernelEnergy() : energies_{ 0, 0, 0, 0 } {}
+
+ T& carrier() { return energies_[0]; }
+ const T& carrier() const { return energies_[0]; }
+
+ T& twoCenterAggregate() { return energies_[1]; }
+ const T& twoCenterAggregate() const { return energies_[1]; }
+
+ T& threeCenterAggregate() { return energies_[2]; }
+ const T& threeCenterAggregate() const { return energies_[2]; }
+
+ T& freeEnergyDerivative() { return energies_[3]; }
+ const T& freeEnergyDerivative() const { return energies_[3]; }
+
+ KernelEnergy& operator+=(const KernelEnergy& other)
+ {
+ for (size_t i = 0; i < energies_.size(); ++i)
+ {
+ energies_[i] += other.energies_[i];
+ }
+ return *this;
+ }
+
+ operator T() const { return std::accumulate(begin(energies_), end(energies_), T{}); }
+
+private:
+ std::array<T, 4> energies_;
+};
+
+template<class BasicVector>
+using BasicVectorValueType_t = std::remove_all_extents_t<typename BasicVector::RawArray>;
+
+} // namespace nblib
+#endif // NBLIB_LISTEDFORCES_TRAITS_H
}
}
-void Molecule::addExclusion(std::tuple<std::string, std::string> particle,
- std::tuple<std::string, std::string> particleToExclude)
+void Molecule::addExclusion(std::tuple<ParticleName, ResidueName> particle,
+ std::tuple<ParticleName, ResidueName> particleToExclude)
{
// duplication for the swapped pair happens in getExclusions()
exclusionsByName_.emplace_back(std::make_tuple(std::get<0>(particle), std::get<1>(particle),
std::get<1>(particleToExclude)));
}
-void Molecule::addExclusion(const std::string& particleName, const std::string& particleNameToExclude)
+void Molecule::addExclusion(const ParticleName& particleName, const ParticleName& particleNameToExclude)
{
- addExclusion(std::make_tuple(particleName, name_), std::make_tuple(particleNameToExclude, name_));
+ addExclusion(std::make_tuple(particleName, ResidueName(name_)),
+ std::make_tuple(particleNameToExclude, ResidueName(name_)));
}
const ParticleType& Molecule::at(const std::string& particleTypeName) const
#include <unordered_map>
#include <vector>
+#include "nblib/listed_forces/definitions.h"
#include "nblib/particletype.h"
namespace nblib
{
+class TopologyBuilder;
+
//! Named type for unique identifier for a particle in a molecule
using ParticleName = StrongType<std::string, struct ParticleNameParameter>;
class Molecule final
{
+ //! \brief string based listed interaction data type used during construction
+ template<class TwoCenterType>
+ struct TwoCenterData
+ {
+ using type = TwoCenterType;
+
+ std::vector<TwoCenterType> interactionTypes_;
+ std::vector<std::tuple<ParticleName, ResidueName, ParticleName, ResidueName>> interactions_;
+ };
+
+ template<class ThreeCenterType>
+ struct ThreeCenterData
+ {
+ using type = ThreeCenterType;
+
+ std::vector<ThreeCenterType> interactionTypes_;
+ std::vector<std::tuple<ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName>> interactions_;
+ };
+
+ template<class FourCenter>
+ struct FourCenterDataHolder
+ {
+ using type = FourCenter;
+
+ std::vector<FourCenter> interactionTypes_;
+ std::vector<std::tuple<ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName>> interactions_;
+ };
+
+ template<class FiveCenter>
+ struct FiveCenterDataHolder
+ {
+ using type = FiveCenter;
+
+ std::vector<FiveCenter> interactionTypes_;
+ std::vector<std::tuple<ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName, ParticleName, ResidueName>>
+ interactions_;
+ };
+
+ // BondContainerTypes is TypeList<TwoCenterData<HarmonicBondType>, ...>
+ using TwoCenterContainerTypes = Map<TwoCenterData, SupportedTwoCenterTypes>;
+
+ using ThreeCenterContainerTypes = Map<ThreeCenterData, SupportedThreeCenterTypes>;
+
+ using FourCenterContainerTypes = Map<FourCenterDataHolder, SupportedFourCenterTypes>;
+
+ using FiveCenterContainerTypes = Map<FiveCenterDataHolder, SupportedFiveCenterTypes>;
+
+ // InteractionTuple is std::tuple<TwoCenterData<HarmonicBondType>, ...>
+ using InteractionTuple = decltype(std::tuple_cat(Reduce<std::tuple, TwoCenterContainerTypes>{},
+ Reduce<std::tuple, ThreeCenterContainerTypes>{},
+ Reduce<std::tuple, FourCenterContainerTypes>{},
+ Reduce<std::tuple, FiveCenterContainerTypes>{}));
+
public:
explicit Molecule(MoleculeName moleculeName);
void addExclusion(int particleIndex, int particleIndexToExclude);
//! Specify an exclusion with particle and residue names that have been added to molecule
- void addExclusion(std::tuple<std::string, std::string> particle,
- std::tuple<std::string, std::string> particleToExclude);
+ void addExclusion(std::tuple<ParticleName, ResidueName> particle,
+ std::tuple<ParticleName, ResidueName> particleToExclude);
//! Specify an exclusion with particle names that have been added to molecule
- void addExclusion(const std::string& particleName, const std::string& particleNameToExclude);
+ void addExclusion(const ParticleName& particleName, const ParticleName& particleNameToExclude);
+
+ // Add various types of interactions to the molecule
+ // Note: adding an interaction type not listed in SUPPORTED_TWO_CENTER_TYPES results in a compilation error
+
+ //! For 2-particle interactions such as harmonic bonds
+ template<class Interaction>
+ void addInteraction(const ParticleName& particleNameI,
+ const ResidueName& residueNameI,
+ const ParticleName& particleNameJ,
+ const ResidueName& residueNameJ,
+ const Interaction& interaction);
+
+ //! Add 2-particle interactions with the default residue name
+ template<class Interaction>
+ void addInteraction(const ParticleName& particleNameI,
+ const ParticleName& particleNameJ,
+ const Interaction& interaction);
+
+ //! For 3-particle interactions such as angles
+ template<class Interaction>
+ void addInteraction(const ParticleName& particleNameI,
+ const ResidueName& residueNameI,
+ const ParticleName& particleNameJ,
+ const ResidueName& residueNameJ,
+ const ParticleName& particleNameK,
+ const ResidueName& residueNameK,
+ const Interaction& interaction);
+
+ //! Add 3-particle interactions with the default residue name
+ template<class Interaction>
+ void addInteraction(const ParticleName& particleNameI,
+ const ParticleName& particleNameJ,
+ const ParticleName& particleNameK,
+ const Interaction& interaction);
//! The number of molecules
int numParticlesInMolecule() const;
//! returns a sorted vector containing no duplicates of particles to exclude by indices
std::vector<std::tuple<int, int>> getExclusions() const;
+ //! Return all interactions stored in Molecule
+ const InteractionTuple& interactionData() const;
+
//! Return name of ith particle
ParticleName particleName(int i) const;
//! we cannot efficiently compute indices during the build-phase
//! so we delay the conversion until TopologyBuilder requests it
std::vector<std::tuple<std::string, std::string, std::string, std::string>> exclusionsByName_;
+
+ //! collection of data for all types of interactions
+ InteractionTuple interactionData_;
};
+//! \cond DO_NOT_DOCUMENT
+#define ADD_INTERACTION_EXTERN_TEMPLATE(x) \
+ extern template void Molecule::addInteraction( \
+ const ParticleName& particleNameI, const ResidueName& residueNameI, \
+ const ParticleName& particleNameJ, const ResidueName& residueNameJ, const x& interaction);
+MAP(ADD_INTERACTION_EXTERN_TEMPLATE, SUPPORTED_TWO_CENTER_TYPES)
+#undef ADD_INTERACTION_EXTERN_TEMPLATE
+
+#define ADD_INTERACTION_EXTERN_TEMPLATE(x) \
+ extern template void Molecule::addInteraction( \
+ const ParticleName& particleNameI, const ParticleName& particleNameJ, const x& interaction);
+MAP(ADD_INTERACTION_EXTERN_TEMPLATE, SUPPORTED_TWO_CENTER_TYPES)
+#undef ADD_INTERACTION_EXTERN_TEMPLATE
+
+#define ADD_INTERACTION_EXTERN_TEMPLATE(x) \
+ extern template void Molecule::addInteraction( \
+ const ParticleName& particleNameI, const ResidueName& residueNameI, \
+ const ParticleName& particleNameJ, const ResidueName& residueNameJ, \
+ const ParticleName& particleNameK, const ResidueName& residueNameK, const x& interaction);
+MAP(ADD_INTERACTION_EXTERN_TEMPLATE, SUPPORTED_THREE_CENTER_TYPES)
+#undef ADD_INTERACTION_EXTERN_TEMPLATE
+
+#define ADD_INTERACTION_EXTERN_TEMPLATE(x) \
+ extern template void Molecule::addInteraction( \
+ const ParticleName& particleNameI, const ParticleName& particleNameJ, \
+ const ParticleName& particleNameK, const x& interaction);
+MAP(ADD_INTERACTION_EXTERN_TEMPLATE, SUPPORTED_THREE_CENTER_TYPES)
+#undef ADD_INTERACTION_EXTERN_TEMPLATE
+//! \endcond
+
} // namespace nblib
#endif // NBLIB_MOLECULES_H
#include "nblib/integrator.h"
#include "nblib/interactions.h"
#include "nblib/kerneloptions.h"
+#include "nblib/listed_forces/bondtypes.h"
+#include "nblib/listed_forces/calculator.h"
+#include "nblib/listed_forces/definitions.h"
#include "nblib/molecules.h"
#include "nblib/particletype.h"
+#include "nblib/ppmap.h"
#include "nblib/simulationstate.h"
#include "nblib/topology.h"
#include "nblib/topologyhelpers.h"
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*
+ * Copyright (C) 2012 William Swanson
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the names of the authors or
+ * their institutions shall not be used in advertising or otherwise to
+ * promote the sale, use or other dealings in this Software without
+ * prior written authorization from the authors.
+ */
+
+/*! \inpublicapi \file
+ * \brief
+ * Provides MAP and MAP_LIST to apply a macro to a variadic argument list
+ *
+ * The MAP and MAP_LIST macros implement calling a supplied macro with
+ * all of the subsequent arguments. For example:
+ * MAP(macro, x, y, z) expands to macro(x) macro(y) macro(z) while
+ * MAP_LIST(macro, x, y, z) expands to macro(x), macro(y), macro(z)
+ *
+ * Due to the limitations of the preprocessor, it is unfortunately not
+ * possible to implement this functionality in a more straight-forward way.
+ * Since this use-case is not too uncommon, Boost for example implements
+ * BOOST_PP_SEQ_FOR_EACH which provides equivalent functionality implemented
+ * with the same technique, but is more comprehensive in scope,
+ * beyond what's required here.
+ *
+ * A discussion of how and why this macro works can be found here:
+ * https://stackoverflow.com/questions/27765387/distributing-an-argument-in-a-variadic-macro
+ * and the original repository of this implementation is this one:
+ * https://github.com/swansontec/map-macro
+ * It also contains some useful explanations of how it works.
+ */
+
+#ifndef NBLIB_PPMAP_H
+#define NBLIB_PPMAP_H
+
+#define EVAL0(...) __VA_ARGS__
+#define EVAL1(...) EVAL0(EVAL0(EVAL0(__VA_ARGS__)))
+#define EVAL2(...) EVAL1(EVAL1(EVAL1(__VA_ARGS__)))
+#define EVAL3(...) EVAL2(EVAL2(EVAL2(__VA_ARGS__)))
+#define EVAL4(...) EVAL3(EVAL3(EVAL3(__VA_ARGS__)))
+#define EVAL(...) EVAL4(EVAL4(EVAL4(__VA_ARGS__)))
+
+#define MAP_END(...)
+#define MAP_OUT
+#define MAP_COMMA ,
+
+#define MAP_GET_END2() 0, MAP_END
+#define MAP_GET_END1(...) MAP_GET_END2
+#define MAP_GET_END(...) MAP_GET_END1
+#define MAP_NEXT0(test, next, ...) next MAP_OUT
+#define MAP_NEXT1(test, next) MAP_NEXT0(test, next, 0)
+#define MAP_NEXT(test, next) MAP_NEXT1(MAP_GET_END test, next)
+
+#define MAP0(f, x, peek, ...) f(x) MAP_NEXT(peek, MAP1)(f, peek, __VA_ARGS__)
+#define MAP1(f, x, peek, ...) f(x) MAP_NEXT(peek, MAP0)(f, peek, __VA_ARGS__)
+
+#define MAP_LIST_NEXT1(test, next) MAP_NEXT0(test, MAP_COMMA next, 0)
+#define MAP_LIST_NEXT(test, next) MAP_LIST_NEXT1(MAP_GET_END test, next)
+
+#define MAP_LIST0(f, x, peek, ...) f(x) MAP_LIST_NEXT(peek, MAP_LIST1)(f, peek, __VA_ARGS__)
+#define MAP_LIST1(f, x, peek, ...) f(x) MAP_LIST_NEXT(peek, MAP_LIST0)(f, peek, __VA_ARGS__)
+
+/**
+ * Applies the function macro `f` to each of the remaining parameters.
+ */
+#define MAP(f, ...) EVAL(MAP1(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0))
+
+/**
+ * Applies the function macro `f` to each of the remaining parameters and
+ * inserts commas between the results.
+ */
+#define MAP_LIST(f, ...) EVAL(MAP_LIST1(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0))
+
+
+/** The PP_NARG macro returns the number of arguments that have been
+ * passed to it.
+ */
+#define PP_NARG(...) PP_NARG_(__VA_ARGS__, PP_RSEQ_N())
+#define PP_NARG_(...) PP_ARG_N(__VA_ARGS__)
+#define PP_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, \
+ _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, \
+ _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, \
+ _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, N, ...) \
+ N
+#define PP_RSEQ_N() \
+ 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, \
+ 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, \
+ 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/** MAP_ENUMERATE macro:
+ * MAP_ENUMERATE(action, args...)
+ * like MAP, calls action with each argument, but also forwards the index of the argument to action
+ */
+#define FE_0(WHAT)
+#define FE_1(WHAT, N, X) WHAT(X, N - 1) // NOLINT bugprone-macro-parentheses
+#define FE_2(WHAT, N, X, ...) WHAT(X, N - 2) FE_1(WHAT, N, __VA_ARGS__)
+#define FE_3(WHAT, N, X, ...) WHAT(X, N - 3) FE_2(WHAT, N, __VA_ARGS__)
+#define FE_4(WHAT, N, X, ...) WHAT(X, N - 4) FE_3(WHAT, N, __VA_ARGS__)
+#define FE_5(WHAT, N, X, ...) WHAT(X, N - 5) FE_4(WHAT, N, __VA_ARGS__)
+
+#define GET_MACRO(_0, _1, _2, _3, _4, _5, NAME, ...) NAME
+#define MAP_ENUMERATE(action, ...) \
+ GET_MACRO(_0, __VA_ARGS__, FE_5, FE_4, FE_3, FE_2, FE_1, FE_0, ) \
+ (action, PP_NARG(__VA_ARGS__), __VA_ARGS__)
+
+#endif // NBLIB_PPMAP_H
box_(box),
topology_(std::move(topology))
{
- if (!checkNumericValues(coordinates))
+ auto numParticles = topology_.numParticles();
+
+ if (int(coordinates.size()) != numParticles)
+ {
+ throw InputException("Coordinates array size mismatch");
+ }
+
+ if (int(velocities.size()) != numParticles)
+ {
+ throw InputException("Velocities array size mismatch");
+ }
+
+ if (int(forces.size()) != numParticles)
+ {
+ throw InputException("Force buffer array size mismatch");
+ }
+
+ if (!isRealValued(coordinates))
{
throw InputException("Input coordinates has at least one NaN");
}
coordinates_ = coordinates;
- if (!checkNumericValues(velocities))
+ if (!isRealValued(velocities))
{
throw InputException("Input velocities has at least one NaN");
}
Molecule water = waterMolecule.waterMoleculeWithoutExclusions();
//! Add the exclusions
- water.addExclusion("H1", "Oxygen");
- water.addExclusion("H2", "Oxygen");
+ water.addExclusion(ParticleName("H1"), ParticleName("Oxygen"));
+ water.addExclusion(ParticleName("H2"), ParticleName("Oxygen"));
water.addExclusion(1, 2);
std::vector<std::tuple<int, int>> exclusions = water.getExclusions();
gmx::ArrayRef<Vec3> forces(simState.forces());
forceCalculator.compute(simState.coordinates(), simState.forces());
+ // copy computed forces to another array
std::vector<Vec3> forces_1(forces.size());
std::copy(forces.begin(), forces.end(), begin(forces_1));
+ // zero original force buffer
+ zeroCartesianArray(forces);
+
// check if forces change without update step
forceCalculator.compute(simState.coordinates(), forces);
// update
integrator.integrate(1.0, simState.coordinates(), simState.velocities(), simState.forces());
+ // zero original force buffer
+ zeroCartesianArray(forces);
+
// step 2
- forceCalculator.compute(simState.coordinates(), simState.forces());
+ forceCalculator.compute(simState.coordinates(), forces);
std::vector<Vec3> forces_2(forces.size());
std::copy(forces.begin(), forces.end(), begin(forces_2));
void WaterMoleculeBuilder::addExclusionsFromNames()
{
- water_.addExclusion("H1", "Oxygen");
- water_.addExclusion("H2", "Oxygen");
- water_.addExclusion("H1", "H2");
+ water_.addExclusion(ParticleName("H1"), ParticleName("Oxygen"));
+ water_.addExclusion(ParticleName("H2"), ParticleName("Oxygen"));
+ water_.addExclusion(ParticleName("H1"), ParticleName("H2"));
}
MethanolMoleculeBuilder::MethanolMoleculeBuilder() : methanol_(MoleculeName("MeOH"))
methanol_.addParticle(ParticleName("H3"), Charges.at("HMet"), library.type("H"));
// Add the exclusions
- methanol_.addExclusion("Me1", "O2");
- methanol_.addExclusion("Me1", "H3");
- methanol_.addExclusion("H3", "O2");
+ methanol_.addExclusion(ParticleName("Me1"), ParticleName("O2"));
+ methanol_.addExclusion(ParticleName("Me1"), ParticleName("H3"));
+ methanol_.addExclusion(ParticleName("H3"), ParticleName("O2"));
}
Molecule MethanolMoleculeBuilder::methanolMolecule()
size_t numMols = std::get<1>(molNumberTuple);
const auto& exclusions = molecule.getExclusions();
- assert((!exclusions.empty()
- && std::string("No exclusions found in the " + molecule.name().value() + " molecule.")
- .c_str()));
+ // Note this is a programming error as all particles should exclude at least themselves and empty topologies are not allowed.
+ const std::string message =
+ "No exclusions found in the " + molecule.name().value() + " molecule.";
+ assert((!exclusions.empty() && message.c_str()));
std::vector<gmx::ExclusionBlock> exclusionBlockPerMolecule =
detail::toGmxExclusionBlock(exclusions);
Topology TopologyBuilder::buildTopology()
{
+ assert((!(numParticles_ < 0) && "It should not be possible to have negative particles"));
+ if (numParticles_ == 0)
+ {
+ throw InputException("You cannot build a topology with no particles");
+ }
topology_.numParticles_ = numParticles_;
topology_.exclusions_ = createExclusionsListOfLists();
#include <vector>
#include "nblib/interactions.h"
+#include "nblib/listed_forces/definitions.h"
#include "nblib/molecules.h"
#include "nblib/topologyhelpers.h"
//! Returns a map of non-bonded force parameters indexed by ParticleType names
NonBondedInteractionMap getNonBondedInteractionMap() const;
+ //! Returns the interaction data
+ ListedInteractionData getInteractionData() const;
+
//! Returns the combination rule used to generate the NonBondedInteractionMap
CombinationRule getCombinationRule() const;
detail::ParticleSequencer particleSequencer_;
//! Map that should hold all nonbonded interactions for all particle types
NonBondedInteractionMap nonBondedInteractionMap_;
+ //! data about bonds for all supported types
+ ListedInteractionData interactionData_;
//! Combination Rule used to generate the nonbonded interactions
CombinationRule combinationRule_;
};
//! Builds a GROMACS-compliant performant exclusions list aggregating exclusions from all molecules
gmx::ListOfLists<int> createExclusionsListOfLists() const;
+ //! Gather interaction data from molecules
+ ListedInteractionData createInteractionData(const detail::ParticleSequencer&);
+
//! Helper function to extract quantities like mass, charge, etc from the system
template<typename T, class Extractor>
std::vector<T> extractParticleTypeQuantity(Extractor&& extractor);
return std::get<0>(tup1) < std::get<0>(tup2);
};
+ // Note this is a programming error as all particles should exclude at least themselves and empty topologies are not allowed.
+ // Note also that this is also checked in the parent function with a more informative error message.
+ assert((!tupleList.empty() && "No exclusions found.\n"));
+
// initialize pair of iterators delimiting the range of exclusions for
// the first particle in the list
- assert((!tupleList.empty() && "tupleList must not be empty\n"));
auto range = std::equal_range(std::begin(tupleList), std::end(tupleList), tupleList[0], firstLowerThan);
auto it1 = range.first;
auto it2 = range.second;
#include <vector>
#include "gromacs/utility/listoflists.h"
+#include "nblib/listed_forces/traits.h"
#include "nblib/molecules.h"
namespace gmx
//! Add offset to all indices in inBlock
std::vector<gmx::ExclusionBlock> offsetGmxBlock(std::vector<gmx::ExclusionBlock> inBlock, int offset);
+/*!
+ * \brief
+ * Extract all interactions of type I from a vector of molecules. The second argument tuple element
+ * specifies multiples of the molecule given as first tuple element. Let (S, I) denote the return
+ * value tuple. Then J[i] = I[S[i]] for all i in 0...S.size() is the full sequence of BondType
+ * instances as they occur in the input tuple
+ *
+ */
+template<class I>
+std::tuple<std::vector<size_t>, std::vector<I>>
+collectInteractions(const std::vector<std::tuple<Molecule, int>>&);
+
+#define COLLECT_BONDS_EXTERN_TEMPLATE(x) \
+ extern template std::tuple<std::vector<size_t>, std::vector<x>> collectInteractions( \
+ const std::vector<std::tuple<Molecule, int>>&);
+MAP(COLLECT_BONDS_EXTERN_TEMPLATE, SUPPORTED_TWO_CENTER_TYPES)
+#undef COLLECT_BONDS_EXTERN_TEMPLATE
+
+/*!
+ * \brief
+ * Return a list of unique BondType instances U and an index list S of size aggregatedBonds.size()
+ * such that the BondType instance at aggregatedBonds[i] is equal to U[S[i]]
+ * returns std::tuple(S, U)
+ *
+ */
+template<class I>
+std::tuple<std::vector<size_t>, std::vector<I>> eliminateDuplicateInteractions(const std::vector<I>& collectedBonds);
+
+/// \cond DO_NOT_DOCUMENT
+#define ELIMINATE_DUPLICATE_EXTERN_TEMPLATE(x) \
+ extern template std::tuple<std::vector<size_t>, std::vector<x>> eliminateDuplicateInteractions( \
+ const std::vector<x>& collectedBonds);
+MAP(ELIMINATE_DUPLICATE_EXTERN_TEMPLATE, SUPPORTED_LISTED_TYPES)
+#undef ELIMINATE_DUPLICATE_EXTERN_TEMPLATE
+/// \endcond
+
//! Helper class for Topology to keep track of particle IDs
class ParticleSequencer
{
DataType data_;
};
+//!
+template<class B>
+std::vector<CoordinateIndex<B>> sequenceIDs(const std::vector<std::tuple<Molecule, int>>&,
+ const detail::ParticleSequencer&);
+
+/// \cond DO_NOT_DOCUMENT
+#define SEQUENCE_PAIR_ID_EXTERN_TEMPLATE(x) \
+ extern template std::vector<CoordinateIndex<x>> sequenceIDs<x>( \
+ const std::vector<std::tuple<Molecule, int>>&, const detail::ParticleSequencer&);
+MAP(SEQUENCE_PAIR_ID_EXTERN_TEMPLATE, SUPPORTED_LISTED_TYPES)
+#undef SEQUENCE_PAIR_ID_EXTERN_TEMPLATE
+/// \endcond
+
} // namespace detail
} // namespace nblib
#include <type_traits>
#include <vector>
-#include "nblib/basicdefinitions.h"
-#include "nblib/vector.h"
namespace nblib
{
template<auto...>
using void_value_t = void;
+template<class... Tuples>
+using tuple_cat_t = decltype(std::tuple_cat(Tuples{}...));
+
template<class T, class = void>
struct HasValueMember : std::false_type
{
template<class T>
using AccessTypeMemberIfPresent_t = typename AccessTypeMemberIfPresent<T>::type;
-//! this trait evaluates to std::true_type if T is the same as Tuple[N]
-//! OR if T is the same as the type member of Tuple[N]
+/*! \brief Comparison meta function that compares T to Tuple[N]
+ *
+ * This trait evaluates to std::true_type if T is the same as Tuple[N]
+ * OR if T is the same as the type member of Tuple[N]
+ */
template<int N, typename T, typename Tuple>
struct MatchTypeOrTypeMember :
std::disjunction<std::is_same<T, std::tuple_element_t<N, Tuple>>,
{
};
-//! recursion to check the next field N+1
-template<int N, class T, class Tuple, template<int, class, class> class Comparison, bool Match = false>
-struct MatchField_ :
- std::integral_constant<size_t, MatchField_<N + 1, T, Tuple, Comparison, Comparison<N + 1, T, Tuple>{}>{}>
+//! \brief Recursion to check the next field N+1
+template<int N, class T, class Tuple, template<int, class, class> class Comparison, class Match = void>
+struct MatchField_ : std::integral_constant<size_t, MatchField_<N + 1, T, Tuple, Comparison>{}>
{
};
-//! recursion stop when Comparison<N, T, Tuple>::value is true
+//! \brief recursion stop when Comparison<N, T, Tuple>::value is true
template<int N, class T, class Tuple, template<int, class, class> class Comparison>
-struct MatchField_<N, T, Tuple, Comparison, true> : std::integral_constant<size_t, N>
+struct MatchField_<N, T, Tuple, Comparison, std::enable_if_t<Comparison<N, T, Tuple>{}>> :
+ std::integral_constant<size_t, N>
{
};
} // namespace detail
-/*! \brief The value member of this struct evaluates to the integral constant N for which
- * the value member of Comparison<N, T, Tuple> is true
- * and generates a compiler error if there is no such N
+
+/*! \brief Meta function to return the first index in Tuple whose type matches T
+ *
+ * If there are more than one, the first occurrence will be returned.
+ * If there is no such type, the size of Tuple will be returned.
+ * Note that the default comparison operation supplied here also matches if the type member Tuple[N]::type matches T
*/
-template<class T, class Tuple, template<int, class, class> class Comparison>
-struct MatchField : detail::MatchField_<0, T, Tuple, Comparison, Comparison<0, T, Tuple>{}>
+template<typename T, class TL, template<int, class, class> class Comparison = detail::MatchTypeOrTypeMember>
+struct FindIndex
{
};
-/*! \brief Function to return the index in Tuple whose type matches T
- * - If there are more than one, the first occurrence will be returned
- * - If there is no such type, a compiler error from accessing a tuple out of range is generated
- * Note that the default comparison operation supplied here also matches if the type member of Tuple[N] matches T
+/*! \brief Specialization to only enable this trait if TL has template parameters
+ *
+ * \tparam T a type to look for in the template parameters of TL
+ * \tparam TL a template template parameter, e.g. std::tuple or nblib::TypeList
+ * \tparam Ts template parameters of TL
+ * \tparam Comparison comparison operation
+ *
+ * Note that \a T is added to \a TL as a sentinel to terminate the recursion
+ * and prevent an out of bounds tuple access compiler error.
*/
-template<typename T, typename Tuple, template<int, class, class> class Comparison = detail::MatchTypeOrTypeMember>
-struct FindIndex : std::integral_constant<size_t, MatchField<T, Tuple, Comparison>{}>
+template<typename T, template<class...> class TL, class... Ts, template<int, class, class> class Comparison>
+struct FindIndex<T, TL<Ts...>, Comparison> : detail::MatchField_<0, T, std::tuple<Ts..., T>, Comparison>
{
};
-//! Function to return the element in Tuple whose type matches T
-//! Note: if there are more than one, the first occurrence will be returned
+/*! \brief Meta function to return the element in Tuple whose type matches T
+ *
+ * If there are more than one, the first occurrence will be returned
+ * If there is no such that, a compiler error is generated due to accessing
+ * the tuple out of bounds
+ */
template<typename T, typename Tuple>
decltype(auto) pickType(Tuple& tup)
{
- return std::get<FindIndex<T, Tuple>{}>(tup);
+ return std::get<FindIndex<T, std::decay_t<Tuple>>{}>(tup);
}
+//! \brief template meta function to determine whether T is contained in TL
+template<class T, class TL>
+struct Contains
+{
+};
+
+//! this formatting must be a bug in clang-format... should be:
+// struct Contains<T, TL<Ts...>> : std::bool_constant<FindIndex<T, TL<Ts...>>{} < sizeof...(Ts)>
+template<class T, template<class...> class TL, class... Ts>
+ struct Contains<T, TL<Ts...>> : std::bool_constant < FindIndex<T, TL<Ts...>>{}<sizeof...(Ts)>
+{
+};
+
+
//! Utility to call function with each element in tuple_
template<class F, class... Ts>
void for_each_tuple(F&& func, std::tuple<Ts...>& tuple_)
gmx_add_gtest_executable(
${exename}
CPP_SOURCE_FILES
+ internal.cpp
user.cpp
)
target_link_libraries(${exename} PRIVATE nblib_test_infrastructure nblib)
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief
+ * This implements basic nblib utility tests
+ *
+ * \author Victor Holanda <victor.holanda@cscs.ch>
+ * \author Joe Jordan <ejjordan@kth.se>
+ * \author Prashanth Kanduri <kanduri@cscs.ch>
+ * \author Sebastian Keller <keller@cscs.ch>
+ */
+
+#include "nblib/tests/testhelpers.h"
+#include "nblib/util/internal.h"
+#include "nblib/util/user.h"
+
+namespace nblib
+{
+
+TEST(NblibInternalUtils, FindIndexTuple1)
+{
+ using TupleType = std::tuple<float>;
+
+ constexpr int floatIndex = FindIndex<float, TupleType>{};
+
+ constexpr int outOfRange = FindIndex<unsigned, TupleType>{};
+
+ EXPECT_EQ(0, floatIndex);
+ EXPECT_EQ(1, outOfRange);
+}
+
+TEST(NblibInternalUtils, FindIndexTuple2)
+{
+ using TupleType = std::tuple<float, int>;
+
+ constexpr int floatIndex = FindIndex<float, TupleType>{};
+ constexpr int intIndex = FindIndex<int, TupleType>{};
+
+ constexpr int outOfRange = FindIndex<unsigned, TupleType>{};
+
+ EXPECT_EQ(0, floatIndex);
+ EXPECT_EQ(1, intIndex);
+ EXPECT_EQ(2, outOfRange);
+}
+
+TEST(NblibInternalUtils, FindIndexTypeList1)
+{
+ using ListType = TypeList<float>;
+
+ constexpr int floatIndex = FindIndex<float, ListType>{};
+
+ constexpr int outOfRange = FindIndex<unsigned, ListType>{};
+
+ EXPECT_EQ(0, floatIndex);
+ EXPECT_EQ(1, outOfRange);
+}
+
+TEST(NblibInternalUtils, FindIndexTypeList2)
+{
+ using ListType = TypeList<float, int>;
+
+ constexpr int floatIndex = FindIndex<float, ListType>{};
+ constexpr int intIndex = FindIndex<int, ListType>{};
+
+ constexpr int outOfRange = FindIndex<unsigned, ListType>{};
+
+ EXPECT_EQ(0, floatIndex);
+ EXPECT_EQ(1, intIndex);
+ EXPECT_EQ(2, outOfRange);
+}
+
+
+TEST(NblibInternalUtils, Contains)
+{
+ using ListType = TypeList<float, int>;
+
+ constexpr bool hasFloat = Contains<float, ListType>{};
+ constexpr bool hasInt = Contains<int, ListType>{};
+ constexpr bool hasUint = Contains<unsigned, ListType>{};
+
+ EXPECT_TRUE(hasFloat);
+ EXPECT_TRUE(hasInt);
+ EXPECT_FALSE(hasUint);
+}
+
+TEST(NblibInternalUtils, FindIndexTupleRepeated)
+{
+ using TupleType = std::tuple<float, float, int>;
+
+ constexpr int floatIndex = FindIndex<float, TupleType>{};
+
+ constexpr int intIndex = FindIndex<int, TupleType>{};
+
+ constexpr int outOfRange = FindIndex<unsigned, TupleType>{};
+
+ EXPECT_EQ(0, floatIndex);
+ EXPECT_EQ(2, intIndex);
+ EXPECT_EQ(3, outOfRange);
+}
+
+TEST(NblibInternalUtils, FindIndexTypeListRepeated)
+{
+ using TupleType = TypeList<float, float, int>;
+
+ constexpr int floatIndex = FindIndex<float, TupleType>{};
+
+ constexpr int intIndex = FindIndex<int, TupleType>{};
+
+ constexpr int outOfRange = FindIndex<unsigned, TupleType>{};
+
+ EXPECT_EQ(0, floatIndex);
+ EXPECT_EQ(2, intIndex);
+ EXPECT_EQ(3, outOfRange);
+}
+
+
+} // namespace nblib
namespace
{
-TEST(NBlibTest, checkNumericValues)
+TEST(NBlibTest, isRealValued)
{
std::vector<Vec3> vec;
vec.emplace_back(1., 1., 1.);
vec.emplace_back(2., 2., 2.);
- bool ret = checkNumericValues(vec);
+ bool ret = isRealValued(vec);
EXPECT_EQ(ret, true);
}
vec.emplace_back(NAN, NAN, NAN);
- bool ret = checkNumericValues(vec);
+ bool ret = isRealValued(vec);
EXPECT_EQ(ret, false);
}
vec.emplace_back(INFINITY, INFINITY, INFINITY);
- bool ret = checkNumericValues(vec);
+ bool ret = isRealValued(vec);
EXPECT_EQ(ret, false);
}
constexpr int N = 10;
std::vector<real> masses(N, 1.0);
auto out = generateVelocity(300.0, 1, masses);
- bool ret = checkNumericValues(out);
+ bool ret = isRealValued(out);
EXPECT_EQ(ret, true);
}
#include "nblib/util/user.h"
#include "gromacs/random/tabulatednormaldistribution.h"
#include "gromacs/random/threefry.h"
+#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/fatalerror.h"
namespace nblib
}
//! Check within the container of gmx::RVecs for a NaN or inf
-bool checkNumericValues(const std::vector<Vec3>& values)
+bool isRealValued(gmx::ArrayRef<const Vec3> values)
{
for (auto val : values)
{
return true;
}
+void zeroCartesianArray(gmx::ArrayRef<Vec3> cartesianArray)
+{
+ std::fill(cartesianArray.begin(), cartesianArray.end(), Vec3{ 0, 0, 0 });
+}
+
} // namespace nblib
#include "nblib/basicdefinitions.h"
#include "nblib/vector.h"
+namespace gmx
+{
+template<typename T>
+class ArrayRef;
+} // namespace gmx
+
namespace nblib
{
std::vector<Vec3> generateVelocity(real Temperature, unsigned int seed, std::vector<real> const& masses);
//! Check within the container of gmx::RVecs for a NaN or inf
-bool checkNumericValues(const std::vector<Vec3>& values);
+bool isRealValued(gmx::ArrayRef<const Vec3> values);
+
+//! Zero a cartesian buffer
+void zeroCartesianArray(gmx::ArrayRef<Vec3> cartesianArray);
//! Used to ignore unused arguments of a lambda functions
inline void ignore_unused() {}
# FindCUDA.cmake is unaware of the mechanism used by cmake to embed
# the compiler flag for the required C++ standard in the generated
# build files, so we have to pass it ourselves
-if (CUDA_VERSION VERSION_LESS 10.2)
- # CUDA doesn't formally support C++17 until version 10.2, so for
+if (CUDA_VERSION VERSION_LESS 11.0)
+ # CUDA doesn't formally support C++17 until version 11.0, so for
# now host-side code that compiles with CUDA is restricted to
# C++14. This needs to be expressed formally for older CUDA
# version.
list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX14_STANDARD_COMPILE_OPTION}")
else()
- list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX17_STANDARD_COMPILE_OPTION}")
+ # gcc-7 pre-dated C++17, so uses the -std=c++1z compiler flag for it,
+ # which modern nvcc does not recognize. So we work around that by
+ # compiling in C++14 mode. Clang doesn't have this problem because nvcc
+ # only supports version of clang that already understood -std=c++17
+ if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8)
+ list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX14_STANDARD_COMPILE_OPTION}")
+ else()
+ list(APPEND GMX_CUDA_NVCC_FLAGS "${CMAKE_CXX17_STANDARD_COMPILE_OPTION}")
+ endif()
endif()
# assemble the CUDA flags
Images are (re)built manually by |Gromacs| project staff and pushed to
repositories at https://hub.docker.com/u/gromacs
+Refer to :file:`buildall.sh` in the ``master`` branch for the set of images
+currently being built.
+
Utilities
=========
+:file:`utility.py`
+------------------
+
.. automodule:: utility
:members:
-HPC container maker
--------------------
-
-We use the `NVidia HPC Container Maker <https://github.com/NVIDIA/hpc-container-maker>`__
-package for scripted Dockerfile generation.
-See :file:`admin/containers/scripted_gmx_docker_builds.py`.
+:file:`scripted_gmx_docker_builds.py`
+-------------------------------------
-.. todo:: :issue:`3272` Insert tool documentation.
- E.g. ``.. automodule:: scripted_gmx_docker_builds``
+.. automodule:: scripted_gmx_docker_builds
-GitLab
-======
+GitLab CI Pipeline Execution
+============================
The repository contains DockerFiles and GitLab Runner configuration
files to support automated testing and documentation builds.
.. todo:: Expand this documentation to resolve :issue:`3275`
-Pipeline execution
-------------------
-
.. todo:: Discuss the distinct characteristics of |Gromacs| CI pipelines to relevant to job configuration.
+ (:issue:`3472` and :issue:`3617`)
-.. todo:: Comment on the number of pipelines that can be or which are likely to be running at the same time.
+.. todo:: (:issue:`3472` and :issue:`3617`) Comment on the number of pipelines that can be or which are likely to be running at the same time.
+ (:issue:`3472` and :issue:`3617`)
.. note::
sufficient testing before acceptance.
Configuration files
-~~~~~~~~~~~~~~~~~~~
+-------------------
At the root of the repository, :file:`.gitlab-ci.yml` defines the stages and
some default parameters, then includes files from :file:`admin/gitlab-ci/` to
via the `*extends* job property <https://docs.gitlab.com/ee/ci/yaml/#extends>`_.
Job parameters
-~~~~~~~~~~~~~~
+--------------
Refer to https://docs.gitlab.com/ee/ci/yaml for complete documentation on
GitLab CI job parameters, but note the following GROMACS-specific conventions.
to `cache:key <https://docs.gitlab.com/ee/ci/yaml/#cachekey>`__
image
- Part of the tool chain configuration. Instead of setting *image*
- directly, *extend* a *.use_<toolchain>* template from
- :file:`admin/gitlab-ci/global.gitlab-ci.yml`
+ See :doc:`/dev-manual/containers` for more about the Docker images used for the
+ CI pipelines. If a job depends on artifacts from previous jobs, be sure
+ to use the same (or a compatible) image as the dependency!
rules
only
for details of the merging behavior. Refer to :ref:`variables` for local usage.
Schedules and triggers
-~~~~~~~~~~~~~~~~~~~~~~
+----------------------
Pipeline `schedules <https://gitlab.com/help/ci/pipelines/schedules>`__ are
configured through the GitLab web interface.
through the Gitlab web interface.
Global templates
-~~~~~~~~~~~~~~~~
+----------------
In addition to the templates in the main job definition files,
common "mix-in" functionality and behavioral templates are defined in
:file:`admin/gitlab-ci/global.gitlab-ci.yml`
Job names
-~~~~~~~~~
+---------
Job names should
.. _variables:
Updating regression tests
-~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------
Changes in |Gromacs| that require changes in regression-tests are notoriously hard,
because a merge request that tests against the non-updated version of the
The solution is a new regression-test branch or commit, uploaded to gitlab.
Then set that regression test branch with REGRESSIONTESTBRANCH or
the specific commit with REGRESSIONTESTCOMMIT when
-running the specific pipeline that requires the regressiontest-update.
+running the specific pipeline that requires the regressiontest-update.
See below on how to set variables for specific pipelines.
Variables
-~~~~~~~~~
+---------
The GitLab CI framework, GitLab Runner, plugins, and our own scripts set and
use several `variables <https://docs.gitlab.com/ee/ci/variables/README.html>`__.
Integer version number provided by toolchain mix-in for convenience and
internal use.
+ CMAKE
+ ``gromacs/ci-...`` Docker images built after October 2020 have several
+ versions of CMake installed. The most recent version of CMake in the
+ container will be appear first in ``PATH``. To allow individual jobs to
+ use specific versions of CMake, please write the job *script* sections
+ using ``$CMAKE`` instead of ``cmake`` and begin the *script* section with
+ a line such as ``- CMAKE=${CMAKE:-$(which cmake)}``. Specify a CMake
+ version by setting the *CMAKE* variable to the full executable path for
+ the CMake version you would like to use. See also :doc:`containers`.
+
CMAKE_COMPILER_SCRIPT
CMake command line options for a tool chain. A definition is provided by
the mix-in toolchain definitions (e.g. ``.use-gcc8``) to be appended to
pipeline execution time.
REGRESSIONTESTBRANCH
- Use this branch of the regressiontests rather than master to allow for
+ Use this branch of the regressiontests rather than master to allow for
merge requests that require updated regression tests with valid CI tests.
REGRESSIONTESTCOMMIT
- Use this commit to the regressiontests rather than the head on master to
- allow for merge requests that require updated regression tests with
+ Use this commit to the regressiontests rather than the head on master to
+ allow for merge requests that require updated regression tests with
valid CI tests.
POST_MERGE_ACCEPTANCE
``BUILD_DIR``, ``INSTALL_DIR``, ``CACHE_FALLBACK_KEY``, ...
Setting variables
-~~~~~~~~~~~~~~~~~
+-----------------
Variables for individual piplelines are set in the gitlab interface under
``CI/CD``; ``Pipelines``. Then chose in the top right corner ``Run Piplelines``.
been deprecated for years, it is not supported.
For example, depending on your actual MPI library, use ``cmake
--DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DGMX_MPI=on``.
+-DMPI_C_COMPILER=mpicc -DGMX_MPI=on``.
CMake
--- /dev/null
+Design goals and motivation for the data format of bonded forces in NB-LIB
+--------------------------------------------------------------------------
+
+
+The current format for listed forces in GROMACS looks like this:
+
+.. code:: cpp
+
+ struct InteractionDefinitions
+ {
+ std::vector<t_iparams> iparams;
+ std::array<std::vector<int>, F_NRE> il;
+ };
+
+The format covers all interaction types, i.e. \ ``t_iparams`` is a union
+type which can hold the parameters of any type.
+The other member called ``il`` contains the
+indices for each interaction type, where ``F_NRE`` is the number of
+interaction types that GROMACS supports. More precisely, each
+member of ``il``, a ``std::vector<int>``, is a flattened list of all
+interactions for a given interaction type. The vector contains ``N+1`` integer indices
+for each interaction, where ``N`` is the number of particles that are
+involved in the interaction. An additional index is needed to retrieve
+the correct parameters in ``iparams``, hence the total number of indices sums up
+to ``N+1`` per interaction.
+
+The big advantage of storing all types in a union data type is (was),
+that it allows looping over all types with a simple for-loop.
+In pre C++11 and perhaps even pre C++14 times, looping over different
+types was a big hassle and the union data type approach likely was the
+only practicable solution. One downside of this approach, however, is
+that with just a single (union) type, one can't leverage the compiler's
+type system, most importantly static branching, for example with overload resolution.
+As a consequence, only dynamic branching with ``if`` statements remains.
+
+Consider, for instance, the implementation of the top-level
+``calc_listed(const InteractionDefinitions& idef, ...)`` in GROMACS, which in its essence,
+looks like this:
+
+.. code:: cpp
+
+ void calc_listed(const InteractionDefinitions& idef, ...)
+ {
+ // manage timing and multi-threading
+
+ for (int ftype = 0; ftype < F_NRE; ++type)
+ {
+ // branch out and descend stack for 2 intermediate functions based on
+ // the type of interaction that ftype corresponds to
+ // then call a function from a pointer table
+
+ bondFunction* bonded = bondedInteractionFunctions[ftype];
+
+ // compute all forces for ftype
+ bonded(idef.iparams, idef.il[ftype], ...);
+ }
+
+ // reduce thread output
+ }
+
+GROMACS supports a lot of different listed interaction types, such as different
+types of bonds, angles and proper and improper dihedrals. These different types
+require different handling and finally the right force kernel chosen from a table
+of function pointers.
+The handling code required to correctly branch out to all the different cases
+results in quite a deep call stack, a lot of branching logic and ends up accounting
+for a fair part of the overall complexity, which should ideally just consist of
+the type-specific force calculation implementations.
+
+
+A type-aware approach to listed forces
+--------------------------------------
+
+NB-LIB aims to reduce the overall code complexity with a type-aware data format
+where each interaction type is implemented as a separate (C++)-type.
+The format for a given interaction type looks like this:
+
+.. code:: cpp
+
+ template <class Interaction>
+ struct InteractionData
+ {
+ std::vector<Index<Interaction>> indices;
+ std::vector<Interaction> parameters;
+ };
+
+For each type of interaction, we store the interaction indices plus the
+interaction parameters. While the (C++)-types are different, the actual data stored is
+exactly the same: ``N+1`` integer indices per ``N``-center interaction plus the unique parameters.
+An example for ``Interaction`` would be ``HarmonicBond``, the public part of which looks like this:
+
+.. code:: cpp
+
+ class HarmonicBond
+ {
+ public:
+ // return lvalue ref for use with std::tie
+ // in order to leverage std::tuple comparison ops
+ const real& forceConstant();
+ const real& equilDistance();
+ };
+
+The ``Index`` traits class deduces to ``std::array<int, 3>``, because
+for each harmonic bond, we need two ``int``\ s for the coordinate
+indices and a third ``int`` to look up the bond parameters in the
+``parameters`` vector. For angles and dihedrals, the ``Index`` trait
+would add an additional one or two ``int``\ s to hold the additional
+coordinate indices.
+
+Finally, we gather all types of interactions in a
+``std::tuple``, such that the complete definition for listed forces
+in NB-LIB looks like this:
+
+.. code:: cpp
+
+ using ListedInteractions = std::tuple<InteractionData<HarmonicBond>, ..., InteractionData<HarmonicAngle>, ...>;
+
+One important property of ``ListedInteractions`` is that it stores exactly the same information as ``InteractionDefinitions``
+and therefore conversion in either direction is easy to implement.
+
+
+The NB-LIB listed forces pipeline
+---------------------------------
+
+Given the listed interaction data provided in the format described above,
+the steps required to calculate the corresponding forces
+are, in brief:
+
+ * Loop over all interaction types
+ * Loop over all interactions for given type
+ * Call interaction type kernel, store forces and return energy
+
+
+This procedure is identical to the current implementation in GROMACS.
+In actual code, the first step looks like this:
+
+.. code:: cpp
+
+ template<class Buffer, class Pbc>
+ auto reduceListedForces(const ListedInteractions& interactions,
+ const std::vector<gmx::RVec>& x,
+ Buffer* forces,
+ const Pbc& pbc)
+ {
+ std::array<real, std::tuple_size<ListedInteractions>::value> energies;
+
+ // lambda function, will be applied to each type
+ auto computeForceType = [forces, &x, &energies, &pbc](const auto& ielem) {
+ real energy = computeForces(ielem.indices, ielem.parameters, x, forces, pbc);
+ energies[FindIndex<std::decay_t<decltype(ilem)>, ListedInteractions>{}] = energy;
+ };
+
+ // apply the lambda to all bond types
+ for_each_tuple(computeForceType, interactions);
+
+ return energies;
+ }
+
+With the help of a generic lambda and C++17’s ``std::apply`` in the
+one-liner ``for_each_tuple``, we can generate the loop over the
+different types in the tuple quite effortlessly. While
+``reduceListedForces`` implements a loop over the interaction types, the
+next layer, ``computeForces`` implements a loop over all interactions of
+a given type:
+
+.. code:: cpp
+
+ template <class Index, class InteractionType, class Buffer, class Pbc>
+ real computeForces(const std::vector<Index>& indices,
+ const std::vector<InteractionType>& iParams,
+ const std::vector<gmx::RVec>& x,
+ Buffer* forces,
+ const Pbc& pbc)
+ {
+ real Epot = 0.0;
+
+ for (const auto& index : indices)
+ {
+ Epot += dispatchInteraction(index, iParams, x, forces);
+ }
+
+ return Epot;
+ }
+
+Compared to the union data type approach where this loop has been manually
+implemented for all interaction types, in NB-LIB, only a single implementation
+is required.
+
+We’re now down to the level of individual bonds, angles and dihedrals.
+At this point, the next steps depend on the actual type of the
+interaction. But instead of dispatching each harmonic bond, cubic bond,
+harmonic angle and so on to their seperate paths just yet, we just
+differentiate based on the number of interaction centers for now.
+Through overload resolution, the appropriate version
+``dispatchInteraction`` gets called now, such as this one for the case
+of 2-center interactions:
+
+.. code:: cpp
+
+ template <class Buffer, class TwoCenterType, class Pbc>
+ std::enable_if_t<IsTwoCenter<TwoCenterType>::value, real>
+ dispatchInteraction(const InteractionIndex<TwoCenterType>& index,
+ const std::vector<TwoCenterType>& bondInstances,
+ const std::vector<gmx::RVec>& x,
+ Buffer* forces,
+ const Pbc& pbc)
+ {
+ int i = std::get<0>(index);
+ int j = std::get<1>(index);
+ const gmx::RVec& x1 = x[i];
+ const gmx::RVec& x2 = x[j];
+ const TwoCenterType& bond = bondInstances[std::get<2>(index)];
+
+ gmx::RVec dx;
+ // calculate x1 - x2 modulo pbc
+ pbc.dxAiuc(x1, x2, dx);
+ real dr2 = dot(dx, dx);
+ real dr = std::sqrt(dr2);
+
+ auto [force, energy] = bondKernel(dr, bond);
+
+ // avoid division by 0
+ if (dr2 != 0.0)
+ {
+ force /= dr;
+ detail::spreadTwoCenterForces(force, dx, &(*forces)[i], &(*forces)[j]);
+ }
+
+ return energy;
+ }
+
+We can again observe that common parts among different 2-center interaction types
+are reused. The common parts are
+
+ * coordinate retrieval
+ * computation of the scalar distance
+ * spreading of the scalar part of the force to the two centers
+
+The only remaining thing to do now is to call the actual
+kernel to compute the force. Since ``bond`` has a distinct type, we can
+again use overload resolution:
+
+.. code:: cpp
+
+ template <class T>
+ auto bondKernel(T dr, const HarmonicBond& bond)
+ {
+ return harmonicScalarForce(bond.forceConstant(), bond.equilDistance(), dr);
+ }
+
+and call the actual kernel, which in its simplest form for a harmonic
+bond looks like this:
+
+.. code:: cpp
+
+ template <class T>
+ std::tuple<T, T> harmonicScalarForce(T k, T x0, T x)
+ {
+ real dx = x - x0;
+ real dx2 = dx * dx;
+
+ real force = -k * dx;
+ real epot = 0.5 * k * dx2;
+
+ return std::make_tuple(force, epot);
+
+ /* That was 6 flops */
+ }
+
+That’s it! The approach outlined here manages to reuse (between different types)
+a significant part of the code that feeds input data to force kernels.
+Notably, not a single ``if(ftype)`` is required to implement the control flow.
+The remaining parts for a feature complete implementation are
+overloads of ``dispatchInteraction`` for the 3- to 5-center interactions and
+the type-aware wrappers for all the different kernels implemented in
+GROMACS. They have been omitted for brevity.
+
+A note on **multithreading**: multithreading is handled above the top-level
+``reduceListedForces`` described here. For parallel execution, the
+input ``ListedInteractions`` tuple is split into ``nThreads`` parts and a
+``Buffer`` object is set up for each thread. ``reduceListedForces`` is then
+called once by each thread with the assigned fraction of ``ListedInteractions``
+and the ``Buffer`` as argument.
+The lifetime of the ``ListedInteractions`` splits is coupled to the domain decomposition.
+
+Summary
+-------
+
+NB-LIB listed forces employs a (C++)-type aware data format that
+is otherwise equivalent to its counter-part in GROMACS.
+The type-aware data format is then used to simplify the "routing" layer that
+connects data input to the appropriate kernels. Thanks to static branching and polymorphism,
+increased code reuse and simplified branching logic could be achieved.
+**The force kernels themselves do not need to be changed and NB-LIB refers to
+GROMACS for their implementation.**
+
+
+Outlook
+-------
+
+The data flow management for listed forces described here allows further
+improvements to be implemented:
+
+* Aggregate interaction types: fuse interactions of different types into
+ aggregated types. For example, a dihedral interaction and the bonds and angles
+ that are present among the same four particle indices can be combined into a single
+ aggregated interaction. This allows to reuse the particle coordinates loaded from memory
+ for multiple types and also combines the store operations for the forces.
+ Type aggregates also likely simplify an efficient GPU implementation of listed forces.
+
+* Separation of a topology containing both parameter sets for a system state A and B into two
+ separate topologies for the A and B system states.
--- /dev/null
+Adding New Listed-Interaction Types in NB-LIB
+=============================================
+
+NB-LIB currently has code paths for listed interactions that occur between two, three, four and five different particles.
+To extend NB-LIB to support more types of particle interactions, modify the following three files.
+
+Two center interactions must use the distance between the centers as an input to the force kernel.
+Three center interactions take the form ``(particleI, particleJ, ParticleK)``.
+In this case, the middle particle, ``particleJ`` is taken as the center around which the angle is computed.
+This angle must be an input to a three center force kernel.
+Likewise for four center interactions, the dihedral angle phi must be an input to the force kernel.
+Accepting these constraints, it is possible to add a new kernel by modifying the following three files.
+
+1) bondtypes.h_
+2) definitions.h_
+3) kernels.hpp_
+
+.. _bondtypes.h:
+
+1) bondtypes.h
+---------------
+
+This file contains one C++ type to store the parameters for each interaction type.
+New interaction types are added here as separate C++ types.
+The interface of these types is completely unrestricted.
+The only requirements are equality and less than comparison, and that the interface be
+compatible with the corresponding (user-added) kernel.
+
+.. _definitions.h:
+
+2) definitions.h
+------------------------
+
+This file begins with pre-processor macro lists that classify concrete interaction types into two, three, four and five center types.
+To add a new type, the user must add the interaction type parameter struct name to the macro of the correct center number.
+In this case, ``NewBondType`` is an example of a two center interaction.
+As such it would get added to the ``SUPPORTED_TWO_CENTER_TYPES`` macro.
+Assuming that the only other two center interaction is called ``DefaultBond``, the result would look like the following snippet.
+
+.. code:: cpp
+
+ #define SUPPORTED_TWO_CENTER_TYPES DefaultBond, NewBondType
+
+.. _kernels.hpp:
+
+Adding ``NewBondType`` to this macro ensures that the NBLIB ``molecule``
+class ``addInteraction`` function supports adding the new bond type
+and includes it in the listed interaction data that the ``topology`` class
+provides. The ``SUPPORTED_TWO_CENTER_TYPES`` macro is immediately converted into a
+C++ type list that is implemented as a variadic template. The type list
+is then used to define all the dependent data structures. Apart from creating
+the type list, the only place where the macro is needed is explicit template instantiation.
+
+Note that, as of C++17, there's no alternative to preprocessor macros for adding
+the required template instantiations controlled through the macros described here.
+(Other than manually adding the template instantiations, which would require the instantiation list
+of several templates to be updated each time a new interaction type is added. Compared to the preprocessor
+based solution where just a single macro has to be extended, this would clearly be an inferior solution.)
+In NBLIB, the design decision we took, was that we did not want to expose a templated
+interface in a user header and it is for this reason that we explicitly need
+to instantiate the interface with all the supported listed interaction types defined
+in this macro.
+
+3) kernels.hpp
+---------------------
+
+In this file the actual force kernels for each interaction type are implemented.
+Each kernel call is templated to allow various precisions and is
+accessed through an overload ``bondKernel`` that extracts the relevant
+parameters from a ``const NewBondType&`` argument.
+The kernel return type is always an ``std::tuple`` of the force and the potential.
+
+.. code:: cpp
+
+ /*! \brief kernel to calculate the new bond type force
+ *
+ * \param k Force constant
+ * \param x0 Equilibrium distance
+ * \param scale The scaling factor
+ * \param x Input bond length
+ *
+ * \return tuple<force, potential energy>
+ */
+ template <class T>
+ std::tuple<T, T> newBondForce(T k, T x0, T scale, T x)
+ {
+ real exponent = std::exp( (x - x0) / scale);
+ real epot = k * exponent;
+ real force = epot / scale;
+ return std::make_tuple(force, epot);
+ }
+
+ template <class T>
+ inline std::tuple<T, T> bondKernel(T dr, const NewBondType& bond)
+ {
+ return newBondForce(bond.forceConstant(), bond.equilDistance(), bond.scaleFactor(), dr);
+ }
+
estimate used by AWH to initialize :math:`N` in terms of more meaningful
quantities
-.. math:: \frac{1}{N_0} = \frac{1}{N_0(\varepsilon_0, D)} \sim D\varepsilon_0^2.
+.. math:: \frac{1}{N_0} = \frac{1}{N_0(\varepsilon_0, D)} = \frac{1}{\Delta
+ t_\mathrm{sample}} \max_d \frac{L_d^2}{2D_d} \varepsilon_0^2
:label: eqawhn0
-Essentially, this tells us that a slower system (small :math:`D`)
+where :math:`L_d` is the length of the interval and :math:`D_d` is
+the diffusion constant along dimension :math:`d` of the AWH bias.
+For one dimension, :math:`L^2/2D` is the average time to diffuse
+over a distance of :math:`L`. We then takes the maximum crossing
+time over all dimensions involved in the bias.
+Essentially, this formula tells us that a slower system (small :math:`D`)
requires more samples (larger :math:`N^0`) to attain the same level of
accuracy (:math:`\varepsilon_0`) at a given sampling rate. Conversely,
for a system of given diffusion, how to choose the initial biasing rate
maximum free energy difference of the PMF estimate. If this is much
larger than the expected magnitude of the free energy barriers that
should be crossed, then the system is probably being pulled too hard and
-:math:`D` should be decreased. :math:`\varepsilon_0` on the other hand,
-would only be tweaked when starting an AWH simulation using a fairly
-accurate guess of the PMF as input.
+:math:`D` should be decreased. An accurate estimate of the diffusion
+can be obtaining from an AWH simulation with the :ref:`gmx awh` tool.
+:math:`\varepsilon_0` on the other hand, should be a rough estimate
+of the initial error.
Tips for efficient sampling
^^^^^^^^^^^^^^^^^^^^^^^^^^^
H atoms in particular.
:issue:`3469`
+
+Corrected AWH initial histogram size
+""""""""""""""""""""""""""""""""""""
+
+The initial histogram size for AWH biases depended (weakly) on the force
+constant. This dependence has been removed, which increases the histogram
+size by a about a factor of 3. In practice this has only a minor effect
+on the time to solution. For multiple dimensions, the histogram size was
+underestimated, in particular with a combination of slower and faster
+dimensions. The, now simplified, formula for the initial histogram size is
+given in the reference manual.
+
+:issue:`3751`
SIMD acceleration for bonds slightly improves performance for systems
with H-bonds only constrained or no constraints. This gives a significant
improvement with multiple time stepping.
+
+Allow offloading GPU update and constraints without direct GPU communication
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Allow domain-decomposition and separate PME rank parallel runs to offload update and
+constraints to a GPU with CUDA without requiring the (experimental) direct GPU
+communication features to be also enabled.
|Gromacs| now builds correctly on Windows with MSVC even when the path
to the source or build directory has a space in it.
+Builds with MSVC 2019 correctly detect the proper static linking setup
+during CMake configuration.
+
RDTSCP usage and reporting
""""""""""""""""""""""""""
Use a multiple timing-stepping integrator to evaluate some forces, as specified
by :mdp:`mts-level2-forces` every :mdp:`mts-level2-factor` integration
- steps. All other forces are evaluated at every step.
+ steps. All other forces are evaluated at every step. MTS is currently
+ only supported with :mdp-value:`integrator=md`.
.. mdp:: mts-levels
old_IFS="$IFS"
IFS=":"
-# First remove gromacs part of ld_library_path
-tmppath=""
-for i in $@LD_LIBRARY_PATH@; do
- if test "$i" != "$GMXLDLIB"; then
- tmppath="${tmppath}${tmppath:+:}${i}"
- fi
-done
-@LD_LIBRARY_PATH@=$tmppath
-
-# remove gromacs part of PKG_CONFIG_PATH
-tmppath=""
-for i in $PKG_CONFIG_PATH; do
- if test "$i" != "$GMXLDLIB/pkgconfig"; then
- tmppath="${tmppath}${tmppath:+:}${i}"
- fi
-done
-PKG_CONFIG_PATH=$tmppath
-
-# remove gromacs part of path
-tmppath=""
-for i in $PATH; do
- if test "$i" != "$GMXBIN"; then
- tmppath="${tmppath}${tmppath:+:}${i}"
+replace_in_path() {
+ # Parse PATH-like variable $1, and return a copy of it with any instances of $3 removed and $2 added to the beginning.
+ # If $3 is empty, do not remove anything.
+ local tmppath oldpath to_remove to_add old_shell_opts
+ oldpath="$1"
+ to_add="$2"
+ to_remove="$3"
+ if test -z "${oldpath}"; then
+ echo "${to_add}"
+ else
+ if test "${oldpath}" = ":"; then
+ echo "${to_add}:"
+ else
+ tmppath="${to_add}"
+ old_shell_opts="$-"
+ set -o noglob
+ set -- ${oldpath}"" # Will put tokens to $@, including empty ones
+ # If did not have noglob ("f") enabled before, disable it back
+ if test -n "${old_shell_opts##*f*}"; then
+ set +o noglob
+ fi
+ for i in "$@"; do
+ if test \( -z "${to_remove}" \) -o \( "$i" != "${to_remove}" \); then
+ tmppath="${tmppath}:${i}"
+ fi
+ done
+ echo "${tmppath}"
+ fi
fi
-done
-PATH=$tmppath
+}
-# and remove the gmx part of manpath
-tmppath=""
-for i in $MANPATH; do
- if test "$i" != "$GMXMAN"; then
- tmppath="${tmppath}${tmppath:+:}${i}"
- fi
-done
-MANPATH=$tmppath
+# Keep current values to remove later
+OLD_GMXLDLIB="$GMXLDLIB"
+OLD_GMXBIN="$GMXBIN"
+OLD_GMXMAN="$GMXMAN"
##########################################################
# This is the real configuration part. We save the Gromacs
GMXTOOLCHAINDIR=${GMXPREFIX}/@GMX_INSTALL_CMAKEDIR@
GROMACS_DIR=${GMXPREFIX}
-@LD_LIBRARY_PATH@=${GMXLDLIB}${@LD_LIBRARY_PATH@:+:}${@LD_LIBRARY_PATH@}
-PKG_CONFIG_PATH=${GMXLDLIB}/pkgconfig${PKG_CONFIG_PATH:+:}${PKG_CONFIG_PATH}
-PATH=${GMXBIN}${PATH:+:}${PATH}
-#debian/ubuntu needs a : at the end
-MANPATH=${GMXMAN}:${MANPATH}
+@LD_LIBRARY_PATH@=$(replace_in_path "${@LD_LIBRARY_PATH@}" "${GMXLDLIB}" "${OLD_GMXLDLIB}")
+PKG_CONFIG_PATH=$(replace_in_path "${PKG_CONFIG_PATH}" "${GMXLDLIB}/pkgconfig" "${OLD_GMXLDLIB}/pkgconfig")
+PATH=$(replace_in_path "${PATH}" "${GMXBIN}" "${OLD_GMXBIN}")
+MANPATH=$(replace_in_path "${MANPATH}" "${GMXMAN}" "${OLD_GMXMAN}")
# export should be separate, so /bin/sh understands it
export GMXBIN GMXLDLIB GMXMAN GMXDATA @LD_LIBRARY_PATH@ PATH MANPATH
endif()
set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES)
-set_property(GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES)
+set_property(GLOBAL PROPERTY CUDA_SOURCES)
set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS)
set_property(GLOBAL PROPERTY GMX_AVX_512_SOURCE)
# Mark some shared GPU implementation files to compile with CUDA if needed
if (GMX_GPU_CUDA)
- get_property(LIBGROMACS_GPU_IMPL_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES)
- set_source_files_properties(${LIBGROMACS_GPU_IMPL_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
-endif()
-
-# set up CUDA compilation with clang
-if (GMX_CLANG_CUDA)
- foreach (_file ${LIBGROMACS_SOURCES})
- get_filename_component(_ext ${_file} EXT)
- get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT)
- if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format)
- gmx_compile_cuda_file_with_clang(${_file})
- endif()
- endforeach()
+ get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES)
+ set_source_files_properties(${CUDA_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
endif()
if (GMX_GPU_CUDA)
# must suppress them.
GMX_TEST_CXXFLAG(CXXFLAGS_NO_ZERO_AS_NULL_POINTER_CONSTANT "-Wno-zero-as-null-pointer-constant" NVCC_CLANG_SUPPRESSIONS_CXXFLAGS)
- get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES)
foreach(_compile_flag ${NVCC_CLANG_SUPPRESSIONS_CXXFLAGS})
- set_source_files_properties(${CUDA_SOURCES} PROPERTIES COMPILE_FLAGS ${_compile_flag})
+ set(GMX_CUDA_CLANG_FLAGS "${GMX_CUDA_CLANG_FLAGS} ${_compile_flag}")
endforeach()
+ if (GMX_CLANG_CUDA)
+ foreach (_file ${LIBGROMACS_SOURCES})
+ get_filename_component(_ext ${_file} EXT)
+ get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT)
+ if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format)
+ gmx_compile_cuda_file_with_clang(${_file})
+ endif()
+ endforeach()
+ else()
+ get_property(CUDA_SOURCES GLOBAL PROPERTY CUDA_SOURCES)
+ set_source_files_properties(${CUDA_SOURCES} PROPERTIES COMPILE_FLAGS ${GMX_CUDA_CLANG_FLAGS})
+ endif()
endif()
# Only add the -fsycl flag to sources that really need it
return numStepsCheck;
}
-/*! \brief
- * Returns an approximation of the geometry factor used for initializing the AWH update size.
- *
- * The geometry factor is defined as the following sum of Gaussians:
- * sum_{k!=0} exp(-0.5*(k*pi*x)^2)/(pi*k)^2,
- * where k is a xArray.size()-dimensional integer vector with k_i in {0,1,..}.
- *
- * \param[in] xArray Array to evaluate.
- * \returns the geometry factor.
- */
-double gaussianGeometryFactor(gmx::ArrayRef<const double> xArray)
-{
- /* For convenience we give the geometry factor function a name: zeta(x) */
- constexpr size_t tableSize = 5;
- std::array<const double, tableSize> xTabulated = { { 1e-5, 1e-4, 1e-3, 1e-2, 1e-1 } };
- std::array<const double, tableSize> zetaTable1d = { { 0.166536811948, 0.16653116886, 0.166250075882,
- 0.162701098306, 0.129272430287 } };
- std::array<const double, tableSize> zetaTable2d = { { 2.31985974274, 1.86307292523, 1.38159772648,
- 0.897554759158, 0.405578211115 } };
-
- gmx::ArrayRef<const double> zetaTable;
-
- if (xArray.size() == 1)
- {
- zetaTable = zetaTable1d;
- }
- else if (xArray.size() == 2)
- { // NOLINT bugprone-branch-clone
- zetaTable = zetaTable2d;
- }
- else
- {
- /* TODO... but this is anyway a rough estimate and > 2 dimensions is not so popular.
- * Remove the above NOLINT when addressing this */
- zetaTable = zetaTable2d;
- }
-
- /* TODO. Really zeta is a function of an ndim-dimensional vector x and we shoudl have a ndim-dimensional lookup-table.
- Here we take the geometric average of the components of x which is ok if the x-components are not very different. */
- double xScalar = 1;
- for (const double& x : xArray)
- {
- xScalar *= x;
- }
-
- GMX_ASSERT(!xArray.empty(), "We should have a non-empty input array");
- xScalar = std::pow(xScalar, 1.0 / xArray.size());
-
- /* Look up zeta(x) */
- size_t xIndex = 0;
- while ((xIndex < xTabulated.size()) && (xScalar > xTabulated[xIndex]))
- {
- xIndex++;
- }
-
- double zEstimate;
- if (xIndex == xTabulated.size())
- {
- /* Take last value */
- zEstimate = zetaTable[xTabulated.size() - 1];
- }
- else if (xIndex == 0)
- {
- zEstimate = zetaTable[xIndex];
- }
- else
- {
- /* Interpolate */
- double x0 = xTabulated[xIndex - 1];
- double x1 = xTabulated[xIndex];
- double w = (xScalar - x0) / (x1 - x0);
- zEstimate = w * zetaTable[xIndex - 1] + (1 - w) * zetaTable[xIndex];
- }
-
- return zEstimate;
-}
-
/*! \brief
* Estimate a reasonable initial reference weight histogram size.
*
- * \param[in] dimParams Parameters for the dimensions of the coordinate.
* \param[in] awhBiasParams Bias parameters.
* \param[in] gridAxis The BiasGrid axes.
* \param[in] beta 1/(k_B T).
* \param[in] samplingTimestep Sampling frequency of probability weights.
* \returns estimate of initial histogram size.
*/
-double getInitialHistogramSizeEstimate(const std::vector<DimParams>& dimParams,
- const AwhBiasParams& awhBiasParams,
- const std::vector<GridAxis>& gridAxis,
- double beta,
- double samplingTimestep)
+double getInitialHistogramSizeEstimate(const AwhBiasParams& awhBiasParams,
+ const std::vector<GridAxis>& gridAxis,
+ double beta,
+ double samplingTimestep)
{
/* Get diffusion factor */
- double crossingTime = 0.;
+ double maxCrossingTime = 0.;
std::vector<double> x;
for (size_t d = 0; d < gridAxis.size(); d++)
{
- double axisLength = gridAxis[d].isFepLambdaAxis() ? 1.0 : gridAxis[d].length();
- if (axisLength > 0)
- {
- crossingTime += awhBiasParams.dimParams[d].diffusion / (axisLength * axisLength);
- /* The sigma of the Gaussian distribution in the umbrella */
- double sigma = 1.;
- if (dimParams[d].isPullDimension())
- {
- GMX_RELEASE_ASSERT(dimParams[d].pullDimParams().betak != 0,
- "beta*k cannot be zero");
- sigma /= std::sqrt(dimParams[d].pullDimParams().betak);
- }
- x.push_back(sigma / axisLength);
- }
+ GMX_RELEASE_ASSERT(awhBiasParams.dimParams[d].diffusion > 0, "We need positive diffusion");
+ // With diffusion it takes on average T = L^2/2D time to cross length L
+ double axisLength = gridAxis[d].isFepLambdaAxis() ? 1.0 : gridAxis[d].length();
+ double crossingTime = (axisLength * axisLength) / (2 * awhBiasParams.dimParams[d].diffusion);
+ maxCrossingTime = std::max(maxCrossingTime, crossingTime);
}
- GMX_RELEASE_ASSERT(crossingTime > 0, "We need at least one dimension with non-zero length");
+ GMX_RELEASE_ASSERT(maxCrossingTime > 0, "We need at least one dimension with non-zero length");
double errorInitialInKT = beta * awhBiasParams.errorInitial;
- double histogramSize = gaussianGeometryFactor(x)
- / (crossingTime * gmx::square(errorInitialInKT) * samplingTimestep);
+ double histogramSize = maxCrossingTime / (gmx::square(errorInitialInKT) * samplingTimestep);
return histogramSize;
}
updateWeight(numSamplesUpdateFreeEnergy_ * numSharedUpdate),
localWeightScaling(eTarget == eawhtargetLOCALBOLTZMANN ? temperatureScaleFactor : 1),
initialErrorInKT(beta * awhBiasParams.errorInitial),
- initialHistogramSize(getInitialHistogramSizeEstimate(dimParams,
- awhBiasParams,
- gridAxis,
- beta,
- numStepsSampleCoord_ * mdTimeStep)),
+ initialHistogramSize(
+ getInitialHistogramSizeEstimate(awhBiasParams, gridAxis, beta, numStepsSampleCoord_ * mdTimeStep)),
convolveForce(awhParams.ePotential == eawhpotentialCONVOLVED),
biasIndex(biasIndex),
disableUpdateSkips_(disableUpdateSkips == DisableUpdateSkips::yes)
{
if (grid.axis(d).isFepLambdaAxis())
{
- /* TODO: Verify that a threshold of 1.0 is OK. With a very high sample weight 1.0 can be
- * reached quickly even in regions with low probability. Should the sample weight be
- * taken into account here? */
+ /* Do not modify the weight threshold based on a FEP lambda axis. The spread
+ * of the sampling weights is not depending on a Gaussian distribution (like
+ * below). */
weightThreshold *= 1.0;
}
else
{
+ /* The spacing is proportional to 1/sqrt(betak). The weight threshold will be
+ * approximately (given that the spacing can be modified if the dimension is periodic)
+ * proportional to sqrt(1/(2*pi)). */
weightThreshold *= grid.axis(d).spacing()
* std::sqrt(dimParams[d].pullDimParams().betak * 0.5 * M_1_PI);
}
AwhDimParams& awhDimParams = params.awhDimParams;
- awhDimParams.period = 0;
- awhDimParams.diffusion = 0.1;
+ awhDimParams.period = 0;
+ // Correction for removal of GaussianGeometryFactor/2 in histogram size
+ awhDimParams.diffusion = 0.1 / (0.144129616073222 * 2);
awhDimParams.origin = 0.5;
awhDimParams.end = 1.5;
awhDimParams.coordValueInit = awhDimParams.origin;
AwhDimParams& awhDimParams = params.awhDimParams;
- awhDimParams.period = 0;
- awhDimParams.diffusion = 1e-4;
+ awhDimParams.period = 0;
+ // Correction for removal of GaussianGeometryFactor/2 in histogram size
+ awhDimParams.diffusion = 1e-4 / (0.12927243028700 * 2);
awhDimParams.origin = 0;
awhDimParams.end = numLambdaStates - 1;
awhDimParams.coordValueInit = awhDimParams.origin;
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+
+/*! \libinternal \file
+ * \brief Provides ported functions/classes from boost::mp11
+ *
+ * Adapted from the Boost Library 1.67
+ *
+ * \author Roland Schulz <roland.schulz@intel.com>
+ * \ingroup module_compat
+ * \inlibraryapi
+ */
+#ifndef GMX_COMPAT_MP11_H
+#define GMX_COMPAT_MP11_H
+
+#include <utility>
+
+#include "gromacs/utility/exceptions.h"
+
+namespace gmx
+{
+namespace compat
+{
+
+/** \internal \brief Simplified analogue of boost::mp11::mp_with_index, compatible only with C++17 and up.
+ *
+ * \c mp_with_index<N>(i, f) calls \p f with \c mp_size_t<i>() and returns the result.
+ * \p i must be less than \p N.
+ *
+ * Example usage:
+ * \code
+ constexpr int foo_max = 3;
+ template<int i, typename = std::enable_if_t<(i < foo_max)>>
+ bool constexpr foo();
+
+ bool bar(int i)
+ {
+ return mp_with_index<foo_max>(i, [](auto i) {
+ return foo<i>();
+ });
+ }
+ * \endcode
+ */
+template<std::size_t N, class F, typename std::enable_if<(N <= 1)>::type* = nullptr>
+static auto mp_with_index(std::size_t i, F&& f)
+{
+ // Last step of recursion. Must have one active "return" for proper type deduction.
+ if (i == N - 1)
+ {
+ return std::forward<F>(f)(std::integral_constant<std::size_t, N - 1>());
+ }
+ else
+ {
+ const std::string errorMessage =
+ "Invalid arguments of mp_with_index (i=" + std::to_string(i) + ")";
+ GMX_THROW(InternalError(errorMessage));
+ }
+}
+
+template<std::size_t N, class F, typename std::enable_if<(N > 1)>::type* = nullptr>
+static auto mp_with_index(std::size_t i, F&& f)
+{
+ if (i == N - 1)
+ {
+ return std::forward<F>(f)(std::integral_constant<std::size_t, N - 1>());
+ }
+ else
+ {
+ return mp_with_index<N - 1>(i, std::forward<F>(f));
+ }
+}
+
+
+} // namespace compat
+} // namespace gmx
+
+#endif
class not_null
{
public:
- static_assert(std::is_assignable_v<T&, std::nullptr_t>, "T cannot be assigned nullptr.");
+ static_assert(std::is_assignable<T&, std::nullptr_t>::value, "T cannot be assigned nullptr.");
//! Move constructor. Asserts in debug mode if \c is nullptr.
- template<typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
+ template<typename U, typename = std::enable_if_t<std::is_convertible<U, T>::value>>
constexpr explicit not_null(U&& u) : ptr_(std::forward<U>(u))
{
Expects(ptr_ != nullptr);
}
//! Simple constructor. Asserts in debug mode if \c u is nullptr.
- template<typename = std::enable_if_t<!std::is_same_v<std::nullptr_t, T>>>
+ template<typename = std::enable_if_t<!std::is_same<std::nullptr_t, T>::value>>
constexpr explicit not_null(T u) : ptr_(u)
{
Expects(ptr_ != nullptr);
}
//! Copy constructor.
- template<typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
+ template<typename U, typename = std::enable_if_t<std::is_convertible<U, T>::value>>
constexpr not_null(const not_null<U>& other) : not_null(other.get())
{
}
if (GMX_BUILD_UNITTESTS)
gmx_add_unit_test(CompatibilityHelpersTests compat-test
CPP_SOURCE_FILES
+ mp11.cpp
pointers.cpp
)
# Maintainer note: The files here may be borrowed from other projects, and
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#include "gmxpre.h"
+
+#include "gromacs/compat/mp11.h"
+
+#include <gtest/gtest.h>
+
+// Defining some dummy functions to use later
+
+namespace gmx
+{
+namespace compat
+{
+namespace test
+{
+enum class Options
+{
+ Op0 = 0,
+ Op1 = 1,
+ Op2 = 2,
+ Count = 3
+};
+
+template<int i>
+static int testIncrement()
+{
+ return i + 1;
+}
+
+template<bool i>
+static bool testNot()
+{
+ return !i;
+}
+
+template<Options i, Options j>
+static int testEnumTwoIPlusJPlusK(int k)
+{
+ return 2 * int(i) + int(j) + k;
+}
+
+TEST(TemplateMPTest, MpWithIndexInt)
+{
+ static constexpr int maxArgValue = 4;
+ int inc_0 = mp_with_index<maxArgValue>(0, [](auto i) { return testIncrement<i>(); });
+ EXPECT_EQ(inc_0, 1);
+ int inc_3 = mp_with_index<maxArgValue>(3, [](auto i) { return testIncrement<i>(); });
+ EXPECT_EQ(inc_3, 4);
+}
+
+TEST(TemplateMPTest, MpWithIndexIntBad)
+{
+ static constexpr int maxArgValue = 4;
+ int i = maxArgValue;
+ // Function requirement: i < maxArgValue
+ EXPECT_THROW(mp_with_index<maxArgValue>(i, [](auto i) { return testIncrement<i>(); }),
+ gmx::InternalError);
+}
+
+TEST(TemplateMPTest, MpWithIndexBool)
+{
+ bool not_true = mp_with_index<2>(size_t(true), [](auto i) { return testNot<i>(); });
+ EXPECT_FALSE(not_true);
+ bool not_false = mp_with_index<2>(size_t(false), [](auto i) { return testNot<i>(); });
+ EXPECT_TRUE(not_false);
+}
+
+TEST(TemplateMPTest, MpWithIndexEnum)
+{
+ int five = 5;
+ int two1plus2plus5 = mp_with_index<static_cast<size_t>(Options::Count)>(
+ static_cast<size_t>(Options::Op2), [=](auto i) {
+ return testEnumTwoIPlusJPlusK<Options::Op1, static_cast<Options>(size_t(i))>(five);
+ });
+ EXPECT_EQ(two1plus2plus5, 9);
+}
+
+} // namespace test
+} // namespace compat
+} // namespace gmx
int pulse,
gmx_wallcycle* wcycle) :
dd_(dd),
- dimIndex_(dimIndex),
sendRankX_(dd->neighbor[dimIndex][1]),
recvRankX_(dd->neighbor[dimIndex][0]),
sendRankF_(dd->neighbor[dimIndex][0]),
deviceContext_(deviceContext),
localStream_(localStream),
nonLocalStream_(nonLocalStream),
+ dimIndex_(dimIndex),
pulse_(pulse),
wcycle_(wcycle)
{
int dimIndex_ = 0;
//! The pulse corresponding to this halo exchange instance
int pulse_ = 0;
- //! Number of zones. Always 1 for 1-D case.
- const int nzone_ = 1;
//! The wallclock counter
gmx_wallcycle* wcycle_ = nullptr;
//! The atom offset for receive (x) or send (f) for dimension index and pulse corresponding to this halo exchange instance
localatomsetmanager.cpp
)
-gmx_add_mpi_unit_test(DomDecMpiTests domdec-mpi-test 4
- CPP_SOURCE_FILES
+gmx_add_mpi_unit_test(DomDecMpiTests domdec-mpi-test 4 HARDWARE_DETECTION
+ GPU_CPP_SOURCE_FILES
haloexchange_mpi.cpp
)
/*! \internal \file
* \brief Tests for the halo exchange
*
- * The test sets up a 2D rank topology and performs a coordinate halo
- * exchange (using the pre-existing CPU codepath), with 2 pulses in
- * the first dimension and 1 pulse in the second. Each pulse involves
- * a few non-contiguous indices. The sending rank, atom number and
- * spatial 3D index are encoded in the x values, to allow correctness
- * checking following the halo exchange.
+ * The test sets up the rank topology and performs a coordinate halo
+ * exchange (for both CPU and GPU codepaths) for several 1D and 2D
+ * pulse configirations. Each pulse involves a few non-contiguous
+ * indices. The sending rank, atom number and spatial 3D index are
+ * encoded in the x values, to allow correctness checking following
+ * the halo exchange.
*
- * \todo Add more test variations
- * \todo Port to GPU codepath
+ * \todo Add 3D case
*
* \author Alan Gray <alang@nvidia.com>
* \ingroup module_domdec
#include "gmxpre.h"
+#include "config.h"
+
#include <array>
#include <gtest/gtest.h>
#include "gromacs/domdec/atomdistribution.h"
#include "gromacs/domdec/domdec_internal.h"
#include "gromacs/domdec/gpuhaloexchange.h"
+#if GMX_GPU_CUDA
+# include "gromacs/gpu_utils/device_stream.h"
+# include "gromacs/gpu_utils/devicebuffer.h"
+# include "gromacs/gpu_utils/gpueventsynchronizer.cuh"
+#endif
+#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/mdtypes/inputrec.h"
#include "testutils/mpitest.h"
+#include "testutils/test_hardware_environment.h"
namespace gmx
{
+namespace test
+{
namespace
{
}
}
+/*! \brief Perform GPU halo exchange, including required setup and data transfers
+ *
+ * \param [in] dd Domain decomposition object
+ * \param [in] box Box matrix
+ * \param [in] h_x Atom coordinate data array on host
+ * \param [in] numAtomsTotal Total number of atoms, including halo
+ */
+void gpuHalo(gmx_domdec_t* dd, matrix box, RVec* h_x, int numAtomsTotal)
+{
+#if (GMX_GPU_CUDA && GMX_THREAD_MPI)
+ // Set up GPU hardware environment and assign this MPI rank to a device
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ int numDevices = getTestHardwareEnvironment()->getTestDeviceList().size();
+ const auto& testDevice = getTestHardwareEnvironment()->getTestDeviceList()[rank % numDevices];
+ const auto& deviceContext = testDevice->deviceContext();
+ setActiveDevice(testDevice->deviceInfo());
+ DeviceStream deviceStream(deviceContext, DeviceStreamPriority::Normal, false);
+
+ // Set up GPU buffer and copy input data from host
+ DeviceBuffer<RVec> d_x;
+ int d_x_size = -1;
+ int d_x_size_alloc = -1;
+ reallocateDeviceBuffer(&d_x, numAtomsTotal, &d_x_size, &d_x_size_alloc, deviceContext);
+
+ copyToDeviceBuffer(&d_x, h_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+
+ GpuEventSynchronizer coordinatesReadyOnDeviceEvent;
+ coordinatesReadyOnDeviceEvent.markEvent(deviceStream);
+
+ // Perform GPU halo exchange
+ for (int d = 0; d < dd->ndim; d++)
+ {
+ for (int pulse = 0; pulse < dd->comm->cd[d].numPulses(); pulse++)
+ {
+ GpuHaloExchange gpuHaloExchange(dd, d, MPI_COMM_WORLD, deviceContext, deviceStream,
+ deviceStream, pulse, nullptr);
+ gpuHaloExchange.reinitHalo(d_x, nullptr);
+ gpuHaloExchange.communicateHaloCoordinates(box, &coordinatesReadyOnDeviceEvent);
+ }
+ }
+
+ GpuEventSynchronizer haloCompletedEvent;
+ haloCompletedEvent.markEvent(deviceStream);
+ haloCompletedEvent.waitForEvent();
+
+ // Copy results back to host
+ copyFromDeviceBuffer(h_x, &d_x, 0, numAtomsTotal, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+
+ freeDeviceBuffer(d_x);
+#else
+ GMX_UNUSED_VALUE(dd);
+ GMX_UNUSED_VALUE(box);
+ GMX_UNUSED_VALUE(h_x);
+ GMX_UNUSED_VALUE(numAtomsTotal);
+#endif
+}
+
+/*! \brief Define 1D rank topology with 4 MPI tasks
+ *
+ * \param [in] dd Domain decomposition object
+ */
+void define1dRankTopology(gmx_domdec_t* dd)
+{
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ dd->neighbor[0][0] = (rank + 1) % 4;
+ dd->neighbor[0][1] = (rank == 0) ? 3 : rank - 1;
+}
+
/*! \brief Define 2D rank topology with 4 MPI tasks
*
* -----
}
}
+/*! \brief Define a 1D halo with 1 pulses
+ *
+ * \param [in] dd Domain decomposition object
+ * \param [in] indvec Vector of index vectors
+ */
+void define1dHaloWith1Pulse(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ std::vector<int> indexvec;
+ gmx_domdec_ind_t ind;
+
+ dd->ndim = 1;
+ int nzone = 1;
+ int dimIndex = 0;
+
+ // Set up indices involved in halo
+ indexvec.clear();
+ indvec->clear();
+
+ dd->comm->cd[dimIndex].receiveInPlace = true;
+ dd->dim[dimIndex] = 0;
+ dd->ci[dimIndex] = rank;
+
+ // First pulse involves (arbitrary) indices 1 and 3
+ indexvec.push_back(1);
+ indexvec.push_back(3);
+
+ ind.index = indexvec;
+ ind.nsend[nzone + 1] = 2;
+ ind.nrecv[nzone + 1] = 2;
+ indvec->push_back(ind);
+
+ dd->comm->cd[dimIndex].ind = *indvec;
+}
+
+/*! \brief Define a 1D halo with 2 pulses
+ *
+ * \param [in] dd Domain decomposition object
+ * \param [in] indvec Vector of index vectors
+ */
+void define1dHaloWith2Pulses(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ std::vector<int> indexvec;
+ gmx_domdec_ind_t ind;
+
+ dd->ndim = 1;
+ int nzone = 1;
+ int dimIndex = 0;
+
+ // Set up indices involved in halo
+ indexvec.clear();
+ indvec->clear();
+
+ dd->comm->cd[dimIndex].receiveInPlace = true;
+ dd->dim[dimIndex] = 0;
+ dd->ci[dimIndex] = rank;
+
+ // First pulse involves (arbitrary) indices 1 and 3
+ indexvec.push_back(1);
+ indexvec.push_back(3);
+
+ ind.index = indexvec;
+ ind.nsend[nzone + 1] = 2;
+ ind.nrecv[nzone + 1] = 2;
+ indvec->push_back(ind);
+
+ // Add another pulse with (arbitrary) indices 4,5,7
+ indexvec.clear();
+
+ indexvec.push_back(4);
+ indexvec.push_back(5);
+ indexvec.push_back(7);
+
+ ind.index = indexvec;
+ ind.nsend[nzone + 1] = 3;
+ ind.nrecv[nzone + 1] = 3;
+ indvec->push_back(ind);
+
+ dd->comm->cd[dimIndex].ind = *indvec;
+}
+
+/*! \brief Define a 2D halo with 1 pulse in each dimension
+ *
+ * \param [in] dd Domain decomposition object
+ * \param [in] indvec Vector of index vectors
+ */
+void define2dHaloWith1PulseInEachDim(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
+{
+
+ int rank;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+ std::vector<int> indexvec;
+ gmx_domdec_ind_t ind;
+
+ dd->ndim = 2;
+ int nzone = 1;
+ for (int dimIndex = 0; dimIndex < dd->ndim; dimIndex++)
+ {
+
+ // Set up indices involved in halo
+ indexvec.clear();
+ indvec->clear();
+
+ dd->comm->cd[dimIndex].receiveInPlace = true;
+ dd->dim[dimIndex] = 0;
+ dd->ci[dimIndex] = rank;
+
+ // Single pulse involving (arbitrary) indices 1 and 3
+ indexvec.push_back(1);
+ indexvec.push_back(3);
+
+ ind.index = indexvec;
+ ind.nsend[nzone + 1] = 2;
+ ind.nrecv[nzone + 1] = 2;
+ indvec->push_back(ind);
+
+ dd->comm->cd[dimIndex].ind = *indvec;
+
+ nzone += nzone;
+ }
+}
+
/*! \brief Define a 2D halo with 2 pulses in the first dimension
*
* \param [in] dd Domain decomposition object
* \param [in] indvec Vector of index vectors
*/
-void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t> indvec)
+void define2dHaloWith2PulsesInDim1(gmx_domdec_t* dd, std::vector<gmx_domdec_ind_t>* indvec)
{
int rank;
// Set up indices involved in halo
indexvec.clear();
- indvec.clear();
+ indvec->clear();
dd->comm->cd[dimIndex].receiveInPlace = true;
dd->dim[dimIndex] = 0;
ind.index = indexvec;
ind.nsend[nzone + 1] = 2;
ind.nrecv[nzone + 1] = 2;
- indvec.push_back(ind);
+ indvec->push_back(ind);
if (dimIndex == 0) // Add another pulse with (arbitrary) indices 4,5,7
{
indexvec.clear();
- dd->comm->cd[dimIndex].ind = indvec;
-
indexvec.push_back(4);
indexvec.push_back(5);
indexvec.push_back(7);
ind.index = indexvec;
ind.nsend[nzone + 1] = 3;
ind.nrecv[nzone + 1] = 3;
- indvec.push_back(ind);
+ indvec->push_back(ind);
}
- dd->comm->cd[dimIndex].ind = indvec;
+ dd->comm->cd[dimIndex].ind = *indvec;
nzone += nzone;
}
}
+/*! \brief Check results for above-defined 1D halo with 1 pulse
+ *
+ * \param [in] x Atom coordinate data array
+ * \param [in] dd Domain decomposition object
+ * \param [in] numHomeAtoms Number of home atoms
+ */
+void checkResults1dHaloWith1Pulse(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+ // Check results are expected from values encoded in x data
+ for (int j = 0; j < DIM; j++)
+ {
+ // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+ EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+ EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+ }
+}
+
+/*! \brief Check results for above-defined 1D halo with 2 pulses
+ *
+ * \param [in] x Atom coordinate data array
+ * \param [in] dd Domain decomposition object
+ * \param [in] numHomeAtoms Number of home atoms
+ */
+void checkResults1dHaloWith2Pulses(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+ // Check results are expected from values encoded in x data
+ for (int j = 0; j < DIM; j++)
+ {
+ // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+ EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+ EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+ // Second Pulse in first dim: atoms 4,5,7 from forward horizontal neighbour
+ EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[0][0], 4, j));
+ EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[0][0], 5, j));
+ EXPECT_EQ(x[numHomeAtoms + 4][j], encodedValue(dd->neighbor[0][0], 7, j));
+ }
+}
+
+/*! \brief Check results for above-defined 2D halo with 1 pulse in each dimension
+ *
+ * \param [in] x Atom coordinate data array
+ * \param [in] dd Domain decomposition object
+ * \param [in] numHomeAtoms Number of home atoms
+ */
+void checkResults2dHaloWith1PulseInEachDim(const RVec* x, const gmx_domdec_t* dd, const int numHomeAtoms)
+{
+ // Check results are expected from values encoded in x data
+ for (int j = 0; j < DIM; j++)
+ {
+ // First Pulse in first dim: atoms 1 and 3 from forward horizontal neighbour
+ EXPECT_EQ(x[numHomeAtoms][j], encodedValue(dd->neighbor[0][0], 1, j));
+ EXPECT_EQ(x[numHomeAtoms + 1][j], encodedValue(dd->neighbor[0][0], 3, j));
+ // First Pulse in second dim: atoms 1 and 3 from forward vertical neighbour
+ EXPECT_EQ(x[numHomeAtoms + 2][j], encodedValue(dd->neighbor[1][0], 1, j));
+ EXPECT_EQ(x[numHomeAtoms + 3][j], encodedValue(dd->neighbor[1][0], 3, j));
+ }
+}
+
/*! \brief Check results for above-defined 2D halo with 2 pulses in the first dimension
*
* \param [in] x Atom coordinate data array
}
}
+TEST(HaloExchangeTest, Coordinates1dHaloWith1Pulse)
+{
+ GMX_MPI_TEST(4);
+
+ // Set up atom data
+ const int numHomeAtoms = 10;
+ const int numHaloAtoms = 2;
+ const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
+ HostVector<RVec> h_x;
+ changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
+ h_x.resize(numAtomsTotal);
+
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Set up dd
+ t_inputrec ir;
+ gmx_domdec_t dd(ir);
+ dd.mpi_comm_all = MPI_COMM_WORLD;
+ gmx_domdec_comm_t comm;
+ dd.comm = &comm;
+ dd.unitCellInfo.haveScrewPBC = false;
+
+ DDAtomRanges atomRanges;
+ atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+ dd.comm->atomRanges = atomRanges;
+
+ define1dRankTopology(&dd);
+
+ std::vector<gmx_domdec_ind_t> indvec;
+ define1dHaloWith1Pulse(&dd, &indvec);
+
+ // Perform halo exchange
+ matrix box = { { 0., 0., 0. } };
+ dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+ // Check results
+ checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
+
+ if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+ {
+ // Re-initialize input
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Perform GPU halo exchange
+ gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+
+ // Check results
+ checkResults1dHaloWith1Pulse(h_x.data(), &dd, numHomeAtoms);
+ }
+}
+
+TEST(HaloExchangeTest, Coordinates1dHaloWith2Pulses)
+{
+ GMX_MPI_TEST(4);
+
+ // Set up atom data
+ const int numHomeAtoms = 10;
+ const int numHaloAtoms = 5;
+ const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
+ HostVector<RVec> h_x;
+ changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
+ h_x.resize(numAtomsTotal);
+
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Set up dd
+ t_inputrec ir;
+ gmx_domdec_t dd(ir);
+ dd.mpi_comm_all = MPI_COMM_WORLD;
+ gmx_domdec_comm_t comm;
+ dd.comm = &comm;
+ dd.unitCellInfo.haveScrewPBC = false;
+
+ DDAtomRanges atomRanges;
+ atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+ dd.comm->atomRanges = atomRanges;
+
+ define1dRankTopology(&dd);
+
+ std::vector<gmx_domdec_ind_t> indvec;
+ define1dHaloWith2Pulses(&dd, &indvec);
+
+ // Perform halo exchange
+ matrix box = { { 0., 0., 0. } };
+ dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+ // Check results
+ checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
+
+ if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+ {
+ // Re-initialize input
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Perform GPU halo exchange
+ gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+
+ // Check results
+ checkResults1dHaloWith2Pulses(h_x.data(), &dd, numHomeAtoms);
+ }
+}
+
+
+TEST(HaloExchangeTest, Coordinates2dHaloWith1PulseInEachDim)
+{
+ GMX_MPI_TEST(4);
+
+ // Set up atom data
+ const int numHomeAtoms = 10;
+ const int numHaloAtoms = 4;
+ const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
+ HostVector<RVec> h_x;
+ changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
+ h_x.resize(numAtomsTotal);
+
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Set up dd
+ t_inputrec ir;
+ gmx_domdec_t dd(ir);
+ dd.mpi_comm_all = MPI_COMM_WORLD;
+ gmx_domdec_comm_t comm;
+ dd.comm = &comm;
+ dd.unitCellInfo.haveScrewPBC = false;
+
+ DDAtomRanges atomRanges;
+ atomRanges.setEnd(DDAtomRanges::Type::Home, numHomeAtoms);
+ dd.comm->atomRanges = atomRanges;
+
+ define2dRankTopology(&dd);
+
+ std::vector<gmx_domdec_ind_t> indvec;
+ define2dHaloWith1PulseInEachDim(&dd, &indvec);
+
+ // Perform halo exchange
+ matrix box = { { 0., 0., 0. } };
+ dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+ // Check results
+ checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
+
+ if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+ {
+ // Re-initialize input
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Perform GPU halo exchange
+ gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
+
+ // Check results
+ checkResults2dHaloWith1PulseInEachDim(h_x.data(), &dd, numHomeAtoms);
+ }
+}
TEST(HaloExchangeTest, Coordinates2dHaloWith2PulsesInDim1)
{
GMX_MPI_TEST(4);
// Set up atom data
- const int numHomeAtoms = 10;
- const int numHaloAtoms = 7;
- const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
- RVec x[numAtomsTotal];
- initHaloData(x, numHomeAtoms, numAtomsTotal);
+ const int numHomeAtoms = 10;
+ const int numHaloAtoms = 7;
+ const int numAtomsTotal = numHomeAtoms + numHaloAtoms;
+ HostVector<RVec> h_x;
+ changePinningPolicy(&h_x, PinningPolicy::PinnedIfSupported);
+ h_x.resize(numAtomsTotal);
+
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
// Set up dd
t_inputrec ir;
define2dRankTopology(&dd);
std::vector<gmx_domdec_ind_t> indvec;
- define2dHaloWith2PulsesInDim1(&dd, indvec);
+ define2dHaloWith2PulsesInDim1(&dd, &indvec);
// Perform halo exchange
matrix box = { { 0., 0., 0. } };
- dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(x), nullptr);
+ dd_move_x(&dd, box, static_cast<ArrayRef<RVec>>(h_x), nullptr);
+
+ // Check results
+ checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
+
+#if (GMX_GPU_CUDA && GMX_THREAD_MPI) // repeat with GPU halo codepath
+ // Re-initialize input
+ initHaloData(h_x.data(), numHomeAtoms, numAtomsTotal);
+
+ // Perform GPU halo exchange
+ gpuHalo(&dd, box, h_x.data(), numAtomsTotal);
// Check results
- checkResults2dHaloWith2PulsesInDim1(x, &dd, numHomeAtoms);
+ checkResults2dHaloWith2PulsesInDim1(h_x.data(), &dd, numHomeAtoms);
+#endif
}
} // namespace
+} // namespace test
} // namespace gmx
GPU_FUNC_QUALIFIER void pme_gpu_launch_spread(gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
GpuEventSynchronizer* GPU_FUNC_ARGUMENT(xReadyOnDevice),
gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
- const real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+ real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
/*! \brief
* Launches middle stages of PME (FFT R2C, solving, FFT C2R) either on GPU or on CPU, depending on the run mode.
*/
GPU_FUNC_QUALIFIER void pme_gpu_launch_gather(const gmx_pme_t* GPU_FUNC_ARGUMENT(pme),
gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
- const real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+ real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
/*! \brief
* Attempts to complete PME GPU tasks.
gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
gmx_enerdata_t* GPU_FUNC_ARGUMENT(enerd),
- const real GPU_FUNC_ARGUMENT(lambdaQ),
+ real GPU_FUNC_ARGUMENT(lambdaQ),
GpuTaskCompletion GPU_FUNC_ARGUMENT(completionKind))
GPU_FUNC_TERM_WITH_RETURN(false);
gmx_wallcycle* GPU_FUNC_ARGUMENT(wcycle),
gmx::ForceWithVirial* GPU_FUNC_ARGUMENT(forceWithVirial),
gmx_enerdata_t* GPU_FUNC_ARGUMENT(enerd),
- const real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
+ real GPU_FUNC_ARGUMENT(lambdaQ)) GPU_FUNC_TERM;
/*! \brief
* The PME GPU reinitialization function that is called both at the end of any PME computation and on any load balancing.
* \author Aleksei Iupinov <a.yupinov@gmail.com>
*/
-#include "pme_gpu_types.h"
#include "pme_gpu_calculate_splines.clh"
+#include "pme_gpu_types.h"
#ifndef COMPILE_GATHER_HELPERS_ONCE
# define COMPILE_GATHER_HELPERS_ONCE
if (numGrids == 2)
{
barrier(CLK_LOCAL_MEM_FENCE);
- fx = 0.0f;
- fy = 0.0f;
- fz = 0.0f;
+ fx = 0.0F;
+ fy = 0.0F;
+ fz = 0.0F;
chargeCheck = pme_gpu_check_atom_charge(gm_coefficientsB[atomIndexGlobal]);
if (chargeCheck)
{
#pragma unroll
for (int i = 0; i < numIter; i++)
{
- const int outputIndexLocal = i * iterThreads + threadLocalId;
- const int outputIndexGlobal = get_group_id(XX) * blockForcesSize + outputIndexLocal;
+ const int outputIndexLocal = i * iterThreads + threadLocalId;
+ const int outputIndexGlobal = (int)get_group_id(XX) * blockForcesSize + outputIndexLocal;
const float outputForceComponent = sm_forces[outputIndexLocal];
gm_forces[outputIndexGlobal] += outputForceComponent;
}
// time needed for that checking, but do not yet record that the
// gather has occured.
bool needToSynchronize = true;
- constexpr bool c_streamQuerySupported = bool(GMX_GPU_CUDA);
+ constexpr bool c_streamQuerySupported = GMX_GPU_CUDA;
// TODO: implement c_streamQuerySupported with an additional GpuEventSynchronizer per stream (#2521)
if ((completionKind == GpuTaskCompletion::Check) && c_streamQuerySupported)
* \param[in] pmeGpu The PME GPU structure.
* \param[in] gridIndex The index of the grid on which to perform the calculations.
*/
- GpuParallel3dFft(const PmeGpu* pmeGpu, const int gridIndex);
+ GpuParallel3dFft(const PmeGpu* pmeGpu, int gridIndex);
/*! \brief Destroys the FFT plans. */
~GpuParallel3dFft();
/*! \brief Performs the FFT transform in given direction
__device__ inline void assertIsFinite(T arg);
template<>
-__device__ inline void assertIsFinite(float3 arg)
+__device__ inline void assertIsFinite(float3 gmx_unused arg)
{
assert(isfinite(float(arg.x)));
assert(isfinite(float(arg.y)));
}
template<typename T>
-__device__ inline void assertIsFinite(T arg)
+__device__ inline void assertIsFinite(T gmx_unused arg)
{
assert(isfinite(float(arg)));
}
float** GPU_FUNC_ARGUMENT(h_grids),
bool GPU_FUNC_ARGUMENT(computeSplines),
bool GPU_FUNC_ARGUMENT(spreadCharges),
- const real GPU_FUNC_ARGUMENT(lambda)) GPU_FUNC_TERM;
+ real GPU_FUNC_ARGUMENT(lambda)) GPU_FUNC_TERM;
/*! \libinternal \brief
* 3D FFT R2C/C2R routine.
__global float2* __restrict__ gm_grid)
{
/* This kernel supports 2 different grid dimension orderings: YZX and XYZ */
- int majorDim;
- int middleDim;
- int minorDim;
+ int majorDim = 0;
+ int middleDim = 0;
+ int minorDim = 0;
if (gridOrdering == YZX)
{
majorDim = YY;
/* We should skip the k-space point (0,0,0) */
const bool notZeroPoint = (kMinor > 0) | (kMajor > 0) | (kMiddle > 0);
- float mX;
- float mY;
- float mZ;
+ float mX = 0.0F;
+ float mY = 0.0F;
+ float mZ = 0.0F;
if (gridOrdering == YZX)
{
mX = mMinor;
#include "gromacs/gpu_utils/vectype_ops.clh"
-#include "pme_gpu_types.h"
#include "pme_gpu_calculate_splines.clh"
+#include "pme_gpu_types.h"
/*
* This define affects the spline calculation behaviour in the kernel.
/* Indices interpolation */
if (orderIndex == 0)
{
- int tableIndex;
- int tInt;
- float n;
- float t;
- const float3 x = vload3(atomIndexLocal, sm_coordinates);
+ int tableIndex = 0;
+ float n = 0.0F;
+ float t = 0.0F;
+ const float3 x = vload3(atomIndexLocal, sm_coordinates);
/* Accessing fields in fshOffset/nXYZ/recipbox/... with dimIndex offset
* puts them into local memory(!) instead of accessing the constant memory directly.
/* Fractional coordinates along box vectors, adding a positive shift to ensure t is positive for triclinic boxes */
t = (t + shift) * n;
- tInt = (int)t;
+ const int tInt = (int)t;
sm_fractCoords[sharedMemoryIndex] = t - (float)tInt;
tableIndex += tInt;
assert(tInt >= 0);
const int chargeCheck = pme_gpu_check_atom_charge(sm_coefficients[atomIndexLocal]);
if (chargeCheck)
{
- float div;
int o = orderIndex; // This is an index that is set once for PME_GPU_PARALLEL_SPLINE == 1
const float dr = sm_fractCoords[sharedMemoryIndex];
# pragma unroll order
for (int k = 3; k < order; k++)
{
- div = 1.0F / ((float)k - 1.0F);
+ const float div = 1.0F / ((float)k - 1.0F);
*SPLINE_DATA_PTR(k - 1) = div * dr * SPLINE_DATA(k - 2);
# pragma unroll
for (int l = 1; l < (k - 1); l++)
gm_dtheta[thetaGlobalIndex] = dtheta;
}
- div = 1.0F / (order - 1.0F);
+ const float div = 1.0F / (order - 1.0F);
*SPLINE_DATA_PTR(order - 1) = div * dr * SPLINE_DATA(order - 2);
# pragma unroll
for (int k = 1; k < (order - 1); k++)
{ eftASC, ".edi", "sam", nullptr, "ED sampling input" },
{ eftASC, ".cub", "pot", nullptr, "Gaussian cube file" },
{ eftASC, ".xpm", "root", nullptr, "X PixMap compatible matrix file" },
- { eftASC, "", "rundir", nullptr, "Run directory" }
+ { eftASC, "", "rundir", nullptr, "Run directory" },
+ { eftASC, ".csv", "bench", nullptr, "CSV data file" }
};
const char* ftp2ext(int ftp)
gmx::ssize(pullGroup.ind));
}
+ pullGroup.pbcatom_input = pullGroup.pbcatom;
if (pullGroup.ind.size() == 1)
{
/* No pbc is required for this group */
gmx::formatString("cudaStreamSynchronize failed (CUDA error %d: %s).", stat,
cudaGetErrorString(stat))
.c_str());
-}
\ No newline at end of file
+}
* \param[in,out] deviceBuffer Device buffer to store data in.
*/
template<typename ValueType>
-void destroyParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer, DeviceTexture& /* deviceTexture*/)
+void destroyParamLookupTable(DeviceBuffer<ValueType>* deviceBuffer, const DeviceTexture& /* deviceTexture*/)
{
freeDeviceBuffer(deviceBuffer);
}
device_stream_manager.cpp
hostallocator.cpp
pinnedmemorychecker.cpp
- typecasts.cpp
GPU_CPP_SOURCE_FILES
device_buffer.cpp
+ typecasts.cpp
CUDA_CU_SOURCE_FILES
devicetransfers.cu
detecthardware.cpp
device_management_common.cpp
hardwaretopology.cpp
+ prepare_detection.cpp
printhardware.cpp
identifyavx512fmaunits.cpp
)
#include <algorithm>
#include <array>
-#include <chrono>
#include <memory>
#include <string>
-#include <thread>
#include <vector>
-#include "gromacs/compat/pointers.h"
#include "gromacs/hardware/cpuinfo.h"
#include "gromacs/hardware/device_management.h"
#include "gromacs/hardware/hardwaretopology.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/inmemoryserializer.h"
#include "gromacs/utility/logger.h"
-#include "gromacs/utility/mutex.h"
#include "gromacs/utility/physicalnodecommunicator.h"
#include "architecture.h"
#include "device_information.h"
+#include "prepare_detection.h"
#ifdef HAVE_UNISTD_H
# include <unistd.h> // sysconf()
# define _SC_NPROCESSORS_CONF _SC_NPROC_CONF
#endif
-/*! \brief Information about the hardware of all nodes (common to all threads in this process).
+/*! \brief The result of device detection
*
- * This information is constructed only when required, but thereafter
- * its lifetime is that of the whole process, potentially across
- * multiple successive simulation parts. It's wise to ensure that only
- * one thread can create the information, but thereafter they can all
- * read it without e.g. needing a std::shared_ptr to ensure its
- * lifetime exceeds that of the thread. */
-static std::unique_ptr<gmx_hw_info_t> g_hardwareInfo;
-//! A mutex to protect the hwinfo structure
-static Mutex g_hardwareInfoMutex;
-
-//! Detect GPUs, if that makes sense to attempt.
-static void gmx_detect_gpus(const gmx::MDLogger& mdlog,
- const PhysicalNodeCommunicator& physicalNodeComm,
- compat::not_null<gmx_hw_info_t*> hardwareInfo)
+ * Note that non-functional device detection still produces
+ * a detection result, ie. of no devices. This might not be
+ * what the user wanted, so it makes sense to log later when
+ * that is possible. */
+struct DeviceDetectionResult
{
+ //! The device information detected
+ std::vector<std::unique_ptr<DeviceInformation>> deviceInfoList_;
+ //! Container of possible warnings to issue when that is possible
+ std::vector<std::string> deviceDetectionWarnings_;
+};
+
+/*! \brief Detect GPUs when that makes sense to attempt.
+ *
+ * \param[in] physicalNodeComm The communicator across this physical node
+ * \return The result of the detection, perhaps including diagnostic messages
+ * to issue later.
+ *
+ * \todo Coordinating the efficient detection of devices across
+ * multiple ranks per node should be separated from the lower-level
+ * hardware detection. See
+ * https://gitlab.com/gromacs/gromacs/-/issues/3650.
+ */
+static DeviceDetectionResult detectAllDeviceInformation(const PhysicalNodeCommunicator& physicalNodeComm)
+{
+ DeviceDetectionResult deviceDetectionResult;
+
if (!isDeviceDetectionEnabled())
{
- return;
+ return deviceDetectionResult;
}
std::string errorMessage;
#if GMX_LIB_MPI
isMasterRankOfPhysicalNode = (physicalNodeComm.rank_ == 0);
#else
- // We choose to run the detection only once with thread-MPI and
- // use a mutex to enforce it.
+ // Without an MPI library, this process is trivially the only one
+ // on the physical node. This code runs before e.g. thread-MPI
+ // ranks are spawned, so detection is race-free by construction.
+ // Read-only access is enforced with providing those ranks with a
+ // handle to a const object, so usage is also free of races.
GMX_UNUSED_VALUE(physicalNodeComm);
- isMasterRankOfPhysicalNode = true;
+ isMasterRankOfPhysicalNode = true;
#endif
- /* The OpenCL support requires us to run detection on all ranks.
+ /* The SYCL and OpenCL support requires us to run detection on all
+ * ranks.
+ *
* With CUDA we don't need to, and prefer to detect on one rank
- * and send the information to the other ranks over MPI. */
+ * and send the information to the other ranks over MPI. This
+ * avoids creating a start-up bottleneck with each MPI rank on a
+ * node making the same GPU API calls. */
constexpr bool allRanksMustDetectGpus = (GMX_GPU_OPENCL != 0 || GMX_GPU_SYCL != 0);
bool gpusCanBeDetected = false;
if (isMasterRankOfPhysicalNode || allRanksMustDetectGpus)
gpusCanBeDetected = isDeviceDetectionFunctional(&errorMessage);
if (!gpusCanBeDetected)
{
- GMX_LOG(mdlog.warning)
- .asParagraph()
- .appendTextFormatted(
- "NOTE: Detection of GPUs failed. The API reported:\n"
- " %s\n"
- " GROMACS cannot run tasks on a GPU.",
- errorMessage.c_str());
+ deviceDetectionResult.deviceDetectionWarnings_.emplace_back(
+ "Detection of GPUs failed. The API reported:\n" + errorMessage);
}
}
if (gpusCanBeDetected)
{
- hardwareInfo->deviceInfoList = findDevices();
+ deviceDetectionResult.deviceInfoList_ = findDevices();
// No need to tell the user anything at this point, they get a
// hardware report later.
}
if (isMasterRankOfPhysicalNode)
{
gmx::InMemorySerializer writer;
- serializeDeviceInformations(hardwareInfo->deviceInfoList, &writer);
+ serializeDeviceInformations(deviceDetectionResult.deviceInfoList_, &writer);
buffer = writer.finishAndGetBuffer();
sizeOfBuffer = buffer.size();
}
if (!isMasterRankOfPhysicalNode)
{
gmx::InMemoryDeserializer reader(buffer, false);
- hardwareInfo->deviceInfoList = deserializeDeviceInformations(&reader);
+ deviceDetectionResult.deviceInfoList_ = deserializeDeviceInformations(&reader);
}
}
}
#endif
+ return deviceDetectionResult;
}
//! Reduce the locally collected \p hardwareInfo over MPI ranks
-static void gmx_collect_hardware_mpi(const gmx::CpuInfo& cpuInfo,
- const PhysicalNodeCommunicator& physicalNodeComm,
- compat::not_null<gmx_hw_info_t*> hardwareInfo)
+static void gmx_collect_hardware_mpi(const gmx::CpuInfo& cpuInfo,
+ const PhysicalNodeCommunicator& physicalNodeComm,
+ gmx_hw_info_t* hardwareInfo)
{
const int ncore = hardwareInfo->hardwareTopology->numberOfCores();
/* Zen1 is assumed for:
hardwareInfo->bIdenticalGPUs = (maxMinReduced[4] == -maxMinReduced[9]);
hardwareInfo->haveAmdZen1Cpu = (maxMinReduced[10] > 0);
#else
- /* All ranks use the same pointer, protected by a mutex in the caller */
hardwareInfo->nphysicalnode = 1;
hardwareInfo->ncore_tot = ncore;
hardwareInfo->ncore_min = ncore;
#endif
}
-/*! \brief Utility that does dummy computing for max 2 seconds to spin up cores
- *
- * This routine will check the number of cores configured and online
- * (using sysconf), and the spins doing dummy compute operations for up to
- * 2 seconds, or until all cores have come online. This can be used prior to
- * hardware detection for platforms that take unused processors offline.
- *
- * This routine will not throw exceptions. In principle it should be
- * declared noexcept, but at least icc 19.1 and 21-beta08 with the
- * libstdc++-7.5 has difficulty implementing a std::vector of
- * std::thread started with this function when declared noexcept. It
- * is not clear whether the problem is the compiler or the standard
- * library. Fortunately, this function is not performance sensitive,
- * and only runs on platforms other than x86 and POWER (ie ARM),
- * so the possible overhead introduced by omitting noexcept is not
- * important.
- */
-static void spinUpCore()
-{
-#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN)
- float dummy = 0.1;
- int countConfigured = sysconf(_SC_NPROCESSORS_CONF); // noexcept
- auto start = std::chrono::steady_clock::now(); // noexcept
-
- while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured
- && std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
- {
- for (int i = 1; i < 10000; i++)
- {
- dummy /= i;
- }
- }
-
- if (dummy < 0)
- {
- printf("This cannot happen, but prevents loop from being optimized away.");
- }
-#endif
-}
-
-/*! \brief Prepare the system before hardware topology detection
- *
- * This routine should perform any actions we want to put the system in a state
- * where we want it to be before detecting the hardware topology. For most
- * processors there is nothing to do, but some architectures (in particular ARM)
- * have support for taking configured cores offline, which will make them disappear
- * from the online processor count.
- *
- * This routine checks if there is a mismatch between the number of cores
- * configured and online, and in that case we issue a small workload that
- * attempts to wake sleeping cores before doing the actual detection.
- *
- * This type of mismatch can also occur for x86 or PowerPC on Linux, if SMT has only
- * been disabled in the kernel (rather than bios). Since those cores will never
- * come online automatically, we currently skip this test for x86 & PowerPC to
- * avoid wasting 2 seconds. We also skip the test if there is no thread support.
- *
- * \note Cores will sleep relatively quickly again, so it's important to issue
- * the real detection code directly after this routine.
- */
-static void hardwareTopologyPrepareDetection()
-{
-#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) \
- && (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS))
-
- // Modify this conditional when/if x86 or PowerPC starts to sleep some cores
- if (c_architecture != Architecture::X86 && c_architecture != Architecture::PowerPC)
- {
- int countConfigured = sysconf(_SC_NPROCESSORS_CONF);
- std::vector<std::thread> workThreads(countConfigured);
-
- for (auto& t : workThreads)
- {
- t = std::thread(spinUpCore);
- }
-
- for (auto& t : workThreads)
- {
- t.join();
- }
- }
-#endif
-}
-
-/*! \brief Sanity check hardware topology and print some notes to log
- *
- * \param mdlog Logger.
- * \param hardwareTopology Reference to hardwareTopology object.
- */
-static void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
- const gmx::HardwareTopology gmx_unused& hardwareTopology)
+void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger gmx_unused& mdlog,
+ const gmx::HardwareTopology gmx_unused& hardwareTopology)
{
#if defined HAVE_SYSCONF && defined(_SC_NPROCESSORS_CONF)
if (hardwareTopology.supportLevel() < gmx::HardwareTopology::SupportLevel::LogicalProcessorCount)
"performance.");
}
}
+#else
+ GMX_UNUSED_VALUE(mdlog);
+ GMX_UNUSED_VALUE(hardwareTopology);
#endif
}
-gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog, const PhysicalNodeCommunicator& physicalNodeComm)
+std::unique_ptr<gmx_hw_info_t> gmx_detect_hardware(const PhysicalNodeCommunicator& physicalNodeComm)
{
- // By construction, only one thread ever runs hardware detection,
- // but we may as well prevent issues arising if that would change.
- // Taking the lock early ensures that exactly one thread can
- // attempt to construct g_hardwareInfo.
- lock_guard<Mutex> lock(g_hardwareInfoMutex);
-
- // If we already have the information, just return a handle to it.
- if (g_hardwareInfo != nullptr)
- {
- return g_hardwareInfo.get();
- }
-
- // Make the new hardwareInfo in a temporary.
+ // Ensure all cores have spun up, where applicable.
hardwareTopologyPrepareDetection();
// TODO: We should also do CPU hardware detection only once on each
std::make_unique<CpuInfo>(CpuInfo::detect()),
std::make_unique<HardwareTopology>(HardwareTopology::detect()));
- // If we detected the topology on this system, double-check that it makes sense
- if (hardwareInfo->hardwareTopology->isThisSystem())
- {
- hardwareTopologyDoubleCheckDetection(mdlog, *hardwareInfo->hardwareTopology);
- }
-
// TODO: Get rid of this altogether.
hardwareInfo->nthreads_hw_avail = hardwareInfo->hardwareTopology->machine().logicalProcessorCount;
// Detect GPUs
- gmx_detect_gpus(mdlog, physicalNodeComm, compat::make_not_null(hardwareInfo));
- gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, compat::make_not_null(hardwareInfo));
+ // Open a nested scope so no temporary variables can
+ // be mis-used later.
+ {
+ DeviceDetectionResult deviceDetectionResult = detectAllDeviceInformation(physicalNodeComm);
+ hardwareInfo->deviceInfoList.swap(deviceDetectionResult.deviceInfoList_);
+ std::swap(hardwareInfo->hardwareDetectionWarnings_, deviceDetectionResult.deviceDetectionWarnings_);
+ }
- // Now that the temporary is fully constructed, swap it to become
- // the real thing.
- g_hardwareInfo.swap(hardwareInfo);
+ gmx_collect_hardware_mpi(*hardwareInfo->cpuInfo, physicalNodeComm, hardwareInfo.get());
+
+ return hardwareInfo;
+}
- return g_hardwareInfo.get();
+void logHardwareDetectionWarnings(const gmx::MDLogger& mdlog, const gmx_hw_info_t& hardwareInformation)
+{
+ for (const std::string& warningString : hardwareInformation.hardwareDetectionWarnings_)
+ {
+ GMX_LOG(mdlog.warning).asParagraph().appendText(warningString);
+ }
}
} // namespace gmx
#ifndef GMX_HARDWARE_DETECTHARDWARE_H
#define GMX_HARDWARE_DETECTHARDWARE_H
+#include <memory>
+
struct gmx_hw_info_t;
namespace gmx
{
+class HardwareTopology;
class MDLogger;
class PhysicalNodeCommunicator;
-/*! \brief Run detection, consistency checks, and make consistent
+/*! \brief Run detection and make correct and consistent
* hardware information available on all ranks.
*
- * This routine constructs the global hwinfo structure and returns a pointer to
- * it. It will run a preamble before executing cpu and hardware checks, and
- * then run consistency checks afterwards. The results will also be made
- * available on all nodes.
- *
* May do communication on MPI_COMM_WORLD when compiled with real MPI.
*
- * All processes in a physical node need to coordinate calling this
- * routine. With thread-MPI only the first call leads to detection
- * work, and any subsequent call receives the same handle. With real
- * MPI, communication is needed to coordinate the results. In all
- * cases, any thread within a process may use the returned handle. */
-gmx_hw_info_t* gmx_detect_hardware(const gmx::MDLogger& mdlog,
- const PhysicalNodeCommunicator& physicalNodeComm);
+ * This routine is designed to be called once on each process. In a
+ * thread-MPI configuration, it may only be called before the threads
+ * are spawned. With real MPI, communication is needed to coordinate
+ * the results. In all cases, any thread within a process may use the
+ * returned handle.
+ *
+ * \todo Replace the use of MPI_COMM_WORLD e.g. by using a libraryCommWorld
+ * argument. See https://gitlab.com/gromacs/gromacs/-/issues/3650
+ */
+std::unique_ptr<gmx_hw_info_t> gmx_detect_hardware(const PhysicalNodeCommunicator& physicalNodeComm);
+
+/*! \brief Sanity check hardware topology and print some notes to log
+ *
+ * \param mdlog Logger.
+ * \param hardwareTopology Reference to hardwareTopology object.
+ */
+void hardwareTopologyDoubleCheckDetection(const gmx::MDLogger& mdlog,
+ const gmx::HardwareTopology& hardwareTopology);
+
+/*! \brief Issue warnings to mdlog that were decided during detection
+ *
+ * \param[in] mdlog Logger
+ * \param[in] hardwareInformation The hardwareInformation */
+void logHardwareDetectionWarnings(const gmx::MDLogger& mdlog, const gmx_hw_info_t& hardwareInformation);
} // namespace gmx
gmx_bool bIdenticalGPUs; /* TRUE if all ranks have the same type(s) and order of GPUs */
bool haveAmdZen1Cpu; /* TRUE when at least one CPU in any of the nodes is AMD Zen of the first generation */
+
+ //! Container of warning strings to log later when that is possible.
+ std::vector<std::string> hardwareDetectionWarnings_;
};
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief Defines routine for activating potentially deactivated cores
+ * so they can be detected.
+ *
+ * The use of std::thread makes for brittle interaction with std
+ * library headers. Its caller also handles GPU detection and
+ * allocation of device-specific data structures. This is more
+ * manageable when separated into two distinct translation units.
+ *
+ * \author Erik Lindahl <erik.lindahl@scilifelab.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_hardware
+ */
+#include "gmxpre.h"
+
+#include "prepare_detection.h"
+
+#include "config.h"
+
+#include <cstdio>
+
+#include <chrono>
+#include <thread>
+#include <vector>
+
+#include "architecture.h"
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h> // sysconf()
+#endif
+
+namespace gmx
+{
+
+/*! \brief Utility that does dummy computing for max 2 seconds to spin up cores
+ *
+ * This routine will check the number of cores configured and online
+ * (using sysconf), and the spins doing dummy compute operations for up to
+ * 2 seconds, or until all cores have come online. This can be used prior to
+ * hardware detection for platforms that take unused processors offline.
+ *
+ * This routine will not throw exceptions. In principle it should be
+ * declared noexcept, but at least icc 19.1 and 21-beta08 with the
+ * libstdc++-7.5 has difficulty implementing a std::vector of
+ * std::thread started with this function when declared noexcept. It
+ * is not clear whether the problem is the compiler or the standard
+ * library. Fortunately, this function is not performance sensitive,
+ * and only runs on platforms other than x86 and POWER (ie ARM),
+ * so the possible overhead introduced by omitting noexcept is not
+ * important.
+ */
+static void spinUpCore()
+{
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) && defined(_SC_NPROCESSORS_ONLN)
+ float dummy = 0.1;
+ int countConfigured = sysconf(_SC_NPROCESSORS_CONF); // noexcept
+ auto start = std::chrono::steady_clock::now(); // noexcept
+
+ while (sysconf(_SC_NPROCESSORS_ONLN) < countConfigured
+ && std::chrono::steady_clock::now() - start < std::chrono::seconds(2))
+ {
+ for (int i = 1; i < 10000; i++)
+ {
+ dummy /= i;
+ }
+ }
+
+ if (dummy < 0)
+ {
+ printf("This cannot happen, but prevents loop from being optimized away.");
+ }
+#endif
+}
+
+void hardwareTopologyPrepareDetection()
+{
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF) \
+ && (defined(THREAD_PTHREADS) || defined(THREAD_WINDOWS))
+
+ // Modify this conditional when/if x86 or PowerPC starts to sleep some cores
+ if (c_architecture != Architecture::X86 && c_architecture != Architecture::PowerPC)
+ {
+ int countConfigured = sysconf(_SC_NPROCESSORS_CONF);
+ std::vector<std::thread> workThreads(countConfigured);
+
+ for (auto& t : workThreads)
+ {
+ t = std::thread(spinUpCore);
+ }
+
+ for (auto& t : workThreads)
+ {
+ t.join();
+ }
+ }
+#endif
+}
+
+} // namespace gmx
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal
+ * \file
+ * \brief Declares routine for activating potentially deactivated
+ * cores so they can be detected.
+ *
+ * \author Erik Lindahl <erik.lindahl@scilifelab.se>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \ingroup module_hardware
+ */
+#ifndef GMX_HARDWARE_PREPAREDETECTION_H
+#define GMX_HARDWARE_PREPAREDETECTION_H
+
+namespace gmx
+{
+
+/*! \brief Prepare the system before hardware topology detection
+ *
+ * This routine should perform any actions we want to put the system in a state
+ * where we want it to be before detecting the hardware topology. For most
+ * processors there is nothing to do, but some architectures (in particular ARM)
+ * have support for taking configured cores offline, which will make them disappear
+ * from the online processor count.
+ *
+ * This routine checks if there is a mismatch between the number of cores
+ * configured and online, and in that case we issue a small workload that
+ * attempts to wake sleeping cores before doing the actual detection.
+ *
+ * This type of mismatch can also occur for x86 or PowerPC on Linux, if SMT has only
+ * been disabled in the kernel (rather than bios). Since those cores will never
+ * come online automatically, we currently skip this test for x86 & PowerPC to
+ * avoid wasting 2 seconds. We also skip the test if there is no thread support.
+ *
+ * \note Cores will sleep relatively quickly again, so it's important to issue
+ * the real detection code directly after this routine.
+ */
+void hardwareTopologyPrepareDetection();
+
+} // namespace gmx
+
+#endif
selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
idefSelection_.ilsort = domainIdef.ilsort;
+
+ if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
+ {
+ idefSelection_.iparams_posres = domainIdef.iparams_posres;
+ idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
+ }
+ else
+ {
+ idefSelection_.iparams_posres.clear();
+ idefSelection_.iparams_fbposres.clear();
+ }
}
setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
{
None, //!< Do not apply velocity scaling (not a PR-coupling run or step)
Diagonal, //!< Apply velocity scaling using a diagonal matrix
- Full //!< Apply velocity scaling using a full matrix
};
class LeapFrogGpu
}
set_lincs_matrix(li, invmass, lambda);
+
+ li->rmsdData[0] = 0.0;
+ li->rmsdData[1] = 0.0;
}
//! Issues a warning when LINCS constraints cannot be satisfied.
AtomLocality::Local, simulationWork, stepWork)
: nullptr;
+ // Copy coordinate from the GPU if update is on the GPU and there
+ // are forces to be computed on the CPU, or for the computation of
+ // virial, or if host-side data will be transferred from this task
+ // to a remote task for halo exchange or PME-PP communication. At
+ // search steps the current coordinates are already on the host,
+ // hence copy is not needed.
+ const bool haveHostPmePpComms =
+ !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication;
+
+ GMX_ASSERT(simulationWork.useGpuHaloExchange
+ == ((cr->dd != nullptr) && (!cr->dd->gpuHaloExchange[0].empty())),
+ "The GPU halo exchange is active, but it has not been constructed.");
+ const bool haveHostHaloExchangeComms =
+ havePPDomainDecomposition(cr) && !simulationWork.useGpuHaloExchange;
+
+ bool gmx_used_in_debug haveCopiedXFromGpu = false;
+ if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch
+ && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial
+ || haveHostPmePpComms || haveHostHaloExchangeComms))
+ {
+ stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::Local);
+ haveCopiedXFromGpu = true;
+ }
+
// If coordinates are to be sent to PME task from CPU memory, perform that send here.
// Otherwise the send will occur after H2D coordinate transfer.
if (GMX_MPI && !thisRankHasDuty(cr, DUTY_PME) && !pmeSendCoordinatesFromGpu && stepWork.computeSlowForces)
/* Send particle coordinates to the pme nodes */
if (!stepWork.doNeighborSearch && simulationWork.useGpuUpdate)
{
- GMX_RELEASE_ASSERT(false,
- "GPU update and separate PME ranks are only supported with GPU "
- "direct communication!");
- // TODO: when this code-path becomes supported add:
- // stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
+ stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
}
gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()), lambda[efptCOUL],
}
}
- // Copy coordinate from the GPU if update is on the GPU and there
- // are forces to be computed on the CPU, or for the computation of
- // virial, or if host-side data will be transferred from this task
- // to a remote task for halo exchange or PME-PP communication. At
- // search steps the current coordinates are already on the host,
- // hence copy is not needed.
- const bool haveHostPmePpComms =
- !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication;
-
- GMX_ASSERT(simulationWork.useGpuHaloExchange
- == ((cr->dd != nullptr) && (!cr->dd->gpuHaloExchange[0].empty())),
- "The GPU halo exchange is active, but it has not been constructed.");
- const bool haveHostHaloExchangeComms =
- havePPDomainDecomposition(cr) && !simulationWork.useGpuHaloExchange;
-
- bool gmx_used_in_debug haveCopiedXFromGpu = false;
- if (simulationWork.useGpuUpdate && !stepWork.doNeighborSearch
- && (runScheduleWork->domainWork.haveCpuLocalForceWork || stepWork.computeVirial
- || haveHostPmePpComms || haveHostHaloExchangeComms))
- {
- GMX_ASSERT(stateGpu != nullptr, "stateGpu should not be null");
- stateGpu->copyCoordinatesFromGpu(x.unpaddedArrayRef(), AtomLocality::Local);
- haveCopiedXFromGpu = true;
- }
-
// If coordinates are to be sent to PME task from GPU memory, perform that send here.
// Otherwise the send will occur before the H2D coordinate transfer.
if (!thisRankHasDuty(cr, DUTY_PME) && pmeSendCoordinatesFromGpu)
}
else
{
- // Note: GPU update + DD without direct communication is not supported,
- // a waitCoordinatesReadyOnHost() should be issued if it will be.
- GMX_ASSERT(!simulationWork.useGpuUpdate,
- "GPU update is not supported with CPU halo exchange");
+ if (simulationWork.useGpuUpdate)
+ {
+ GMX_ASSERT(haveCopiedXFromGpu,
+ "a wait should only be triggered if copy has been scheduled");
+ stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
+ }
dd_move_x(cr->dd, box, x.unpaddedArrayRef(), wcycle);
}
wallcycle_stop(wcycle, ewcFORCE);
}
- // If on GPU PME-PP comms or GPU update path, receive forces from PME before GPU buffer ops
+ // If on GPU PME-PP comms path, receive forces from PME before GPU buffer ops
// TODO refactor this and unify with below default-path call to the same function
if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && stepWork.computeSlowForces
- && (simulationWork.useGpuPmePpCommunication || simulationWork.useGpuUpdate))
+ && simulationWork.useGpuPmePpCommunication)
{
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
// copy call done in sim_utils(...) for the output.
// NOTE: If there are virtual sites, the forces are modified on host after this D2H copy. Hence,
// they should not be copied in do_md(...) for the output.
- if (!simulationWork.useGpuUpdate || vsite)
+ if (!simulationWork.useGpuUpdate
+ || (simulationWork.useGpuUpdate && DOMAINDECOMP(cr) && haveHostPmePpComms) || vsite)
{
stateGpu->copyForcesFromGpu(forceWithShift, AtomLocality::Local);
stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
// TODO refactor this and unify with above GPU PME-PP / GPU update path call to the same function
if (PAR(cr) && !thisRankHasDuty(cr, DUTY_PME) && !simulationWork.useGpuPmePpCommunication
- && !simulationWork.useGpuUpdate && stepWork.computeSlowForces)
+ && stepWork.computeSlowForces)
{
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
#include "gromacs/gpu_utils/devicebuffer_datatype.h"
#include "gromacs/mdtypes/group.h"
+#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/arrayref.h"
#include "gromacs/utility/classhelpers.h"
/*! \brief Create Update-Constrain object.
*
* The constructor is given a non-nullptr \p deviceStream, in which all the update and constrain
- * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that modify
- * coordinates. The event is maintained outside this class and also passed to all (if any) consumers
- * of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr because the
- * markEvent(...) method is called unconditionally.
+ * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that
+ * modify coordinates. The event is maintained outside this class and also passed to all (if
+ * any) consumers of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr
+ * because the markEvent(...) method is called unconditionally.
*
* \param[in] ir Input record data: LINCS takes number of iterations and order of
* projection from it.
* and target O-H and H-H distances from this object.
* \param[in] deviceContext GPU device context.
* \param[in] deviceStream GPU stream to use.
- * \param[in] xUpdatedOnDevice The event synchronizer to use to mark that update is done on the GPU.
+ * \param[in] xUpdatedOnDevice The event synchronizer to use to mark that update is done
+ * on the GPU.
+ * \param[in] wcycle The wallclock counter
*/
UpdateConstrainGpu(const t_inputrec& ir,
const gmx_mtop_t& mtop,
const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
- GpuEventSynchronizer* xUpdatedOnDevice);
+ GpuEventSynchronizer* xUpdatedOnDevice,
+ gmx_wallcycle* wcycle);
~UpdateConstrainGpu();
const gmx_mtop_t& /* mtop */,
const DeviceContext& /* deviceContext */,
const DeviceStream& /* deviceStream */,
- GpuEventSynchronizer* /* xUpdatedOnDevice */) :
+ GpuEventSynchronizer* /* xUpdatedOnDevice */,
+ gmx_wallcycle* /*wcycle*/) :
impl_(nullptr)
{
GMX_ASSERT(!impl_,
#include "gromacs/mdlib/settle_gpu.cuh"
#include "gromacs/mdlib/update_constrain_gpu.h"
#include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/timing/wallcycle.h"
namespace gmx
{
const float dtPressureCouple,
const matrix prVelocityScalingMatrix)
{
+ wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+ wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+
// Clearing virial matrix
// TODO There is no point in having separate virial matrix for constraints
clear_mat(virial);
coordinatesReady_->markEvent(deviceStream_);
+ wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+ wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
+
return;
}
void UpdateConstrainGpu::Impl::scaleCoordinates(const matrix scalingMatrix)
{
+ wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+ wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+
ScalingMatrix mu;
mu.xx = scalingMatrix[XX][XX];
mu.yy = scalingMatrix[YY][YY];
// TODO: Although this only happens on the pressure coupling steps, this synchronization
// can affect the performance if nstpcouple is small.
deviceStream_.synchronize();
+
+ wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+ wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
}
void UpdateConstrainGpu::Impl::scaleVelocities(const matrix scalingMatrix)
{
+ wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+ wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+
ScalingMatrix mu;
mu.xx = scalingMatrix[XX][XX];
mu.yy = scalingMatrix[YY][YY];
// TODO: Although this only happens on the pressure coupling steps, this synchronization
// can affect the performance if nstpcouple is small.
deviceStream_.synchronize();
+
+ wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+ wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
}
UpdateConstrainGpu::Impl::Impl(const t_inputrec& ir,
const gmx_mtop_t& mtop,
const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
- GpuEventSynchronizer* xUpdatedOnDevice) :
+ GpuEventSynchronizer* xUpdatedOnDevice,
+ gmx_wallcycle* wcycle) :
deviceContext_(deviceContext),
deviceStream_(deviceStream),
- coordinatesReady_(xUpdatedOnDevice)
+ coordinatesReady_(xUpdatedOnDevice),
+ wcycle_(wcycle)
{
GMX_ASSERT(xUpdatedOnDevice != nullptr, "The event synchronizer can not be nullptr.");
const t_mdatoms& md,
const int numTempScaleValues)
{
+ // TODO wallcycle
+ wallcycle_start_nocount(wcycle_, ewcLAUNCH_GPU);
+ wallcycle_sub_start(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+
GMX_ASSERT(d_x != nullptr, "Coordinates device buffer should not be null.");
GMX_ASSERT(d_v != nullptr, "Velocities device buffer should not be null.");
GMX_ASSERT(d_f != nullptr, "Forces device buffer should not be null.");
coordinateScalingKernelLaunchConfig_.gridSize[0] =
(numAtoms_ + c_threadsPerBlock - 1) / c_threadsPerBlock;
+
+ wallcycle_sub_stop(wcycle_, ewcsLAUNCH_GPU_UPDATE_CONSTRAIN);
+ wallcycle_stop(wcycle_, ewcLAUNCH_GPU);
}
void UpdateConstrainGpu::Impl::setPbc(const PbcType pbcType, const matrix box)
{
+ // TODO wallcycle
setPbcAiuc(numPbcDimensions(pbcType), box, &pbcAiuc_);
}
const gmx_mtop_t& mtop,
const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
- GpuEventSynchronizer* xUpdatedOnDevice) :
- impl_(new Impl(ir, mtop, deviceContext, deviceStream, xUpdatedOnDevice))
+ GpuEventSynchronizer* xUpdatedOnDevice,
+ gmx_wallcycle* wcycle) :
+ impl_(new Impl(ir, mtop, deviceContext, deviceStream, xUpdatedOnDevice, wcycle))
{
}
/*! \brief Create Update-Constrain object.
*
* The constructor is given a non-nullptr \p deviceStream, in which all the update and constrain
- * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that modify
- * coordinates. The event is maintained outside this class and also passed to all (if any) consumers
- * of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr because the
- * markEvent(...) method is called unconditionally.
+ * routines are executed. \p xUpdatedOnDevice should mark the completion of all kernels that
+ * modify coordinates. The event is maintained outside this class and also passed to all (if
+ * any) consumers of the updated coordinates. The \p xUpdatedOnDevice also can not be a nullptr
+ * because the markEvent(...) method is called unconditionally.
*
* \param[in] ir Input record data: LINCS takes number of iterations and order of
* projection from it.
* and target O-H and H-H distances from this object.
* \param[in] deviceContext GPU device context.
* \param[in] deviceStream GPU stream to use.
- * \param[in] xUpdatedOnDevice The event synchronizer to use to mark that update is done on the GPU.
+ * \param[in] xUpdatedOnDevice The event synchronizer to use to mark that
+ * update is done on the GPU.
+ * \param[in] wcycle The wallclock counter
*/
Impl(const t_inputrec& ir,
const gmx_mtop_t& mtop,
const DeviceContext& deviceContext,
const DeviceStream& deviceStream,
- GpuEventSynchronizer* xUpdatedOnDevice);
+ GpuEventSynchronizer* xUpdatedOnDevice,
+ gmx_wallcycle* wcycle);
~Impl();
//! An pointer to the event to indicate when the update of coordinates is complete
GpuEventSynchronizer* coordinatesReady_;
+ //! The wallclock counter
+ gmx_wallcycle* wcycle_ = nullptr;
};
} // namespace gmx
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011-2019, by the GROMACS development team, led by
+ * Copyright (c) 2011-2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* support for the CLI and API without duplicating code. It should be
* eliminated following the TODOs below.
*
+ * \warning Instances provide lifetime scope for members that do not have
+ * effective lifetime management or which are frequently accessed unsafely.
+ * The caller is responsible for keeping a LegacyMdrunOptions object alive
+ * for as long as any consumers, direct or transitive.
+ *
* \todo Modules in mdrun should acquire proper option handling so
- * that all of these declarations and defaults are local to the
- * modules.
+ * that all of these declarations and defaults are local to the
+ * modules.
*
* \todo Contextual aspects, such as working directory
- * and environment variable handling are more properly
- * the role of SimulationContext, and should be moved there */
+ * and environment variable handling are more properly
+ * the role of SimulationContext, and should be moved there.
+ */
class LegacyMdrunOptions
{
public:
integrator = std::make_unique<UpdateConstrainGpu>(
*ir, *top_global, fr->deviceStreamManager->context(),
fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
- stateGpu->xUpdatedOnDevice());
+ stateGpu->xUpdatedOnDevice(), wcycle);
integrator->setPbc(PbcType::Xyz, state->box);
}
if (useGpuForUpdate)
{
+ wallcycle_stop(wcycle, ewcUPDATE);
+
if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
{
integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(),
stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
}
- // If the buffer ops were not offloaded this step, the forces are on the host and have to be copied
- if (!runScheduleWork->stepWork.useGpuFBufferOps)
+ if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
+ && !thisRankHasDuty(cr, DUTY_PME))
+ {
+ // The PME forces were recieved to the host, so have to be copied
+ stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
+ }
+ else if (!runScheduleWork->stepWork.useGpuFBufferOps)
{
+ // The buffer ops were not offloaded this step, so the forces are on the
+ // host and have to be copied
stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
}
#include "gromacs/hardware/cpuinfo.h"
#include "gromacs/hardware/detecthardware.h"
#include "gromacs/hardware/device_management.h"
+#include "gromacs/hardware/hardwaretopology.h"
#include "gromacs/hardware/printhardware.h"
#include "gromacs/imd/imd.h"
#include "gromacs/listed_forces/disre.h"
newRunner.hw_opt = hw_opt;
newRunner.filenames = filenames;
+ newRunner.hwinfo_ = hwinfo_;
newRunner.oenv = oenv;
newRunner.mdrunOptions = mdrunOptions;
newRunner.domdecOptions = domdecOptions;
gmx_wallcycle_t wcycle;
gmx_walltime_accounting_t walltime_accounting = nullptr;
MembedHolder membedHolder(filenames.size(), filenames.data());
- gmx_hw_info_t* hwinfo = nullptr;
/* CAUTION: threads may be started later on in this function, so
cr doesn't reflect the final parallel state right now */
gmx::LoggerOwner logOwner(buildLogger(fplog, isSimulationMasterRank));
gmx::MDLogger mdlog(logOwner.logger());
- // TODO The thread-MPI master rank makes a working
- // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks
- // after the threads have been launched. This works because no use
- // is made of that communicator until after the execution paths
- // have rejoined. But it is likely that we can improve the way
- // this is expressed, e.g. by expressly running detection only the
- // master rank for thread-MPI, rather than relying on the mutex
- // and reference count.
- PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
- hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm);
-
- gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo);
+ gmx_print_detected_hardware(fplog, isSimulationMasterRank && isMasterSim(ms), mdlog, hwinfo_);
- std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo->deviceInfoList, hw_opt.gpuIdsAvailable);
+ std::vector<int> gpuIdsToUse = makeGpuIdsToUse(hwinfo_->deviceInfoList, hw_opt.gpuIdsAvailable);
const int numDevicesToUse = gmx::ssize(gpuIdsToUse);
// Print citation requests after all software/hardware printing
gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, GMX_THREAD_MPI),
hw_opt.nthreads_tmpi);
useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi(
- useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo,
+ useGpuForNonbonded, pmeTarget, numDevicesToUse, userGpuTaskAssignment, *hwinfo_,
*inputrec, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
* prevent any possible subsequent checks from working
* correctly. */
hw_opt.nthreads_tmpi =
- get_nthreads_mpi(hwinfo, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
+ get_nthreads_mpi(hwinfo_, &hw_opt, numDevicesToUse, useGpuForNonbonded, useGpuForPme,
inputrec.get(), &mtop, mdlog, membedHolder.doMembed());
// Now start the threads for thread MPI.
spawnThreads(hw_opt.nthreads_tmpi);
// The spawned threads enter mdrunner() and execution of
// master and spawned threads joins at the end of this block.
- physicalNodeComm =
- PhysicalNodeCommunicator(libraryWorldCommunicator, gmx_physicalnode_id_hash());
}
GMX_RELEASE_ASSERT(ms || simulationCommunicator != MPI_COMM_NULL,
t_commrec* cr = crHandle.get();
GMX_RELEASE_ASSERT(cr != nullptr, "Must have valid commrec");
+ PhysicalNodeCommunicator physicalNodeComm(libraryWorldCommunicator, gmx_physicalnode_id_hash());
+
+ // If we detected the topology on this system, double-check that it makes sense
+ if (hwinfo_->hardwareTopology->isThisSystem())
+ {
+ hardwareTopologyDoubleCheckDetection(mdlog, *hwinfo_->hardwareTopology);
+ }
+
if (PAR(cr))
{
/* now broadcast everything to the non-master nodes/threads: */
bool useGpuForPme = false;
bool useGpuForBonded = false;
bool useGpuForUpdate = false;
- bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0;
+ bool gpusWereDetected = hwinfo_->ngpu_compatible_tot > 0;
try
{
// It's possible that there are different numbers of GPUs on
nonbondedTarget, userGpuTaskAssignment, emulateGpuNonbonded, canUseGpuForNonbonded,
gpuAccelerationOfNonbondedIsUseful(mdlog, *inputrec, !GMX_THREAD_MPI), gpusWereDetected);
useGpuForPme = decideWhetherToUseGpusForPme(
- useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo, *inputrec,
+ useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, *hwinfo_, *inputrec,
cr->sizeOfDefaultCommunicator, domdecOptions.numPmeRanks, gpusWereDetected);
- auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr)
- && inputSupportsGpuBondeds(*inputrec, mtop, nullptr);
- useGpuForBonded = decideWhetherToUseGpusForBonded(
- useGpuForNonbonded, useGpuForPme, bondedTarget, canUseGpuForBonded,
- EVDW_PME(inputrec->vdwtype), EEL_PME_EWALD(inputrec->coulombtype),
- domdecOptions.numPmeRanks, gpusWereDetected);
+ useGpuForBonded = decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme,
+ bondedTarget, *inputrec, mtop,
+ domdecOptions.numPmeRanks, gpusWereDetected);
}
GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
*/
prepare_verlet_scheme(fplog, cr, inputrec.get(), nstlist_cmdline, &mtop, box,
useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes),
- *hwinfo->cpuInfo);
+ *hwinfo_->cpuInfo);
// This builder is necessary while we have multi-part construction
// of DD. Before DD is constructed, we use the existence of
// Produce the task assignment for this rank - done after DD is constructed
GpuTaskAssignments gpuTaskAssignments = GpuTaskAssignmentsBuilder::build(
- gpuIdsToUse, userGpuTaskAssignment, *hwinfo, simulationCommunicator, physicalNodeComm,
+ gpuIdsToUse, userGpuTaskAssignment, *hwinfo_, simulationCommunicator, physicalNodeComm,
nonbondedTarget, pmeTarget, bondedTarget, updateTarget, useGpuForNonbonded,
useGpuForPme, thisRankHasDuty(cr, DUTY_PP),
// TODO cr->duty & DUTY_PME should imply that a PME
// that existing affinity setting was from OpenMP or something
// else, so we run this code both before and after we initialize
// the OpenMP support.
- gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+ gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, FALSE);
/* Check and update the number of OpenMP threads requested */
- checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_,
+ checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo_, cr, ms, physicalNodeComm.size_,
pmeRunMode, mtop, *inputrec);
- gmx_omp_nthreads_init(mdlog, cr, hwinfo->nthreads_hw_avail, physicalNodeComm.size_,
+ gmx_omp_nthreads_init(mdlog, cr, hwinfo_->nthreads_hw_avail, physicalNodeComm.size_,
hw_opt.nthreads_omp, hw_opt.nthreads_omp_pme, !thisRankHasDuty(cr, DUTY_PP));
// Enable FP exception detection, but not in
}
/* Now that we know the setup is consistent, check for efficiency */
- check_resource_division_efficiency(hwinfo, gpuTaskAssignments.thisRankHasAnyGpuTask(),
+ check_resource_division_efficiency(hwinfo_, gpuTaskAssignments.thisRankHasAnyGpuTask(),
mdrunOptions.ntompOptionIsSet, cr, mdlog);
/* getting number of PP/PME threads on this MPI / tMPI rank.
*/
const int numThreadsOnThisRank = thisRankHasDuty(cr, DUTY_PP) ? gmx_omp_nthreads_get(emntNonbonded)
: gmx_omp_nthreads_get(emntPME);
- checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo->hardwareTopology,
+ checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, *hwinfo_->hardwareTopology,
physicalNodeComm, mdlog);
// Enable Peer access between GPUs where available
* - which indicates that probably the OpenMP library has changed it
* since we first checked).
*/
- gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo->nthreads_hw_avail, TRUE);
+ gmx_check_thread_affinity_set(mdlog, &hw_opt, hwinfo_->nthreads_hw_avail, TRUE);
int numThreadsOnThisNode, intraNodeThreadOffset;
analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode,
&intraNodeThreadOffset);
/* Set the CPU affinity */
- gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology, numThreadsOnThisRank,
+ gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo_->hardwareTopology, numThreadsOnThisRank,
numThreadsOnThisNode, intraNodeThreadOffset, nullptr);
}
deviceStreamManager->stream(DeviceStreamType::PmePpTransfer));
}
- fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo,
+ fr->nbv = Nbnxm::init_nb_verlet(mdlog, inputrec.get(), fr, cr, *hwinfo_,
runScheduleWork.simulationWork.useGpuNonbonded,
deviceStreamManager.get(), &mtop, box, wcycle);
// TODO: Move the logic below to a GPU bonded builder
sfree(disresdata);
sfree(oriresdata);
- if (!hwinfo->deviceInfoList.empty())
+ if (!hwinfo_->deviceInfoList.empty())
{
/* stop the GPU profiler (only CUDA) */
stopGpuProfiler();
real forceWarningThreshold,
StartingBehavior startingBehavior);
+ void addHardwareDetectionResult(const gmx_hw_info_t* hwinfo);
+
void addDomdec(const DomdecOptions& options);
void addInput(SimulationInputHandle inputHolder);
//! The modules that comprise the functionality of mdrun.
std::unique_ptr<MDModules> mdModules_;
+ //! Detected hardware.
+ const gmx_hw_info_t* hwinfo_ = nullptr;
+
//! \brief Parallelism information.
gmx_hw_opt_t hardwareOptions_;
// nullptr is a valid value for the multisim handle
newRunner.ms = multiSimulation_;
+ if (hwinfo_)
+ {
+ newRunner.hwinfo_ = hwinfo_;
+ }
+ else
+ {
+ GMX_THROW(gmx::APIError(
+ "MdrunnerBuilder::addHardwareDetectionResult() is required before build()"));
+ }
+
if (inputHolder_)
{
newRunner.inputHolder_ = std::move(inputHolder_);
return newRunner;
}
+void Mdrunner::BuilderImplementation::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo)
+{
+ hwinfo_ = hwinfo;
+}
+
void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt)
{
nbpu_opt_ = nbpu_opt;
MdrunnerBuilder::~MdrunnerBuilder() = default;
+MdrunnerBuilder& MdrunnerBuilder::addHardwareDetectionResult(const gmx_hw_info_t* hwinfo)
+{
+ impl_->addHardwareDetectionResult(hwinfo);
+ return *this;
+}
+
MdrunnerBuilder& MdrunnerBuilder::addSimulationMethod(const MdrunOptions& options,
real forceWarningThreshold,
const StartingBehavior startingBehavior)
//! The modules that comprise mdrun.
std::unique_ptr<MDModules> mdModules_;
+ //! Non-owning handle to the results of the hardware detection.
+ const gmx_hw_info_t* hwinfo_ = nullptr;
+
/*!
* \brief Holds simulation input specification provided by client, if any.
*
*/
Mdrunner build();
+ /*!
+ * \brief Supply the result of hardware detection to the gmx::Mdrunner
+ *
+ * \param hwinfo Non-owning not-null handle to result of hardware detection.
+ *
+ * \todo It would be better to express this as either a not-null const pointer or
+ * a const reference, but neither of those is consistent with incremental
+ * building of an object. This motivates future work to be able to make a deep copy
+ * of the detection result. See https://gitlab.com/gromacs/gromacs/-/issues/3650 */
+ MdrunnerBuilder& addHardwareDetectionResult(const gmx_hw_info_t* hwinfo);
+
/*!
* \brief Set up non-bonded short-range force calculations.
*
/*! \brief Struct that defines a pull coordinate */
struct t_pull_coord
{
- int eType; /**< The pull type: umbrella, constraint, ... */
- std::string externalPotentialProvider; /**< Name of the module providing the external potential, only used with eType==epullEXTERNAL */
- int eGeom; /**< The pull geometry */
- int ngroup; /**< The number of groups, depends on eGeom */
- std::array<int, c_pullCoordNgroupMax> group; /**< The pull groups: indices into the group arrays in pull_t and pull_params_t, ngroup indices are used */
- gmx::IVec dim; /**< Used to select components for constraint */
- gmx::RVec origin; /**< The origin for the absolute reference */
- gmx::RVec vec; /**< The pull vector, direction or position */
- bool bStart; /**< Set init based on the initial structure */
- real init; /**< Initial reference displacement (nm) or (deg) */
- real rate; /**< Rate of motion (nm/ps) or (deg/ps) */
- real k; /**< Force constant (kJ/(mol nm^2) or kJ/(mol rad^2) for umbrella pull type, or kJ/(mol nm) or kJ/(mol rad) for constant force pull type */
- real kB; /**< Force constant for state B */
+ //! The pull type: umbrella, constraint, ...
+ int eType = 0;
+ //! Name of the module providing the external potential, only used with eType==epullEXTERNAL
+ std::string externalPotentialProvider;
+ //! The pull geometry
+ int eGeom = 0;
+ //! The number of groups, depends on eGeom
+ int ngroup = 0;
+ /*! \brief The pull groups:
+ *
+ * indices into the group arrays in pull_t and pull_params_t,
+ * ngroup indices are used
+ */
+ std::array<int, c_pullCoordNgroupMax> group;
+ //! Used to select components for constraint
+ gmx::IVec dim = { 0, 0, 0 };
+ //! The origin for the absolute reference
+ gmx::RVec origin = { 0, 0, 0 };
+ //! The pull vector, direction or position
+ gmx::RVec vec = { 0, 0, 0 };
+ //! Set init based on the initial structure
+ bool bStart = false;
+ //! Initial reference displacement (nm) or (deg)
+ real init = 0.0;
+ //! Rate of motion (nm/ps) or (deg/ps)
+ real rate = 0.0;
+ /*! \brief Force constant
+ *
+ * For umbrella pull type this is (kJ/(mol nm^2) or kJ/(mol rad^2).
+ * For constant force pull type it is kJ/(mol nm) or kJ/(mol rad).
+ */
+ real k = 0.0;
+ //! Force constant for state B
+ real kB = 0.0;
};
/*! \brief Struct containing all pull parameters */
struct pull_params_t
{
- int ngroup; /**< Number of pull groups */
- int ncoord; /**< Number of pull coordinates */
- real cylinder_r; /**< Radius of cylinder for dynamic COM (nm) */
- real constr_tol; /**< Absolute tolerance for constraints in (nm) */
- bool bPrintCOM; /**< Print coordinates of COM for each coord */
- bool bPrintRefValue; /**< Print the reference value for each coord */
- bool bPrintComp; /**< Print cartesian components for each coord with geometry=distance */
- bool bSetPbcRefToPrevStepCOM; /**< Use the COM of each group from the previous step as reference */
- int nstxout; /**< Output interval for pull x */
- int nstfout; /**< Output interval for pull f */
- bool bXOutAverage; /**< Write the average coordinate during the output interval */
- bool bFOutAverage; /**< Write the average force during the output interval */
-
- std::vector<t_pull_group> group; /**< groups to pull/restrain/etc/ */
- std::vector<t_pull_coord> coord; /**< the pull coordinates */
+ //! Number of pull groups
+ int ngroup = 0;
+ //! Number of pull coordinates
+ int ncoord = 0;
+ //! Radius of cylinder for dynamic COM (nm)
+ real cylinder_r = 0.0;
+ //! Absolute tolerance for constraints in (nm)
+ real constr_tol = 0.0;
+ //! Print coordinates of COM for each coord
+ bool bPrintCOM = false;
+ //! Print the reference value for each coord
+ bool bPrintRefValue = false;
+ //! Print cartesian components for each coord with geometry=distance
+ bool bPrintComp = false;
+ //! Use the COM of each group from the previous step as reference
+ bool bSetPbcRefToPrevStepCOM = false;
+ //! Output interval for pull x
+ int nstxout = 0;
+ //! Output interval for pull f
+ int nstfout = 0;
+ //! Write the average coordinate during the output interval
+ bool bXOutAverage = false;
+ //! Write the average force during the output interval
+ bool bFOutAverage = false;
+ //! groups to pull/restrain/etc/
+ std::vector<t_pull_group> group;
+ //! the pull coordinates
+ std::vector<t_pull_coord> coord;
};
/*! \endcond */
return "the requested SIMD kernel was not set up at configuration time";
}
+ if (options.reportTime && (0 > gmx_cycles_calibrate(1.0)))
+ {
+ return "the -time option is not supported on this system";
+ }
+
return {};
}
options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
options.useHalfLJOptimization ? "half" : "all",
combruleNames[options.ljCombinationRule].c_str(), kernelNames[options.nbnxmSimd].c_str());
+ if (!options.outputFile.empty())
+ {
+ fprintf(system.csv,
+ "\"%d\",\"%zu\",\"%g\",\"%d\",\"%d\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%"
+ "s\",",
+#if GMX_SIMD
+ (options.nbnxmSimd != BenchMarkKernels::SimdNo) ? GMX_SIMD_REAL_WIDTH : 0,
+#else
+ 0,
+#endif
+ system.coordinates.size(), options.pairlistCutoff, options.numThreads,
+ options.numIterations, options.computeVirialAndEnergy ? "yes" : "no",
+ (options.coulombType != BenchMarkCoulomb::ReactionField)
+ ? ((options.nbnxmSimd == BenchMarkKernels::SimdNo || options.useTabulatedEwaldCorr)
+ ? "table"
+ : "analytical")
+ : "",
+ options.coulombType == BenchMarkCoulomb::Pme ? "Ewald" : "RF",
+ options.useHalfLJOptimization ? "half" : "all",
+ combruleNames[options.ljCombinationRule].c_str(),
+ kernelNames[options.nbnxmSimd].c_str());
+ }
}
// Run pre-iteration to avoid cache misses
cycles = gmx_cycles_read() - cycles;
if (!doWarmup)
{
- const double dCycles = static_cast<double>(cycles);
- if (options.cyclesPerPair)
+ if (options.reportTime)
{
- fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", cycles * 1e-6,
- dCycles / options.numIterations * 1e-6, dCycles / (options.numIterations * numPairs),
- dCycles / (options.numIterations * numUsefulPairs));
+ const double uSec = static_cast<double>(cycles) * gmx_cycles_calibrate(1.0) * 1.e6;
+ if (options.cyclesPerPair)
+ {
+ fprintf(stdout, "%13.2f %13.3f %10.3f %10.3f\n", uSec, uSec / options.numIterations,
+ uSec / (options.numIterations * numPairs),
+ uSec / (options.numIterations * numUsefulPairs));
+ if (!options.outputFile.empty())
+ {
+ fprintf(system.csv, "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n", uSec,
+ uSec / options.numIterations, uSec / (options.numIterations * numPairs),
+ uSec / (options.numIterations * numUsefulPairs));
+ }
+ }
+ else
+ {
+ fprintf(stdout, "%13.2f %13.3f %10.3f %10.3f\n", uSec, uSec / options.numIterations,
+ options.numIterations * numPairs / uSec,
+ options.numIterations * numUsefulPairs / uSec);
+ if (!options.outputFile.empty())
+ {
+ fprintf(system.csv, "\"%.3f\",\"%.4f\",\"%.4f\",\"%.4f\"\n", uSec,
+ uSec / options.numIterations, options.numIterations * numPairs / uSec,
+ options.numIterations * numUsefulPairs / uSec);
+ }
+ }
}
else
{
- fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", dCycles * 1e-6,
- dCycles / options.numIterations * 1e-6, options.numIterations * numPairs / dCycles,
- options.numIterations * numUsefulPairs / dCycles);
+ const double dCycles = static_cast<double>(cycles);
+ if (options.cyclesPerPair)
+ {
+ fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", cycles * 1e-6,
+ dCycles / options.numIterations * 1e-6,
+ dCycles / (options.numIterations * numPairs),
+ dCycles / (options.numIterations * numUsefulPairs));
+ }
+ else
+ {
+ fprintf(stdout, "%10.3f %10.4f %8.4f %8.4f\n", dCycles * 1e-6,
+ dCycles / options.numIterations * 1e-6, options.numIterations * numPairs / dCycles,
+ options.numIterations * numUsefulPairs / dCycles);
+ }
}
}
}
gmx_omp_nthreads_set(emntPairsearch, options.numThreads);
gmx_omp_nthreads_set(emntNonbonded, options.numThreads);
- const gmx::BenchmarkSystem system(sizeFactor);
+ const gmx::BenchmarkSystem system(sizeFactor, options.outputFile);
real minBoxSize = norm(system.box[XX]);
for (int dim = YY; dim < DIM; dim++)
setupAndRunInstance(system, optionsList[0], true);
}
- fprintf(stdout, "Coulomb LJ comb. SIMD Mcycles Mcycles/it. %s\n",
- options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
- fprintf(stdout, " total useful\n");
+ if (options.reportTime)
+ {
+ fprintf(stdout, "Coulomb LJ comb. SIMD usec usec/it. %s\n",
+ options.cyclesPerPair ? "usec/pair" : "pairs/usec");
+ if (!options.outputFile.empty())
+ {
+ fprintf(system.csv,
+ "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
+ "energy\",\"Ewald excl. "
+ "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"usec\",\"usec/it\",\"total "
+ "pairs/usec\",\"useful pairs/usec\"\n");
+ }
+ fprintf(stdout,
+ " total useful\n");
+ }
+ else
+ {
+ fprintf(stdout, "Coulomb LJ comb. SIMD Mcycles Mcycles/it. %s\n",
+ options.cyclesPerPair ? "cycles/pair" : "pairs/cycle");
+ if (!options.outputFile.empty())
+ {
+ fprintf(system.csv,
+ "\"width\",\"atoms\",\"cut-off radius\",\"threads\",\"iter\",\"compute "
+ "energy\",\"Ewald excl. "
+ "corr.\",\"Coulomb\",\"LJ\",\"comb\",\"SIMD\",\"Mcycles\",\"Mcycles/"
+ "it\",\"total "
+ "total cycles/pair\",\"total cycles per useful pair\"\n");
+ }
+ fprintf(stdout, " total useful\n");
+ }
for (const auto& optionsInstance : optionsList)
{
setupAndRunInstance(system, optionsInstance, false);
}
+
+ if (!options.outputFile.empty())
+ {
+ fclose(system.csv);
+ }
}
} // namespace Nbnxm
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_NBNXN_BENCH_SETUP_H
#define GMX_NBNXN_BENCH_SETUP_H
+#include <string>
+
#include "gromacs/utility/real.h"
namespace Nbnxm
int numWarmupIterations = 0;
//! Print cycles/pair instead of pairs/cycle
bool cyclesPerPair = false;
+ //! Report in micro seconds instead of cycles
+ bool reportTime = false;
+ //! Also report into a csv file
+ std::string outputFile;
};
/*! \brief
}
}
-BenchmarkSystem::BenchmarkSystem(const int multiplicationFactor)
+BenchmarkSystem::BenchmarkSystem(const int multiplicationFactor, const std::string& outputFile)
{
numAtomTypes = 2;
nonbondedParameters.resize(numAtomTypes * numAtomTypes * 2, 0);
forceRec.nbfp = nonbondedParameters;
snew(forceRec.shift_vec, SHIFTS);
calc_shifts(box, forceRec.shift_vec);
+ if (!outputFile.empty())
+ {
+ csv = fopen(outputFile.c_str(), "w+");
+ }
}
} // namespace gmx
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2019, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_NBNXN_BENCH_SYSTEM_H
#define GMX_NBNXN_BENCH_SYSTEM_H
+#include <string>
#include <vector>
#include "gromacs/math/vectypes.h"
* with 3000 atoms total.
*
* \param[in] multiplicationFactor Should be a power of 2, is checked
+ * \param[in] outputFile The name of the csv file to write benchmark results
*/
- BenchmarkSystem(int multiplicationFactor);
+ BenchmarkSystem(int multiplicationFactor, const std::string& outputFile);
//! Number of different atom types in test system.
int numAtomTypes;
matrix box;
//! Forcerec with only the entries used in the benchmark set
t_forcerec forceRec;
+ //! csv output file
+ FILE* csv;
};
} // namespace gmx
}
}
}
-
-/*! \brief CUDA kernel to sum up the force components
- *
- * \tparam accumulateForce If the initial forces in \p gm_fTotal should be saved.
- * \tparam addPmeForce Whether the PME force should be added to the total.
- *
- * \param[in] gm_fNB Non-bonded forces in nbnxm format.
- * \param[in] gm_fPme PME forces.
- * \param[in,out] gm_fTotal Force buffer to be reduced into.
- * \param[in] cell Cell index mapping.
- * \param[in] atomStart Start atom index.
- * \param[in] numAtoms Number of atoms.
- */
-template<bool accumulateForce, bool addPmeForce>
-static __global__ void nbnxn_gpu_add_nbat_f_to_f_kernel(const float3* __restrict__ gm_fNB,
- const float3* __restrict__ gm_fPme,
- float3* gm_fTotal,
- const int* __restrict__ gm_cell,
- const int atomStart,
- const int numAtoms)
-{
-
- /* map particle-level parallelism to 1D CUDA thread and block index */
- const int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
-
- /* perform addition for each particle*/
- if (threadIndex < numAtoms)
- {
-
- const int i = gm_cell[atomStart + threadIndex];
- float3* gm_fDest = &gm_fTotal[atomStart + threadIndex];
- float3 temp;
-
- if (accumulateForce)
- {
- temp = *gm_fDest;
- temp += gm_fNB[i];
- }
- else
- {
- temp = gm_fNB[i];
- }
- if (addPmeForce)
- {
- temp += gm_fPme[atomStart + threadIndex];
- }
- *gm_fDest = temp;
- }
- return;
-}
-Xclang -finclude-default-header -D_${VENDOR}_SOURCE_
-DGMX_OCL_FASTGEN ${ELEC_DEF} ${VDW_DEF}
-Dc_nbnxnGpuClusterSize=${CLUSTER_SIZE}
- -Dc_nbnxnMinDistanceSquared=3.82e-07F
+ -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=3.82e-07
-Dc_nbnxnGpuNumClusterPerSupercluster=8
-Dc_nbnxnGpuJgroupSize=4
-DIATYPE_SHMEM
/* DtoH f */
GMX_ASSERT(sizeof(*nbatom->out[0].f.data()) == sizeof(float),
"The host force buffer should be in single precision to match device data size.");
- copyFromDeviceBuffer(&nbatom->out[0].f.data()[adat_begin * DIM], &adat->f, adat_begin * DIM,
+ copyFromDeviceBuffer(&nbatom->out[0].f[adat_begin * DIM], &adat->f, adat_begin * DIM,
adat_len * DIM, deviceStream, GpuApiCallBehavior::Async,
bDoTime ? t->xf[aloc].nb_d2h.fetchNextEvent() : nullptr);
}
/* Free kernels */
+ // NOLINTNEXTLINE(bugprone-sizeof-expression)
int kernel_count = sizeof(nb->kernel_ener_noprune_ptr) / sizeof(nb->kernel_ener_noprune_ptr[0][0]);
free_kernels(nb->kernel_ener_noprune_ptr[0], kernel_count);
+ // NOLINTNEXTLINE(bugprone-sizeof-expression)
kernel_count = sizeof(nb->kernel_ener_prune_ptr) / sizeof(nb->kernel_ener_prune_ptr[0][0]);
free_kernels(nb->kernel_ener_prune_ptr[0], kernel_count);
+ // NOLINTNEXTLINE(bugprone-sizeof-expression)
kernel_count = sizeof(nb->kernel_noener_noprune_ptr) / sizeof(nb->kernel_noener_noprune_ptr[0][0]);
free_kernels(nb->kernel_noener_noprune_ptr[0], kernel_count);
+ // NOLINTNEXTLINE(bugprone-sizeof-expression)
kernel_count = sizeof(nb->kernel_noener_prune_ptr) / sizeof(nb->kernel_noener_prune_ptr[0][0]);
free_kernels(nb->kernel_noener_prune_ptr[0], kernel_count);
/* Here we pass macros and static const/constexpr int variables defined
* in include files outside the opencl as macros, to avoid
* including those files in the plain-C JIT compilation that happens
- * at runtime. */
+ * at runtime.
+ * Note that we need to re-add the the suffix to the floating point literals
+ * passed the to the kernel to avoid type ambiguity.
+ */
extraDefines += gmx::formatString(
" -Dc_nbnxnGpuClusterSize=%d"
- " -Dc_nbnxnMinDistanceSquared=%g"
+ " -DNBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT=%g"
" -Dc_nbnxnGpuNumClusterPerSupercluster=%d"
" -Dc_nbnxnGpuJgroupSize=%d"
"%s",
#endif
barrier(CLK_LOCAL_MEM_FENCE);
- float3 fci_buf[c_nbnxnGpuNumClusterPerSupercluster]; /* i force buffer */
+ fvec fci_buf[c_nbnxnGpuNumClusterPerSupercluster]; /* i force buffer */
for (int ci_offset = 0; ci_offset < c_nbnxnGpuNumClusterPerSupercluster; ci_offset++)
{
- fci_buf[ci_offset] = (float3)(0.0F);
+ fci_buf[ci_offset][0] = 0.0F;
+ fci_buf[ci_offset][1] = 0.0F;
+ fci_buf[ci_offset][2] = 0.0F;
}
#ifdef LJ_EWALD
const float c12 = ljcp_i.y * ljcp_j.y;
# else
/* LJ 2^(1/6)*sigma and 12*epsilon */
- const float sigma = ljcp_i.x + ljcp_j.x;
- const float epsilon = ljcp_i.y * ljcp_j.y;
+ const float sigma = ljcp_i.x + ljcp_j.x;
+ const float epsilon = ljcp_i.y * ljcp_j.y;
# if defined CALC_ENERGIES || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
- float c6, c12;
- convert_sigma_epsilon_to_c6_c12(sigma, epsilon, &c6, &c12);
+ const float2 c6c12 = convert_sigma_epsilon_to_c6_c12(sigma, epsilon);
+ const float c6 = c6c12.x;
+ const float c12 = c6c12.y;
# endif
# endif /* LJ_COMB_GEOM */
#endif /* LJ_COMB */
// Ensure distance do not become so small that r^-12 overflows.
- // Cast to float to ensure the correct built-in max() function
- // is called.
- r2 = max(r2, (float)c_nbnxnMinDistanceSquared);
+ r2 = max(r2, c_nbnxnMinDistanceSquared);
const float inv_r = rsqrt(r2);
const float inv_r2 = inv_r * inv_r;
fcj_buf -= f_ij;
/* accumulate i forces in registers */
- fci_buf[i] += f_ij;
+ fci_buf[i][0] += f_ij.x;
+ fci_buf[i][1] += f_ij.y;
+ fci_buf[i][2] += f_ij.z;
}
}
/* loop over the j clusters = seen by any of the atoms in the current super-cluster */
for (int j4 = cij4_start + tidxz; j4 < cij4_end; j4 += NTHREAD_Z)
{
- unsigned int imaskFull, imaskCheck, imaskNew;
+ unsigned int imaskFull = 0, imaskCheck = 0, imaskNew = 0;
if (haveFreshList)
{
# define gmx_unused
# endif
+/*! \brief Single precision floating point short vector type (as rvec in the CPU codebase).
+ * Currently only used to avoid float3 register arrays.
+ */
+typedef float fvec[3];
+
// Data structures shared between OpenCL device code and OpenCL host code
// TODO: review, improve
// Replaced real by float for now, to avoid including any other header
/*! i-cluster interaction mask for a super-cluster with all c_nbnxnGpuNumClusterPerSupercluster bits set */
__constant unsigned supercl_interaction_mask = ((1U << c_nbnxnGpuNumClusterPerSupercluster) - 1U);
+/*! Minimum single precision threshold for r^2 to avoid r^-12 overflow. */
+__constant float c_nbnxnMinDistanceSquared = NBNXM_MIN_DISTANCE_SQUARED_VALUE_FLOAT;
+
gmx_opencl_inline void preloadCj4Generic(__local int* sm_cjPreload,
const __global int* gm_cj,
int tidxi,
}
/*! Convert LJ sigma,epsilon parameters to C6,C12. */
-gmx_opencl_inline void convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon, float* c6, float* c12)
-{
- float sigma2, sigma6;
-
- sigma2 = sigma * sigma;
- sigma6 = sigma2 * sigma2 * sigma2;
- *c6 = epsilon * sigma6;
- *c12 = *c6 * sigma6;
+gmx_opencl_inline float2 convert_sigma_epsilon_to_c6_c12(const float sigma, const float epsilon)
+{
+ const float sigma2 = sigma * sigma;
+ const float sigma6 = sigma2 * sigma2 * sigma2;
+ const float c6 = epsilon * sigma6;
+ const float2 c6c12 = (float2)(c6, /* c6 */
+ c6 * sigma6); /* c12 */
+ return c6c12;
}
float r2,
float* F_invr)
{
- float r, r_switch;
-
/* force switch constants */
- float disp_shift_V2 = nbparam->dispersion_shift.c2;
- float disp_shift_V3 = nbparam->dispersion_shift.c3;
- float repu_shift_V2 = nbparam->repulsion_shift.c2;
- float repu_shift_V3 = nbparam->repulsion_shift.c3;
+ const float disp_shift_V2 = nbparam->dispersion_shift.c2;
+ const float disp_shift_V3 = nbparam->dispersion_shift.c3;
+ const float repu_shift_V2 = nbparam->repulsion_shift.c2;
+ const float repu_shift_V3 = nbparam->repulsion_shift.c3;
- r = r2 * inv_r;
- r_switch = r - nbparam->rvdw_switch;
- r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+ const float r = r2 * inv_r;
+ float r_switch = r - nbparam->rvdw_switch;
+ r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
*F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
+ c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
float* F_invr,
float* E_lj)
{
- float r, r_switch;
-
/* force switch constants */
- float disp_shift_V2 = nbparam->dispersion_shift.c2;
- float disp_shift_V3 = nbparam->dispersion_shift.c3;
- float repu_shift_V2 = nbparam->repulsion_shift.c2;
- float repu_shift_V3 = nbparam->repulsion_shift.c3;
+ const float disp_shift_V2 = nbparam->dispersion_shift.c2;
+ const float disp_shift_V3 = nbparam->dispersion_shift.c3;
+ const float repu_shift_V2 = nbparam->repulsion_shift.c2;
+ const float repu_shift_V3 = nbparam->repulsion_shift.c3;
- float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3;
- float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4;
- float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3;
- float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4;
+ const float disp_shift_F2 = nbparam->dispersion_shift.c2 / 3;
+ const float disp_shift_F3 = nbparam->dispersion_shift.c3 / 4;
+ const float repu_shift_F2 = nbparam->repulsion_shift.c2 / 3;
+ const float repu_shift_F3 = nbparam->repulsion_shift.c3 / 4;
- r = r2 * inv_r;
- r_switch = r - nbparam->rvdw_switch;
- r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+ const float r = r2 * inv_r;
+ float r_switch = r - nbparam->rvdw_switch;
+ r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
*F_invr += -c6 * (disp_shift_V2 + disp_shift_V3 * r_switch) * r_switch * r_switch * inv_r
+ c12 * (repu_shift_V2 + repu_shift_V3 * r_switch) * r_switch * r_switch * inv_r;
float* F_invr,
const float* E_lj)
{
- float r, r_switch;
- float sw, dsw;
-
/* potential switch constants */
- float switch_V3 = nbparam->vdw_switch.c3;
- float switch_V4 = nbparam->vdw_switch.c4;
- float switch_V5 = nbparam->vdw_switch.c5;
- float switch_F2 = nbparam->vdw_switch.c3;
- float switch_F3 = nbparam->vdw_switch.c4;
- float switch_F4 = nbparam->vdw_switch.c5;
+ const float switch_V3 = nbparam->vdw_switch.c3;
+ const float switch_V4 = nbparam->vdw_switch.c4;
+ const float switch_V5 = nbparam->vdw_switch.c5;
+ const float switch_F2 = nbparam->vdw_switch.c3;
+ const float switch_F3 = nbparam->vdw_switch.c4;
+ const float switch_F4 = nbparam->vdw_switch.c5;
- r = r2 * inv_r;
- r_switch = r - nbparam->rvdw_switch;
+ const float r = r2 * inv_r;
+ const float r_switch = r - nbparam->rvdw_switch;
/* Unlike in the F+E kernel, conditional is faster here */
if (r_switch > 0.0F)
{
- sw = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
- dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
+ const float sw = 1.0F
+ + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch
+ * r_switch * r_switch;
+ const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
*F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
}
float* F_invr,
float* E_lj)
{
- float r, r_switch;
- float sw, dsw;
-
/* potential switch constants */
- float switch_V3 = nbparam->vdw_switch.c3;
- float switch_V4 = nbparam->vdw_switch.c4;
- float switch_V5 = nbparam->vdw_switch.c5;
- float switch_F2 = nbparam->vdw_switch.c3;
- float switch_F3 = nbparam->vdw_switch.c4;
- float switch_F4 = nbparam->vdw_switch.c5;
+ const float switch_V3 = nbparam->vdw_switch.c3;
+ const float switch_V4 = nbparam->vdw_switch.c4;
+ const float switch_V5 = nbparam->vdw_switch.c5;
+ const float switch_F2 = nbparam->vdw_switch.c3;
+ const float switch_F3 = nbparam->vdw_switch.c4;
+ const float switch_F4 = nbparam->vdw_switch.c5;
- r = r2 * inv_r;
- r_switch = r - nbparam->rvdw_switch;
- r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
+ const float r = r2 * inv_r;
+ float r_switch = r - nbparam->rvdw_switch;
+ r_switch = r_switch >= 0.0F ? r_switch : 0.0F;
/* Unlike in the F-only kernel, masking is faster here */
- sw = 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
- dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
+ const float sw =
+ 1.0F + (switch_V3 + (switch_V4 + switch_V5 * r_switch) * r_switch) * r_switch * r_switch * r_switch;
+ const float dsw = (switch_F2 + (switch_F3 + switch_F4 * r_switch) * r_switch) * r_switch * r_switch;
*F_invr = (*F_invr) * sw - inv_r * (*E_lj) * dsw;
*E_lj *= sw;
float lje_coeff6_6,
float* F_invr)
{
- float c6grid, inv_r6_nm, cr2, expmcr2, poly;
-
- c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
+ const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
/* Recalculate inv_r6 without exclusion mask */
- inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
- cr2 = lje_coeff2 * r2;
- expmcr2 = exp(-cr2);
- poly = 1.0F + cr2 + HALF_F * cr2 * cr2;
+ const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+ const float cr2 = lje_coeff2 * r2;
+ const float expmcr2 = exp(-cr2);
+ const float poly = 1.0F + cr2 + HALF_F * cr2 * cr2;
/* Subtract the grid force from the total LJ force */
*F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
float* F_invr,
float* E_lj)
{
- float c6grid, inv_r6_nm, cr2, expmcr2, poly, sh_mask;
-
- c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
+ const float c6grid = nbfp_comb_climg2d[2 * typei] * nbfp_comb_climg2d[2 * typej];
/* Recalculate inv_r6 without exclusion mask */
- inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
- cr2 = lje_coeff2 * r2;
- expmcr2 = exp(-cr2);
- poly = 1.0F + cr2 + HALF_F * cr2 * cr2;
+ const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+ const float cr2 = lje_coeff2 * r2;
+ const float expmcr2 = exp(-cr2);
+ const float poly = 1.0F + cr2 + HALF_F * cr2 * cr2;
/* Subtract the grid force from the total LJ force */
*F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
/* Shift should be applied only to real LJ pairs */
- sh_mask = nbparam->sh_lj_ewald * int_bit;
+ const float sh_mask = nbparam->sh_lj_ewald * int_bit;
*E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
}
float* F_invr,
float* E_lj)
{
- float c6grid, inv_r6_nm, cr2, expmcr2, poly;
- float sigma, sigma2, epsilon;
-
/* sigma and epsilon are scaled to give 6*C6 */
- sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej];
+ const float sigma = nbfp_comb_climg2d[2 * typei] + nbfp_comb_climg2d[2 * typej];
- epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1];
+ const float epsilon = nbfp_comb_climg2d[2 * typei + 1] * nbfp_comb_climg2d[2 * typej + 1];
- sigma2 = sigma * sigma;
- c6grid = epsilon * sigma2 * sigma2 * sigma2;
+ const float sigma2 = sigma * sigma;
+ const float c6grid = epsilon * sigma2 * sigma2 * sigma2;
/* Recalculate inv_r6 without exclusion mask */
- inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
- cr2 = lje_coeff2 * r2;
- expmcr2 = exp(-cr2);
- poly = 1.0F + cr2 + HALF_F * cr2 * cr2;
+ const float inv_r6_nm = inv_r2 * inv_r2 * inv_r2;
+ const float cr2 = lje_coeff2 * r2;
+ const float expmcr2 = exp(-cr2);
+ const float poly = 1.0F + cr2 + HALF_F * cr2 * cr2;
/* Subtract the grid force from the total LJ force */
*F_invr += c6grid * (inv_r6_nm - expmcr2 * (inv_r6_nm * poly + lje_coeff6_6)) * inv_r2;
if (with_E_lj)
{
- float sh_mask;
/* Shift should be applied only to real LJ pairs */
- sh_mask = nbparam->sh_lj_ewald * int_bit;
+ const float sh_mask = nbparam->sh_lj_ewald * int_bit;
*E_lj += ONE_SIXTH_F * c6grid * (inv_r6_nm * (1.0F - expmcr2 * poly) + sh_mask);
}
}
const float FD1 = 0.50736591960530292870F;
const float FD0 = 1.0F;
- float z4;
- float polyFN0, polyFN1, polyFD0, polyFD1;
-
- z4 = z2 * z2;
+ const float z4 = z2 * z2;
- polyFD0 = FD4 * z4 + FD2;
- polyFD1 = FD3 * z4 + FD1;
- polyFD0 = polyFD0 * z4 + FD0;
- polyFD0 = polyFD1 * z2 + polyFD0;
+ float polyFD0 = FD4 * z4 + FD2;
+ float polyFD1 = FD3 * z4 + FD1;
+ polyFD0 = polyFD0 * z4 + FD0;
+ polyFD0 = polyFD1 * z2 + polyFD0;
polyFD0 = 1.0F / polyFD0;
- polyFN0 = FN6 * z4 + FN4;
- polyFN1 = FN5 * z4 + FN3;
- polyFN0 = polyFN0 * z4 + FN2;
- polyFN1 = polyFN1 * z4 + FN1;
- polyFN0 = polyFN0 * z4 + FN0;
- polyFN0 = polyFN1 * z2 + polyFN0;
+ float polyFN0 = FN6 * z4 + FN4;
+ float polyFN1 = FN5 * z4 + FN3;
+ polyFN0 = polyFN0 * z4 + FN2;
+ polyFN1 = polyFN1 * z4 + FN1;
+ polyFN0 = polyFN0 * z4 + FN0;
+ polyFN0 = polyFN1 * z2 + polyFN0;
return polyFN0 * polyFD0;
}
}
# if REDUCE_SHUFFLE
-gmx_opencl_inline void reduce_force_i_and_shift_shfl(float3* fci_buf,
+gmx_opencl_inline void reduce_force_i_and_shift_shfl(__private fvec fci_buf[],
__global float* fout,
bool bCalcFshift,
int tidxi,
for (int ci_offset = 0; ci_offset < c_nbnxnGpuNumClusterPerSupercluster; ci_offset++)
{
int aidx = (sci * c_nbnxnGpuNumClusterPerSupercluster + ci_offset) * CL_SIZE + tidxi;
- float3 fin = fci_buf[ci_offset];
+ float3 fin = (float3)(fci_buf[ci_offset][0], fci_buf[ci_offset][1], fci_buf[ci_offset][2]);
fin.x += intel_sub_group_shuffle_down(fin.x, fin.x, CL_SIZE);
fin.y += intel_sub_group_shuffle_up(fin.y, fin.y, CL_SIZE);
fin.z += intel_sub_group_shuffle_down(fin.z, fin.z, CL_SIZE);
* array sizes.
*/
gmx_opencl_inline void reduce_force_i_and_shift_pow2(volatile __local float* f_buf,
- float3* fci_buf,
- __global float* fout,
- bool bCalcFshift,
- int tidxi,
- int tidxj,
- int sci,
- int shift,
- __global float* fshift)
+ __private fvec fci_buf[],
+ __global float* fout,
+ bool bCalcFshift,
+ int tidxi,
+ int tidxj,
+ int sci,
+ int shift,
+ __global float* fshift)
{
float fshift_buf = 0;
for (int ci_offset = 0; ci_offset < c_nbnxnGpuNumClusterPerSupercluster; ci_offset++)
int aidx = (sci * c_nbnxnGpuNumClusterPerSupercluster + ci_offset) * CL_SIZE + tidxi;
int tidx = tidxi + tidxj * CL_SIZE;
/* store i forces in shmem */
- f_buf[tidx] = fci_buf[ci_offset].x;
- f_buf[FBUF_STRIDE + tidx] = fci_buf[ci_offset].y;
- f_buf[2 * FBUF_STRIDE + tidx] = fci_buf[ci_offset].z;
+ f_buf[tidx] = fci_buf[ci_offset][0];
+ f_buf[FBUF_STRIDE + tidx] = fci_buf[ci_offset][1];
+ f_buf[2 * FBUF_STRIDE + tidx] = fci_buf[ci_offset][2];
barrier(CLK_LOCAL_MEM_FENCE);
/* Reduce the initial CL_SIZE values for each i atom to half
/*! Final i-force reduction
*/
gmx_opencl_inline void reduce_force_i_and_shift(__local float gmx_unused* f_buf,
- float3* fci_buf,
- __global float* f,
- bool bCalcFshift,
- int tidxi,
- int tidxj,
- int sci,
- int shift,
- __global float* fshift)
+ __private fvec fci_buf[],
+ __global float* f,
+ bool bCalcFshift,
+ int tidxi,
+ int tidxj,
+ int sci,
+ int shift,
+ __global float* fshift)
{
# if REDUCE_SHUFFLE
reduce_force_i_and_shift_shfl(fci_buf, f, bCalcFshift, tidxi, tidxj, sci, shift, fshift);
volatile __global float* e_el,
int tidx)
{
- int j;
-
int i = WARP_SIZE / 2;
/* Can't just use i as loop variable because than nvcc refuses to unroll. */
- for (j = WARP_SIZE_LOG2 - 1; j > 0; j--)
+ for (int j = WARP_SIZE_LOG2 - 1; j > 0; j--)
{
if (tidx < i)
{
#if !GMX_DOUBLE
return rbb2;
#else
- return (float)((1 + GMX_FLOAT_EPS) * rbb2);
+ return static_cast<float>((1 + GMX_FLOAT_EPS) * rbb2);
#endif
}
const FileTypeMapping c_fileTypeMapping[] = { { eftTopology, efTPS }, { eftRunInput, efTPR },
{ eftTrajectory, efTRX }, { eftEnergy, efEDR },
{ eftPDB, efPDB }, { eftIndex, efNDX },
- { eftPlot, efXVG }, { eftGenericData, efDAT } };
+ { eftPlot, efXVG }, { eftGenericData, efDAT },
+ { eftCsv, efCSV } };
/********************************************************************
* FileTypeHandler
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2010,2011,2012,2015,2019, by the GROMACS development team, led by
+ * Copyright (c) 2010,2011,2012,2015,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
eftIndex,
eftPlot,
eftGenericData,
+ eftCsv,
eftOptionFileType_NR
};
EXPECT_TRUE(value.empty());
}
+TEST(FileNameOptionTest, HandlesRequiredCsvValueWithoutExtension)
+{
+ gmx::Options options;
+ std::string value;
+ ASSERT_NO_THROW_GMX(options.addOption(
+ FileNameOption("f").store(&value).required().filetype(gmx::eftCsv).outputFile().defaultBasename("testfile")));
+ EXPECT_EQ("testfile.csv", value);
+
+ gmx::OptionsAssigner assigner(&options);
+ EXPECT_NO_THROW_GMX(assigner.start());
+ EXPECT_NO_THROW_GMX(assigner.finish());
+ EXPECT_NO_THROW_GMX(options.finish());
+
+ EXPECT_EQ("testfile.csv", value);
+}
+
+TEST(FileNameOptionTest, HandlesRequiredCsvOptionWithoutValue)
+{
+ gmx::Options options;
+ std::string value;
+ ASSERT_NO_THROW_GMX(options.addOption(
+ FileNameOption("f").store(&value).required().filetype(gmx::eftCsv).outputFile().defaultBasename("testfile")));
+ EXPECT_EQ("testfile.csv", value);
+
+ gmx::OptionsAssigner assigner(&options);
+ EXPECT_NO_THROW_GMX(assigner.start());
+ EXPECT_NO_THROW_GMX(assigner.startOption("f"));
+ EXPECT_NO_THROW_GMX(assigner.finishOption());
+ EXPECT_NO_THROW_GMX(assigner.finish());
+ EXPECT_NO_THROW_GMX(options.finish());
+
+ EXPECT_EQ("testfile.csv", value);
+}
+
} // namespace
// shifting. Currently up to 8 is accelerated. Could be accelerated for any
// number with a constexpr log2 function.
template<int n>
-SimdDInt32 fastMultiply(SimdDInt32 x)
+static inline SimdDInt32 fastMultiply(SimdDInt32 x)
{
if (n == 2)
{
// shifting. Currently up to 8 is accelerated. Could be accelerated for any
// number with a constexpr log2 function.
template<int n>
-SimdFInt32 fastMultiply(SimdFInt32 x)
+static inline SimdFInt32 fastMultiply(SimdFInt32 x)
{
if (n == 2)
{
#include "gromacs/hardware/detecthardware.h"
#include "gromacs/hardware/hardwaretopology.h"
#include "gromacs/hardware/hw_info.h"
+#include "gromacs/listed_forces/gpubonded.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdlib/update_constrain_gpu.h"
#include "gromacs/mdtypes/commrec.h"
}
}
-bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded,
- const bool useGpuForPme,
- const TaskTarget bondedTarget,
- const bool canUseGpuForBonded,
- const bool usingLJPme,
- const bool usingElecPmeOrEwald,
- const int numPmeRanksPerSimulation,
- const bool gpusWereDetected)
+bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded,
+ bool useGpuForPme,
+ TaskTarget bondedTarget,
+ const t_inputrec& inputrec,
+ const gmx_mtop_t& mtop,
+ int numPmeRanksPerSimulation,
+ bool gpusWereDetected)
{
if (bondedTarget == TaskTarget::Cpu)
{
return false;
}
- if (!canUseGpuForBonded)
+ std::string errorMessage;
+
+ if (!buildSupportsGpuBondeds(&errorMessage))
{
if (bondedTarget == TaskTarget::Gpu)
{
- GMX_THROW(InconsistentInputError(
- "Bonded interactions on the GPU were required, but not supported for these "
- "simulation settings. Change your settings, or do not require using GPUs."));
+ GMX_THROW(InconsistentInputError(errorMessage.c_str()));
+ }
+
+ return false;
+ }
+
+ if (!inputSupportsGpuBondeds(inputrec, mtop, &errorMessage))
+ {
+ if (bondedTarget == TaskTarget::Gpu)
+ {
+ GMX_THROW(InconsistentInputError(errorMessage.c_str()));
}
return false;
// Note that here we assume that the auto setting of PME ranks will not
// choose seperate PME ranks when nonBonded are assigned to the GPU.
bool usingOurCpuForPmeOrEwald =
- (usingLJPme || (usingElecPmeOrEwald && !useGpuForPme && numPmeRanksPerSimulation <= 0));
+ (EVDW_PME(inputrec.vdwtype)
+ || (EEL_PME_EWALD(inputrec.coulombtype) && !useGpuForPme && numPmeRanksPerSimulation <= 0));
return gpusWereDetected && usingOurCpuForPmeOrEwald;
}
if (isDomainDecomposition)
{
- if (!devFlags.enableGpuHaloExchange)
+ if (hasAnyConstraints && !useUpdateGroups)
{
- errorMessage += "Domain decomposition without GPU halo exchange is not supported.\n ";
+ errorMessage +=
+ "Domain decomposition is only supported with constraints when update "
+ "groups "
+ "are used. This means constraining all bonds is not supported, except for "
+ "small molecules, and box sizes close to half the pair-list cutoff are not "
+ "supported.\n ";
}
- else
- {
- if (hasAnyConstraints && !useUpdateGroups)
- {
- errorMessage +=
- "Domain decomposition is only supported with constraints when update "
- "groups "
- "are used. This means constraining all bonds is not supported, except for "
- "small molecules, and box sizes close to half the pair-list cutoff are not "
- "supported.\n ";
- }
- if (pmeUsesCpu)
- {
- errorMessage += "With domain decomposition, PME must run fully on the GPU.\n";
- }
+ if (pmeUsesCpu)
+ {
+ errorMessage += "With domain decomposition, PME must run fully on the GPU.\n";
}
}
{
errorMessage += "With separate PME rank(s), PME must run fully on the GPU.\n";
}
-
- if (!devFlags.enableGpuPmePPComm)
- {
- errorMessage += "With separate PME rank(s), PME must use direct communication.\n";
- }
}
if (inputrec.useMts)
* \param[in] useGpuForNonbonded Whether GPUs will be used for nonbonded interactions.
* \param[in] useGpuForPme Whether GPUs will be used for PME interactions.
* \param[in] bondedTarget The user's choice for mdrun -bonded for where to assign tasks.
- * \param[in] canUseGpuForBonded Whether the bonded interactions can run on a GPU
- * \param[in] usingLJPme Whether Vdw interactions use LJ-PME.
- * \param[in] usingElecPmeOrEwald Whether a PME or Ewald type method is used for electrostatics.
+ * \param[in] inputrec The user input.
+ * \param[in] mtop The global topology.
* \param[in] numPmeRanksPerSimulation The number of PME ranks in each simulation, can be -1 for auto.
* \param[in] gpusWereDetected Whether compatible GPUs were detected on any node.
*
*
* \throws std::bad_alloc If out of memory
* InconsistentInputError If the user requirements are inconsistent. */
-bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded,
- bool useGpuForPme,
- TaskTarget bondedTarget,
- bool canUseGpuForBonded,
- bool usingLJPme,
- bool usingElecPmeOrEwald,
- int numPmeRanksPerSimulation,
- bool gpusWereDetected);
+bool decideWhetherToUseGpusForBonded(bool useGpuForNonbonded,
+ bool useGpuForPme,
+ TaskTarget bondedTarget,
+ const t_inputrec& inputrec,
+ const gmx_mtop_t& mtop,
+ int numPmeRanksPerSimulation,
+ bool gpusWereDetected);
/*! \brief Decide whether to use GPU for update.
*
*/
double gmx_cycles_calibrate(double sampletime)
{
-#ifdef _MSC_VER
+ /* On ARM and recent-generation x86-64, we can use the more accurate cycle counters
+ * that allow better timing for things that depend on it (e.g. load balancing, profiling).
+ */
+#if ((defined __aarch64__) \
+ && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
+ /* 64-bit ARM cycle counters with GCC inline assembly */
+ unsigned long cycles;
+ __asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(cycles));
+ /* Only first 32 bits are significant */
+ cycles &= 0xFFFFFFFF;
+ return 1. / cycles;
+ GMX_UNUSED_VALUE(sampletime);
+#else
+# if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) \
+ && defined(__x86_64__) && !defined(_CRAYC))
+ long gmx_unused tmp;
+ int cpuid1;
+ int gmx_unused cpuid2;
+ const int l0 = 0x0;
+ const int l16 = 0x16;
+ gmx_cycles_t cycles;
+
+ /* cpuid clobbers ebx but it must be restored for -fPIC so save
+ * then restore ebx */
+ __asm__ volatile(
+ "xchg %%rbx, %2\n"
+ "cpuid\n"
+ "xchg %%rbx, %2\n"
+ : "=a"(cpuid1), "=d"(cpuid2), "=r"(tmp)
+ : "a"(l0)
+ : "ecx", "ebx");
+ if (cpuid1 >= 0x16)
+ {
+ /* This CPU is recent enough so the timer frequency can be directly queried */
+ __asm__ volatile(
+ "xchg %%rbx, %2\n"
+ "cpuid\n"
+ "xchg %%rbx, %2\n"
+ : "=a"(cpuid1), "=d"(cpuid2), "=r"(tmp)
+ : "a"(l16)
+ : "ecx", "ebx");
+ cycles = static_cast<gmx_cycles_t>(cpuid1) * static_cast<gmx_cycles_t>(1000000);
+ return 1. / cycles;
+ }
+# endif
+# ifdef _MSC_VER
/* Windows does not have gettimeofday, but it provides a special
* routine that returns the cycle counter frequency.
return 1.0 / static_cast<double>(i.QuadPart);
/* end of MS Windows implementation */
-#elif HAVE_GETTIMEOFDAY
+# elif HAVE_GETTIMEOFDAY
/* generic implementation with gettimeofday() */
struct timeval t1, t2;
return -1;
}
-# if (defined(__alpha__) || defined(__alpha))
+# if (defined(__alpha__) || defined(__alpha))
/* Alpha cannot count to more than 4e9, but I don't expect
* that the architecture will go over 2GHz before it dies, so
* up to 2.0 seconds of sampling should be safe.
{
sampletime = 2.0;
}
-# endif
+# endif
/* Start a timing loop. We want this to be largely independent
* of machine speed, so we need to start with a very small number
/* Return seconds per cycle */
return timediff / cyclediff;
-#else
+# else
/* No timing function available */
return -1;
GMX_UNUSED_VALUE(sampletime);
+# endif
#endif
}
"Launch GPU NB F buffer ops.",
"Launch GPU Comm. coord.",
"Launch GPU Comm. force.",
+ "Launch GPU update",
"Test subcounter",
};
ewcsLAUNCH_GPU_NB_F_BUF_OPS,
ewcsLAUNCH_GPU_MOVEX,
ewcsLAUNCH_GPU_MOVEF,
+ ewcsLAUNCH_GPU_UPDATE_CONSTRAIN,
ewcsTEST,
ewcsNR
};
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief
+ * Declares utilities for template metaprogramming
+ *
+ * \author Roland Schulz <roland.schulz@intel.com>
+ *
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_TEMPLATE_MP_H
+#define GMX_UTILITY_TEMPLATE_MP_H
+
+#include <cassert>
+#include <cstddef>
+
+#include <utility>
+
+#include "gromacs/compat/mp11.h"
+
+namespace gmx
+{
+
+template<class Function>
+auto dispatchTemplatedFunction(Function&& f)
+{
+ return std::forward<Function>(f)();
+}
+
+/** \internal \brief Helper function to select appropriate template based on runtime values.
+ *
+ * Can only use enums for template parameters.
+ * These enums must have a member \c Count indicating the total number of valid values.
+ *
+ * Example usage:
+ * \code
+ enum class Options {
+ Op1 = 0,
+ Op2 = 1,
+ Count = 2
+ };
+
+ template<Options p1, Options p2>
+ bool foo(int i);
+
+ bool bar(Options p1, Options p2, int i) {
+ return dispatchTemplatedFunction(
+ [=](auto p1, auto p2) {
+ return foo<p1, p2>(i);
+ },
+ p1, p2);
+ }
+ * \endcode
+ */
+template<class Function, class Enum, class... Enums>
+auto dispatchTemplatedFunction(Function&& f, Enum e, Enums... es)
+{
+ return dispatchTemplatedFunction(
+ [&](auto... es_) {
+ return compat::mp_with_index<size_t(Enum::Count)>(size_t(e), [&](auto e_) {
+ return std::forward<Function>(f)(
+ std::integral_constant<Enum, static_cast<Enum>(size_t(e_))>(), es_...);
+ });
+ },
+ es...);
+}
+
+} // namespace gmx
+
+#endif // GMX_UTILITY_TEMPLATE_MP_H
range.cpp
strconvert.cpp
stringutil.cpp
+ template_mp.cpp
textreader.cpp
textwriter.cpp
typetraits.cpp
--- /dev/null
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2020, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+#include "gmxpre.h"
+
+#include "gromacs/utility/template_mp.h"
+
+#include <gtest/gtest.h>
+
+namespace gmx
+{
+namespace
+{
+
+enum class Options
+{
+ Op0 = 0,
+ Op1 = 1,
+ Op2 = 2,
+ Count = 3
+};
+
+template<Options i, Options j>
+static int testEnumTwoIPlusJPlusK(int k)
+{
+ return 2 * int(i) + int(j) + k;
+}
+
+TEST(TemplateMPTest, DispatchTemplatedFunction)
+{
+ int five = 5;
+ int two1plus2plus5 = dispatchTemplatedFunction(
+ [=](auto p1, auto p2) { return testEnumTwoIPlusJPlusK<p1, p2>(five); }, Options::Op1,
+ Options::Op2);
+ EXPECT_EQ(two1plus2plus5, 9);
+}
+
+} // anonymous namespace
+} // namespace gmx
add_library(gmx_objlib OBJECT ${GMX_MAIN_SOURCES})
target_link_libraries(gmx_objlib PRIVATE legacy_api)
target_include_directories(gmx_objlib SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
+ target_include_directories(gmx_objlib SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
add_executable(gmx
$<TARGET_OBJECTS:gmx_objlib>
$<TARGET_OBJECTS:mdrun_objlib>
#include "gromacs/commandline/pargs.h"
#include "gromacs/domdec/options.h"
#include "gromacs/fileio/gmxfio.h"
+#include "gromacs/hardware/detecthardware.h"
#include "gromacs/mdrun/legacymdrunoptions.h"
#include "gromacs/mdrun/runner.h"
#include "gromacs/mdrun/simulationcontext.h"
#include "gromacs/mdrunutility/logging.h"
#include "gromacs/mdrunutility/multisim.h"
#include "gromacs/utility/arrayref.h"
-#include "gromacs/utility/smalloc.h"
+#include "gromacs/utility/basenetwork.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
#include "mdrun_main.h"
namespace gmx
{
-//! Implements C-style main function for mdrun
int gmx_mdrun(int argc, char* argv[])
+{
+ // Set up the communicator, where possible (see docs for
+ // SimulationContext).
+ MPI_Comm communicator = GMX_LIB_MPI ? MPI_COMM_WORLD : MPI_COMM_NULL;
+ PhysicalNodeCommunicator physicalNodeCommunicator(communicator, gmx_physicalnode_id_hash());
+ std::unique_ptr<gmx_hw_info_t> hwinfo = gmx_detect_hardware(physicalNodeCommunicator);
+ return gmx_mdrun(communicator, *hwinfo, argc, argv);
+}
+
+int gmx_mdrun(MPI_Comm communicator, const gmx_hw_info_t& hwinfo, int argc, char* argv[])
{
auto mdModules = std::make_unique<MDModules>();
ArrayRef<const std::string> multiSimDirectoryNames =
opt2fnsIfOptionSet("-multidir", ssize(options.filenames), options.filenames.data());
- // Set up the communicator, where possible (see docs for
- // SimulationContext).
- MPI_Comm communicator = GMX_LIB_MPI ? MPI_COMM_WORLD : MPI_COMM_NULL;
// The SimulationContext is necessary with gmxapi so that
// resources owned by the client code can have suitable
// lifetime. The gmx wrapper binary uses the same infrastructure,
*/
auto builder = MdrunnerBuilder(std::move(mdModules),
compat::not_null<SimulationContext*>(&simulationContext));
+ builder.addHardwareDetectionResult(&hwinfo);
builder.addSimulationMethod(options.mdrunOptions, options.pforce, startingBehavior);
builder.addDomainDecomposition(options.domdecOptions);
// \todo pass by value
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2013,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2013,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
+/*! \internal \file
+ *
+ * \brief This file declares C-style entrypoints for mdrun
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ *
+ * \ingroup module_mdrun
+ */
#ifndef GMX_PROGRAMS_MDRUN_MDRUN_H
#define GMX_PROGRAMS_MDRUN_MDRUN_H
+#include "gromacs/utility/gmxmpi.h"
+
+struct gmx_hw_info_t;
+
namespace gmx
{
+/*! \brief Implements C-style main function for mdrun
+ *
+ * This implementation detects hardware itself, as suits
+ * the gmx wrapper binary.
+ *
+ * \param[in] argc Number of C-style command-line arguments
+ * \param[in] argv C-style command-line argument strings
+ */
int gmx_mdrun(int argc, char* argv[]);
+/*! \brief Implements C-style main function for mdrun
+ *
+ * This implementation facilitates reuse of infrastructure. This
+ * includes the information about the hardware detected across the
+ * given \c communicator. That suits e.g. efficient implementation of
+ * test fixtures.
+ *
+ * \param[in] communicator The communicator to use for the simulation
+ * \param[in] hwinfo Describes the hardware detected on the physical nodes of the communicator
+ * \param[in] argc Number of C-style command-line arguments
+ * \param[in] argv C-style command-line argument strings
+ *
+ * \todo Progress on https://gitlab.com/gromacs/gromacs/-/issues/3774
+ * will remove the need of test binaries to call gmx_mdrun in a way
+ * that is different from the command-line and gmxapi.
+ */
+int gmx_mdrun(MPI_Comm communicator, const gmx_hw_info_t& hwinfo, int argc, char* argv[]);
+
} // namespace gmx
#endif
options->addOption(BooleanOption("cycles")
.store(&benchmarkOptions_.cyclesPerPair)
.description("Report cycles/pair instead of pairs/cycle"));
+ options->addOption(
+ BooleanOption("time").store(&benchmarkOptions_.reportTime).description("Report micro-seconds instead of cycles"));
+ options->addOption(FileNameOption("o")
+ .filetype(eftCsv)
+ .outputFile()
+ .store(&benchmarkOptions_.outputFile)
+ .defaultBasename("nonbonded-benchmark")
+ .description("Also output results in csv format"));
}
void NonbondedBenchmark::optionsFinished()
#include <cstdio>
+#include <utility>
+
#include "gromacs/gmxana/gmx_ana.h"
#include "gromacs/gmxpreprocess/grompp.h"
#include "gromacs/hardware/detecthardware.h"
+#include "gromacs/hardware/hw_info.h"
#include "gromacs/options/basicoptions.h"
#include "gromacs/options/ioptionscontainer.h"
#include "gromacs/tools/convert_tpr.h"
#include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/basenetwork.h"
#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/physicalnodecommunicator.h"
#include "gromacs/utility/textwriter.h"
#include "programs/mdrun/mdrun_main.h"
{
#if GMX_LIB_MPI
GMX_RELEASE_ASSERT(gmx_mpi_initialized(), "MPI system not initialized for mdrun tests");
+
+ // It would be better to also detect this in a thread-MPI build,
+ // but there is no way to do that currently, and it is also not a
+ // problem for such a build. Any code based on such an invalid
+ // test fixture will be found in CI testing, however.
+ GMX_RELEASE_ASSERT(MdrunTestFixtureBase::communicator_ != MPI_COMM_NULL,
+ "SimulationRunner may only be used from a test fixture that inherits from "
+ "MdrunTestFixtureBase");
#endif
}
// Make sure rank zero has written the .tpr file before other
// ranks try to read it. Thread-MPI and serial do this just fine
// on their own.
- MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Barrier(MdrunTestFixtureBase::communicator_);
#endif
return returnValue;
}
caller.addOption("-ntomp", g_numOpenMPThreads);
#endif
- return gmx_mdrun(caller.argc(), caller.argv());
+ return gmx_mdrun(MdrunTestFixtureBase::communicator_, *MdrunTestFixtureBase::hwinfo_,
+ caller.argc(), caller.argv());
}
int SimulationRunner::callMdrun()
// ====
+// static
+MPI_Comm MdrunTestFixtureBase::communicator_ = MPI_COMM_NULL;
+// static
+std::unique_ptr<gmx_hw_info_t> MdrunTestFixtureBase::hwinfo_;
+
+// static
+void MdrunTestFixtureBase::SetUpTestCase()
+{
+ communicator_ = MPI_COMM_WORLD;
+ auto newHwinfo =
+ gmx_detect_hardware(PhysicalNodeCommunicator{ communicator_, gmx_physicalnode_id_hash() });
+ std::swap(hwinfo_, newHwinfo);
+}
+
+// static
+void MdrunTestFixtureBase::TearDownTestCase()
+{
+ hwinfo_.reset(nullptr);
+}
+
MdrunTestFixtureBase::MdrunTestFixtureBase()
{
#if GMX_LIB_MPI
{
#if GMX_LIB_MPI
// fileManager_ should only clean up after all the ranks are done.
- MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Barrier(MdrunTestFixtureBase::communicator_);
#endif
}
#include <gtest/gtest.h>
#include "gromacs/utility/classhelpers.h"
+#include "gromacs/utility/gmxmpi.h"
#include "testutils/cmdlinetest.h"
#include "testutils/testfilemanager.h"
+struct gmx_hw_info_t;
+
namespace gmx
{
namespace test
* \brief Helper object for running grompp and mdrun in
* integration tests of mdrun functionality
*
- * Objects of this class are intended to be owned by
- * IntegrationTestFixture objects, and an IntegrationTestFixture
- * object might own more than one SimulationRunner.
+ * Objects of this class must be owned by objects descended from
+ * MdrunTestFixtureBase, which sets up necessary infrastructure for
+ * it. Such an object may own more than one SimulationRunner.
*
* The setup phase creates various temporary files for input and
* output that are common for mdrun tests, using the file manager
std::string mdpInputContents_;
private:
+ //! The file manager used to manage I/O
TestFileManager& fileManager_;
GMX_DISALLOW_COPY_AND_ASSIGN(SimulationRunner);
* \brief Declares test fixture base class for
* integration tests of mdrun functionality
*
- * Derived fixture classes (or individual test cases) that might have
- * specific requirements should assert that behaviour, rather than
- * hard-code the requirements. A test that (for example) can't run
- * with more than one thread should report that as a diagnostic, so the
- * person running the test (or designing the test harness) can get
- * feedback on what tests need what conditions without having to read
- * the code of lots of tests.
- *
- * Specifying the execution context (such as numbers of threads and
- * processors) is normally sensible to specify from the test harness
- * (i.e. when CMake/CTest/the user runs a test executable), because
- * only there is information about the hardware available. The default
- * values for such context provided in test fixtures for mdrun should
- * mirror the defaults for mdrun, but currently mdrun.c hard-codes
- * those in a gmx_hw_opt_t.
- *
- * Any method in this class may throw std::bad_alloc if out of memory.
+ * Heavyweight resources are set up here and shared
+ * across all tests in the test case fixture, e.g.
+ * the MPI communicator for the tests and the hardware
+ * detected that is available to it.
*
* \ingroup module_mdrun_integration_tests
*/
class MdrunTestFixtureBase : public ::testing::Test
{
public:
+ //! Per-test-case setup for lengthy processes that need run only once.
+ static void SetUpTestCase();
+ //! Per-test-case tear down
+ static void TearDownTestCase();
+
MdrunTestFixtureBase();
~MdrunTestFixtureBase() override;
+
+ //! Communicator over which the test fixture works
+ static MPI_Comm communicator_;
+ /*! \brief Hardware information object
+ *
+ * Detected within \c communicator_ and available to re-use
+ * over all tests in the test case of this text fixture. */
+ static std::unique_ptr<gmx_hw_info_t> hwinfo_;
};
/*! \internal
*
* \ingroup module_mdrun_integration_tests
*/
-class MdrunTestFixture : public ::testing::Test
+class MdrunTestFixture : public MdrunTestFixtureBase
{
public:
MdrunTestFixture();
*
* \ingroup module_mdrun_integration_tests
*/
-class MultiSimTest : public ::testing::Test, public ::testing::WithParamInterface<const char*>
+class MultiSimTest : public MdrunTestFixtureBase, public ::testing::WithParamInterface<const char*>
{
public:
MultiSimTest();
#include "gromacs/trajectory/energyframe.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/loggerbuilder.h"
#include "gromacs/utility/physicalnodecommunicator.h"
#include "gromacs/utility/stringutil.h"
EXPECT_NONFATAL_FAILURE(rootChecker.checkUnusedEntries(), ""); // skip checks on other ranks
}
- auto hardwareInfo_ = gmx_detect_hardware(
- MDLogger{}, PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash()));
+ auto hardwareInfo_ =
+ gmx_detect_hardware(PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash()));
for (const auto& mode : runModes)
{
if (NOT GMX_CLANG_CUDA)
gmx_cuda_add_library(testutils ${TESTUTILS_SOURCES})
else()
+ set_source_files_properties(test_device.cpp PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+ gmx_compile_cuda_file_with_clang(test_device.cpp)
add_library(testutils STATIC ${TESTUTILS_SOURCES})
+ target_link_libraries(testutils PRIVATE ${GMX_CUDA_CLANG_LINK_LIBS})
endif()
target_link_libraries(testutils PRIVATE ${CUDA_CUFFT_LIBRARIES})
else()
${ARG_GPU_CPP_SOURCE_FILES})
set_source_files_properties(${ARG_GPU_CPP_SOURCE_FILES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
gmx_compile_cuda_file_with_clang(${ARG_CUDA_CU_SOURCE_FILES})
+ gmx_compile_cuda_file_with_clang(${ARG_GPU_CPP_SOURCE_FILES})
if(ARG_CUDA_CU_SOURCE_FILES OR ARG_GPU_CPP_SOURCE_FILES)
target_link_libraries(${EXENAME} PRIVATE ${GMX_EXTRA_LIBRARIES})
endif()
#include "gromacs/hardware/hw_info.h"
#include "gromacs/utility/basenetwork.h"
#include "gromacs/utility/exceptions.h"
-#include "gromacs/utility/loggerbuilder.h"
#include "gromacs/utility/physicalnodecommunicator.h"
namespace gmx
getTestHardwareEnvironment();
}
-//! Simple hardware initialization
-static gmx_hw_info_t* hardwareInit()
+TestHardwareEnvironment::TestHardwareEnvironment() :
+ hardwareInfo_(gmx_detect_hardware(PhysicalNodeCommunicator{ MPI_COMM_WORLD, gmx_physicalnode_id_hash() }))
{
- PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash());
- return gmx_detect_hardware(MDLogger{}, physicalNodeComm);
}
void TestHardwareEnvironment::SetUp()
{
testDeviceList_.clear();
- hardwareInfo_ = hardwareInit();
// Constructing contexts for all compatible GPUs - will be empty on non-GPU builds
for (const DeviceInformation& compatibleDeviceInfo : getCompatibleDevices(hardwareInfo_->deviceInfoList))
{
void TestHardwareEnvironment::TearDown()
{
testDeviceList_.clear();
+ /* In OneAPI 2021.1-beta9 and beta10, there is a bug that cause a
+ * segfault when a sycl::device is destructed too late. So, we
+ * explicitly destroy device handles here by resetting
+ * hardwareInfo_, which does no harm to anything else. */
+ hardwareInfo_.reset(nullptr);
}
} // namespace test
*/
#include <map>
+#include <memory>
#include <vector>
#include <gtest/gtest.h>
{
private:
//! General hardware info
- gmx_hw_info_t* hardwareInfo_;
+ std::unique_ptr<gmx_hw_info_t> hardwareInfo_;
//! Storage of hardware contexts
std::vector<std::unique_ptr<TestDevice>> testDeviceList_;
public:
+ TestHardwareEnvironment();
//! This is called by GTest framework once to query the hardware
void SetUp() override;
//! This is called by GTest framework once release the hardware
{
return testDeviceList_;
}
+ //! Whether the available hardware has any compatible devices
bool hasCompatibleDevices() const { return !testDeviceList_.empty(); }
//! Get available hardware information.
- const gmx_hw_info_t* hwinfo() const { return hardwareInfo_; }
+ const gmx_hw_info_t* hwinfo() const { return hardwareInfo_.get(); }
};
//! Get the test environment