--- /dev/null
+include:
+ - local: '/admin/ci-templates/.include-template.yml'
+
+.build-extends-template:
+ extends:
+ - .build-template
+ - .build-cache-template
+ - .variables-template
+ - .tags-template
+
+.test-extends-template:
+ extends:
+ - .test-base-template
+ - .test-cache-template
+ - .test-script-template
+ - .variables-template
+ - .tags-template
+
+before_script:
+ - mkdir -p ccache
+ - export CCACHE_BASEDIR=${PWD}
+ - export CCACHE_DIR=${PWD}/ccache
+
+# CI is organized into stages so that if early stages fail
+# the later ones are not run.
+stages:
+ # Preparation for building and testing (getting regressiontest repo right now)
+ - pre-build
+ # Build GROMACS in multiple configurations
+ - build
+ # Test each configuration, perhaps in multiple ways
+ - test
+ # Linting, docs, static analysis, coverage, code analysis...
+ - post-test
+
+.build-gcc:
+ extends:
+ - .build-extends-template
+ - .build-gcc-template
+
+.test-gcc:
+ extends:
+ - .test-extends-template
+ - .test-gcc-template
+
+.build-gcc-mpi:
+ extends:
+ - .build-gcc
+ - .build-mpi-template
+
+.build-clang:
+ extends:
+ - .build-extends-template
+ - .clang-before-script-template
+ - .build-clang-template
+
+.test-clang:
+ extends:
+ - .test-extends-template
+ - .clang-before-script-template
+ - .test-clang-template
+
+.build-docs:
+ extends:
+ - .build-extends-template
+ - .build-docs-template
+ - .build-clang-template
+
+.build-cuda-gcc:
+ extends:
+ - .build-gcc
+ - .build-cuda-template
+
+prepare-regressiontests:
+ extends: .regressiontests-template
+
+
+build-gcc-7:
+ extends: .build-gcc
+
+ variables:
+ COMPILER_MAJOR_VERSION: 7
+
+test-gcc-7:
+ extends: .test-gcc
+
+ dependencies:
+ - build-gcc-7
+ - prepare-regressiontests
+
+##build-gcc-7-mpi:
+## extends: .build-gcc-mpi
+##
+## variables:
+## COMPILER_MAJOR_VERSION: 7
+##
+build-gcc-7-double:
+ extends: .build-gcc
+
+ variables:
+ COMPILER_MAJOR_VERSION: 7
+ CMAKE_PRECISION_OPTIONS: -DGMX_DOUBLE=ON
+
+test-gcc-7-double:
+ allow_failure: true
+ extends: .test-gcc
+
+ dependencies:
+ - build-gcc-7-double
+ - prepare-regressiontests
+
+
+build-gcc-8:
+ extends: .build-gcc
+
+ variables:
+ COMPILER_MAJOR_VERSION: 8
+
+test-gcc-8:
+ extends: .test-gcc
+
+ dependencies:
+ - build-gcc-8
+ - prepare-regressiontests
+
+build-gcc-7-simd-sse41:
+ extends: .build-gcc
+ variables:
+ COMPILER_MAJOR_VERSION: 7
+ CMAKE_SIMD_OPTIONS: "-DGMX_SIMD=SSE4.1"
+
+test-gcc-7-simd-sse41:
+ extends: .test-gcc
+
+ dependencies:
+ - build-gcc-7-simd-sse41
+ - prepare-regressiontests
+
+##build-gcc-8-cuda-10:
+## extends: .build-cuda-gcc
+##
+## variables:
+## COMPILER_MAJOR_VERSION: 8
+## CUDA_BASE_VERSION: "10.1"
+
+build-clang-7:
+ extends: .build-clang
+
+ variables:
+ COMPILER_MAJOR_VERSION: 7
+
+test-clang-7:
+ extends: .test-clang
+
+ dependencies:
+ - build-clang-7
+ - prepare-regressiontests
+
+# For version of clang before 7, we need to always provide the full version
+# Use something like this instead in the download script:
+# APT_COMPILER_VERSION=$(($COMPILER_MAJOR_VERSION < 7 ? "${COMPILER_MAJOR_VERSION}.0" : $COMPILER_MAJOR_VERSION))
+build-clang-6:
+ extends: .build-clang
+
+ variables:
+ COMPILER_MAJOR_VERSION: "6.0"
+
+test-clang-6:
+ extends: .test-clang
+
+ dependencies:
+ - build-clang-6
+ - prepare-regressiontests
+
+# TODO turn on openmp when updating master head
+# TODO needs to be set to not allow failure
+build-clang-tidy:
+ allow_failure: true
+ extends: .build-clang
+ stage: post-test
+ dependencies: []
+ variables:
+ BUILD_DIR: build-clang-tidy
+ COMPILER_MAJOR_VERSION: 7
+ CMAKE_EXTRA_OPTIONS: -DCLANG_TIDY=clang-tidy-$COMPILER_MAJOR_VERSION -DCMAKE_BUILD_TYPE=Reference -DGMX_CLANG_TIDY=ON -DGMX_COMPILER_WARNINGS=ON -DGMX_OPENMP=off
+
+build-docs:
+ extends: .build-docs
+ variables:
+ BUILD_DIR: build-docs
+ CMAKE_EXTRA_OPTIONS: -DGMX_BUILD_HELP=on -DGMX_BUILD_MANUAL=on -DCMAKE_BUILD_TYPE=Debug -DGMX_SIMD=None -DGMX_GPU=off
+
+run-check-source:
+ extends: .build-docs
+ stage: test
+ dependencies:
+ - build-docs
+ variables:
+ BUILD_DIR: build-docs
+ script:
+ - cd $BUILD_DIR
+ - cmake --build . --target check-source
+ artifacts:
+ name: docs-artifacts-$CI_COMMIT_REF_SLUG
+ when: always
+ expire_in: 1 week
+ paths:
+ - $BUILD_DIR/docs/doxygen/doxygen-xml.log
+ - $BUILD_DIR/docs/doxygen/check-source.log
+
+# The manual build is built separately so that errors in converting
+# Sphinx to LaTeX and compiling can always be found in the
+# all-output.txt file, while avoiding many thousands of lines of spam
+# from pdflatex for normal builds. This does reduce the available
+# parallel utilization, and so increases the build time.
+#
+# TODO why are the doxygen and sphinx log files disappearing
+# TODO use a feature from gitlab runner instead of using pipefail to get timings for the job
+webpage:
+ extends: .build-docs
+ stage: post-test
+ dependencies:
+ - build-docs
+ - check-source
+ variables:
+ BUILD_DIR: build-docs
+ before_script:
+ - apt-get -yqq install moreutils
+ script:
+ - cd $BUILD_DIR
+ - set -o pipefail && cmake --build . --target manual 2>&1 > docs/manual/all-output.txt | ts "%Y/%m/%d %H:%M:%S"
+ - set -o pipefail && cmake --build . --target webpage | ts "%Y/%m/%d %H:%M:%S"
+ artifacts:
+ name: docs-artifacts-$CI_COMMIT_REF_SLUG
+ when: always
+ expire_in: 1 week
+ paths:
+ - $BUILD_DIR/docs/html
+ - $BUILD_DIR/docs/manual/gromacs.log
+ - $BUILD_DIR/docs/manual/all-output.txt
+ - $BUILD_DIR/docs/doxygen/doxygen-user.log
+ - $BUILD_DIR/docs/doxygen/doxygen-lib.log
+ - $BUILD_DIR/docs/doxygen/doxygen-full.log
+ - $BUILD_DIR/docs/sphinx-html.log
+
+# TODO do this only in release pipeline
+#linkchecker:
+# extends: .build-docs
+# stage: post-test
+# when: manual
+# dependencies:
+# - webpage
+# variables:
+# BUILD_DIR: build-docs
+# script:
+# - cd $BUILD_DIR
+# - linkchecker docs/html/index.html -f ../docs/linkcheckerrc -Fxml
+# artifacts:
+# reports:
+# junit: $BUILD_DIR/docs/linkchecker-out.xml
+
--- /dev/null
+.build-docs-template:
+ # Built by admin/dockerfiles/ci-docs
+ # TODO this should be organized more like the current documentation.py script
+ image: gromacs/gromacs:ci-docs
+ variables:
+ KUBERNETES_CPU_REQUEST: 4
+ CMAKE_COMPILER_SCRIPT: ""
+ CMAKE_EXTRA_OPTIONS: ""
+
+ script:
+ - echo $BUILD_DIR
+ - echo $CMAKE_COMPILER_SCRIPT
+ - echo $CMAKE_EXTRA_OPTIONS
+ - mkdir -p $BUILD_DIR
+ - cd $BUILD_DIR
+ - cmake ..
+ $CMAKE_COMPILER_SCRIPT
+ $CMAKE_EXTRA_OPTIONS
+ - cmake --build . --target gmx -- -j$KUBERNETES_CPU_REQUEST
+ - cd ..
+ artifacts:
+ name: docs-artifacts-$CI_COMMIT_REF_SLUG
+ when: always
+ expire_in: 1 week
+ paths:
+ - $BUILD_DIR
--- /dev/null
+.build-template:
+ # Dockerfiles are from dockerhub, user eriklindahl
+ # image in admin/dockerimages/ci-docs-py27
+ image: biophysics/gcc-gromacs
+ stage: build
+ variables:
+ CMAKE_COMPILER_SCRIPT: ""
+ CMAKE_EXTRA_OPTIONS: ""
+ CMAKE_SIMD_OPTIONS: ""
+ CMAKE_MPI_OPTIONS: ""
+ CMAKE_PRECISION_OPTIONS: ""
+
+ script:
+ - echo $BUILD_DIR
+ - echo $CMAKE_COMPILER_SCRIPT
+ - echo $CMAKE_EXTRA_OPTIONS
+ - echo $CMAKE_SIMD_OPTIONS
+ - echo $CMAKE_MPI_OPTIONS
+ - echo $CMAKE_PRECISION_OPTIONS
+ - echo $INSTALL_DIR
+ - mkdir -p $BUILD_DIR
+ - cd $BUILD_DIR
+ - cmake ..
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+ $CMAKE_COMPILER_SCRIPT
+ $CMAKE_EXTRA_OPTIONS
+ $CMAKE_SIMD_OPTIONS
+ $CMAKE_MPI_OPTIONS
+ $CMAKE_PRECISION_OPTIONS
+ -DCMAKE_INSTALL_PREFIX=../$INSTALL_DIR -DGMX_COMPILER_WARNINGS=ON
+ - cmake --build . -- -j$KUBERNETES_CPU_REQUEST
+ - cmake --build . --target tests -- -j$KUBERNETES_CPU_REQUEST
+ - cmake --build . --target install
+ - cd ..
+ - tar czf gmx-build.tar.gz $BUILD_DIR/
+ - tar czf gmx-install.tar.gz $INSTALL_DIR/
+ artifacts:
+ paths:
+ - gmx-build.tar.gz
+ - gmx-install.tar.gz
--- /dev/null
+.build-cache-template:
+ variables:
+ BUILD_DIR: build
+ cache:
+ key: "$CI_JOB_STAGE-$CI_COMMIT_REF_SLUG"
+ paths:
+ - ccache/
+ - $BUILD_DIR/CMakeCache.txt
+
--- /dev/null
+.test-cache-template:
+ cache:
+ # Read but don't update the cache
+ policy: pull
+ paths:
+ - ccache/
--- /dev/null
+.clang-before-script-template:
+ before_script:
+ - time apt-get -qq update
+ - DEBIAN_FRONTEND=noninteractive apt-get -y -q=2 --no-install-suggests --no-install-recommends install
+ apt-utils wget software-properties-common gpg-agent
+ - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
+ - apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-$COMPILER_MAJOR_VERSION main"
+ - time apt-get -qq update
+ - time apt-get -qqy --no-install-suggests --no-install-recommends install
+ build-essential cmake git ninja-build ccache
+ clang++-$COMPILER_MAJOR_VERSION clang-tools-$COMPILER_MAJOR_VERSION
+ clang-tidy-$COMPILER_MAJOR_VERSION
+ libfftw3-dev libhwloc-dev liblapack-dev
+ - mkdir -p ccache
+ - export CCACHE_BASEDIR=${PWD}
+ - export CCACHE_DIR=${PWD}/ccache
+
--- /dev/null
+.build-clang-template:
+ variables:
+ DEBIAN_FRONTEND: noninteractive
+ COMPILER_MAJOR_VERSION: "6.0"
+ BUILD_DIR: build-clang-$COMPILER_MAJOR_VERSION
+ CMAKE_COMPILER_SCRIPT: -DCMAKE_C_COMPILER=clang-$COMPILER_MAJOR_VERSION -DCMAKE_CXX_COMPILER=clang++-$COMPILER_MAJOR_VERSION
+
--- /dev/null
+.test-clang-template:
+ variables:
+ COMPILER_MAJOR_VERSION: 7
+ BUILD_DIR: build-clang-$COMPILER_MAJOR_VERSION
+ dependencies:
+ - build-clang
--- /dev/null
+.build-cuda-template:
+ variables:
+ CMAKE_PRECISION_OPTIONS: "-DGMX_DOUBLE=OFF"
+ CUDA_BASE_VERSION: "10.1"
+ BUILD_DIR: build-cuda-$CUDA_BASE_VERSION
+ CMAKE_EXTRA_OPTIONS: -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ -DGMX_GPU=ON
+
+ image: nvidia/cuda:${CUDA_BASE_VERSION}-devel
+
--- /dev/null
+.build-gcc-template:
+ variables:
+ COMPILER_MAJOR_VERSION: 7
+ BUILD_DIR: build-gcc-$COMPILER_MAJOR_VERSION
+ CMAKE_COMPILER_SCRIPT: -DCMAKE_C_COMPILER=gcc-$COMPILER_MAJOR_VERSION -DCMAKE_CXX_COMPILER=g++-$COMPILER_MAJOR_VERSION
+ EXTRA_INSTALLS: ""
+
+ before_script:
+ - time apt-get -qq update
+ - time apt-get -qqy --no-install-suggests --no-install-recommends install
+ build-essential cmake gcc-$COMPILER_MAJOR_VERSION g++-$COMPILER_MAJOR_VERSION
+ $EXTRA_INSTALLS
+ - mkdir -p ccache
+ - export CCACHE_BASEDIR=${PWD}
+ - export CCACHE_DIR=${PWD}/ccache
+
--- /dev/null
+.test-gcc-template:
+ variables:
+ COMPILER_MAJOR_VERSION: 7
+ BUILD_DIR: build-gcc-$COMPILER_MAJOR_VERSION
+ dependencies:
+ - build-gcc
--- /dev/null
+include:
+ - local: '/admin/ci-templates/.cache-build-template.yml'
+ - local: '/admin/ci-templates/.cache-test-template.yml'
+ - local: '/admin/ci-templates/.variables-template.yml'
+ - local: '/admin/ci-templates/.build-template.yml'
+ - local: '/admin/ci-templates/.test-runner-template.yml'
+ - local: '/admin/ci-templates/.gcc-build-template.yml'
+ - local: '/admin/ci-templates/.gcc-test-template.yml'
+ - local: '/admin/ci-templates/.clang-build-template.yml'
+ - local: '/admin/ci-templates/.clang-test-template.yml'
+ - local: '/admin/ci-templates/.clang-before-script.yml'
+ - local: '/admin/ci-templates/.tags-template.yml'
+ - local: '/admin/ci-templates/.script-template.yml'
+ - local: '/admin/ci-templates/.build-docs-template.yml'
+ - local: '/admin/ci-templates/.cuda-gcc-build-template.yml'
+ - local: '/admin/ci-templates/.mpi-build-template.yml'
+ - local: '/admin/ci-templates/.regressiontest-template.yml'
--- /dev/null
+.build-mpi-template:
+ variables:
+ CMAKE_MPI_OPTIONS: "-DGMX_MPI=ON"
+ BUILD_DIR: mpi-$BUILD_DIR
+ EXTRA_INSTALLS: libopenmpi-dev
--- /dev/null
+.regressiontests-template:
+ # TODO Where is this Dockerfile? Combine it with ci-basic-dependencies
+ # image in admin/dockerimages/ci-docs-py27
+ image: biophysics/gcc-gromacs
+ stage: pre-build
+
+ script:
+ - git clone https://gerrit.gromacs.org/regressiontests.git
+ - cd regressiontests
+ - git checkout master
+ - cd ..
+ - tar czf gmx-regressiontests.tar.gz regressiontests
+ artifacts:
+ paths:
+ - gmx-regressiontests.tar.gz
+ - gmx-install.tar.gz
--- /dev/null
+.test-script-template:
+ script:
+ - tar xzf gmx-build.tar.gz
+ - tar xzf gmx-install.tar.gz
+ - tar xzf gmx-regressiontests.tar.gz
+ - cd $BUILD_DIR
+ - ctest -D ExperimentalTest --output-on-failure
+ - xsltproc scripts/CTest2JUnit.xsl Testing/`head -n 1 < Testing/TAG`/Test.xml > JUnitTestResults.xml
+ - cd ..
+ - source $INSTALL_DIR/bin/GMXRC
+ artifacts:
+ reports:
+ junit: $BUILD_DIR/JUnitTestResults.xml
+ # TODO Why is this file in junit and paths?
+ artifacts:
+ paths:
+ - $BUILD_DIR/JUnitTestResults.xml
+ when: always
+ expire_in: 1 week
--- /dev/null
+.tags-template:
+ tags:
+ - k8s-scilifelab
--- /dev/null
+.test-base-template:
+ variables:
+ BUILD_DIR: build
+ image: biophysics/gcc-gromacs
+ stage: test
+ dependencies:
+ - build
+
--- /dev/null
+.variables-template:
+ variables:
+ KUBERNETES_CPU_LIMIT: 16
+ KUBERNETES_MEMORY_LIMIT: 32Gi
+ CACHE_FALLBACK_KEY: master
+ BUILD_DIR: build
+ INSTALL_DIR: install
+
if [ -z "$CLANG_FORMAT" ]
then
echo "Please set the path to clang-format using the git hook"
- echo "git config hooks.clang_formatpath /path/to/clang-format"
+ echo "git config hooks.clangformatpath /path/to/clang-format"
echo "or by setting an environment variable, e.g."
echo "CLANG_FORMAT=/path/to/clang-format"
echo "See docs/dev-manual/code-formatting.rst for how to get clang-format."
--- /dev/null
+#!/bin/bash
+
+if [ -z "$*" ]
+then
+ echo "usage: `basename "$0"` name-of-target"
+ exit
+fi
+
+docker login
+docker build $1 --target $1 -t gromacs/gromacs:$1
+docker push gromacs/gromacs:$1
--- /dev/null
+# Make a base container useful for all others
+
+FROM ubuntu:18.04 as base
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update
+
+# Make a container that can build a static Doxygen 1.8.5 that other
+# containers will be able to use.
+
+FROM base as doxygen-builder
+RUN apt-get install -y \
+ gcc \
+ build-essential \
+ m4 \
+ bison \
+ wget
+WORKDIR /tmp
+RUN wget https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/flex/2.5.35-10ubuntu3/flex_2.5.35.orig.tar.gz && \
+ tar xf flex_2.5.35.orig.tar.gz && \
+ cd flex-2.5.35 && \
+ ./configure --prefix=/tmp/install-of-flex --disable-shared && \
+ make -j && make install && cd .. && rm -rf flex*
+RUN wget https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/doxygen/1.8.5-1/doxygen_1.8.5.orig.tar.gz && \
+ tar xf doxygen_1.8.5.orig.tar.gz && \
+ cd doxygen-1.8.5 && \
+ ./configure --flex /tmp/install-of-flex/bin/flex --static && \
+ make -j && make install && cd .. && rm -rf doxygen*
+
+# Make an image that has the basic dependencies for building GROMACS.
+
+# TODO Remove git dependency?
+
+FROM base as ci-basic-dependencies
+WORKDIR /tmp
+RUN apt-get install -y \
+ cmake \
+ git \
+ ninja-build \
+ ccache
+
+# Make an image that has the dependencies for building GROMACS documentation.
+
+# The ImageMagick package from apt has highly secure settings by
+# default, suitable for use behind a webserver, which we don't
+# need. So we use sed to remove those.
+
+# Some optional GROMACS dependencies are obtained from the
+# distribution, e.g. fftw3, hwloc, blas and lapack so that the build
+# is as fast as possible.
+
+FROM ci-basic-dependencies as ci-docs
+WORKDIR /tmp
+COPY --from=doxygen-builder /usr/local/bin/* /usr/local/bin/
+RUN apt-get install -y \
+ texlive-latex-base \
+ texlive-fonts-recommended \
+ texlive-fonts-extra \
+ texlive-latex-extra
+RUN \
+ apt-get install -y \
+ clang \
+ graphviz \
+ imagemagick \
+ linkchecker \
+ mscgen \
+ liblapack-dev \
+ libfftw3-dev \
+ libhwloc-dev \
+ python3-pip && \
+ sed -i \
+ '/\"XPS\"/d;/\"PDF\"/d;/\"PS\"/d;/\"EPS\"/d;/disable ghostscript format types/d' \
+ /etc/ImageMagick-6/policy.xml && \
+ pip3 install sphinx==1.6.1
# GROMACS 2018 3
# GROMACS 2019 4
# GROMACS 2020 5
+# GROMACS 2021 6
# LIBRARY_SOVERSION_MINOR so minor version for the built libraries.
# Should be increased for each release that changes only the implementation.
# In GROMACS, the typical policy is to increase it for each patch version
# The GROMACS convention is that these are the version number of the next
# release that is going to be made from this branch.
-set(GMX_VERSION_MAJOR 2020)
+set(GMX_VERSION_MAJOR 2021)
set(GMX_VERSION_PATCH 0)
# The suffix, on the other hand, is used mainly for betas and release
# candidates, where it signifies the most recent such release from
# this branch; it will be empty before the first such release, as well
# as after the final release is out.
-set(GMX_VERSION_SUFFIX "-beta3")
+set(GMX_VERSION_SUFFIX "")
# Conventionally with libtool, any ABI change must change the major
# version number, the minor version number should change if it's just
# here. The important thing is to minimize the chance of third-party
# code being able to dynamically link with a version of libgromacs
# that might not work.
-set(LIBRARY_SOVERSION_MAJOR 5)
+set(LIBRARY_SOVERSION_MAJOR 6)
set(LIBRARY_SOVERSION_MINOR 0)
set(LIBRARY_VERSION ${LIBRARY_SOVERSION_MAJOR}.${LIBRARY_SOVERSION_MINOR}.0)
endif()
set(REGRESSIONTEST_VERSION "${GMX_VERSION_STRING}")
-set(REGRESSIONTEST_BRANCH "refs/heads/release-2020")
+set(REGRESSIONTEST_BRANCH "refs/heads/master")
# Run the regressiontests packaging job with the correct pakage
# version string, and the release box checked, in order to have it
# build the regressiontests tarball with all the right naming. The
how-to/visualize.rst
install-guide/index.rst
release-notes/index.rst
+ release-notes/2021/major/highlights.rst
+ release-notes/2021/major/features.rst
+ release-notes/2021/major/performance.rst
+ release-notes/2021/major/tools.rst
+ release-notes/2021/major/bugs-fixed.rst
+ release-notes/2021/major/removed-functionality.rst
+ release-notes/2021/major/deprecated-functionality.rst
+ release-notes/2021/major/portability.rst
+ release-notes/2021/major/miscellaneous.rst
release-notes/2020/major/highlights.rst
release-notes/2020/major/features.rst
release-notes/2020/major/performance.rst
version 8.0.* with libstdc++<7 or libc++ is supported. Others might miss tests or give false positives.
It is run automatically on Jenkins for each commit. Many checks have fixes which can automatically be
applied. To run it, the build has to be configured with
- ``cmake -DGMX_CLANG_TIDY=ON -DGMX_OPENMP=no -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=on``.
+ ``cmake -DGMX_CLANG_TIDY=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=on``.
Any ``CMAKE_BUILD_TYPE`` which enables asserts (e.g. ASAN) works. Such a configured build will
run both the compiler as well as clang-tidy when building. The name of the clang-tidy executable is set with
``-DCLANG_TIDY=...``, and the full path to it can be set with ``-DCLANG_TIDY_EXE=...``.
--- /dev/null
+Bugs fixed
+^^^^^^^^^^
+
+.. Note to developers!
+ Please use """"""" to underline the individual entries for fixed issues in the subfolders,
+ otherwise the formatting on the webpage is messed up.
+ Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ a space between the colon and number!
+
--- /dev/null
+.. _anticipated-changes:
+
+.. Note to developers!
+ Please use """"""" to underline the individual entries for fixed issues in the subfolders,
+ otherwise the formatting on the webpage is messed up.
+ Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ a space between the colon and number!
+
+Changes anticipated to |Gromacs| 2021 functionality
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Functionality deprecated in |Gromacs| 2021
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
--- /dev/null
+New and improved features
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. Note to developers!
+ Please use """"""" to underline the individual entries for fixed issues in the subfolders,
+ otherwise the formatting on the webpage is messed up.
+ Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ a space between the colon and number!
+
--- /dev/null
+Highlights
+^^^^^^^^^^
+
+|Gromacs| 2021 was released on INSERT DATE HERE. Patch releases may
+have been made since then, please use the updated versions! Here are
+some highlights of what you can expect, along with more detail in the
+links below!
+
+As always, we've got several useful performance improvements, with or
+without GPUs, all enabled and automated by default. In addition,
+several new features are available for running simulations. We are extremely
+interested in your feedback on how well the new release works on your
+simulations and hardware. The new features are:
+
+* Cool quote autogenerator
+
+
+.. Note to developers!
+ Please use """"""" to underline the individual entries for fixed issues in the subfolders,
+ otherwise the formatting on the webpage is messed up.
+ Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ a space between the colon and number!
--- /dev/null
+Miscellaneous
+^^^^^^^^^^^^^
+
+.. Note to developers!
+ Please use """"""" to underline the individual entries for fixed issues in the subfolders,
+ otherwise the formatting on the webpage is messed up.
+ Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ a space between the colon and number!
+
--- /dev/null
+Performance improvements
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. Note to developers!
+ Please use """"""" to underline the individual entries for fixed issues in the subfolders,
+ otherwise the formatting on the webpage is messed up.
+ Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ a space between the colon and number!
+
--- /dev/null
+Portability
+^^^^^^^^^^^
+
+.. Note to developers!
+ Please use """"""" to underline the individual entries for fixed issues in the subfolders,
+ otherwise the formatting on the webpage is messed up.
+ Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ a space between the colon and number!
+
--- /dev/null
+Removed functionality
+^^^^^^^^^^^^^^^^^^^^^
+
+.. Note to developers!
+ Please use """"""" to underline the individual entries for fixed issues in the subfolders,
+ otherwise the formatting on the webpage is messed up.
+ Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ a space between the colon and number!
+
--- /dev/null
+Improvements to |Gromacs| tools
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. Note to developers!
+ Please use """"""" to underline the individual entries for fixed issues in the subfolders,
+ otherwise the formatting on the webpage is messed up.
+ Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ a space between the colon and number!
+
functionality supported, whereas patch releases contain only fixes for
issues identified in the corresponding major releases.
-Two versions of |Gromacs| are under active maintenance, the 2020
-series and the 2019 series. In the latter, only highly conservative
+Two versions of |Gromacs| are under active maintenance, the 2021
+series and the 2020 series. In the latter, only highly conservative
fixes will be made, and only to address issues that affect scientific
correctness. Naturally, some of those releases will be made after the
-year 2019 ends, but we keep 2018 in the name so users understand how
+year 2020 ends, but we keep 2019 in the name so users understand how
up to date their version is. Such fixes will also be incorporated into
-the 2020 release series, as appropriate. Around the time the 2021
-release is made, the 2019 series will no longer be maintained.
+the 2021 release series, as appropriate. Around the time the 2022
+release is made, the 2020 series will no longer be maintained.
Where issue numbers are reported in these release notes, more details
can be found at https://redmine.gromacs.org at that issue number.
+|Gromacs| 2021 series
+---------------------
+
+Major release
+^^^^^^^^^^^^^
+
+.. toctree::
+ :maxdepth: 1
+
+ 2021/major/highlights
+ 2021/major/features
+ 2021/major/performance
+ 2021/major/tools
+ 2021/major/bugs-fixed
+ 2021/major/deprecated-functionality
+ 2021/major/removed-functionality
+ 2021/major/portability
+ 2021/major/miscellaneous
+
+
|Gromacs| 2020 series
---------------------
"-Wno-double-promotion")
string(REPLACE " " ";" IGNORED_CLANG_ALL_WARNINGS "${IGNORED_CLANG_ALL_WARNINGS}")
+option(GMX_CLANG_TIDY "Use clang-tidy" OFF)
if (GMX_CLANG_TIDY)
+ if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+ elseif("${CMAKE_BUILD_TYPE}" STREQUAL "RelWithAssert")
+ elseif("${CMAKE_BUILD_TYPE}" STREQUAL "RelWithDebInfo")
+ elseif("${CMAKE_BUILD_TYPE}" STREQUAL "ASAN")
+ else()
+ message(FATAL_ERROR "Can only use clang-tidy with build type containing asserts: Debug, RelWithAssert, RelWithDebInfo, ASAN.")
+ endif()
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+ mark_as_advanced(CMAKE_EXPORT_COMPILE_COMMANDS)
set(CLANG_TIDY "clang-tidy" CACHE STRING "Name of clang-tidy executable")
find_program(CLANG_TIDY_EXE NAMES "${CLANG_TIDY}"
DOC "Path to clang-tidy executable")
if(NOT CLANG_TIDY_EXE)
message(FATAL_ERROR "clang-tidy not found.")
endif()
+ mark_as_advanced(CLANG_TIDY)
+ mark_as_advanced(CLANG_TIDY_EXE)
endif()
-#####
add_subdirectory(external)
}
}
-void dd_init_bondeds(FILE* fplog,
- gmx_domdec_t* dd,
- const gmx_mtop_t* mtop,
- const gmx_vsite_t* vsite,
- const t_inputrec* ir,
- gmx_bool bBCheck,
- cginfo_mb_t* cginfo_mb)
+void dd_init_bondeds(FILE* fplog,
+ gmx_domdec_t* dd,
+ const gmx_mtop_t& mtop,
+ const gmx_vsite_t* vsite,
+ const t_inputrec* ir,
+ gmx_bool bBCheck,
+ gmx::ArrayRef<cginfo_mb_t> cginfo_mb)
{
gmx_domdec_comm_t* comm;
- dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
+ dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
comm = dd->comm;
bool is1DAnd1PulseDD(const gmx_domdec_t& dd);
/*! \brief Initialize data structures for bonded interactions */
-void dd_init_bondeds(FILE* fplog,
- gmx_domdec_t* dd,
- const gmx_mtop_t* mtop,
- const gmx_vsite_t* vsite,
- const t_inputrec* ir,
- gmx_bool bBCheck,
- cginfo_mb_t* cginfo_mb);
+void dd_init_bondeds(FILE* fplog,
+ gmx_domdec_t* dd,
+ const gmx_mtop_t& mtop,
+ const gmx_vsite_t* vsite,
+ const t_inputrec* ir,
+ gmx_bool bBCheck,
+ gmx::ArrayRef<cginfo_mb_t> cginfo_mb);
/*! \brief Returns whether molecules are always whole, i.e. not broken by PBC */
bool dd_moleculesAreAlwaysWhole(const gmx_domdec_t& dd);
*
* Also stores whether atoms are linked in \p cginfo_mb.
*/
-t_blocka* makeBondedLinks(const gmx_mtop_t* mtop, cginfo_mb_t* cginfo_mb);
+t_blocka* makeBondedLinks(const gmx_mtop_t& mtop, gmx::ArrayRef<cginfo_mb_t> cginfo_mb);
/*! \brief Calculate the maximum distance involved in 2-body and multi-body bonded interactions */
void dd_bonded_cg_distance(const gmx::MDLogger& mdlog,
}
}
-t_blocka* makeBondedLinks(const gmx_mtop_t* mtop, cginfo_mb_t* cginfo_mb)
+t_blocka* makeBondedLinks(const gmx_mtop_t& mtop, gmx::ArrayRef<cginfo_mb_t> cginfo_mb)
{
t_blocka* link;
cginfo_mb_t* cgi_mb;
*/
reverse_ilist_t ril_intermol;
- if (mtop->bIntermolecularInteractions)
+ if (mtop.bIntermolecularInteractions)
{
t_atoms atoms;
- atoms.nr = mtop->natoms;
+ atoms.nr = mtop.natoms;
atoms.atom = nullptr;
- GMX_RELEASE_ASSERT(mtop->intermolecular_ilist,
+ GMX_RELEASE_ASSERT(mtop.intermolecular_ilist,
"We should have an ilist when intermolecular interactions are on");
- make_reverse_ilist(*mtop->intermolecular_ilist, &atoms, FALSE, FALSE, FALSE, TRUE, &ril_intermol);
+ make_reverse_ilist(*mtop.intermolecular_ilist, &atoms, FALSE, FALSE, FALSE, TRUE, &ril_intermol);
}
snew(link, 1);
- snew(link->index, mtop->natoms + 1);
+ snew(link->index, mtop.natoms + 1);
link->nalloc_a = 0;
link->a = nullptr;
link->index[0] = 0;
int cg_offset = 0;
int ncgi = 0;
- for (size_t mb = 0; mb < mtop->molblock.size(); mb++)
+ for (size_t mb = 0; mb < mtop.molblock.size(); mb++)
{
- const gmx_molblock_t& molb = mtop->molblock[mb];
+ const gmx_molblock_t& molb = mtop.molblock[mb];
if (molb.nmol == 0)
{
continue;
}
- const gmx_moltype_t& molt = mtop->moltype[molb.type];
+ const gmx_moltype_t& molt = mtop.moltype[molb.type];
/* Make a reverse ilist in which the interactions are linked
* to all atoms, not only the first atom as in gmx_reverse_top.
* The constraints are discarded here.
cgi_mb = &cginfo_mb[mb];
int mol;
- for (mol = 0; mol < (mtop->bIntermolecularInteractions ? molb.nmol : 1); mol++)
+ for (mol = 0; mol < (mtop.bIntermolecularInteractions ? molb.nmol : 1); mol++)
{
for (int a = 0; a < molt.atoms.nr; a++)
{
i += nral_rt(ftype);
}
- if (mtop->bIntermolecularInteractions)
+ if (mtop.bIntermolecularInteractions)
{
int i = ril_intermol.index[cg_gl];
while (i < ril_intermol.index[cg_gl + 1])
if (debug)
{
- fprintf(debug, "Of the %d atoms %d are linked via bonded interactions\n", mtop->natoms, ncgi);
+ fprintf(debug, "Of the %d atoms %d are linked via bonded interactions\n", mtop.natoms, ncgi);
}
return link;
{
if (fr != nullptr)
{
- const cginfo_mb_t* cginfo_mb = fr->cginfo_mb;
- gmx::ArrayRef<int> cginfo = fr->cginfo;
+ gmx::ArrayRef<cginfo_mb_t> cginfo_mb = fr->cginfo_mb;
+ gmx::ArrayRef<int> cginfo = fr->cginfo;
for (int cg = cg0; cg < cg1; cg++)
{
const int* recv_i,
gmx::ArrayRef<gmx::RVec> x,
gmx::ArrayRef<const gmx::RVec> recv_vr,
- cginfo_mb_t* cginfo_mb,
+ gmx::ArrayRef<cginfo_mb_t> cginfo_mb,
gmx::ArrayRef<int> cginfo)
{
gmx_domdec_ind_t *ind, *ind_p;
gmx_domdec_comm_t* comm;
gmx_domdec_zones_t* zones;
gmx_domdec_comm_dim_t* cd;
- cginfo_mb_t* cginfo_mb;
gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
dd_corners_t corners;
rvec * normal, *v_d, *v_0 = nullptr, *v_1 = nullptr;
v_1 = ddbox->v[dim1];
}
- zone_cg_range = zones->cg_range;
- cginfo_mb = fr->cginfo_mb;
+ zone_cg_range = zones->cg_range;
+ gmx::ArrayRef<cginfo_mb_t> cginfo_mb = fr->cginfo_mb;
zone_cg_range[0] = 0;
zone_cg_range[1] = dd->ncg_home;
/* We reuse the intBuffer without reacquiring since we are in the same scope */
DDBufferAccess<int>& flagBuffer = moveBuffer;
- const cginfo_mb_t* cginfo_mb = fr->cginfo_mb;
+ gmx::ArrayRef<const cginfo_mb_t> cginfo_mb = fr->cginfo_mb;
/* Temporarily store atoms passed to our rank at the end of the range */
int home_pos_cg = dd->ncg_home;
#include "gromacs/gpu_utils/hostallocator.h"
#include "gromacs/mdtypes/forcerec.h"
+#include "gromacs/utility/arrayref.h"
#include "domdec_internal.h"
void check_screw_box(const matrix box);
/*! \brief Return the charge group information flags for charge group cg */
-static inline int ddcginfo(const cginfo_mb_t* cginfo_mb, int cg)
+static inline int ddcginfo(gmx::ArrayRef<const cginfo_mb_t> cginfo_mb, int cg)
{
- while (cg >= cginfo_mb->cg_end)
+ size_t index = 0;
+ while (cg >= cginfo_mb[index].cg_end)
{
- cginfo_mb++;
+ index++;
}
+ const cginfo_mb_t& cgimb = cginfo_mb[index];
- return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
+ return cgimb.cginfo[(cg - cgimb.cg_start) % cgimb.cg_mod];
};
/*! \brief Returns the number of MD steps for which load has been recorded */
const int* typeA = mdatoms->typeA;
const int* typeB = mdatoms->typeB;
const int ntype = fr->ntype;
- const real* nbfp = fr->nbfp;
+ const real* nbfp = fr->nbfp.data();
const real* nbfp_grid = fr->ljpme_c6grid;
real* Vv = kernel_data->energygrp_vdw;
const real lambda_coul = kernel_data->lambda[efptCOUL];
/* do QMMM first if requested */
if (fr->bQMMM)
{
- enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr);
+ enerd->term[F_EQM] = calculate_QMMM(cr, &forceOutputs->forceWithShiftForces(), fr->qr);
}
/* Call the short range functions all in one go. */
static const bool c_enableGpuPmePpComms =
(getenv("GMX_GPU_PME_PP_COMMS") != nullptr) && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
-static real* mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM)
+static std::vector<real> mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM)
{
- real* nbfp;
- int i, j, k, atnr;
+ std::vector<real> nbfp;
+ int atnr;
atnr = idef->atnr;
if (bBHAM)
{
- snew(nbfp, 3 * atnr * atnr);
- for (i = k = 0; (i < atnr); i++)
+ nbfp.resize(3 * atnr * atnr);
+ int k = 0;
+ for (int i = 0; (i < atnr); i++)
{
- for (j = 0; (j < atnr); j++, k++)
+ for (int j = 0; (j < atnr); j++, k++)
{
BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
}
else
{
- snew(nbfp, 2 * atnr * atnr);
- for (i = k = 0; (i < atnr); i++)
+ nbfp.resize(2 * atnr * atnr);
+ int k = 0;
+ for (int i = 0; (i < atnr); i++)
{
- for (j = 0; (j < atnr); j++, k++)
+ for (int j = 0; (j < atnr); j++, k++)
{
/* nbfp now includes the 6.0/12.0 derivative prefactors */
C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6 * 6.0;
acSETTLE
};
-static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr, gmx_bool* bFEP_NonBonded)
+static std::vector<cginfo_mb_t> init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr, gmx_bool* bFEP_NonBonded)
{
- cginfo_mb_t* cginfo_mb;
- gmx_bool* type_VDW;
- int* cginfo;
- int* a_con;
-
- snew(cginfo_mb, mtop->molblock.size());
+ gmx_bool* type_VDW;
+ int* a_con;
snew(type_VDW, fr->ntype);
for (int ai = 0; ai < fr->ntype; ai++)
*bFEP_NonBonded = FALSE;
- int a_offset = 0;
+ std::vector<cginfo_mb_t> cginfoPerMolblock;
+ int a_offset = 0;
for (size_t mb = 0; mb < mtop->molblock.size(); mb++)
{
const gmx_molblock_t& molb = mtop->molblock[mb];
}
}
- cginfo_mb[mb].cg_start = a_offset;
- cginfo_mb[mb].cg_end = a_offset + molb.nmol * molt.atoms.nr;
- cginfo_mb[mb].cg_mod = (bId ? 1 : molb.nmol) * molt.atoms.nr;
- snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
- cginfo = cginfo_mb[mb].cginfo;
+ cginfo_mb_t cginfo_mb;
+ cginfo_mb.cg_start = a_offset;
+ cginfo_mb.cg_end = a_offset + molb.nmol * molt.atoms.nr;
+ cginfo_mb.cg_mod = (bId ? 1 : molb.nmol) * molt.atoms.nr;
+ cginfo_mb.cginfo.resize(cginfo_mb.cg_mod);
+ gmx::ArrayRef<int> cginfo = cginfo_mb.cginfo;
/* Set constraints flags for constrained atoms */
snew(a_con, molt.atoms.nr);
sfree(a_con);
+ cginfoPerMolblock.push_back(cginfo_mb);
+
a_offset += molb.nmol * molt.atoms.nr;
}
sfree(type_VDW);
- return cginfo_mb;
+ return cginfoPerMolblock;
}
-static std::vector<int> cginfo_expand(const int nmb, const cginfo_mb_t* cgi_mb)
+static std::vector<int> cginfo_expand(const int nmb, gmx::ArrayRef<const cginfo_mb_t> cgi_mb)
{
const int ncg = cgi_mb[nmb - 1].cg_end;
return cginfo;
}
-static void done_cginfo_mb(cginfo_mb_t* cginfo_mb, int numMolBlocks)
-{
- if (cginfo_mb == nullptr)
- {
- return;
- }
- for (int mb = 0; mb < numMolBlocks; ++mb)
- {
- sfree(cginfo_mb[mb].cginfo);
- }
- sfree(cginfo_mb);
-}
-
/* Sets the sum of charges (squared) and C6 in the system in fr.
* Returns whether the system has a net charge.
*/
fr->shiftForces.resize(SHIFTS);
- if (fr->nbfp == nullptr)
+ if (fr->nbfp.empty())
{
fr->ntype = mtop->ffparams.atnr;
fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
if (ir->eDispCorr != edispcNO)
{
fr->dispersionCorrection = std::make_unique<DispersionCorrection>(
- *mtop, *ir, fr->bBHAM, fr->ntype,
- gmx::arrayRefFromArray(fr->nbfp, fr->ntype * fr->ntype * 2), *fr->ic, tabfn);
+ *mtop, *ir, fr->bBHAM, fr->ntype, fr->nbfp, *fr->ic, tabfn);
fr->dispersionCorrection->print(mdlog);
}
t_forcerec::t_forcerec() = default;
-t_forcerec::~t_forcerec() = default;
-
-/* Frees GPU memory and sets a tMPI node barrier.
- *
- * Note that this function needs to be called even if GPUs are not used
- * in this run because the PME ranks have no knowledge of whether GPUs
- * are used or not, but all ranks need to enter the barrier below.
- * \todo Remove physical node barrier from this function after making sure
- * that it's not needed anymore (with a shared GPU run).
- */
-void free_gpu_resources(t_forcerec* fr,
- const gmx::PhysicalNodeCommunicator& physicalNodeCommunicator,
- const gmx_gpu_info_t& gpu_info)
+t_forcerec::~t_forcerec()
{
- bool isPPrankUsingGPU = (fr != nullptr) && (fr->nbv != nullptr) && fr->nbv->useGpu();
-
- /* stop the GPU profiler (only CUDA) */
- if (gpu_info.n_dev > 0)
- {
- stopGpuProfiler();
- }
-
- if (isPPrankUsingGPU)
- {
- /* Free data in GPU memory and pinned memory before destroying the GPU context */
- fr->nbv.reset();
-
- delete fr->gpuBonded;
- fr->gpuBonded = nullptr;
- }
-
- /* With tMPI we need to wait for all ranks to finish deallocation before
- * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
- * GPU and context.
- *
- * This is not a concern in OpenCL where we use one context per rank which
- * is freed in nbnxn_gpu_free().
- *
- * Note: it is safe to not call the barrier on the ranks which do not use GPU,
- * but it is easier and more futureproof to call it on the whole node.
- */
- if (GMX_THREAD_MPI)
- {
- physicalNodeCommunicator.barrier();
- }
-}
-
-void done_forcerec(t_forcerec* fr, int numMolBlocks)
-{
- if (fr == nullptr)
- {
- // PME-only ranks don't have a forcerec
- return;
- }
- done_cginfo_mb(fr->cginfo_mb, numMolBlocks);
- sfree(fr->nbfp);
- delete fr->ic;
- sfree(fr->shift_vec);
- sfree(fr->ewc_t);
- tear_down_bonded_threading(fr->bondedThreading);
- GMX_RELEASE_ASSERT(fr->gpuBonded == nullptr, "Should have been deleted earlier, when used");
- fr->bondedThreading = nullptr;
- delete fr;
+ /* Note: This code will disappear when types are converted to C++ */
+ sfree(shift_vec);
+ sfree(ewc_t);
+ tear_down_bonded_threading(bondedThreading);
}
class PhysicalNodeCommunicator;
} // namespace gmx
-//! Destroy a forcerec.
-void done_forcerec(t_forcerec* fr, int numMolBlocks);
-
/*! \brief Print the contents of the forcerec to a file
*
* \param[in] fplog The log file to print to
*/
void forcerec_set_excl_load(t_forcerec* fr, const gmx_localtop_t* top);
-void free_gpu_resources(t_forcerec* fr,
- const gmx::PhysicalNodeCommunicator& physicalNodeCommunicator,
- const gmx_gpu_info_t& gpu_info);
-
#endif
freeDeviceBuffer(&d_inverseMasses_);
}
-void LeapFrogCuda::setPbc(const t_pbc* pbc)
-{
- setPbcAiuc(pbc->ndim_ePBC, pbc->box, &pbcAiuc_);
-}
-
void LeapFrogCuda::set(const t_mdatoms& md, const int numTempScaleValues, const unsigned short* tempScaleGroups)
{
numAtoms_ = md.nr;
LeapFrogCuda(CommandStream commandStream);
~LeapFrogCuda();
- /*! \brief
- * Update PBC data.
- *
- * Converts PBC data from t_pbc into the PbcAiuc format and stores the latter.
- *
- * \param[in] pbc The PBC data in t_pbc format.
- */
- void setPbc(const t_pbc* pbc);
-
/*! \brief Integrate
*
* Integrates the equation of motion using Leap-Frog algorithm.
CommandStream commandStream_;
//! CUDA kernel launch config
KernelLaunchConfig kernelLaunchConfig_;
- //! Periodic boundary data
- PbcAiuc pbcAiuc_;
//! Number of atoms
int numAtoms_;
* using CUDA, including class initialization, data-structures management
* and GPU kernel.
*
- * \note Management of periodic boundary should be unified with SETTLE and
- * removed from here.
* \todo Reconsider naming, i.e. "cuda" suffics should be changed to "gpu".
*
* \author Artem Zhmurov <zhmurov@gmail.com>
float3* d_v,
const real invdt,
const bool computeVirial,
- tensor virialScaled)
+ tensor virialScaled,
+ const PbcAiuc pbcAiuc)
{
ensureNoPendingCudaError("In CUDA version of LINCS");
}
config.stream = commandStream_;
+ kernelParams_.pbcAiuc = pbcAiuc;
+
const auto kernelArgs =
prepareGpuKernelArguments(kernelPtr, config, &kernelParams_, &d_x, &d_xp, &d_v, &invdt);
GpuApiCallBehavior::Sync, nullptr);
}
-void LincsCuda::setPbc(const t_pbc* pbc)
-{
- setPbcAiuc(pbc->ndim_ePBC, pbc->box, &kernelParams_.pbcAiuc);
-}
-
} // namespace gmx
* Applies LINCS to coordinates and velocities, stored on GPU.
* The results are not automatically copied back to the CPU memory.
* Method uses this class data structures which should be updated
- * when needed using set() and setPbc() method.
+ * when needed using set() method.
*
* \param[in] d_x Coordinates before timestep (in GPU memory)
* \param[in,out] d_xp Coordinates after timestep (in GPU memory). The
* multipliers when velocities are updated)
* \param[in] computeVirial If virial should be updated.
* \param[in,out] virialScaled Scaled virial tensor to be updated.
+ * \param[in] pbcAiuc PBC data.
*/
void apply(const float3* d_x,
float3* d_xp,
float3* d_v,
const real invdt,
const bool computeVirial,
- tensor virialScaled);
+ tensor virialScaled,
+ const PbcAiuc pbcAiuc);
/*! \brief
* Update data-structures (e.g. after NB search step).
*/
void set(const t_idef& idef, const t_mdatoms& md);
- /*! \brief
- * Update PBC data.
- *
- * Converts pbc data from t_pbc into the PbcAiuc format and stores the latter.
- *
- * \todo Remove this method. LINCS should not manage PBC.
- *
- * \param[in] pbc The PBC data in t_pbc format.
- */
- void setPbc(const t_pbc* pbc);
-
/*! \brief
* Returns whether the maximum number of coupled constraints is supported
* by the CUDA LINCS code.
#include "gromacs/math/units.h"
#include "gromacs/math/vec.h"
#include "gromacs/mdlib/force.h"
-#include "gromacs/mdlib/forcerec.h"
#include "gromacs/mdlib/qmmm.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/utility/cstringutil.h"
}
-static void write_gaussian_SH_input(int step, gmx_bool swap, const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm)
+static void write_gaussian_SH_input(int step, gmx_bool swap, const t_QMMMrec* QMMMrec, t_QMrec* qm, t_MMrec* mm)
{
- int i;
- gmx_bool bSA;
- FILE* out;
- t_QMMMrec* QMMMrec;
- QMMMrec = fr->qr;
- bSA = (qm->SAstep > 0);
-
- out = fopen("input.com", "w");
+ int i;
+ bool bSA = (qm->SAstep > 0);
+ FILE* out = fopen("input.com", "w");
/* write the route */
fprintf(out, "%s", "%scr=input\n");
fprintf(out, "%s", "%rwf=input\n");
fclose(out);
} /* write_gaussian_SH_input */
-static void write_gaussian_input(int step, const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm)
+static void write_gaussian_input(int step, const t_QMMMrec* QMMMrec, t_QMrec* qm, t_MMrec* mm)
{
- int i;
- t_QMMMrec* QMMMrec;
- FILE* out;
+ int i;
- QMMMrec = fr->qr;
- out = fopen("input.com", "w");
+ FILE* out = fopen("input.com", "w");
/* write the route */
if (qm->QMmethod >= eQMmethodRHF)
}
}
-real call_gaussian(const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[])
+real call_gaussian(const t_QMMMrec* qmmm, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[])
{
/* normal gaussian jobs */
static int step = 0;
snew(QMgrad, qm->nrQMatoms);
snew(MMgrad, mm->nrMMatoms);
- write_gaussian_input(step, fr, qm, mm);
+ write_gaussian_input(step, qmmm, qm, mm);
do_gaussian(step, exe);
QMener = read_gaussian_output(QMgrad, MMgrad, qm, mm);
/* put the QMMM forces in the force array and to the fshift
} /* call_gaussian */
-real call_gaussian_SH(const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[])
+real call_gaussian_SH(const t_QMMMrec* qmmm, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[])
{
/* a gaussian call routine intended for doing diabatic surface
* "sliding". See the manual for the theoretical background of this
/* if(!step)
* qr->bSA=FALSE;*/
/* temporray set to step + 1, since there is a chk start */
- write_gaussian_SH_input(step, swapped, fr, qm, mm);
+ write_gaussian_SH_input(step, swapped, qmmm, qm, mm);
do_gaussian(step, exe);
QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, qm, mm);
}
if (swap) /* change surface, so do another call */
{
- write_gaussian_SH_input(step, swapped, fr, qm, mm);
+ write_gaussian_SH_input(step, swapped, qmmm, qm, mm);
do_gaussian(step, exe);
QMener = read_gaussian_SH_output(QMgrad, MMgrad, step, qm, mm);
}
/*! \brief
* Call gaussian to do qm calculation.
*
- * \param[in] fr Global forcerec.
- * \param[in] qm QM part of forcerec.
- * \param[in] mm mm part of forcerec.
- * \param[in] f force vector.
+ * \param[in] qmmm QMMM part forcerec.
+ * \param[in] qm QM part of forcerec.
+ * \param[in] mm mm part of forcerec.
+ * \param[in] f force vector.
* \param[in] fshift shift of force vector.
*/
-real call_gaussian(const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[]);
+real call_gaussian(const t_QMMMrec* qmmm, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[]);
/*! \brief
* Call gaussian SH(?) to do qm calculation.
*
- * \param[in] fr Global forcerec.
- * \param[in] qm QM part of forcerec.
- * \param[in] mm mm part of forcerec.
- * \param[in] f force vector.
+ * \param[in] qmmm QMMM part forcerec.
+ * \param[in] qm QM part of forcerec.
+ * \param[in] mm mm part of forcerec.
+ * \param[in] f force vector.
* \param[in] fshift shift of force vector.
*/
-real call_gaussian_SH(const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[]);
+real call_gaussian_SH(const t_QMMMrec* qmmm, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[]);
#endif
#include "gromacs/math/units.h"
#include "gromacs/math/vec.h"
#include "gromacs/mdlib/qmmm.h"
-#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/smalloc.h"
}
-static void write_orca_input(const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm)
+static void write_orca_input(const t_QMMMrec* QMMMrec, t_QMrec* qm, t_MMrec* mm)
{
- int i;
- t_QMMMrec* QMMMrec;
- FILE * out, *pcFile, *addInputFile;
- char * buf, *orcaInput, *addInputFilename, *pcFilename;
-
- QMMMrec = fr->qr;
+ int i;
+ FILE *pcFile, *addInputFile;
+ char *buf, *orcaInput, *addInputFilename, *pcFilename;
/* write the first part of the input-file */
snew(orcaInput, 200);
sprintf(orcaInput, "%s.inp", qm->orca_basename);
- out = fopen(orcaInput, "w");
+ FILE* out = fopen(orcaInput, "w");
snew(addInputFilename, 200);
sprintf(addInputFilename, "%s.ORCAINFO", qm->orca_basename);
fclose(out);
} /* write_orca_input */
-static real read_orca_output(rvec QMgrad[], rvec MMgrad[], const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm)
+static real read_orca_output(rvec QMgrad[], rvec MMgrad[], const t_QMMMrec* QMMMrec, t_QMrec* qm, t_MMrec* mm)
{
- int i, j;
- char buf[300], orca_pcgradFilename[300], orca_engradFilename[300];
- real QMener;
- FILE * pcgrad, *engrad;
- int k;
- t_QMMMrec* QMMMrec;
- QMMMrec = fr->qr;
+ int i, j;
+ char buf[300], orca_pcgradFilename[300], orca_engradFilename[300];
+ real QMener;
+ FILE *pcgrad, *engrad;
+ int k;
/* the energy and gradients for the QM part are stored in the engrad file
* and the gradients for the point charges are stored in the pc file.
}
}
-real call_orca(const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[])
+real call_orca(const t_QMMMrec* qmmm, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[])
{
/* normal orca jobs */
static int step = 0;
snew(QMgrad, qm->nrQMatoms);
snew(MMgrad, mm->nrMMatoms);
- write_orca_input(fr, qm, mm);
+ write_orca_input(qmmm, qm, mm);
do_orca(qm->orca_dir, qm->orca_basename);
- QMener = read_orca_output(QMgrad, MMgrad, fr, qm, mm);
+ QMener = read_orca_output(QMgrad, MMgrad, qmmm, qm, mm);
/* put the QMMM forces in the force array and to the fshift
*/
for (i = 0; i < qm->nrQMatoms; i++)
void init_orca(t_QMrec* qm);
-real call_orca(const t_forcerec* fr, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[]);
+real call_orca(const t_QMMMrec* qmmm, t_QMrec* qm, t_MMrec* mm, rvec f[], rvec fshift[]);
#endif
}
static real call_QMroutine(const t_commrec gmx_unused* cr,
- const t_forcerec gmx_unused* fr,
+ const t_QMMMrec gmx_unused* qmmm,
t_QMrec gmx_unused* qm,
t_MMrec gmx_unused* mm,
rvec gmx_unused f[],
{
if (GMX_QMMM_GAUSSIAN)
{
- return call_gaussian_SH(fr, qm, mm, f, fshift);
+ return call_gaussian_SH(qmmm, qm, mm, f, fshift);
}
else
{
}
else if (GMX_QMMM_GAUSSIAN)
{
- return call_gaussian(fr, qm, mm, f, fshift);
+ return call_gaussian(qmmm, qm, mm, f, fshift);
}
else if (GMX_QMMM_ORCA)
{
- return call_orca(fr, qm, mm, f, fshift);
+ return call_orca(qmmm, qm, mm, f, fshift);
}
else
{
}
} /* update_QMMM_rec */
-real calculate_QMMM(const t_commrec* cr, gmx::ForceWithShiftForces* forceWithShiftForces, const t_forcerec* fr)
+real calculate_QMMM(const t_commrec* cr, gmx::ForceWithShiftForces* forceWithShiftForces, const t_QMMMrec* qr)
{
real QMener = 0.0;
/* a selection for the QM package depending on which is requested
* (Gaussian, GAMESS-UK, MOPAC or ORCA) needs to be implemented here. Now
* it works through defines.... Not so nice yet
*/
- t_QMMMrec* qr;
- t_QMrec * qm, *qm2;
- t_MMrec* mm = nullptr;
- rvec * forces = nullptr, *fshift = nullptr, *forces2 = nullptr,
+ t_QMrec *qm, *qm2;
+ t_MMrec* mm = nullptr;
+ rvec * forces = nullptr, *fshift = nullptr, *forces2 = nullptr,
*fshift2 = nullptr; /* needed for multilayer ONIOM */
int i, j, k;
/* make a local copy the QMMMrec pointer
*/
- qr = fr->qr;
mm = qr->mm;
/* now different procedures are carried out for one layer ONION and
qm = qr->qm[0];
snew(forces, (qm->nrQMatoms + mm->nrMMatoms));
snew(fshift, (qm->nrQMatoms + mm->nrMMatoms));
- QMener = call_QMroutine(cr, fr, qm, mm, forces, fshift);
+ QMener = call_QMroutine(cr, qr, qm, mm, forces, fshift);
for (i = 0; i < qm->nrQMatoms; i++)
{
for (j = 0; j < DIM; j++)
srenew(fshift, qm->nrQMatoms);
/* we need to re-initialize the QMroutine every step... */
init_QMroutine(cr, qm, mm);
- QMener += call_QMroutine(cr, fr, qm, mm, forces, fshift);
+ QMener += call_QMroutine(cr, qr, qm, mm, forces, fshift);
/* this layer at the lower level of theory */
srenew(forces2, qm->nrQMatoms);
srenew(fshift2, qm->nrQMatoms);
init_QMroutine(cr, qm2, mm);
- QMener -= call_QMroutine(cr, fr, qm2, mm, forces2, fshift2);
+ QMener -= call_QMroutine(cr, qr, qm2, mm, forces2, fshift2);
/* E = E1high-E1low The next layer includes the current layer at
* the lower level of theory, which provides + E2low
* this is similar for gradients
init_QMroutine(cr, qm, mm);
srenew(forces, qm->nrQMatoms);
srenew(fshift, qm->nrQMatoms);
- QMener += call_QMroutine(cr, fr, qm, mm, forces, fshift);
+ QMener += call_QMroutine(cr, qr, qm, mm, forces, fshift);
for (i = 0; i < qm->nrQMatoms; i++)
{
for (j = 0; j < DIM; j++)
* routine should be called at every step, since it updates the MM
* elements of the t_QMMMrec struct.
*/
-real calculate_QMMM(const t_commrec* cr, gmx::ForceWithShiftForces* forceWithShiftForces, const t_forcerec* fr);
+real calculate_QMMM(const t_commrec* cr, gmx::ForceWithShiftForces* forceWithShiftForces, const t_QMMMrec* qmmm);
/* QMMM computes the QM forces. This routine makes either function
* calls to gmx QM routines (derived from MOPAC7 (semi-emp.) and MPQC
* using CUDA, including class initialization, data-structures management
* and GPU kernel.
*
- * \note Management of CUDA stream and periodic boundary should be unified with LINCS
- * and removed from here once constraints are fully integrated with update module.
* \todo Reconsider naming to use "gpu" suffix instead of "cuda".
*
* \author Artem Zhmurov <zhmurov@gmail.com>
* \param [in] gm_x Coordinates of atoms before the timestep.
* \param [in,out] gm_x Coordinates of atoms after the timestep (constrained coordinates will be
* saved here).
- * \param [in] pbcAiuc Periodic boundary conditions data.
* \param [in] invdt Reciprocal timestep.
* \param [in] gm_v Velocities of the particles.
* \param [in] gm_virialScaled Virial tensor.
+ * \param [in] pbcAiuc Periodic boundary conditions data.
*/
template<bool updateVelocities, bool computeVirial>
__launch_bounds__(c_maxThreadsPerBlock) __global__
const SettleParameters pars,
const float3* __restrict__ gm_x,
float3* __restrict__ gm_xprime,
- const PbcAiuc pbcAiuc,
- float invdt,
+ float invdt,
float3* __restrict__ gm_v,
- float* __restrict__ gm_virialScaled)
+ float* __restrict__ gm_virialScaled,
+ const PbcAiuc pbcAiuc)
{
/* ******************************************************************* */
/* ** */
float3* d_v,
const real invdt,
const bool computeVirial,
- tensor virialScaled)
+ tensor virialScaled,
+ const PbcAiuc pbcAiuc)
{
ensureNoPendingCudaError("In CUDA version SETTLE");
config.stream = commandStream_;
const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, config, &numSettles_, &d_atomIds_,
- &settleParameters_, &d_x, &d_xp, &pbcAiuc_,
- &invdt, &d_v, &d_virialScaled_);
+ &settleParameters_, &d_x, &d_xp, &invdt, &d_v,
+ &d_virialScaled_, &pbcAiuc);
launchGpuKernel(kernelPtr, config, nullptr, "settle_kernel<updateVelocities, computeVirial>", kernelArgs);
GpuApiCallBehavior::Sync, nullptr);
}
-void SettleCuda::setPbc(const t_pbc* pbc)
-{
- setPbcAiuc(pbc->ndim_ePBC, pbc->box, &pbcAiuc_);
-}
-
} // namespace gmx
* multipliers when velocities are updated)
* \param[in] computeVirial If virial should be updated.
* \param[in,out] virialScaled Scaled virial tensor to be updated.
+ * \param[in] pbcAiuc PBC data.
*/
void apply(const float3* d_x,
float3* d_xp,
float3* d_v,
const real invdt,
const bool computeVirial,
- tensor virialScaled);
+ tensor virialScaled,
+ const PbcAiuc pbcAiuc);
/*! \brief
* Update data-structures (e.g. after NB search step).
*/
void set(const t_idef& idef, const t_mdatoms& md);
- /*! \brief
- * Update PBC data.
- *
- * Converts pbc data from t_pbc into the PbcAiuc format and stores the latter.
- *
- * \todo PBC should not be handled by constraints.
- *
- * \param[in] pbc The PBC data in t_pbc format.
- */
- void setPbc(const t_pbc* pbc);
-
-
private:
//! CUDA stream
CommandStream commandStream_;
- //! Periodic boundary data
- PbcAiuc pbcAiuc_;
//! Scaled virial tensor (9 reals, GPU)
std::vector<float> h_virialScaled_;
/* \brief Launch end-of-step GPU tasks: buffer clearing and rolling pruning.
*
- * TODO: eliminate the \p useGpuNonbonded and \p useGpuNonbonded when these are
+ * TODO: eliminate \p useGpuPmeOnThisRank when this is
* incorporated in DomainLifetimeWorkload.
*/
static void launchGpuEndOfStepTasks(nonbonded_verlet_t* nbv,
gmx_pme_t* pmedata,
gmx_enerdata_t* enerd,
const gmx::MdrunScheduleWorkload& runScheduleWork,
- bool useGpuNonbonded,
- bool useGpuPme,
+ bool useGpuPmeOnThisRank,
int64_t step,
gmx_wallcycle_t wcycle)
{
- if (useGpuNonbonded)
+ if (runScheduleWork.simulationWork.useGpuNonbonded)
{
/* Launch pruning before buffer clearing because the API overhead of the
* clear kernel launches can leave the GPU idle while it could be running
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
- if (useGpuPme)
+ if (useGpuPmeOnThisRank)
{
pme_gpu_reinit_computation(pmedata, wcycle);
}
}
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
- }
- if (stepWork.doNeighborSearch)
- {
// Need to run after the GPU-offload bonded interaction lists
// are set up to be able to determine whether there is bonded work.
runScheduleWork->domainWork = setupDomainLifetimeWorkload(
}
launchGpuEndOfStepTasks(nbv, fr->gpuBonded, fr->pmedata, enerd, *runScheduleWork,
- simulationWork.useGpuNonbonded, useGpuPmeOnThisRank, step, wcycle);
+ useGpuPmeOnThisRank, step, wcycle);
if (DOMAINDECOMP(cr))
{
float3 *d_x, *d_xp, *d_v;
lincsCuda->set(testData->idef_, testData->md_);
- lincsCuda->setPbc(&pbc);
+ PbcAiuc pbcAiuc;
+ setPbcAiuc(pbc.ndim_ePBC, pbc.box, &pbcAiuc);
allocateDeviceBuffer(&d_x, numAtoms, nullptr);
allocateDeviceBuffer(&d_xp, numAtoms, nullptr);
GpuApiCallBehavior::Sync, nullptr);
}
lincsCuda->apply(d_x, d_xp, updateVelocities, d_v, testData->invdt_, testData->computeVirial_,
- testData->virialScaled_);
+ testData->virialScaled_, pbcAiuc);
copyFromDeviceBuffer((float3*)(testData->xPrime_.data()), &d_xp, 0, numAtoms, nullptr,
GpuApiCallBehavior::Sync, nullptr);
GMX_RELEASE_ASSERT(canPerformGpuDetection(), "Can't detect CUDA-capable GPUs.");
auto settleCuda = std::make_unique<SettleCuda>(testData->mtop_, nullptr);
- settleCuda->setPbc(&pbc);
+
settleCuda->set(testData->idef_, testData->mdatoms_);
+ PbcAiuc pbcAiuc;
+ setPbcAiuc(pbc.ndim_ePBC, pbc.box, &pbcAiuc);
int numAtoms = testData->mdatoms_.homenr;
copyToDeviceBuffer(&d_v, (float3*)h_v, 0, numAtoms, nullptr, GpuApiCallBehavior::Sync, nullptr);
}
settleCuda->apply(d_x, d_xp, updateVelocities, d_v, testData->reciprocalTimeStep_, calcVirial,
- testData->virial_);
+ testData->virial_, pbcAiuc);
copyFromDeviceBuffer((float3*)h_xp, &d_xp, 0, numAtoms, nullptr, GpuApiCallBehavior::Sync, nullptr);
if (updateVelocities)
// Constraints need both coordinates before (d_x_) and after (d_xp_) update. However, after constraints
// are applied, the d_x_ can be discarded. So we intentionally swap the d_x_ and d_xp_ here to avoid the
// d_xp_ -> d_x_ copy after constraints. Note that the integrate saves them in the wrong order as well.
- lincsCuda_->apply(d_xp_, d_x_, updateVelocities, d_v_, 1.0 / dt, computeVirial, virial);
- settleCuda_->apply(d_xp_, d_x_, updateVelocities, d_v_, 1.0 / dt, computeVirial, virial);
+ lincsCuda_->apply(d_xp_, d_x_, updateVelocities, d_v_, 1.0 / dt, computeVirial, virial, pbcAiuc_);
+ settleCuda_->apply(d_xp_, d_x_, updateVelocities, d_v_, 1.0 / dt, computeVirial, virial, pbcAiuc_);
// scaledVirial -> virial (methods above returns scaled values)
float scaleFactor = 0.5f / (dt * dt);
void UpdateConstrainCuda::Impl::setPbc(const t_pbc* pbc)
{
setPbcAiuc(pbc->ndim_ePBC, pbc->box, &pbcAiuc_);
- integrator_->setPbc(pbc);
- lincsCuda_->setPbc(pbc);
- settleCuda_->setPbc(pbc);
}
GpuEventSynchronizer* UpdateConstrainCuda::Impl::getCoordinatesReadySync()
const int nwall = ir.nwall;
const int ngid = ir.opts.ngener;
const int ntype = fr.ntype;
- const real* nbfp = fr.nbfp;
+ const real* nbfp = fr.nbfp.data();
const int* egp_flags = fr.egp_flags;
for (int w = 0; w < nwall; w++)
/* This call is not included in init_domain_decomposition mainly
* because fr->cginfo_mb is set later.
*/
- dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec,
+ dd_init_bondeds(fplog, cr->dd, mtop, vsite.get(), inputrec,
domdecOptions.checkBondedInteractions, fr->cginfo_mb);
}
}
// FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x,
- // before we destroy the GPU context(s) in free_gpu_resources().
+ // before we destroy the GPU context(s) in free_gpu().
// Pinned buffers are associated with contexts in CUDA.
// As soon as we destroy GPU contexts after mdrunner() exits, these lines should go.
mdAtoms.reset(nullptr);
globalState.reset(nullptr);
mdModules_.reset(nullptr); // destruct force providers here as they might also use the GPU
+ /* Free pinned buffers in *fr */
+ delete fr;
+ fr = nullptr;
+
+ if (hwinfo->gpu_info.n_dev > 0)
+ {
+ /* stop the GPU profiler (only CUDA) */
+ stopGpuProfiler();
+ }
+
+ /* With tMPI we need to wait for all ranks to finish deallocation before
+ * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
+ * GPU and context.
+ *
+ * This is not a concern in OpenCL where we use one context per rank which
+ * is freed in nbnxn_gpu_free().
+ *
+ * Note: it is safe to not call the barrier on the ranks which do not use GPU,
+ * but it is easier and more futureproof to call it on the whole node.
+ *
+ * Note that this function needs to be called even if GPUs are not used
+ * in this run because the PME ranks have no knowledge of whether GPUs
+ * are used or not, but all ranks need to enter the barrier below.
+ * \todo Remove this physical node barrier after making sure
+ * that it's not needed anymore (with a shared GPU run).
+ */
+ if (GMX_THREAD_MPI)
+ {
+ physicalNodeComm.barrier();
+ }
- /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */
- free_gpu_resources(fr, physicalNodeComm, hwinfo->gpu_info);
free_gpu(nonbondedDeviceInfo);
free_gpu(pmeDeviceInfo);
- done_forcerec(fr, mtop.molblock.size());
sfree(fcd);
if (doMembed)
struct cginfo_mb_t
{
- int cg_start;
- int cg_end;
- int cg_mod;
- int* cginfo;
+ int cg_start = 0;
+ int cg_end = 0;
+ int cg_mod = 0;
+ std::vector<int> cginfo;
};
real sc_sigma6_min = 0;
/* Information about atom properties for the molecule blocks in the system */
- struct cginfo_mb_t* cginfo_mb = nullptr;
+ std::vector<cginfo_mb_t> cginfo_mb;
/* Information about atom properties for local and non-local atoms */
std::vector<int> cginfo;
std::vector<gmx::RVec> shiftForces;
/* Non bonded Parameter lists */
- int ntype = 0; /* Number of atom types */
- gmx_bool bBHAM = FALSE;
- real* nbfp = nullptr;
- real* ljpme_c6grid = nullptr; /* C6-values used on grid in LJPME */
+ int ntype = 0; /* Number of atom types */
+ gmx_bool bBHAM = FALSE;
+ std::vector<real> nbfp;
+ real* ljpme_c6grid = nullptr; /* C6-values used on grid in LJPME */
/* Energy group pair flags */
int* egp_flags = nullptr;
GMX_UNUSED_VALUE(dataSize);
+ GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
+
GMX_ASSERT(dataSize >= 0, "Trying to copy to device buffer before it was allocated.");
+ GMX_ASSERT(commandStream != nullptr,
+ "No stream is valid for copying with given atom locality.");
+
int atomsStartAt, numAtomsToCopy;
std::tie(atomsStartAt, numAtomsToCopy) = getAtomRangesFromAtomLocality(atomLocality);
GMX_UNUSED_VALUE(dataSize);
+ GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
+
GMX_ASSERT(dataSize >= 0, "Trying to copy from device buffer before it was allocated.");
+ GMX_ASSERT(commandStream != nullptr,
+ "No stream is valid for copying with given atom locality.");
+
int atomsStartAt, numAtomsToCopy;
std::tie(atomsStartAt, numAtomsToCopy) = getAtomRangesFromAtomLocality(atomLocality);
void StatePropagatorDataGpu::Impl::copyCoordinatesToGpu(const gmx::ArrayRef<const gmx::RVec> h_x,
AtomLocality atomLocality)
{
- GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
- CommandStream commandStream = xCopyStreams_[atomLocality];
- GMX_ASSERT(commandStream != nullptr,
- "No stream is valid for copying positions with given atom locality.");
-
- copyToDevice(d_x_, h_x, d_xSize_, atomLocality, commandStream);
+ copyToDevice(d_x_, h_x, d_xSize_, atomLocality, xCopyStreams_[atomLocality]);
// markEvent is skipped in OpenCL as:
// - it's not needed, copy is done in the same stream as the only consumer task (PME)
// TODO: remove this by adding an event-mark free flavor of this function
if (GMX_GPU == GMX_GPU_CUDA)
{
- xReadyOnDevice_[atomLocality].markEvent(commandStream);
+ xReadyOnDevice_[atomLocality].markEvent(xCopyStreams_[atomLocality]);
}
}
void StatePropagatorDataGpu::Impl::copyCoordinatesFromGpu(gmx::ArrayRef<gmx::RVec> h_x, AtomLocality atomLocality)
{
- GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
- CommandStream commandStream = xCopyStreams_[atomLocality];
- GMX_ASSERT(commandStream != nullptr,
- "No stream is valid for copying positions with given atom locality.");
-
- copyFromDevice(h_x, d_x_, d_xSize_, atomLocality, commandStream);
+ copyFromDevice(h_x, d_x_, d_xSize_, atomLocality, xCopyStreams_[atomLocality]);
// Note: unlike copyCoordinatesToGpu this is not used in OpenCL, and the conditional is not needed.
- xReadyOnHost_[atomLocality].markEvent(commandStream);
+ xReadyOnHost_[atomLocality].markEvent(xCopyStreams_[atomLocality]);
}
void StatePropagatorDataGpu::Impl::waitCoordinatesReadyOnHost(AtomLocality atomLocality)
void StatePropagatorDataGpu::Impl::copyVelocitiesToGpu(const gmx::ArrayRef<const gmx::RVec> h_v,
AtomLocality atomLocality)
{
- GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
- CommandStream commandStream = vCopyStreams_[atomLocality];
- GMX_ASSERT(commandStream != nullptr,
- "No stream is valid for copying velocities with given atom locality.");
-
- copyToDevice(d_v_, h_v, d_vSize_, atomLocality, commandStream);
- vReadyOnDevice_[atomLocality].markEvent(commandStream);
+ copyToDevice(d_v_, h_v, d_vSize_, atomLocality, vCopyStreams_[atomLocality]);
+ vReadyOnDevice_[atomLocality].markEvent(vCopyStreams_[atomLocality]);
}
GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getVelocitiesReadyOnDeviceEvent(AtomLocality atomLocality)
void StatePropagatorDataGpu::Impl::copyVelocitiesFromGpu(gmx::ArrayRef<gmx::RVec> h_v, AtomLocality atomLocality)
{
- GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
- CommandStream commandStream = vCopyStreams_[atomLocality];
- GMX_ASSERT(commandStream != nullptr,
- "No stream is valid for copying velocities with given atom locality.");
-
- copyFromDevice(h_v, d_v_, d_vSize_, atomLocality, commandStream);
- vReadyOnHost_[atomLocality].markEvent(commandStream);
+ copyFromDevice(h_v, d_v_, d_vSize_, atomLocality, vCopyStreams_[atomLocality]);
+ vReadyOnHost_[atomLocality].markEvent(vCopyStreams_[atomLocality]);
}
void StatePropagatorDataGpu::Impl::waitVelocitiesReadyOnHost(AtomLocality atomLocality)
void StatePropagatorDataGpu::Impl::copyForcesToGpu(const gmx::ArrayRef<const gmx::RVec> h_f,
AtomLocality atomLocality)
{
- GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
- CommandStream commandStream = fCopyStreams_[atomLocality];
- GMX_ASSERT(commandStream != nullptr,
- "No stream is valid for copying forces with given atom locality.");
-
- copyToDevice(d_f_, h_f, d_fSize_, atomLocality, commandStream);
- fReadyOnDevice_[atomLocality].markEvent(commandStream);
+ copyToDevice(d_f_, h_f, d_fSize_, atomLocality, fCopyStreams_[atomLocality]);
+ fReadyOnDevice_[atomLocality].markEvent(fCopyStreams_[atomLocality]);
}
GpuEventSynchronizer* StatePropagatorDataGpu::Impl::getForcesReadyOnDeviceEvent(AtomLocality atomLocality,
void StatePropagatorDataGpu::Impl::copyForcesFromGpu(gmx::ArrayRef<gmx::RVec> h_f, AtomLocality atomLocality)
{
- GMX_ASSERT(atomLocality < AtomLocality::Count, "Wrong atom locality.");
- CommandStream commandStream = fCopyStreams_[atomLocality];
- GMX_ASSERT(commandStream != nullptr,
- "No stream is valid for copying forces with given atom locality.");
-
- copyFromDevice(h_f, d_f_, d_fSize_, atomLocality, commandStream);
- fReadyOnHost_[atomLocality].markEvent(commandStream);
+ copyFromDevice(h_f, d_f_, d_fSize_, atomLocality, fCopyStreams_[atomLocality]);
+ fReadyOnHost_[atomLocality].markEvent(fCopyStreams_[atomLocality]);
}
void StatePropagatorDataGpu::Impl::waitForcesReadyOnHost(AtomLocality atomLocality)
const Nbnxm::KernelType kernelType,
int enbnxninitcombrule,
int ntype,
- const real* nbfp,
+ ArrayRef<const real> nbfp,
int n_energygroups)
{
real c6, c12, tol;
const Nbnxm::KernelType kernelType,
int enbnxninitcombrule,
int ntype,
- const real* nbfp,
+ ArrayRef<const real> nbfp,
int n_energygroups,
int nout)
{
* to the atom data structure.
* enbnxninitcombrule sets what combination rule data gets stored in nbat.
*/
-void nbnxn_atomdata_init(const gmx::MDLogger& mdlog,
- nbnxn_atomdata_t* nbat,
- Nbnxm::KernelType kernelType,
- int enbnxninitcombrule,
- int ntype,
- const real* nbfp,
- int n_energygroups,
- int nout);
+void nbnxn_atomdata_init(const gmx::MDLogger& mdlog,
+ nbnxn_atomdata_t* nbat,
+ Nbnxm::KernelType kernelType,
+ int enbnxninitcombrule,
+ int ntype,
+ gmx::ArrayRef<const real> nbfp,
+ int n_energygroups,
+ int nout);
void nbnxn_atomdata_set(nbnxn_atomdata_t* nbat,
const Nbnxm::GridSet& gridSet,
std::move(atomData), kernelSetup, nullptr, nullptr);
nbnxn_atomdata_init(gmx::MDLogger(), nbv->nbat.get(), kernelSetup.kernelType, combinationRule,
- system.numAtomTypes, system.nonbondedParameters.data(), 1, numThreads);
+ system.numAtomTypes, system.nonbondedParameters, 1, numThreads);
t_nrnb nrnb;
}
forceRec.ntype = numAtomTypes;
- forceRec.nbfp = nonbondedParameters.data();
+ forceRec.nbfp = nonbondedParameters;
snew(forceRec.shift_vec, SHIFTS);
calc_shifts(box, forceRec.shift_vec);
}