tags[${#tags[@]}]=$tag
python3 $SCRIPT --cmake 3.9.6 --gcc 6 --cuda 10.1 --opencl --clfft --mpi openmpi | docker build -t $tag -
+ tag="gromacs/cmake-3.9.6-gcc-7-amdopencl-clfft-openmpi:2020"
+ tags[${#tags[@]}]=$tag
+ python3 $SCRIPT --cmake 3.9.6 --gcc 7 --opencl amd --clfft --mpi openmpi | docker build -t $tag -
+
tag="gromacs/cmake-3.15.7-gcc-8-cuda-10.1-openmpi:2020"
tags[${#tags[@]}]=$tag
python3 $SCRIPT --cmake 3.15.7 --gcc 8 --cuda 10.1 --mpi openmpi | docker build -t $tag -
tags[${#tags[@]}]=$tag
python3 $SCRIPT --cmake 3.15.7 --llvm 8 --opencl intel --mpi openmpi | docker build -t $tag -
-tag="gromacs/cmake-3.9.6-llvm-3.6-amdopencl-openmpi:2020"
+tag="gromacs/cmake-3.9.6-llvm-8-amdopencl-openmpi:2020"
tags[${#tags[@]}]=$tag
-python3 $SCRIPT --ubuntu 16.04 --cmake 3.9.6 --llvm 3.6 --opencl amd --mpi openmpi | docker build -t $tag -
+python3 $SCRIPT --cmake 3.9.6 --llvm 8 --opencl amd --mpi openmpi | docker build -t $tag -
-tag=gromacs/ci-docs-llvm:2020
+tag="gromacs/cmake-3.15.7-gcc-8-cuda-10.1-nvidiaopencl-clfft-openmpi:master"
tags[${#tags[@]}]=$tag
-python3 $SCRIPT --llvm --doxygen | docker build -t $tag -
+python3 $SCRIPT --cmake 3.15.7 --gcc 8 --cuda 10.1 --opencl --clfft --mpi openmpi \
+| docker build -t $tag -
-tag=gromacs/ci-docs-gcc:2020
+tag="gromacs/cmake-3.13.0-gcc-7-amdopencl-clfft-openmpi:master"
+tags[${#tags[@]}]=$tag
+python3 $SCRIPT --cmake 3.13.0 --gcc 7 --opencl amd --clfft --mpi openmpi | docker build -t $tag -
+
+tag="gromacs/cmake-3.13.0-llvm-8-tsan:master"
+tags[${#tags[@]}]=$tag
+python3 $SCRIPT --cmake 3.13.0 --llvm 8 --tsan | docker build -t $tag -
+
+tag="gromacs/cmake-3.15.7-llvm-8-cuda-10.0-openmpi:master"
+tags[${#tags[@]}]=$tag
+python3 $SCRIPT --cmake 3.15.7 --llvm 8 --cuda 10.0 --mpi openmpi | docker build -t $tag -
+
+tag="gromacs/cmake-3.15.7-llvm-8-cuda-10.1-openmpi:master"
+tags[${#tags[@]}]=$tag
+python3 $SCRIPT --cmake 3.15.7 --llvm 8 --cuda 10.1 --mpi openmpi | docker build -t $tag -
+
+tag="gromacs/cmake-3.15.7-llvm-9-openmpi:master"
+tags[${#tags[@]}]=$tag
+python3 $SCRIPT --cmake 3.15.7 --llvm 9 --mpi openmpi | docker build -t $tag -
+
+tag="gromacs/cmake-3.13.0-llvm-9-intelopencl-openmpi:master"
+tags[${#tags[@]}]=$tag
+python3 $SCRIPT --cmake 3.13.0 --llvm 9 --opencl intel --mpi openmpi | docker build -t $tag -
+
+tag="gromacs/cmake-3.13.0-llvm-9-amdopencl-openmpi:master"
+tags[${#tags[@]}]=$tag
+python3 $SCRIPT --cmake 3.13.0 --llvm 9 --opencl amd --mpi openmpi | docker build -t $tag -
+
+tag=gromacs/ci-docs-llvm:master
+tags[${#tags[@]}]=$tag
+python3 $SCRIPT --cmake 3.17.2 --llvm --doxygen | docker build -t $tag -
+
+tag=gromacs/ci-docs-gcc:master
tags[${#tags[@]}]=$tag
python3 $SCRIPT --gcc --doxygen | docker build -t $tag -
docker login
for tag in "${tags[@]}"; do
- docker push $tag
+ echo "Pushing $tag"
+ #docker push $tag
done
"""
# TODO: Try using distutils.version.StrictVersion.
-parser.add_argument('--cmake', type=str, default='3.9.6',
- choices=['3.9.6', '3.11.4', '3.15.7'],
+parser.add_argument('--cmake', type=str, default='3.13.0',
+ choices=['3.9.6', '3.11.4', '3.13.0', '3.14.7', '3.15.7', '3.16.6', '3.17.2'],
help='Selection of CMake version to provide to base image')
compiler_group = parser.add_mutually_exclusive_group()
compiler_group.add_argument('--gcc', type=int, nargs='?', const=7, default=7,
choices=[5, 6, 7, 8, 9],
help='Select GNU compiler tool chain. (Default) '
'Some checking is implemented to avoid incompatible combinations')
- compiler_group.add_argument('--llvm', type=int, nargs='?', const=7, default=None,
- choices=[3, 6, 7, 8, 9],
+ compiler_group.add_argument('--llvm', type=str, nargs='?', const='7', default=None,
+ choices=['3.6', '6', '7', '8'],
help='Select LLVM compiler tool chain. '
'Some checking is implemented to avoid incompatible combinations')
compiler_group.add_argument('--icc', type=int, nargs='?', const=19, default=None,
- choices=[19],
+ choices=[19, 20],
help='Select Intel compiler tool chain. '
'Some checking is implemented to avoid incompatible combinations')
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2012,2013,2014,2015, The GROMACS development team.
-# Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+# Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+# Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
elseif(GMX_SIMD_ACTIVE STREQUAL "IBM_VSX")
+ # IBM_VSX and gcc > 9 do not work together, so we need to prevent people from
+ # choosing a combination that might fail. Issue #3380.
+ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "9")
+ message(FATAL_ERROR "IBM_VSX does not work together with gcc > 9. Disable SIMD support (slower), or use an older version of the GNU compiler")
+ endif()
+
+
gmx_find_simd_ibm_vsx_flags(SIMD_IBM_VSX_C_SUPPORTED SIMD_IBM_VSX_CXX_SUPPORTED
SIMD_IBM_VSX_C_FLAGS SIMD_IBM_VSX_CXX_FLAGS)
# is not required for SIMD support on this platform. cmake through
# at least version 3.7 cannot pass this check with the C compiler
# in the latest xlc 13.1.5, but the C++ compiler has different
- # behaviour and is OK. See Redmine #2102.
+ # behaviour and is OK. See Issue #2102.
if(NOT SIMD_IBM_VSX_CXX_SUPPORTED)
gmx_give_fatal_error_when_simd_support_not_found("IBM VSX" "disable SIMD support (slower)" "${SUGGEST_BINUTILS_UPDATE}")
endif()
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+# Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
+# Copyright (c) 2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
# GROMACS 2018 3
# GROMACS 2019 4
# GROMACS 2020 5
+# GROMACS 2021 6
# LIBRARY_SOVERSION_MINOR so minor version for the built libraries.
# Should be increased for each release that changes only the implementation.
# In GROMACS, the typical policy is to increase it for each patch version
# The GROMACS convention is that these are the version number of the next
# release that is going to be made from this branch.
-set(GMX_VERSION_MAJOR 2020)
-set(GMX_VERSION_PATCH 3)
+set(GMX_VERSION_MAJOR 2021)
+set(GMX_VERSION_PATCH 0)
# The suffix, on the other hand, is used mainly for betas and release
# candidates, where it signifies the most recent such release from
# this branch; it will be empty before the first such release, as well
# here. The important thing is to minimize the chance of third-party
# code being able to dynamically link with a version of libgromacs
# that might not work.
-set(LIBRARY_SOVERSION_MAJOR 5)
+set(LIBRARY_SOVERSION_MAJOR 6)
set(LIBRARY_SOVERSION_MINOR 0)
set(LIBRARY_VERSION ${LIBRARY_SOVERSION_MAJOR}.${LIBRARY_SOVERSION_MINOR}.0)
endif()
set(REGRESSIONTEST_VERSION "${GMX_VERSION_STRING}")
-set(REGRESSIONTEST_BRANCH "release-2020")
+set(REGRESSIONTEST_BRANCH "master")
# Run the regressiontests packaging job with the correct pakage
# version string, and the release box checked, in order to have it
# build the regressiontests tarball with all the right naming. The
# naming affects the md5sum that has to go here, and if it isn't right
# release workflow will report a failure.
- set(REGRESSIONTEST_MD5SUM "b032e4517195b1f8cd9db87cee1b30df" CACHE INTERNAL "MD5 sum of the regressiontests tarball for this GROMACS version")
+ set(REGRESSIONTEST_MD5SUM "b75c3b1bac0e4114e236f049fc7f1f1c" CACHE INTERNAL "MD5 sum of the regressiontests tarball for this GROMACS version")
math(EXPR GMX_VERSION_NUMERIC
"${GMX_VERSION_MAJOR}*10000 + ${GMX_VERSION_PATCH}")
set(VERSION_INFO_CMAKEIN_FILE ${CMAKE_CURRENT_LIST_DIR}/VersionInfo.cmake.cmakein)
set(VERSION_INFO_CONFIGURE_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/gmxConfigureVersionInfo.cmake)
# A set of directories to scan for calculating the hash of source files.
- set(SET_OF_DIRECTORIES_TO_CHECKSUM "${PROJECT_SOURCE_DIR}/src")
- list(APPEND SET_OF_DIRECTORIES_TO_CHECKSUM "${PROJECT_SOURCE_DIR}/python_packaging")
+ set(SET_OF_DIRECTORIES_TO_CHECKSUM "src")
+ list(APPEND SET_OF_DIRECTORIES_TO_CHECKSUM "python_packaging")
# Due to the limitations for passing a list as arguments, we make the directories a string here
string(REPLACE ";" ":" DIRECTORIES_TO_CHECKSUM_STRING "${SET_OF_DIRECTORIES_TO_CHECKSUM}")
-# Try to find python for the checksumming script
-set(PythonInterp_FIND_QUIETLY ON)
-find_package(PythonInterp 3.5)
# Rules to create the VersionInfo.cmake file.
# For git info, the sequence is:
# not been tampered with.
# Note: The RUN_ALWAYS here is to regenerate the hash file only, it does not
# mean that the target is run in all builds
- if (PYTHON_EXECUTABLE)
+ if (PYTHONINTERP_FOUND)
+ # We need the full path to the directories after passing it through
+ set(FULL_PATH_DIRECTORIES "")
+ foreach(DIR ${SET_OF_DIRECTORIES_TO_CHECKSUM})
+ list(APPEND FULL_PATH_DIRECTORIES "${PROJECT_SOURCE_DIR}/${DIR}")
+ endforeach()
gmx_add_custom_output_target(reference_checksum RUN_ALWAYS
OUTPUT ${CHECKSUM_FILE}
COMMAND ${PYTHON_EXECUTABLE}
${PROJECT_SOURCE_DIR}/admin/createFileHash.py
- -s ${SET_OF_DIRECTORIES_TO_CHECKSUM}
+ -s ${FULL_PATH_DIRECTORIES}
-o ${CHECKSUM_FILE}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "Generating reference checksum of source files")
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+# Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
+# Copyright (c) 2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
# of configure time, because 1) some of the version variables are only
# available during build time, and 2) we don't want to do all the Sphinx setup
# during configuration to save some time when not building the content.
-# All the generated values get put into conf-vars.py (generated from
-# conf-vars.py.cmakein), which in turn is included by the Sphinx configuration
-# file conf.py.
+# All of the generated values get put into conf.py (generated from
+# conf.cmakein.py).
set(SOURCE_MD5SUM "unknown" CACHE STRING
"MD5 sum of the source tarball, normally used only for the pre-release webpage build")
set(SPHINX_SOURCE_FILES
index.rst
download.rst
- conf.py
links.dat
dev-manual/build-system.rst
dev-manual/change-management.rst
dev-manual/commitstyle.rst
+ dev-manual/containers.rst
dev-manual/documentation-generation.rst
dev-manual/contribute.rst
dev-manual/doxygen.rst
dev-manual/error-handling.rst
dev-manual/formatting.rst
+ dev-manual/gitlab.rst
dev-manual/gmxtree.rst
dev-manual/includestyle.rst
dev-manual/index.rst
+ dev-manual/infrastructure.rst
dev-manual/jenkins.rst
dev-manual/known-issues.rst
dev-manual/language-features.rst
how-to/visualize.rst
install-guide/index.rst
release-notes/index.rst
+ release-notes/2021/major/highlights.rst
+ release-notes/2021/major/features.rst
+ release-notes/2021/major/performance.rst
+ release-notes/2021/major/tools.rst
+ release-notes/2021/major/bugs-fixed.rst
+ release-notes/2021/major/removed-functionality.rst
+ release-notes/2021/major/deprecated-functionality.rst
+ release-notes/2021/major/portability.rst
+ release-notes/2021/major/miscellaneous.rst
release-notes/2020/2020.1.rst
release-notes/2020/2020.2.rst
+ release-notes/2020/2020.3.rst
release-notes/2020/major/highlights.rst
release-notes/2020/major/features.rst
release-notes/2020/major/performance.rst
set(IMAGE_CONVERT_STRING "impossible")
endif()
- set(SPHINX_CONFIG_VARS_FILE ${SPHINX_INPUT_DIR}/conf-vars.py)
+ set(SPHINX_CONFIG_FILE ${SPHINX_INPUT_DIR}/conf.py)
if (GMX_PYTHON_PACKAGE)
set(GMXAPI_PYTHON_STAGING_DIR ${CMAKE_BINARY_DIR}/python_packaging/src/gmxapi_staging)
# TODO: Resolve circular reference. We would like to get the CMake build-time directory for
# in this context?
endif ()
- gmx_configure_version_file(conf-vars.py.cmakein ${SPHINX_CONFIG_VARS_FILE}
+ gmx_configure_version_file(
+ conf.cmakein.py ${SPHINX_CONFIG_FILE}
EXTRA_VARS
- SPHINX_EXTENSION_PATH RELENG_PATH
- IMAGE_CONVERT_STRING
+ CMAKE_MINIMUM_REQUIRED_VERSION
EXPECTED_DOXYGEN_VERSION
EXPECTED_SPHINX_VERSION
- CMAKE_MINIMUM_REQUIRED_VERSION REQUIRED_CUDA_VERSION
- REQUIRED_OPENCL_MIN_VERSION
- REQUIRED_CUDA_COMPUTE_CAPABILITY REGRESSIONTEST_VERSION
- SOURCE_MD5SUM REGRESSIONTEST_MD5SUM_STRING
- GMX_TNG_MINIMUM_REQUIRED_VERSION
+ GMX_ADMIN_DIR
GMX_LMFIT_REQUIRED_VERSION
GMX_MANUAL_DOI_STRING
+ GMX_TNG_MINIMUM_REQUIRED_VERSION
GMX_SOURCE_DOI_STRING
GMXAPI_PYTHON_STAGING_DIR
+ IMAGE_CONVERT_STRING
+ REGRESSIONTEST_VERSION
+ REQUIRED_CUDA_COMPUTE_CAPABILITY
+ REQUIRED_CUDA_VERSION
+ REQUIRED_OPENCL_MIN_VERSION
+ REGRESSIONTEST_MD5SUM_STRING
+ RELENG_PATH
+ SOURCE_MD5SUM
+ SPHINX_EXTENSION_PATH
COMMENT "Configuring Sphinx configuration file")
- gmx_add_sphinx_input_file(${SPHINX_CONFIG_VARS_FILE})
+ gmx_add_sphinx_input_file(${SPHINX_CONFIG_FILE})
gmx_add_sphinx_source_files(FILES ${SPHINX_SOURCE_FILES})
if (EXISTS ${RELENG_PATH}/docs/FileList.cmake)
include(${RELENG_PATH}/docs/FileList.cmake)
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
+ set(SPHINX_CONFIG_OVERRIDES "")
+ if (GMX_DEVELOPER_BUILD)
+ set(SPHINX_CONFIG_OVERRIDES "-Dtodo_include_todos=1")
+ endif()
add_custom_target(webpage-sphinx
DEPENDS sphinx-programs
DEPENDS sphinx-input
-q -b html
-w sphinx-html.log
-d "${SPHINX_CACHE_DIR}"
+ ${SPHINX_CONFIG_OVERRIDES}
"${SPHINX_INPUT_DIR}"
"${HTML_OUTPUT_DIR}"
WORKING_DIRECTORY
set(HTML_BUILD_WARNINGS)
# Next, turn it off if any of the preconditions are unsatisified
-if (NOT PythonInterp_FOUND)
+if (NOT Python3_Interpreter_FOUND)
set(HTML_BUILD_IS_POSSIBLE OFF)
set(HTML_BUILD_NOT_POSSIBLE_REASON "Python is required")
elseif (NOT SPHINX_FOUND)
GROMACS 2020.2 release notes
----------------------------
- This version was released on TODO, 2020. These release notes
+ This version was released on April 30th, 2020. These release notes
document the changes that have taken place in GROMACS since the
previous 2020.1 version, to fix known issues. It also incorporates all
fixes made in version 2019.6 and earlier, which you can find described
.. Note to developers!
Please use """"""" to underline the individual entries for fixed issues in the subfolders,
otherwise the formatting on the webpage is messed up.
- Also, please use the syntax :issue:`number` to reference issues on redmine, without the
+ Also, please use the syntax :issue:`number` to reference issues on GitLab, without the
a space between the colon and number!
Fixes where mdrun could behave incorrectly
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ Ewald dipole correction incorrect without domain decomposition
+ """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+ Ewald dipole correction (epsilon-surface != 0) is now disabled when not
+ using domain decomposition. With domain decomposition, it only works
+ when each molecule consists of a single update group (e.g. water).
+ This will be fixed in release-2021.
+
+ :issue:`3441`
+
+ Expanded ensemble simulations restarted from checkpoints
+ """"""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+ When restarting expanded ensemble simulations from checkpoints, expanded
+ ensemble would silently refuse to run, and simulations would remain in
+ their original lambda state.
+
+ :issue:`3465`
+
+ Fixed free energy calculations with LJ PME
+ """"""""""""""""""""""""""""""""""""""""""
+
+ Fixed an issue that calculated wrong long-range corrections when using
+ free energy perturbation with ``vdwtype = pme``. This affected forces,
+ energies, lambda derivatives and foreign lambdas.
+
+ :issue:`3470`
+
+ The velocities of the center of mass are now removed correctly in case of -update gpu
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+ When the center of mass motion is removed, the velocities are updated in the CPU memory.
+ In case of GPU update, they should be copied back to the GPU memory after they were updated
+ on the CPU. This affected most runs where the velocity of the center of mass has to be removed,
+ in particular these where this velocity is large in the beginning of the run.
+
+ :issue:`3508`
+
+ Fix checkpoint restart with non-zero initial step
+ """""""""""""""""""""""""""""""""""""""""""""""""
+
+ When restarting from the checkpoint, the init-step mdp parameter was ignored while
+ checking if the simulation is already finished. As a result, this check only worked
+ properly when init-step was 0 or was not specified.
+
+ :issue:`3489`
+
Fixes for ``gmx`` tools
^^^^^^^^^^^^^^^^^^^^^^^
+ Time output unit fixes
+ ^^^^^^^^^^^^^^^^^^^^^^
+
+ When selecting a time unit of microseconds or larger,
+ ``gmx tool -tu`` now produces the correct string in .xvg and
+ particularly .xvgr plots
+
+ Fix do_dssp
+ ^^^^^^^^^^^
+
+ The tool would fail with a segmentation fault.
+
+ :issue:`3444`
+
Fixes that affect portability
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ Give clearer message about not detecting IBM_VSX support in gcc > 9
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ CMake would fail with a confusing error message.
+
+ :issue:`3380`
+
Miscellaneous
^^^^^^^^^^^^^
+ Fixed initial DLB state reporting
+ """""""""""""""""""""""""""""""""
+
+ The initial DLB state was reported incorrectly in the log file when
+ the either "on" or "auto" value was the chosen at mdrun startup.
functionality supported, whereas patch releases contain only fixes for
issues identified in the corresponding major releases.
-Two versions of |Gromacs| are under active maintenance, the 2020
-series and the 2019 series. In the latter, only highly conservative
+Two versions of |Gromacs| are under active maintenance, the 2021
+series and the 2020 series. In the latter, only highly conservative
fixes will be made, and only to address issues that affect scientific
correctness. Naturally, some of those releases will be made after the
-year 2019 ends, but we keep 2019 in the name so users understand how
+year 2020 ends, but we keep 2019 in the name so users understand how
up to date their version is. Such fixes will also be incorporated into
-the 2020 release series, as appropriate. Around the time the 2021
-release is made, the 2019 series will no longer be maintained.
+the 2021 release series, as appropriate. Around the time the 2022
+release is made, the 2020 series will no longer be maintained.
Where issue numbers are reported in these release notes, more details
-can be found at https://redmine.gromacs.org at that issue number.
+can be found at https://gitlab.com/gromacs/gromacs/-/issues at that issue number.
+
+|Gromacs| 2021 series
+---------------------
+
+.. todolist::
+
+Major release
+^^^^^^^^^^^^^
+
+.. toctree::
+ :maxdepth: 1
+
+ 2021/major/highlights
+ 2021/major/features
+ 2021/major/performance
+ 2021/major/tools
+ 2021/major/bugs-fixed
+ 2021/major/deprecated-functionality
+ 2021/major/removed-functionality
+ 2021/major/portability
+ 2021/major/miscellaneous
+
|Gromacs| 2020 series
---------------------
2020/2020.1
2020/2020.2
+ 2020/2020.3
Major release
^^^^^^^^^^^^^
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+# Copyright (c) 2009,2010,2011,2012,2013 by the GROMACS development team.
+# Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
+# Copyright (c) 2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
get_cuda_compiler_info(CUDA_COMPILER_INFO CUDA_DEVICE_COMPILER_FLAGS CUDA_HOST_COMPILER_FLAGS)
endif()
- string(TOUPPER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_UPPER)
- configure_file(config.h.cmakein config.h)
- configure_file(gmxpre-config.h.cmakein gmxpre-config.h)
- configure_file(buildinfo.h.cmakein buildinfo.h ESCAPE_QUOTES)
# Make a file with compiler flags used for libgromacs for each
# langauge and build configuration. The one that corresponds to
# CMAKE_BUILD_TYPE is #included into buildinfo.h and populates the
"-Wno-covered-switch-default" #GCC gives maybe-uninitialized without default label and checks for illegal enum values.
"-Wno-switch-enum" # default statement for enum is OK
- # TODO uncomment the next few ignore lines when we upgrade to test with clang 8 in Jenkins
-
- # The barriers we use for tMPI and Nbnxm are sufficient, but it's
- # not known whether they're excessive. We assume they not
- # excessive.
- # "-Wno-atomic-implicit-seq-cst"
-
# We need to use macros like
# GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR. Those will look strange
# if they don't have a semicolon after them, and might confuse
# tools like IDEs also.
- # "-Wno-extra-semi-stmt"
+ "-Wno-extra-semi-stmt"
#Following ones are undecided/TODO
"-Wno-disabled-macro-expansion"
"-Wno-double-promotion")
string(REPLACE " " ";" IGNORED_CLANG_ALL_WARNINGS "${IGNORED_CLANG_ALL_WARNINGS}")
+option(GMX_CLANG_TIDY "Use clang-tidy" OFF)
if (GMX_CLANG_TIDY)
+ if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+ elseif("${CMAKE_BUILD_TYPE}" STREQUAL "RelWithAssert")
+ elseif("${CMAKE_BUILD_TYPE}" STREQUAL "RelWithDebInfo")
+ elseif("${CMAKE_BUILD_TYPE}" STREQUAL "ASAN")
+ else()
+ message(FATAL_ERROR "Can only use clang-tidy with build type containing asserts: Debug, RelWithAssert, RelWithDebInfo, ASAN.")
+ endif()
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+ mark_as_advanced(CMAKE_EXPORT_COMPILE_COMMANDS)
set(CLANG_TIDY "clang-tidy" CACHE STRING "Name of clang-tidy executable")
find_program(CLANG_TIDY_EXE NAMES "${CLANG_TIDY}"
DOC "Path to clang-tidy executable")
if(NOT CLANG_TIDY_EXE)
message(FATAL_ERROR "clang-tidy not found.")
endif()
+ mark_as_advanced(CLANG_TIDY)
+ mark_as_advanced(CLANG_TIDY_EXE)
endif()
-#####
add_subdirectory(external)
endif()
add_subdirectory(api)
endif()
+
+ # Configure header files with configuration-specific values. This step
+ # should follow all introspection e.g. looking for headers and
+ # libraries. If not, cmake will need to change the contents of the
+ # file upon subsequent runs of cmake. This can mean that
+ #
+ # cmake $src && make && make test
+ #
+ # requires building all the source files that depend on the changed
+ # header file in both of the make stages. That's slow, and is useless
+ # busy work for ccache, too.
+ string(TOUPPER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_UPPER)
+ configure_file(config.h.cmakein config.h)
+ configure_file(gmxpre-config.h.cmakein gmxpre-config.h)
+
+ set(CMAKE_BUILD_CONFIGURATION_C_FLAGS ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}})
+ set(CMAKE_BUILD_CONFIGURATION_CXX_FLAGS ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}})
+ configure_file(buildinfo.h.cmakein buildinfo.h ESCAPE_QUOTES)
gmx_target_compile_options(gmxapi)
target_compile_definitions(gmxapi PRIVATE HAVE_CONFIG_H)
target_include_directories(gmxapi SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
+# Should be possible to change this when resolving #3290
+target_include_directories(gmxapi SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
# Define public interface. Make sure targets linking against `gmxapi` in the build
# system don't accidentally have the implementation headers (this directory))
endif()
set_target_properties(gmxapi PROPERTIES
+ OUTPUT_NAME "gmxapi${GMX_LIBS_SUFFIX}"
SOVERSION ${GMXAPI_MAJOR}
VERSION ${GMXAPI_RELEASE}
)
#ifndef GROMACS_TESTINGCONFIGURATION_H
#define GROMACS_TESTINGCONFIGURATION_H
-#include <gtest/gtest.h>
+#include "config.h"
#include <string>
#include <vector>
+ #include "config.h"
+
+#include <gtest/gtest.h>
+
#include "gromacs/gmxpreprocess/grompp.h"
#include "gromacs/math/vec.h"
#include "gromacs/utility/stringutil.h"
#include "gromacs/utility/textwriter.h"
+#include "programs/mdrun/tests/moduletest.h"
#include "testutils/cmdlinetest.h"
#include "testutils/testfilemanager.h"
-#include "programs/mdrun/tests/moduletest.h"
namespace gmxapi
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
/** C compiler used to build */
#define BUILD_C_COMPILER "@BUILD_C_COMPILER@"
+ /** C compiler flags for this build configuration */
+ #define CMAKE_BUILD_CONFIGURATION_C_FLAGS "@CMAKE_BUILD_CONFIGURATION_C_FLAGS@"
+
#include "compilerflaginfo-@CMAKE_BUILD_TYPE@-C.h"
- /** C++ compiler flags used to build, or empty string if no C++ */
+ /** C++ compiler used to build */
#define BUILD_CXX_COMPILER "@BUILD_CXX_COMPILER@"
+ /** C++ compiler flags for this build configuration */
+ #define CMAKE_BUILD_CONFIGURATION_CXX_FLAGS "@CMAKE_BUILD_CONFIGURATION_CXX_FLAGS@"
+
#include "compilerflaginfo-@CMAKE_BUILD_TYPE@-CXX.h"
/** Installation prefix (default location of data files) */
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2010,2011,2012,2013,2014,2015, The GROMACS development team.
-# Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+# Copyright (c) 2010,2011,2012,2013,2014 by the GROMACS development team.
+# Copyright (c) 2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
endfunction ()
# TODO Reconsider this, as the CUDA driver API is probably a simpler
-# approach, at least for the build system. See Redmine #2530
+# approach, at least for the build system. See Issue #2530
function (gmx_compile_cpp_as_cuda)
_gmx_add_files_to_property(GMX_LIBGROMACS_GPU_IMPL_SOURCES ${ARGN})
endfunction ()
+# Permit the configuration to disable compiling the many nbnxm kernels
+# and others involved in force calculations. Currently only
+# short-ranged and bonded kernels are disabled this way, but in future
+# others may be appropriate. Thus the cmake option is not specific to
+# nbnxm module.
+option(GMX_USE_SIMD_KERNELS "Whether to compile NBNXM and other SIMD kernels" ON)
+mark_as_advanced(GMX_USE_SIMD_KERNELS)
+
# Add these contents first because linking their tests can take a lot
# of time, so we want lots of parallel work still available after
# linking starts.
# TODO Perhaps generalize this for all headers from src/external
target_include_directories(libgromacs SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/src/external)
- # Temporary fix to allow external access to restraintpotentail
- target_include_directories(libgromacs PUBLIC
- $<INSTALL_INTERFACE:include>
- )
-
if(SIMD_AVX_512_CXX_SUPPORTED AND NOT ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL"))
# Since we might be overriding -march=core-avx2, add a flag so we don't warn for this specific file.
# On KNL this can cause illegal instruction because the compiler might use non KNL AVX instructions
gmx_manage_lmfit()
target_link_libraries(libgromacs PRIVATE lmfit)
-# Fix everything found by the latest version of clang that we use in
-# Jenkins testing. This should be updated when we update the latest
-# tested version of clang.
-if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^7\.0")
+# Make sure we fix "everything" found by more recent versions of clang.
+if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7")
target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Weverything ${IGNORED_CLANG_ALL_WARNINGS}>)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
"${CLANG_TIDY_EXE};-warnings-as-errors=*")
endif()
+ # clang-3.6 warns about a number of issues that are not reported by more modern compilers
+ # and we know they are not real issues. So we only check that it can compile without error
+ # but ignore all warnings.
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^3\.6")
+ target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-w>)
+ endif()
+
# Only install the library in mdrun-only mode if it is actually necessary
# for the binary
if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS)
install(TARGETS libgromacs
+ COMPONENT libraries
EXPORT libgromacs
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
- INCLUDES DESTINATION include
- COMPONENT libraries)
+ INCLUDES DESTINATION include)
+ target_compile_definitions(libgromacs PUBLIC $<INSTALL_INTERFACE:GMX_DOUBLE=${GMX_DOUBLE_VALUE}>)
+ add_library(Gromacs::libgromacs ALIAS libgromacs)
endif()
if (NOT GMX_BUILD_MDRUN_ONLY)
#include "gromacs/domdec/partition.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/gpu_utils/device_stream_manager.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/hardware/hw_info.h"
#include "gromacs/listed_forces/manage_threading.h"
using gmx::DlbOption;
using gmx::DomdecOptions;
- static const char* edlbs_names[int(DlbState::nr)] = { "off", "auto", "locked", "on", "on" };
+ static const char* edlbs_names[int(DlbState::nr)] = { "off", "off", "auto", "locked", "on", "on" };
/* The size per atom group of the cggl_flag buffer in gmx_domdec_comm_t */
#define DD_CGIBS 2
{ 2, 5, 6 },
{ 3, 5, 7 } };
-
-/*
- #define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
-
- static void index2xyz(ivec nc,int ind,ivec xyz)
- {
- xyz[XX] = ind % nc[XX];
- xyz[YY] = (ind / nc[XX]) % nc[YY];
- xyz[ZZ] = ind / (nc[YY]*nc[XX]);
- }
- */
-
static void ddindex2xyz(const ivec nc, int ind, ivec xyz)
{
xyz[XX] = ind / (nc[YY] * nc[ZZ]);
int ddnodeid = -1;
const CartesianRankSetup& cartSetup = dd->comm->cartesianRankSetup;
- const int ddindex = dd_index(dd->nc, c);
+ const int ddindex = dd_index(dd->numCells, c);
if (cartSetup.bCartesianPP_PME)
{
ddnodeid = cartSetup.ddindex2ddnodeid[ddindex];
ivec coords;
int slab;
- dd = cr->dd;
- /*
- if (dd->comm->bCartesian) {
- gmx_ddindex2xyz(dd->nc,ddindex,coords);
- dd_coords2pmecoords(dd,coords,coords_pme);
- copy_ivec(dd->ntot,nc);
- nc[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
- coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
-
- slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
- } else {
- slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
- }
- */
+ dd = cr->dd;
coords[XX] = x;
coords[YY] = y;
coords[ZZ] = z;
- slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->nc, coords));
+ slab = ddindex2pmeindex(dd->comm->ddRankSetup, dd_index(dd->numCells, coords));
return slab;
}
}
else
{
- int ddindex = dd_index(cr->dd->nc, coords);
+ int ddindex = dd_index(cr->dd->numCells, coords);
if (cartSetup.bCartesianPP)
{
nodeid = cartSetup.ddindex2simnodeid[ddindex];
ivec coords;
MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
coords[cartSetup.cartpmedim]++;
- if (coords[cartSetup.cartpmedim] < dd->nc[cartSetup.cartpmedim])
+ if (coords[cartSetup.cartpmedim] < dd->numCells[cartSetup.cartpmedim])
{
int rank;
MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
comm = dd->comm;
- snew(*dim_f, dd->nc[dim] + 1);
+ snew(*dim_f, dd->numCells[dim] + 1);
(*dim_f)[0] = 0;
- for (i = 1; i < dd->nc[dim]; i++)
+ for (i = 1; i < dd->numCells[dim]; i++)
{
if (comm->slb_frac[dim])
{
}
else
{
- (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->nc[dim]);
+ (*dim_f)[i] = static_cast<real>(i) / static_cast<real>(dd->numCells[dim]);
}
}
- (*dim_f)[dd->nc[dim]] = 1;
+ (*dim_f)[dd->numCells[dim]] = 1;
}
static void init_ddpme(gmx_domdec_t* dd, gmx_ddpme_t* ddpme, int dimind)
snew(ddpme->pp_max, ddpme->nslab);
for (int slab = 0; slab < ddpme->nslab; slab++)
{
- ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
+ ddpme->pp_min[slab] = dd->numCells[dd->dim[dimind]] - 1;
ddpme->pp_max[slab] = 0;
}
for (int i = 0; i < dd->nnodes; i++)
{
ivec xyz;
- ddindex2xyz(dd->nc, i, xyz);
+ ddindex2xyz(dd->numCells, i, xyz);
/* For y only use our y/z slab.
* This assumes that the PME x grid size matches the DD grid size.
*/
dim = dd->dim[dim_ind];
copy_ivec(loc, loc_c);
- for (i = 0; i < dd->nc[dim]; i++)
+ for (i = 0; i < dd->numCells[dim]; i++)
{
loc_c[dim] = i;
- rank = dd_index(dd->nc, loc_c);
+ rank = dd_index(dd->numCells, loc_c);
if (rank == dd->rank)
{
/* This process is part of the group */
RowMaster& rowMaster = *cellsizes.rowMaster;
rowMaster.cellFrac.resize(ddCellFractionBufferSize(dd, dim_ind));
- rowMaster.oldCellFrac.resize(dd->nc[dim] + 1);
- rowMaster.isCellMin.resize(dd->nc[dim]);
+ rowMaster.oldCellFrac.resize(dd->numCells[dim] + 1);
+ rowMaster.isCellMin.resize(dd->numCells[dim]);
if (dim_ind > 0)
{
- rowMaster.bounds.resize(dd->nc[dim]);
+ rowMaster.bounds.resize(dd->numCells[dim]);
}
- rowMaster.buf_ncd.resize(dd->nc[dim]);
+ rowMaster.buf_ncd.resize(dd->numCells[dim]);
}
else
{
}
if (dd->ci[dim] == dd->master_ci[dim])
{
- snew(dd->comm->load[dim_ind].load, dd->nc[dim] * DD_NLOAD_MAX);
+ snew(dd->comm->load[dim_ind].load, dd->numCells[dim] * DD_NLOAD_MAX);
}
}
}
if (dd->ndim > 1)
{
dim0 = dd->dim[0];
- for (i = 0; i < dd->nc[dim0]; i++)
+ for (i = 0; i < dd->numCells[dim0]; i++)
{
loc[dim0] = i;
make_load_communicator(dd, 1, loc);
if (dd->ndim > 2)
{
dim0 = dd->dim[0];
- for (i = 0; i < dd->nc[dim0]; i++)
+ for (i = 0; i < dd->numCells[dim0]; i++)
{
loc[dim0] = i;
dim1 = dd->dim[1];
- for (j = 0; j < dd->nc[dim1]; j++)
+ for (j = 0; j < dd->numCells[dim1]; j++)
{
loc[dim1] = j;
make_load_communicator(dd, 2, loc);
{
dim = dd->dim[d];
copy_ivec(dd->ci, tmp);
- tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
+ tmp[dim] = (tmp[dim] + 1) % dd->numCells[dim];
dd->neighbor[d][0] = ddcoord2ddnodeid(dd, tmp);
copy_ivec(dd->ci, tmp);
- tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
+ tmp[dim] = (tmp[dim] - 1 + dd->numCells[dim]) % dd->numCells[dim];
dd->neighbor[d][1] = ddcoord2ddnodeid(dd, tmp);
if (debug)
{
s[d] = dd->ci[d] - zones->shift[i][d];
if (s[d] < 0)
{
- s[d] += dd->nc[d];
+ s[d] += dd->numCells[d];
}
- else if (s[d] >= dd->nc[d])
+ else if (s[d] >= dd->numCells[d])
{
- s[d] -= dd->nc[d];
+ s[d] -= dd->numCells[d];
}
}
}
std::min(ddNonbondedZonePairRanges[iZoneIndex][2], nzone));
for (dim = 0; dim < DIM; dim++)
{
- if (dd->nc[dim] == 1)
+ if (dd->numCells[dim] == 1)
{
/* All shifts should be allowed */
iZone.shift0[dim] = -1;
{
/* Set up cartesian communication for the particle-particle part */
GMX_LOG(mdlog.info)
- .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d", dd->nc[XX],
- dd->nc[YY], dd->nc[ZZ]);
+ .appendTextFormatted("Will use a Cartesian communicator: %d x %d x %d",
+ dd->numCells[XX], dd->numCells[YY], dd->numCells[ZZ]);
ivec periods;
for (int i = 0; i < DIM; i++)
periods[i] = TRUE;
}
MPI_Comm comm_cart;
- MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->nc, periods, static_cast<int>(reorder), &comm_cart);
+ MPI_Cart_create(cr->mpi_comm_mygroup, DIM, dd->numCells, periods, static_cast<int>(reorder),
+ &comm_cart);
/* We overwrite the old communicator with the new cartesian one */
cr->mpi_comm_mygroup = comm_cart;
}
* and not the one after split, we need to make an index.
*/
cartSetup.ddindex2ddnodeid.resize(dd->nnodes);
- cartSetup.ddindex2ddnodeid[dd_index(dd->nc, dd->ci)] = dd->rank;
+ cartSetup.ddindex2ddnodeid[dd_index(dd->numCells, dd->ci)] = dd->rank;
gmx_sumi(dd->nnodes, cartSetup.ddindex2ddnodeid.data(), cr);
/* Get the rank of the DD master,
* above we made sure that the master node is a PP node.
std::vector<int> buf(dd->nnodes);
if (thisRankHasDuty(cr, DUTY_PP))
{
- buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
+ buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
}
/* Communicate the ddindex to simulation nodeid index */
MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
{
if (cartSetup.ddindex2simnodeid[i] == 0)
{
- ddindex2xyz(dd->nc, i, dd->master_ci);
+ ddindex2xyz(dd->numCells, i, dd->master_ci);
MPI_Cart_rank(dd->mpi_comm_all, dd->master_ci, &dd->masterrank);
}
}
{
/* No Cartesian communicators */
/* We use the rank in dd->comm->all as DD index */
- ddindex2xyz(dd->nc, dd->rank, dd->ci);
+ ddindex2xyz(dd->numCells, dd->rank, dd->ci);
/* The simulation master nodeid is 0, so the DD master rank is also 0 */
dd->masterrank = 0;
clear_ivec(dd->master_ci);
std::vector<int> buf(dd->nnodes);
if (thisRankHasDuty(cr, DUTY_PP))
{
- buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
+ buf[dd_index(dd->numCells, dd->ci)] = cr->sim_nodeid;
}
/* Communicate the ddindex to simulation nodeid index */
MPI_Allreduce(buf.data(), cartSetup.ddindex2simnodeid.data(), dd->nnodes, MPI_INT, MPI_SUM,
/* We can not use DDMASTER(dd), because dd->masterrank is set later */
if (MASTER(cr))
{
- dd->ma = std::make_unique<AtomDistribution>(dd->nc, numAtomsInSystem, numAtomsInSystem);
+ dd->ma = std::make_unique<AtomDistribution>(dd->numCells, numAtomsInSystem, numAtomsInSystem);
}
}
static void check_dd_restrictions(const gmx_domdec_t* dd, const t_inputrec* ir, const gmx::MDLogger& mdlog)
{
- if (ir->ePBC == epbcSCREW && (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
+ if (ir->pbcType == PbcType::Screw
+ && (dd->numCells[XX] == 1 || dd->numCells[YY] > 1 || dd->numCells[ZZ] > 1))
{
gmx_fatal(FARGS, "With pbc=%s can only do domain decomposition in the x-direction",
- epbc_names[ir->ePBC]);
+ c_pbcTypeNames[ir->pbcType].c_str());
}
if (ir->nstlist == 0)
gmx_fatal(FARGS, "Domain decomposition does not work with nstlist=0");
}
- if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
+ if (ir->comm_mode == ecmANGULAR && ir->pbcType != PbcType::No)
{
GMX_LOG(mdlog.warning)
.appendText(
}
UnitCellInfo::UnitCellInfo(const t_inputrec& ir) :
- npbcdim(ePBC2npbcdim(ir.ePBC)),
+ npbcdim(numPbcDimensions(ir.pbcType)),
numBoundedDimensions(inputrec2nboundeddim(&ir)),
ddBoxIsDynamic(numBoundedDimensions < DIM || inputrecDynamicBox(&ir)),
- haveScrewPBC(ir.ePBC == epbcSCREW)
+ haveScrewPBC(ir.pbcType == PbcType::Screw)
{
}
systemInfo.useUpdateGroups = false;
if (ir.cutoff_scheme == ecutsVERLET)
{
- real cutoffMargin = std::sqrt(max_cutoff2(ir.ePBC, box)) - ir.rlist;
+ real cutoffMargin = std::sqrt(max_cutoff2(ir.pbcType, box)) - ir.rlist;
setupUpdateGroups(mdlog, mtop, ir, cutoffMargin, &systemInfo);
}
dd_bonded_cg_distance(mdlog, &mtop, &ir, as_rvec_array(xGlobal.data()), box,
options.checkBondedInteractions, &r_2b, &r_mb);
}
- gmx_bcast(sizeof(r_2b), &r_2b, cr);
- gmx_bcast(sizeof(r_mb), &r_mb, cr);
+ gmx_bcast(sizeof(r_2b), &r_2b, cr->mpi_comm_mygroup);
+ gmx_bcast(sizeof(r_mb), &r_mb, cr->mpi_comm_mygroup);
/* We use an initial margin of 10% for the minimum cell size,
* except when we are just below the non-bonded cut-off.
}
/* Set the DD setup given by ddGridSetup */
- copy_ivec(ddGridSetup.numDomains, dd->nc);
+ copy_ivec(ddGridSetup.numDomains, dd->numCells);
dd->ndim = ddGridSetup.numDDDimensions;
copy_ivec(ddGridSetup.ddDimensions, dd->dim);
- dd->nnodes = dd->nc[XX] * dd->nc[YY] * dd->nc[ZZ];
+ dd->nnodes = dd->numCells[XX] * dd->numCells[YY] * dd->numCells[ZZ];
snew(comm->slb_frac, DIM);
if (isDlbDisabled(comm))
{
- comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->nc[XX], options.cellSizeX);
- comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->nc[YY], options.cellSizeY);
- comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->nc[ZZ], options.cellSizeZ);
+ comm->slb_frac[XX] = get_slb_frac(mdlog, "x", dd->numCells[XX], options.cellSizeX);
+ comm->slb_frac[YY] = get_slb_frac(mdlog, "y", dd->numCells[YY], options.cellSizeY);
+ comm->slb_frac[ZZ] = get_slb_frac(mdlog, "z", dd->numCells[ZZ], options.cellSizeZ);
}
/* Set the multi-body cut-off and cellsize limit for DLB */
* the minimum and the maximum,
* since the extra communication cost is nearly zero.
*/
- real acs = average_cellsize_min(ddbox, dd->nc);
+ real acs = average_cellsize_min(ddbox, dd->numCells);
comm->cutoff_mbody = 0.5 * (systemInfo.minCutoffForMultiBody + acs);
if (!isDlbDisabled(comm))
{
}
}
-void dd_init_bondeds(FILE* fplog,
- gmx_domdec_t* dd,
- const gmx_mtop_t* mtop,
- const gmx_vsite_t* vsite,
- const t_inputrec* ir,
- gmx_bool bBCheck,
- cginfo_mb_t* cginfo_mb)
+void dd_init_bondeds(FILE* fplog,
+ gmx_domdec_t* dd,
+ const gmx_mtop_t& mtop,
+ const gmx_vsite_t* vsite,
+ const t_inputrec* ir,
+ gmx_bool bBCheck,
+ gmx::ArrayRef<cginfo_mb_t> cginfo_mb)
{
gmx_domdec_comm_t* comm;
- dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
+ dd_make_reverse_top(fplog, dd, &mtop, vsite, ir, bBCheck);
comm = dd->comm;
log->writeString("The allowed shrink of domain decomposition cells is:");
for (d = 0; d < DIM; d++)
{
- if (dd->nc[d] > 1)
+ if (dd->numCells[d] > 1)
{
- if (d >= ddbox->npbcdim && dd->nc[d] == 2)
+ if (d >= ddbox->npbcdim && dd->numCells[d] == 2)
{
shrink = 0;
}
else
{
shrink = comm->cellsize_min_dlb[d]
- / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->nc[d]);
+ / (ddbox->box_size[d] * ddbox->skew_fac[d] / dd->numCells[d]);
}
log->writeStringFormatted(" %c %.2f", dim2char(d), shrink);
}
log->writeString("The initial domain decomposition cell size is:");
for (d = 0; d < DIM; d++)
{
- if (dd->nc[d] > 1)
+ if (dd->numCells[d] > 1)
{
log->writeStringFormatted(" %c %.2f nm", dim2char(d), dd->comm->cellsize_min[d]);
}
else
{
/* There is no cell size limit */
- npulse = std::max(dd->nc[XX] - 1, std::max(dd->nc[YY] - 1, dd->nc[ZZ] - 1));
+ npulse = std::max(dd->numCells[XX] - 1, std::max(dd->numCells[YY] - 1, dd->numCells[ZZ] - 1));
}
if (!bNoCutOff && npulse > 1)
dim = dd->dim[d];
npulse_d = static_cast<int>(
1
- + dd->nc[dim] * comm->systemInfo.cutoff
+ + dd->numCells[dim] * comm->systemInfo.cutoff
/ (ddbox->box_size[dim] * ddbox->skew_fac[dim] * dlb_scale));
npulse_d_max = std::max(npulse_d_max, npulse_d);
}
}
comm->maxpulse = 1;
- comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
+ comm->bVacDLBNoLimit = (ir->pbcType == PbcType::No);
for (d = 0; d < dd->ndim; d++)
{
- if (comm->ddSettings.request1DAnd1Pulse)
- {
- comm->cd[d].np_dlb = 1;
- }
- else
- {
- comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]] - 1);
- comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
- }
- if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]] - 1)
+ comm->cd[d].np_dlb = std::min(npulse, dd->numCells[dd->dim[d]] - 1);
+ comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
+ if (comm->cd[d].np_dlb < dd->numCells[dd->dim[d]] - 1)
{
comm->bVacDLBNoLimit = FALSE;
}
return dd.comm->systemInfo.moleculesAreAlwaysWhole;
}
-gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, int ePBC)
+gmx_bool dd_bonded_molpbc(const gmx_domdec_t* dd, PbcType pbcType)
{
/* If each molecule is a single charge group
* or we use domain decomposition for each periodic dimension,
* we do not need to take pbc into account for the bonded interactions.
*/
- return (ePBC != epbcNONE && dd->comm->systemInfo.haveInterDomainBondeds
- && !(dd->nc[XX] > 1 && dd->nc[YY] > 1 && (dd->nc[ZZ] > 1 || ePBC == epbcXY)));
+ return (pbcType != PbcType::No && dd->comm->systemInfo.haveInterDomainBondeds
+ && !(dd->numCells[XX] > 1 && dd->numCells[YY] > 1
+ && (dd->numCells[ZZ] > 1 || pbcType == PbcType::XY)));
}
/*! \brief Sets grid size limits and PP-PME setup, prints settings to log */
logSettings(mdlog, dd, mtop, ir, dlb_scale, ddbox);
real vol_frac;
- if (ir->ePBC == epbcNONE)
+ if (ir->pbcType == PbcType::No)
{
vol_frac = 1 - 1 / static_cast<double>(dd->nnodes);
}
else
{
- vol_frac = (1 + comm_box_frac(dd->nc, comm->systemInfo.cutoff, *ddbox))
+ vol_frac = (1 + comm_box_frac(dd->numCells, comm->systemInfo.cutoff, *ddbox))
/ static_cast<double>(dd->nnodes);
}
if (debug)
ddSettings.useSendRecv2 = (dd_getenv(mdlog, "GMX_DD_USE_SENDRECV2", 0) != 0);
ddSettings.dlb_scale_lim = dd_getenv(mdlog, "GMX_DLB_MAX_BOX_SCALING", 10);
- ddSettings.request1DAnd1Pulse = bool(dd_getenv(mdlog, "GMX_DD_1D_1PULSE", 0));
+ ddSettings.request1D = bool(dd_getenv(mdlog, "GMX_DD_1D", 0));
ddSettings.useDDOrderZYX = bool(dd_getenv(mdlog, "GMX_DD_ORDER_ZYX", 0));
ddSettings.useCartesianReorder = bool(dd_getenv(mdlog, "GMX_NO_CART_REORDER", 1));
ddSettings.eFlop = dd_getenv(mdlog, "GMX_DLB_BASED_ON_FLOPS", 0);
gmx_domdec_t::gmx_domdec_t(const t_inputrec& ir) : unitCellInfo(ir) {}
-/*! \brief Return whether the simulation described can run a 1D single-pulse DD.
+/*! \brief Return whether the simulation described can run a 1D DD.
*
- * The GPU halo exchange code requires a 1D single-pulse DD. Such a DD
+ * The GPU halo exchange code requires 1D DD. Such a DD
* generally requires a larger box than other possible decompositions
* with the same rank count, so the calling code might need to decide
* what is the most appropriate way to run the simulation based on
* exchange code path. The number of PME ranks, if any, should be set
* in \c options.numPmeRanks.
*/
-static bool canMake1DAnd1PulseDomainDecomposition(const DDSettings& ddSettingsOriginal,
- const t_commrec* cr,
- const int numRanksRequested,
- const DomdecOptions& options,
- const gmx_mtop_t& mtop,
- const t_inputrec& ir,
- const matrix box,
- gmx::ArrayRef<const gmx::RVec> xGlobal)
+static bool canMake1DDomainDecomposition(const DDSettings& ddSettingsOriginal,
+ const t_commrec* cr,
+ const int numRanksRequested,
+ const DomdecOptions& options,
+ const gmx_mtop_t& mtop,
+ const t_inputrec& ir,
+ const matrix box,
+ gmx::ArrayRef<const gmx::RVec> xGlobal)
{
// Ensure we don't write any output from this checking routine
gmx::MDLogger dummyLogger;
DDSystemInfo systemInfo = getSystemInfo(dummyLogger, cr, options, mtop, ir, box, xGlobal);
- DDSettings ddSettings = ddSettingsOriginal;
- ddSettings.request1DAnd1Pulse = true;
- const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
- dummyLogger, ddSettings.request1DAnd1Pulse, !isDlbDisabled(ddSettings.initialDlbState),
- options.dlbScaling, ir, systemInfo.cellsizeLimit);
+ DDSettings ddSettings = ddSettingsOriginal;
+ ddSettings.request1D = true;
+ const real gridSetupCellsizeLimit =
+ getDDGridSetupCellSizeLimit(dummyLogger, !isDlbDisabled(ddSettings.initialDlbState),
+ options.dlbScaling, ir, systemInfo.cellsizeLimit);
gmx_ddbox_t ddbox = { 0 };
DDGridSetup ddGridSetup =
getDDGridSetup(dummyLogger, cr, numRanksRequested, options, ddSettings, systemInfo,
gridSetupCellsizeLimit, mtop, ir, box, xGlobal, &ddbox);
- const bool canMakeDDWith1DAnd1Pulse = (ddGridSetup.numDomains[XX] != 0);
+ const bool canMake1DDD = (ddGridSetup.numDomains[XX] != 0);
- return canMakeDDWith1DAnd1Pulse;
+ return canMake1DDD;
}
-bool is1DAnd1PulseDD(const gmx_domdec_t& dd)
+bool is1D(const gmx_domdec_t& dd)
{
- const int maxDimensionSize = std::max(dd.nc[XX], std::max(dd.nc[YY], dd.nc[ZZ]));
- const int productOfDimensionSizes = dd.nc[XX] * dd.nc[YY] * dd.nc[ZZ];
+ const int maxDimensionSize = std::max(dd.numCells[XX], std::max(dd.numCells[YY], dd.numCells[ZZ]));
+ const int productOfDimensionSizes = dd.numCells[XX] * dd.numCells[YY] * dd.numCells[ZZ];
const bool decompositionHasOneDimension = (maxDimensionSize == productOfDimensionSizes);
- const bool hasMax1Pulse =
- ((isDlbDisabled(dd.comm) && dd.comm->cellsize_limit >= dd.comm->systemInfo.cutoff)
- || (!isDlbDisabled(dd.comm) && dd.comm->maxpulse == 1));
-
- return decompositionHasOneDimension && hasMax1Pulse;
+ return decompositionHasOneDimension;
}
namespace gmx
t_commrec* cr,
const DomdecOptions& options,
const MdrunOptions& mdrunOptions,
- bool prefer1DAnd1Pulse,
+ bool prefer1D,
const gmx_mtop_t& mtop,
const t_inputrec& ir,
const matrix box,
t_commrec* cr,
const DomdecOptions& options,
const MdrunOptions& mdrunOptions,
- const bool prefer1DAnd1Pulse,
+ const bool prefer1D,
const gmx_mtop_t& mtop,
const t_inputrec& ir,
const matrix box,
ddSettings_ = getDDSettings(mdlog_, options_, mdrunOptions, ir_);
- if (prefer1DAnd1Pulse
- && canMake1DAnd1PulseDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_, mtop_,
- ir_, box, xGlobal))
+ if (prefer1D
+ && canMake1DDomainDecomposition(ddSettings_, cr_, cr_->nnodes, options_, mtop_, ir_, box, xGlobal))
{
- ddSettings_.request1DAnd1Pulse = true;
+ ddSettings_.request1D = true;
}
if (ddSettings_.eFlop > 1)
// DD grid setup uses a more different cell size limit for
// automated setup than the one in systemInfo_. The latter is used
// in set_dd_limits() to configure DLB, for example.
- const real gridSetupCellsizeLimit = getDDGridSetupCellSizeLimit(
- mdlog_, ddSettings_.request1DAnd1Pulse, !isDlbDisabled(ddSettings_.initialDlbState),
- options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
+ const real gridSetupCellsizeLimit =
+ getDDGridSetupCellSizeLimit(mdlog_, !isDlbDisabled(ddSettings_.initialDlbState),
+ options_.dlbScaling, ir_, systemInfo_.cellsizeLimit);
ddGridSetup_ = getDDGridSetup(mdlog_, cr_, numRanksRequested, options_, ddSettings_, systemInfo_,
gridSetupCellsizeLimit, mtop_, ir_, box, xGlobal, &ddbox_);
checkDDGridSetup(ddGridSetup_, cr_, options_, ddSettings_, systemInfo_, gridSetupCellsizeLimit, ddbox_);
t_commrec* cr,
const DomdecOptions& options,
const MdrunOptions& mdrunOptions,
- const bool prefer1DAnd1Pulse,
+ const bool prefer1D,
const gmx_mtop_t& mtop,
const t_inputrec& ir,
const matrix box,
ArrayRef<const RVec> xGlobal) :
- impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1DAnd1Pulse, mtop, ir, box, xGlobal))
+ impl_(new Impl(mdlog, cr, options, mdrunOptions, prefer1D, mtop, ir, box, xGlobal))
{
}
{
dim = dd->dim[d];
- inv_cell_size = DD_CELL_MARGIN * dd->nc[dim] / ddbox.box_size[dim];
+ inv_cell_size = DD_CELL_MARGIN * dd->numCells[dim] / ddbox.box_size[dim];
if (dd->unitCellInfo.ddBoxIsDynamic)
{
inv_cell_size *= DD_PRES_SCALE_MARGIN;
np = 1 + static_cast<int>(cutoffRequested * inv_cell_size * ddbox.skew_fac[dim]);
- if (dd->comm->ddSettings.request1DAnd1Pulse && np > 1)
- {
- return FALSE;
- }
-
if (!isDlbDisabled(dd->comm) && (dim < ddbox.npbcdim) && (dd->comm->cd[d].np_dlb > 0))
{
if (np > dd->comm->cd[d].np_dlb)
return bCutoffAllowed;
}
+
+void constructGpuHaloExchange(const gmx::MDLogger& mdlog,
+ const t_commrec& cr,
+ const gmx::DeviceStreamManager& deviceStreamManager)
+{
+ GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedLocal),
+ "Local non-bonded stream should be valid when using"
+ "GPU halo exchange.");
+ GMX_RELEASE_ASSERT(deviceStreamManager.streamIsValid(gmx::DeviceStreamType::NonBondedNonLocal),
+ "Non-local non-bonded stream should be valid when using "
+ "GPU halo exchange.");
+ int gpuHaloExchangeSize = 0;
+ int pulseStart = 0;
+ if (cr.dd->gpuHaloExchange.empty())
+ {
+ GMX_LOG(mdlog.warning)
+ .asParagraph()
+ .appendTextFormatted(
+ "NOTE: Activating the 'GPU halo exchange' feature, enabled "
+ "by the "
+ "GMX_GPU_DD_COMMS environment variable.");
+ }
+ else
+ {
+ gpuHaloExchangeSize = static_cast<int>(cr.dd->gpuHaloExchange.size());
+ pulseStart = gpuHaloExchangeSize - 1;
+ }
+ if (cr.dd->comm->cd[0].numPulses() > gpuHaloExchangeSize)
+ {
+ for (int pulse = pulseStart; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
+ {
+ cr.dd->gpuHaloExchange.push_back(std::make_unique<gmx::GpuHaloExchange>(
+ cr.dd, cr.mpi_comm_mysim, deviceStreamManager.context(),
+ deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedLocal),
+ deviceStreamManager.stream(gmx::DeviceStreamType::NonBondedNonLocal), pulse));
+ }
+ }
+}
+
+void reinitGpuHaloExchange(const t_commrec& cr,
+ const DeviceBuffer<gmx::RVec> d_coordinatesBuffer,
+ const DeviceBuffer<gmx::RVec> d_forcesBuffer)
+{
+ for (int pulse = 0; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
+ {
+ cr.dd->gpuHaloExchange[pulse]->reinitHalo(d_coordinatesBuffer, d_forcesBuffer);
+ }
+}
+
+void communicateGpuHaloCoordinates(const t_commrec& cr,
+ const matrix box,
+ GpuEventSynchronizer* coordinatesReadyOnDeviceEvent)
+{
+ for (int pulse = 0; pulse < cr.dd->comm->cd[0].numPulses(); pulse++)
+ {
+ cr.dd->gpuHaloExchange[pulse]->communicateHaloCoordinates(box, coordinatesReadyOnDeviceEvent);
+ }
+}
+
+void communicateGpuHaloForces(const t_commrec& cr, bool accumulateForces)
+{
+ for (int pulse = cr.dd->comm->cd[0].numPulses() - 1; pulse >= 0; pulse--)
+ {
+ cr.dd->gpuHaloExchange[pulse]->communicateHaloForces(accumulateForces);
+ }
+}
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
findLabelInLine(lineString, "type"); // discard the returned string
}
- if (!line || strncmp(line, "static", 6) != 0)
+ if (!line_buf || strncmp(line_buf, "static", 6) != 0)
{
gmx_input("Invalid XPixMap");
}
line = line_buf;
}
bSetLine = TRUE;
+ GMX_RELEASE_ASSERT(line, "Need to have valid line to parse");
if (strstr(line, "x-axis"))
{
line = std::strstr(line, "x-axis");
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2012,2013,2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
mat->axis_y.resize(nr);
std::iota(mat->axis_y.begin(), mat->axis_y.end(), 1);
mat->axis_x.resize(0);
- mat->matrix.resize(0, 0);
+ mat->matrix.resize(1, 1);
bFirst = false;
}
mat->axis_x.push_back(t);
for (i = 0; (i < atoms->nr); i++)
{
- if (std::strcmp(*(atoms->atomname[i]), "OXT") == 0)
- {
- *atoms->atomname[i] = OOO;
- }
- else if (std::strcmp(*(atoms->atomname[i]), "O1") == 0)
- {
- *atoms->atomname[i] = OOO;
- }
- else if (std::strcmp(*(atoms->atomname[i]), "OC1") == 0)
+ if ((std::strcmp(*(atoms->atomname[i]), "OXT") == 0)
+ || (std::strcmp(*(atoms->atomname[i]), "O1") == 0)
+ || (std::strcmp(*(atoms->atomname[i]), "OC1") == 0))
{
*atoms->atomname[i] = OOO;
}
const char * fnSCount, *fnArea, *fnTArea, *fnAArea;
const char* leg[] = { "Phobic", "Phylic" };
t_topology top;
- int ePBC;
+ PbcType pbcType;
t_atoms* atoms;
t_matrix mat;
int nres, nr0, naccr, nres_plus_separators;
fnAArea = opt2fn_null("-aa", NFILE, fnm);
bDoAccSurf = ((fnArea != nullptr) || (fnTArea != nullptr) || (fnAArea != nullptr));
- read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xp, nullptr, box, FALSE);
+ read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &pbcType, &xp, nullptr, box, FALSE);
atoms = &(top.atoms);
check_oo(atoms);
bPhbres = bPhobics(atoms);
accr = nullptr;
naccr = 0;
- gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms);
+ gpbc = gmx_rmpbc_init(&top.idef, pbcType, natoms);
do
{
t = output_env_conv_time(oenv, t);
}
gmx_rmpbc(gpbc, natoms, box, x);
tapein = gmx_ffopen(pdbfile, "w");
- write_pdbfile_indexed(tapein, nullptr, atoms, x, ePBC, box, ' ', -1, gnx, index, nullptr, FALSE);
+ write_pdbfile_indexed(tapein, nullptr, atoms, x, pbcType, box, ' ', -1, gnx, index, nullptr, FALSE);
gmx_ffclose(tapein);
/* strip_dssp returns the number of lines found in the dssp file, i.e.
* the number of residues plus the separator lines */
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
t_dih* dd;
gmx_rmpbc_t gpbc = nullptr;
- gpbc = gmx_rmpbc_init(xr->idef, xr->ePBC, xr->natoms);
+ gpbc = gmx_rmpbc_init(xr->idef, xr->pbcType, xr->natoms);
gmx_rmpbc(gpbc, xr->natoms, xr->box, xr->x);
gmx_rmpbc_done(gpbc);
char buf[12];
int i;
- srenew(xr->dih, xr->ndih + 2);
+ srenew(xr->dih, xr->ndih + 2LL);
for (i = 0; (i < 4); i++)
{
xr->dih[xr->ndih].ai[i] = ff[i];
}
xr->ndih += 2;
- srenew(xr->pp, xr->npp + 1);
+ srenew(xr->pp, xr->npp + 1LL);
xr->pp[xr->npp].iphi = xr->ndih - 2;
xr->pp[xr->npp].ipsi = xr->ndih - 1;
xr->pp[xr->npp].bShow = FALSE;
{
for (j = 0; (j < 4); j++)
{
+ MSVC_DIAGNOSTIC_IGNORE(28182) // false positive in 2019 (16.5.4)
ai = xr->dih[i].ai[j];
+ MSVC_DIAGNOSTIC_RESET
if (ai < xr->amin)
{
xr->amin = ai;
}
i += nra + 1;
- ia += nra + 1;
+ ia += nra + 1LL;
}
/* Fill in defaults for values not in the topology */
for (i = 0; (i < xr->ndih); i++)
t_topology* top;
real t;
- top = read_top(topfile, &xr->ePBC);
+ top = read_top(topfile, &xr->pbcType);
/*get_dih2(xr,top->idef.functype,&(top->idef.bondeds),&(top->atoms));*/
get_dih(xr, &(top->atoms));
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "nb_free_energy.h"
+#include "config.h"
+
#include <cmath>
#include <algorithm>
#include "gromacs/math/vec.h"
#include "gromacs/mdtypes/forceoutput.h"
#include "gromacs/mdtypes/forcerec.h"
+#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/simd/simd.h"
#include "gromacs/utility/fatalerror.h"
-//! Enum for templating the soft-core treatment in the kernel
-enum class SoftCoreTreatment
-{
- None, //!< No soft-core
- RPower6, //!< Soft-core with r-power = 6
- RPower48 //!< Soft-core with r-power = 48
-};
-
-//! Most treatments are fine with float in mixed-precision mode.
-template<SoftCoreTreatment softCoreTreatment>
-struct SoftCoreReal
+//! Scalar (non-SIMD) data types.
+struct ScalarDataTypes
{
- //! Real type for soft-core calculations
- using Real = real;
+ using RealType = real; //!< The data type to use as real.
+ using IntType = int; //!< The data type to use as int.
+ static constexpr int simdRealWidth = 1; //!< The width of the RealType.
+ static constexpr int simdIntWidth = 1; //!< The width of the IntType.
};
-//! This treatment requires double precision for some computations.
-template<>
-struct SoftCoreReal<SoftCoreTreatment::RPower48>
+#if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS
+//! SIMD data types.
+struct SimdDataTypes
{
- //! Real type for soft-core calculations
- using Real = double;
+ using RealType = gmx::SimdReal; //!< The data type to use as real.
+ using IntType = gmx::SimdInt32; //!< The data type to use as int.
+ static constexpr int simdRealWidth = GMX_SIMD_REAL_WIDTH; //!< The width of the RealType.
+ static constexpr int simdIntWidth = GMX_SIMD_FINT32_WIDTH; //!< The width of the IntType.
};
+#endif
//! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
-template<SoftCoreTreatment softCoreTreatment>
-static inline void pthRoot(const real r, real* pthRoot, real* invPthRoot)
-{
- *invPthRoot = gmx::invsqrt(std::cbrt(r));
- *pthRoot = 1 / (*invPthRoot);
-}
-
-// We need a double version to make the specialization below work
-#if !GMX_DOUBLE
-//! Computes r^(1/p) and 1/r^(1/p) for the standard p=6
-template<SoftCoreTreatment softCoreTreatment>
-static inline void pthRoot(const double r, real* pthRoot, double* invPthRoot)
+template<class RealType>
+static inline void pthRoot(const RealType r, RealType* pthRoot, RealType* invPthRoot)
{
*invPthRoot = gmx::invsqrt(std::cbrt(r));
*pthRoot = 1 / (*invPthRoot);
}
-#endif
-//! Computes r^(1/p) and 1/r^(1/p) for p=48
-template<>
-inline void pthRoot<SoftCoreTreatment::RPower48>(const double r, real* pthRoot, double* invPthRoot)
+template<class RealType>
+static inline RealType calculateRinv6(const RealType rinvV)
{
- *pthRoot = std::pow(r, 1.0 / 48.0);
- *invPthRoot = 1 / (*pthRoot);
+ RealType rinv6 = rinvV * rinvV;
+ return (rinv6 * rinv6 * rinv6);
}
-template<SoftCoreTreatment softCoreTreatment>
-static inline real calculateSigmaPow(const real sigma6)
-{
- if (softCoreTreatment == SoftCoreTreatment::RPower6)
- {
- return sigma6;
- }
- else
- {
- real sigmaPow = sigma6 * sigma6; /* sigma^12 */
- sigmaPow = sigmaPow * sigmaPow; /* sigma^24 */
- sigmaPow = sigmaPow * sigmaPow; /* sigma^48 */
- return (sigmaPow);
- }
-}
-
-template<SoftCoreTreatment softCoreTreatment, class SCReal>
-static inline real calculateRinv6(const SCReal rinvV)
-{
- if (softCoreTreatment == SoftCoreTreatment::RPower6)
- {
- return rinvV;
- }
- else
- {
- real rinv6 = rinvV * rinvV;
- return (rinv6 * rinv6 * rinv6);
- }
-}
-
-static inline real calculateVdw6(const real c6, const real rinv6)
+template<class RealType>
+static inline RealType calculateVdw6(const RealType c6, const RealType rinv6)
{
return (c6 * rinv6);
}
-static inline real calculateVdw12(const real c12, const real rinv6)
+template<class RealType>
+static inline RealType calculateVdw12(const RealType c12, const RealType rinv6)
{
return (c12 * rinv6 * rinv6);
}
/* reaction-field electrostatics */
-template<class SCReal>
-static inline SCReal
-reactionFieldScalarForce(const real qq, const real rinv, const SCReal r, const real krf, const real two)
+template<class RealType>
+static inline RealType reactionFieldScalarForce(const RealType qq,
+ const RealType rinv,
+ const RealType r,
+ const real krf,
+ const real two)
{
return (qq * (rinv - two * krf * r * r));
}
-template<class SCReal>
-static inline real
-reactionFieldPotential(const real qq, const real rinv, const SCReal r, const real krf, const real potentialShift)
+template<class RealType>
+static inline RealType reactionFieldPotential(const RealType qq,
+ const RealType rinv,
+ const RealType r,
+ const real krf,
+ const real potentialShift)
{
return (qq * (rinv + krf * r * r - potentialShift));
}
/* Ewald electrostatics */
-static inline real ewaldScalarForce(const real coulomb, const real rinv)
+template<class RealType>
+static inline RealType ewaldScalarForce(const RealType coulomb, const RealType rinv)
{
return (coulomb * rinv);
}
-static inline real ewaldPotential(const real coulomb, const real rinv, const real potentialShift)
+template<class RealType>
+static inline RealType ewaldPotential(const RealType coulomb, const RealType rinv, const real potentialShift)
{
return (coulomb * (rinv - potentialShift));
}
/* cutoff LJ */
-static inline real lennardJonesScalarForce(const real v6, const real v12)
+template<class RealType>
+static inline RealType lennardJonesScalarForce(const RealType v6, const RealType v12)
{
return (v12 - v6);
}
-static inline real lennardJonesPotential(const real v6,
- const real v12,
- const real c6,
- const real c12,
- const real repulsionShift,
- const real dispersionShift,
- const real onesixth,
- const real onetwelfth)
+template<class RealType>
+static inline RealType lennardJonesPotential(const RealType v6,
+ const RealType v12,
+ const RealType c6,
+ const RealType c12,
+ const real repulsionShift,
+ const real dispersionShift,
+ const real onesixth,
+ const real onetwelfth)
{
return ((v12 + c12 * repulsionShift) * onetwelfth - (v6 + c6 * dispersionShift) * onesixth);
}
}
/* LJ Potential switch */
-template<class SCReal>
-static inline SCReal potSwitchScalarForceMod(const SCReal fScalarInp,
- const real potential,
- const real sw,
- const SCReal r,
- const real rVdw,
- const real dsw,
- const real zero)
+template<class RealType>
+static inline RealType potSwitchScalarForceMod(const RealType fScalarInp,
+ const RealType potential,
+ const RealType sw,
+ const RealType r,
+ const RealType rVdw,
+ const RealType dsw,
+ const real zero)
{
if (r < rVdw)
{
- SCReal fScalar = fScalarInp * sw - r * potential * dsw;
+ real fScalar = fScalarInp * sw - r * potential * dsw;
return (fScalar);
}
return (zero);
}
-template<class SCReal>
-static inline real
-potSwitchPotentialMod(const real potentialInp, const real sw, const SCReal r, const real rVdw, const real zero)
+template<class RealType>
+static inline RealType potSwitchPotentialMod(const RealType potentialInp,
+ const RealType sw,
+ const RealType r,
+ const RealType rVdw,
+ const real zero)
{
if (r < rVdw)
{
//! Templated free-energy non-bonded kernel
-template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
+template<typename DataTypes, bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
static void nb_free_energy_kernel(const t_nblist* gmx_restrict nlist,
rvec* gmx_restrict xx,
gmx::ForceWithShiftForces* forceWithShiftForces,
nb_kernel_data_t* gmx_restrict kernel_data,
t_nrnb* gmx_restrict nrnb)
{
- using SCReal = typename SoftCoreReal<softCoreTreatment>::Real;
-
- constexpr bool useSoftCore = (softCoreTreatment != SoftCoreTreatment::None);
-
#define STATE_A 0
#define STATE_B 1
#define NSTATES 2
+ using RealType = typename DataTypes::RealType;
+ using IntType = typename DataTypes::IntType;
+
+ /* FIXME: How should these be handled with SIMD? */
constexpr real onetwelfth = 1.0 / 12.0;
constexpr real onesixth = 1.0 / 6.0;
constexpr real zero = 0.0;
const int* typeA = mdatoms->typeA;
const int* typeB = mdatoms->typeB;
const int ntype = fr->ntype;
- const real* nbfp = fr->nbfp;
+ const real* nbfp = fr->nbfp.data();
const real* nbfp_grid = fr->ljpme_c6grid;
real* Vv = kernel_data->energygrp_vdw;
const real lambda_coul = kernel_data->lambda[efptCOUL];
real rcutoff_max2 = std::max(ic->rcoulomb, ic->rvdw);
rcutoff_max2 = rcutoff_max2 * rcutoff_max2;
- const real* tab_ewald_F_lj = nullptr;
- const real* tab_ewald_V_lj = nullptr;
- const real* ewtab = nullptr;
- real ewtabscale = 0;
- real ewtabhalfspace = 0;
- real sh_ewald = 0;
+ const real* tab_ewald_F_lj = nullptr;
+ const real* tab_ewald_V_lj = nullptr;
+ const real* ewtab = nullptr;
+ real coulombTableScale = 0;
+ real coulombTableScaleInvHalf = 0;
+ real vdwTableScale = 0;
+ real vdwTableScaleInvHalf = 0;
+ real sh_ewald = 0;
if (elecInteractionTypeIsEwald || vdwInteractionTypeIsEwald)
{
- const auto& tables = *ic->coulombEwaldTables;
- sh_ewald = ic->sh_ewald;
- ewtab = tables.tableFDV0.data();
- ewtabscale = tables.scale;
- ewtabhalfspace = half / ewtabscale;
- tab_ewald_F_lj = tables.tableF.data();
- tab_ewald_V_lj = tables.tableV.data();
+ sh_ewald = ic->sh_ewald;
+ }
+ if (elecInteractionTypeIsEwald)
+ {
+ const auto& coulombTables = *ic->coulombEwaldTables;
+ ewtab = coulombTables.tableFDV0.data();
+ coulombTableScale = coulombTables.scale;
+ coulombTableScaleInvHalf = half / coulombTableScale;
+ }
+ if (vdwInteractionTypeIsEwald)
+ {
+ const auto& vdwTables = *ic->vdwEwaldTables;
+ tab_ewald_F_lj = vdwTables.tableF.data();
+ tab_ewald_V_lj = vdwTables.tableV.data();
+ vdwTableScale = vdwTables.scale;
+ vdwTableScaleInvHalf = half / vdwTableScale;
}
/* For Ewald/PME interactions we cannot easily apply the soft-core component to
GMX_RELEASE_ASSERT(!(vdwInteractionTypeIsEwald && vdwModifierIsPotSwitch),
"Can not apply soft-core to switched Ewald potentials");
- SCReal dvdl_coul = 0; /* Needs double for sc_power==48 */
- SCReal dvdl_vdw = 0; /* Needs double for sc_power==48 */
+ real dvdl_coul = 0;
+ real dvdl_vdw = 0;
/* Lambda factor for state A, 1-lambda*/
real LFC[NSTATES], LFV[NSTATES];
DLF[STATE_B] = 1;
real lfac_coul[NSTATES], dlfac_coul[NSTATES], lfac_vdw[NSTATES], dlfac_vdw[NSTATES];
- constexpr real sc_r_power = (softCoreTreatment == SoftCoreTreatment::RPower48 ? 48.0_real : 6.0_real);
+ constexpr real sc_r_power = 6.0_real;
for (int i = 0; i < NSTATES; i++)
{
lfac_coul[i] = (lam_power == 2 ? (1 - LFC[i]) * (1 - LFC[i]) : (1 - LFC[i]));
for (int k = nj0; k < nj1; k++)
{
- int tj[NSTATES];
- const int jnr = jjnr[k];
- const int j3 = 3 * jnr;
- real c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
- real r, rinv, rp, rpm2;
- real alpha_vdw_eff, alpha_coul_eff, sigma_pow[NSTATES];
- const real dx = ix - x[j3];
- const real dy = iy - x[j3 + 1];
- const real dz = iz - x[j3 + 2];
- const real rsq = dx * dx + dy * dy + dz * dz;
- SCReal FscalC[NSTATES], FscalV[NSTATES]; /* Needs double for sc_power==48 */
-
- if (rsq >= rcutoff_max2)
+ int tj[NSTATES];
+ const int jnr = jjnr[k];
+ const int j3 = 3 * jnr;
+ RealType c6[NSTATES], c12[NSTATES], qq[NSTATES], Vcoul[NSTATES], Vvdw[NSTATES];
+ RealType r, rinv, rp, rpm2;
+ RealType alpha_vdw_eff, alpha_coul_eff, sigma6[NSTATES];
+ const RealType dx = ix - x[j3];
+ const RealType dy = iy - x[j3 + 1];
+ const RealType dz = iz - x[j3 + 2];
+ const RealType rsq = dx * dx + dy * dy + dz * dz;
+ RealType FscalC[NSTATES], FscalV[NSTATES];
+ /* Check if this pair on the exlusions list.*/
+ const bool bPairIncluded = nlist->excl_fep == nullptr || nlist->excl_fep[k];
+
+ if (rsq >= rcutoff_max2 && bPairIncluded)
{
/* We save significant time by skipping all code below.
* Note that with soft-core interactions, the actual cut-off
* check might be different. But since the soft-core distance
* is always larger than r, checking on r here is safe.
+ * Exclusions outside the cutoff can not be skipped as
+ * when using Ewald: the reciprocal-space
+ * Ewald component still needs to be subtracted.
*/
+
continue;
}
npair_within_cutoff++;
r = 0;
}
- if (softCoreTreatment == SoftCoreTreatment::None)
+ if (useSoftCore)
+ {
+ rpm2 = rsq * rsq; /* r4 */
+ rp = rpm2 * rsq; /* r6 */
+ }
+ else
{
/* The soft-core power p will not affect the results
* with not using soft-core, so we use power of 0 which gives
rpm2 = rinv * rinv;
rp = 1;
}
- if (softCoreTreatment == SoftCoreTreatment::RPower6)
- {
- rpm2 = rsq * rsq; /* r4 */
- rp = rpm2 * rsq; /* r6 */
- }
- if (softCoreTreatment == SoftCoreTreatment::RPower48)
- {
- rp = rsq * rsq * rsq; /* r6 */
- rp = rp * rp; /* r12 */
- rp = rp * rp; /* r24 */
- rp = rp * rp; /* r48 */
- rpm2 = rp / rsq; /* r46 */
- }
- real Fscal = 0;
+ RealType Fscal = 0;
qq[STATE_A] = iqA * chargeA[jnr];
qq[STATE_B] = iqB * chargeB[jnr];
tj[STATE_A] = ntiA + 2 * typeA[jnr];
tj[STATE_B] = ntiB + 2 * typeB[jnr];
- if (nlist->excl_fep == nullptr || nlist->excl_fep[k])
+ if (bPairIncluded)
{
c6[STATE_A] = nbfp[tj[STATE_A]];
c6[STATE_B] = nbfp[tj[STATE_B]];
c12[i] = nbfp[tj[i] + 1];
if (useSoftCore)
{
- real sigma6[NSTATES];
if ((c6[i] > 0) && (c12[i] > 0))
{
/* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
{
sigma6[i] = sigma6_def;
}
- sigma_pow[i] = calculateSigmaPow<softCoreTreatment>(sigma6[i]);
}
}
Vcoul[i] = 0;
Vvdw[i] = 0;
- real rinvC, rinvV;
- SCReal rC, rV, rpinvC, rpinvV; /* Needs double for sc_power==48 */
+ RealType rinvC, rinvV, rC, rV, rpinvC, rpinvV;
/* Only spend time on A or B state if it is non-zero */
if ((qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0))
{
- /* this section has to be inside the loop because of the dependence on sigma_pow */
+ /* this section has to be inside the loop because of the dependence on sigma6 */
if (useSoftCore)
{
- rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma_pow[i] + rp);
- pthRoot<softCoreTreatment>(rpinvC, &rinvC, &rC);
+ rpinvC = one / (alpha_coul_eff * lfac_coul[i] * sigma6[i] + rp);
+ pthRoot(rpinvC, &rinvC, &rC);
if (scLambdasOrAlphasDiffer)
{
- rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma_pow[i] + rp);
- pthRoot<softCoreTreatment>(rpinvV, &rinvV, &rV);
+ rpinvV = one / (alpha_vdw_eff * lfac_vdw[i] * sigma6[i] + rp);
+ pthRoot(rpinvV, &rinvV, &rV);
}
else
{
|| (!vdwInteractionTypeIsEwald && rV < rvdw);
if ((c6[i] != 0 || c12[i] != 0) && computeVdwInteraction)
{
- real rinv6;
- if (softCoreTreatment == SoftCoreTreatment::RPower6)
+ RealType rinv6;
+ if (useSoftCore)
{
- rinv6 = calculateRinv6<softCoreTreatment>(rpinvV);
+ rinv6 = rpinvV;
}
else
{
- rinv6 = calculateRinv6<softCoreTreatment>(rinvV);
+ rinv6 = calculateRinv6(rinvV);
}
- real Vvdw6 = calculateVdw6(c6[i], rinv6);
- real Vvdw12 = calculateVdw12(c12[i], rinv6);
+ RealType Vvdw6 = calculateVdw6(c6[i], rinv6);
+ RealType Vvdw12 = calculateVdw12(c12[i], rinv6);
Vvdw[i] = lennardJonesPotential(Vvdw6, Vvdw12, c6[i], c12[i], repulsionShift,
dispersionShift, onesixth, onetwelfth);
if (vdwModifierIsPotSwitch)
{
- real d = rV - ic->rvdw_switch;
- d = (d > zero) ? d : zero;
- const real d2 = d * d;
- const real sw = one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
- const real dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
+ RealType d = rV - ic->rvdw_switch;
+ d = (d > zero) ? d : zero;
+ const RealType d2 = d * d;
+ const RealType sw =
+ one + d2 * d * (vdw_swV3 + d * (vdw_swV4 + d * vdw_swV5));
+ const RealType dsw = d2 * (vdw_swF2 + d * (vdw_swF3 + d * vdw_swF4));
FscalV[i] = potSwitchScalarForceMod(FscalV[i], Vvdw[i], sw, rV,
rvdw, dsw, zero);
FscalC[i] *= rpinvC;
FscalV[i] *= rpinvV;
}
- }
+ } // end for (int i = 0; i < NSTATES; i++)
/* Assemble A and B states */
for (int i = 0; i < NSTATES; i++)
if (useSoftCore)
{
- dvdl_coul +=
- Vcoul[i] * DLF[i]
- + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma_pow[i];
+ dvdl_coul += Vcoul[i] * DLF[i]
+ + LFC[i] * alpha_coul_eff * dlfac_coul[i] * FscalC[i] * sigma6[i];
dvdl_vdw += Vvdw[i] * DLF[i]
- + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma_pow[i];
+ + LFV[i] * alpha_vdw_eff * dlfac_vdw[i] * FscalV[i] * sigma6[i];
}
else
{
dvdl_vdw += Vvdw[i] * DLF[i];
}
}
- }
+ } // end if (bPairIncluded)
else if (icoul == GMX_NBKERNEL_ELEC_REACTIONFIELD)
{
/* For excluded pairs, which are only in this pair list when
* As there is no singularity, there is no need for soft-core.
*/
const real FF = -two * krf;
- real VV = krf * rsq - crf;
+ RealType VV = krf * rsq - crf;
if (ii == jnr)
{
}
}
- if (elecInteractionTypeIsEwald && r < rcoulomb)
+ if (elecInteractionTypeIsEwald && (r < rcoulomb || !bPairIncluded))
{
/* See comment in the preamble. When using Ewald interactions
* (unless we use a switch modifier) we subtract the reciprocal-space
*/
real v_lr, f_lr;
- const RealType ewrt = r * ewtabscale;
- const real ewrt = r * coulombTableScale;
- int ewitab = static_cast<int>(ewrt);
- const real eweps = ewrt - ewitab;
- ewitab = 4 * ewitab;
- f_lr = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
++ const RealType ewrt = r * coulombTableScale;
+ IntType ewitab = static_cast<IntType>(ewrt);
+ const RealType eweps = ewrt - ewitab;
+ ewitab = 4 * ewitab;
+ f_lr = ewtab[ewitab] + eweps * ewtab[ewitab + 1];
- v_lr = (ewtab[ewitab + 2] - ewtabhalfspace * eweps * (ewtab[ewitab] + f_lr));
+ v_lr = (ewtab[ewitab + 2] - coulombTableScaleInvHalf * eweps * (ewtab[ewitab] + f_lr));
f_lr *= rinv;
/* Note that any possible Ewald shift has already been applied in
* r close to 0 for non-interacting pairs.
*/
- const RealType rs = rsq * rinv * ewtabscale;
- const real rs = rsq * rinv * vdwTableScale;
- const int ri = static_cast<int>(rs);
- const real frac = rs - ri;
- const real f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
++ const RealType rs = rsq * rinv * vdwTableScale;
+ const IntType ri = static_cast<IntType>(rs);
+ const RealType frac = rs - ri;
+ const RealType f_lr = (1 - frac) * tab_ewald_F_lj[ri] + frac * tab_ewald_F_lj[ri + 1];
/* TODO: Currently the Ewald LJ table does not contain
* the factor 1/6, we should add this.
*/
- const real FF = f_lr * rinv / six;
- real VV = (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
- / six;
+ const RealType FF = f_lr * rinv / six;
+ RealType VV =
- (tab_ewald_V_lj[ri] - ewtabhalfspace * frac * (tab_ewald_F_lj[ri] + f_lr)) / six;
++ (tab_ewald_V_lj[ri] - vdwTableScaleInvHalf * frac * (tab_ewald_F_lj[ri] + f_lr))
++ / six;
if (ii == jnr)
{
#pragma omp atomic
f[j3 + 2] -= tz;
}
- }
+ } // end for (int k = nj0; k < nj1; k++)
/* The atomics below are expensive with many OpenMP threads.
* Here unperturbed i-particles will usually only have a few
Vv[ggid] += vvtot;
}
}
- }
+ } // end for (int n = 0; n < nri; n++)
#pragma omp atomic
dvdl[efptCOUL] += dvdl_coul;
nb_kernel_data_t* gmx_restrict kernel_data,
t_nrnb* gmx_restrict nrnb);
-template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
-static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch)
+template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald, bool vdwModifierIsPotSwitch>
+static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
+{
+ if (useSimd)
+ {
+#if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
+ /* FIXME: Here SimdDataTypes should be used to enable SIMD. So far, the code in nb_free_energy_kernel is not adapted to SIMD */
+ return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
+#else
+ return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
+#endif
+ }
+ else
+ {
+ return (nb_free_energy_kernel<ScalarDataTypes, useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald, vdwModifierIsPotSwitch>);
+ }
+}
+
+template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald, bool elecInteractionTypeIsEwald>
+static KernelFunction dispatchKernelOnVdwModifier(const bool vdwModifierIsPotSwitch, const bool useSimd)
{
if (vdwModifierIsPotSwitch)
{
- return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
- vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, true>);
+ return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald, true>(useSimd));
}
else
{
- return (nb_free_energy_kernel<softCoreTreatment, scLambdasOrAlphasDiffer,
- vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, false>);
+ return (dispatchKernelOnUseSimd<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald, false>(useSimd));
}
}
-template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
+template<bool useSoftCore, bool scLambdasOrAlphasDiffer, bool vdwInteractionTypeIsEwald>
static KernelFunction dispatchKernelOnElecInteractionType(const bool elecInteractionTypeIsEwald,
- const bool vdwModifierIsPotSwitch)
+ const bool vdwModifierIsPotSwitch,
+ const bool useSimd)
{
if (elecInteractionTypeIsEwald)
{
- return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
- vdwModifierIsPotSwitch));
+ return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, true>(
+ vdwModifierIsPotSwitch, useSimd));
}
else
{
- return (dispatchKernelOnVdwModifier<softCoreTreatment, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
- vdwModifierIsPotSwitch));
+ return (dispatchKernelOnVdwModifier<useSoftCore, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, false>(
+ vdwModifierIsPotSwitch, useSimd));
}
}
-template<SoftCoreTreatment softCoreTreatment, bool scLambdasOrAlphasDiffer>
+template<bool useSoftCore, bool scLambdasOrAlphasDiffer>
static KernelFunction dispatchKernelOnVdwInteractionType(const bool vdwInteractionTypeIsEwald,
const bool elecInteractionTypeIsEwald,
- const bool vdwModifierIsPotSwitch)
+ const bool vdwModifierIsPotSwitch,
+ const bool useSimd)
{
if (vdwInteractionTypeIsEwald)
{
- return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, true>(
- elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
+ return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, true>(
+ elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
}
else
{
- return (dispatchKernelOnElecInteractionType<softCoreTreatment, scLambdasOrAlphasDiffer, false>(
- elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
+ return (dispatchKernelOnElecInteractionType<useSoftCore, scLambdasOrAlphasDiffer, false>(
+ elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
}
}
-template<SoftCoreTreatment softCoreTreatment>
+template<bool useSoftCore>
static KernelFunction dispatchKernelOnScLambdasOrAlphasDifference(const bool scLambdasOrAlphasDiffer,
const bool vdwInteractionTypeIsEwald,
const bool elecInteractionTypeIsEwald,
- const bool vdwModifierIsPotSwitch)
+ const bool vdwModifierIsPotSwitch,
+ const bool useSimd)
{
if (scLambdasOrAlphasDiffer)
{
- return (dispatchKernelOnVdwInteractionType<softCoreTreatment, true>(
- vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
+ return (dispatchKernelOnVdwInteractionType<useSoftCore, true>(
+ vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
}
else
{
- return (dispatchKernelOnVdwInteractionType<softCoreTreatment, false>(
- vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch));
+ return (dispatchKernelOnVdwInteractionType<useSoftCore, false>(
+ vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd));
}
}
const bool vdwInteractionTypeIsEwald,
const bool elecInteractionTypeIsEwald,
const bool vdwModifierIsPotSwitch,
+ const bool useSimd,
const t_forcerec* fr)
{
if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
{
- return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::None>(
+ return (dispatchKernelOnScLambdasOrAlphasDifference<false>(
scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
- vdwModifierIsPotSwitch));
- }
- else if (fr->sc_r_power == 6.0_real)
- {
- return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower6>(
- scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
- vdwModifierIsPotSwitch));
+ vdwModifierIsPotSwitch, useSimd));
}
else
{
- return (dispatchKernelOnScLambdasOrAlphasDifference<SoftCoreTreatment::RPower48>(
+ return (dispatchKernelOnScLambdasOrAlphasDifference<true>(
scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald,
- vdwModifierIsPotSwitch));
+ vdwModifierIsPotSwitch, useSimd));
}
}
const bool elecInteractionTypeIsEwald = (EEL_PME_EWALD(fr->ic->eeltype));
const bool vdwModifierIsPotSwitch = (fr->ic->vdw_modifier == eintmodPOTSWITCH);
bool scLambdasOrAlphasDiffer = true;
+ const bool useSimd = fr->use_simd_kernels;
if (fr->sc_alphacoul == 0 && fr->sc_alphavdw == 0)
{
scLambdasOrAlphasDiffer = false;
}
- else if (fr->sc_r_power == 6.0_real || fr->sc_r_power == 48.0_real)
+ else if (fr->sc_r_power == 6.0_real)
{
if (kernel_data->lambda[efptCOUL] == kernel_data->lambda[efptVDW] && fr->sc_alphacoul == fr->sc_alphavdw)
{
{
GMX_RELEASE_ASSERT(false, "Unsupported soft-core r-power");
}
- KernelFunction kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
- elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, fr);
+
+ KernelFunction kernelFunc;
+ kernelFunc = dispatchKernel(scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald,
+ elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, useSimd, fr);
kernelFunc(nlist, xx, ff, fr, mdatoms, kernel_data, nrnb);
}
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/mdtypes/fcdata.h"
#include "gromacs/mdtypes/mdatom.h"
#include "gromacs/pbcutil/ishift.h"
-#include "gromacs/pbcutil/mshift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/pbcutil/pbc_simd.h"
#include "gromacs/simd/simd.h"
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms* md,
/*! \brief Spreads and accumulates the bonded forces to the two atoms and adds the virial contribution when needed
*
- * When \p g==nullptr, \p shiftIndex is used as the periodic shift.
- * When \p g!=nullptr, the graph is used to compute the periodic shift.
+ * \p shiftIndex is used as the periodic shift.
*/
template<BondedKernelFlavor flavor>
-inline void spreadBondForces(const real bondForce,
- const rvec dx,
- const int ai,
- const int aj,
- rvec4* f,
- int shiftIndex,
- const t_graph* g,
- rvec* fshift)
+inline void spreadBondForces(const real bondForce,
+ const rvec dx,
+ const int ai,
+ const int aj,
+ rvec4* f,
+ int shiftIndex,
+ rvec* fshift)
{
- if (computeVirial(flavor) && g)
- {
- ivec dt;
- ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
- shiftIndex = IVEC2IS(dt);
- }
-
for (int m = 0; m < DIM; m++) /* 15 */
{
const real fij = bondForce * dx[m];
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
*dvdlambda += (cbB - cbA) * omtemp * omtemp
- (2 - 2 * omtemp) * omtemp * cb * ((b0B - b0A) * be - (beB - beA) * (dr - b0)); /* 15 */
- spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
- } /* 83 TOTAL */
+ spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
+ } /* 83 TOTAL */
return vtot;
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
vtot += vbond; /* 21 */
- spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
- } /* 54 TOTAL */
+ spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
+ } /* 54 TOTAL */
return vtot;
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
vtot += vbond; /* 35 */
- spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
- } /* 58 TOTAL */
+ spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
+ } /* 58 TOTAL */
return vtot;
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
vtot += vbond; /* 1*/
fbond *= gmx::invsqrt(dr2); /* 6 */
- spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
- } /* 59 TOTAL */
+ spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
+ } /* 59 TOTAL */
return vtot;
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
vtot += vbond; /* 1*/
fbond *= gmx::invsqrt(dr2); /* 6 */
- spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
- } /* 59 TOTAL */
+ spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
+ } /* 59 TOTAL */
return vtot;
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms* md,
vtot += vbond; /* 1*/
fbond *= gmx::invsqrt(dr2); /* 6 */
- spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
- } /* 59 TOTAL */
+ spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
+ } /* 59 TOTAL */
return vtot;
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms* md,
fbond *= gmx::invsqrt(dr2); /* 6 */
vtot += vbond; /* 1*/
- spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
- } /* 72 TOTAL */
+ spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
+ } /* 72 TOTAL */
return vtot;
}
rvec4 f[],
rvec gmx_unused fshift[],
const t_pbc gmx_unused* pbc,
- const t_graph gmx_unused* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
* three spatial dimensions in the molecular frame.
*/
int i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
- ivec dt;
rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
kdx[ZZ] = kk[ZZ] * dx[ZZ];
vtot += iprod(dx, kdx);
- if (computeVirial(flavor) && g)
- {
- ivec_sub(SHIFT_IVEC(g, aS), SHIFT_IVEC(g, aD), dt);
- ki = IVEC2IS(dt);
- }
-
for (m = 0; (m < DIM); m++)
{
/* This is a tensor operation but written out for speed */
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph gmx_unused* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms* md,
return V;
}
-// Avoid gcc 386 -O3 code generation bug in this function (see Redmine
+// Avoid gcc 386 -O3 code generation bug in this function (see Issue
// #3205 for more information)
#if defined(__GNUC__) && defined(__i386__) && defined(__OPTIMIZE__)
# pragma GCC push_options
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
int i, ai, aj, ak, t1, t2, type;
rvec r_ij, r_kj;
real cos_theta, cos_theta2, theta, dVdt, va, vtot;
- ivec jt, dt_ij, dt_kj;
vtot = 0.0;
for (i = 0; i < nbonds;)
}
if (computeVirial(flavor))
{
- if (g != nullptr)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
-
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- }
rvec_inc(fshift[t1], f_i);
rvec_inc(fshift[CENTRAL], f_j);
rvec_inc(fshift[t2], f_k);
rvec4 f[],
rvec gmx_unused fshift[],
const t_pbc* pbc,
- const t_graph gmx_unused* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
int i, m, ai, aj, ak, t1, t2, type;
rvec f_i, f_j, f_k;
real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
- ivec jt, dt_ij, dt_kj;
rvec r_ij, r_kj, r_ik, dx;
L1 = 1 - lambda;
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
-
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- }
rvec_inc(fshift[t1], f_i);
rvec_inc(fshift[CENTRAL], f_j);
rvec_inc(fshift[t2], f_k);
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
real cos_theta, cos_theta2, theta;
real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
- ivec jt, dt_ij, dt_kj, dt_ik;
vtot = 0.0;
for (i = 0; (i < nbonds);)
}
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
-
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- }
rvec_inc(fshift[t1], f_i);
rvec_inc(fshift[CENTRAL], f_j);
rvec_inc(fshift[t2], f_k);
vtot += vbond; /* 1*/
fbond *= gmx::invsqrt(dr2); /* 6 */
- if (computeVirial(flavor) && g)
- {
- ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
- ki = IVEC2IS(dt_ik);
- }
for (m = 0; (m < DIM); m++) /* 15 */
{
fik = fbond * r_ik[m];
rvec4 f[],
rvec gmx_unused fshift[],
const t_pbc* pbc,
- const t_graph gmx_unused* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
int i, j, ai, aj, ak, t1, t2, type;
rvec r_ij, r_kj;
real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
- ivec jt, dt_ij, dt_kj;
vtot = 0.0;
for (i = 0; (i < nbonds);)
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
-
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- }
rvec_inc(fshift[t1], f_i);
rvec_inc(fshift[CENTRAL], f_j);
rvec_inc(fshift[t2], f_k);
} // namespace
template<BondedKernelFlavor flavor>
-void do_dih_fup(int i,
- int j,
- int k,
- int l,
- real ddphi,
- rvec r_ij,
- rvec r_kj,
- rvec r_kl,
- rvec m,
- rvec n,
- rvec4 f[],
- rvec fshift[],
- const t_pbc* pbc,
- const t_graph* g,
- const rvec x[],
- int t1,
- int t2,
- int t3)
+void do_dih_fup(int i,
+ int j,
+ int k,
+ int l,
+ real ddphi,
+ rvec r_ij,
+ rvec r_kj,
+ rvec r_kl,
+ rvec m,
+ rvec n,
+ rvec4 f[],
+ rvec fshift[],
+ const t_pbc* pbc,
+ const rvec x[],
+ int t1,
+ int t2,
+ int t3)
{
/* 143 FLOPS */
rvec f_i, f_j, f_k, f_l;
rvec uvec, vvec, svec, dx_jl;
real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
real a, b, p, q, toler;
- ivec jt, dt_ij, dt_kj, dt_lj;
iprm = iprod(m, m); /* 5 */
iprn = iprod(n, n); /* 5 */
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, j), jt);
- ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
- ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- t3 = IVEC2IS(dt_lj);
- }
- else if (pbc)
+ if (pbc)
{
t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
} while (i < nbonds && forceatoms[i + 1] == ai && forceatoms[i + 2] == aj
&& forceatoms[i + 3] == ak && forceatoms[i + 4] == al);
- do_dih_fup<flavor>(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x,
- t1, t2, t3); /* 112 */
- } /* 223 TOTAL */
+ do_dih_fup<flavor>(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
+ t2, t3); /* 112 */
+ } /* 223 TOTAL */
return vtot;
}
rvec4 f[],
rvec gmx_unused fshift[],
const t_pbc* pbc,
- const t_graph gmx_unused* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
rvec4 f[],
rvec gmx_unused fshift[],
const t_pbc* pbc,
- const t_graph gmx_unused* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
dvdl_term += 0.5 * (kB - kA) * dp2 - kk * dphi0 * dp;
- do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1,
+ do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
t2, t3); /* 112 */
/* 218 TOTAL */
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
gmx_bool bZAxis)
rvec r_ij, r_kl, f_i, f_k = { 0, 0, 0 };
real st, sth, nrij2, nrkl2, c, cij, ckl;
- ivec dt;
t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
vtot = 0.0;
if (computeVirial(flavor))
{
- if (g)
- {
- ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
- t1 = IVEC2IS(dt);
- }
rvec_inc(fshift[t1], f_i);
rvec_dec(fshift[CENTRAL], f_i);
if (!bZAxis)
{
- if (g)
- {
- ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
- t2 = IVEC2IS(dt);
- }
rvec_inc(fshift[t2], f_k);
rvec_dec(fshift[CENTRAL], f_k);
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
t_fcdata gmx_unused* fcd,
int gmx_unused* global_atom_index)
{
- return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g, lambda,
- dvdlambda, FALSE);
+ return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, FALSE);
}
template<BondedKernelFlavor flavor>
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
t_fcdata gmx_unused* fcd,
int gmx_unused* global_atom_index)
{
- return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g, lambda,
- dvdlambda, TRUE);
+ return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, TRUE);
}
template<BondedKernelFlavor flavor>
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
{
*dvdlambda += kfac * ddp * ((dphiB - dphiA) - (phi0B - phi0A));
}
- do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x,
- t1, t2, t3); /* 112 */
+ do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
+ t2, t3); /* 112 */
}
}
return vtot;
rvec4 gmx_unused f[],
rvec gmx_unused fshift[],
const t_pbc gmx_unused* pbc,
- const t_graph gmx_unused* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
int i, d, ai, aj, ak, type, m;
int t1, t2;
real v, vtot;
- ivec jt, dt_ij, dt_kj;
rvec f_i, f_j, f_k;
double prefactor, ratio_ante, ratio_post;
rvec delta_ante, delta_post, vec_temp;
real restrangles(int nbonds,
const t_iatom forceatoms[],const t_iparams forceparams[],
const rvec x[],rvec4 f[],rvec fshift[],
- const t_pbc *pbc,const t_graph *g,
+ const t_pbc *pbc,
real gmx_unused lambda,real gmx_unused *dvdlambda,
const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
int gmx_unused *global_atom_index)
int t1, t2;
rvec r_ij,r_kj;
real v, vtot;
- ivec jt,dt_ij,dt_kj;
rvec f_i, f_j, f_k;
real prefactor, ratio_ante, ratio_post;
rvec delta_ante, delta_post, vec_temp;
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- }
-
rvec_inc(fshift[t1], f_i);
rvec_inc(fshift[CENTRAL], f_j);
rvec_inc(fshift[t2], f_k);
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real gmx_unused lambda,
real gmx_unused* dvlambda,
const t_mdatoms gmx_unused* md,
int i, d, type, ai, aj, ak, al;
rvec f_i, f_j, f_k, f_l;
rvec dx_jl;
- ivec jt, dt_ij, dt_kj, dt_lj;
int t1, t2, t3;
real v, vtot;
rvec delta_ante, delta_crnt, delta_post, vec_temp;
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- t3 = IVEC2IS(dt_lj);
- }
- else if (pbc)
+ if (pbc)
{
t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
real v, vtot;
rvec vec_temp;
rvec f_i, f_j, f_k, f_l;
- ivec jt, dt_ij, dt_kj, dt_lj;
rvec dx_jl;
rvec delta_ante, delta_crnt, delta_post;
rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- t3 = IVEC2IS(dt_lj);
- }
- else if (pbc)
+ if (pbc)
{
t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
ddphi = -ddphi * sin_phi; /* 11 */
- do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1,
- t2, t3); /* 112 */
+ do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2,
+ t3); /* 112 */
vtot += v;
}
*dvdlambda += dvdl_term;
} // namespace
-real cmap_dihs(int nbonds,
- const t_iatom forceatoms[],
- const t_iparams forceparams[],
- const gmx_cmap_t* cmap_grid,
- const rvec x[],
- rvec4 f[],
- rvec fshift[],
- const struct t_pbc* pbc,
- const struct t_graph* g,
+real cmap_dihs(int nbonds,
+ const t_iatom forceatoms[],
+ const t_iparams forceparams[],
+ const gmx_cmap_t* cmap_grid,
+ const rvec x[],
+ rvec4 f[],
+ rvec fshift[],
+ const struct t_pbc* pbc,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
rvec a1, b1, a2, b2;
rvec f1, g1, h1, f2, g2, h2;
rvec dtf1, dtg1, dth1, dtf2, dtg2, dth2;
- ivec jt1, dt1_ij, dt1_kj, dt1_lj;
- ivec jt2, dt2_ij, dt2_kj, dt2_lj;
int loop_index[4][4] = { { 0, 4, 8, 12 }, { 1, 5, 9, 13 }, { 2, 6, 10, 14 }, { 3, 7, 11, 15 } };
/* Shift forces */
if (fshift != nullptr)
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, a1j), jt1);
- ivec_sub(SHIFT_IVEC(g, a1i), jt1, dt1_ij);
- ivec_sub(SHIFT_IVEC(g, a1k), jt1, dt1_kj);
- ivec_sub(SHIFT_IVEC(g, a1l), jt1, dt1_lj);
- t11 = IVEC2IS(dt1_ij);
- t21 = IVEC2IS(dt1_kj);
- t31 = IVEC2IS(dt1_lj);
-
- copy_ivec(SHIFT_IVEC(g, a2j), jt2);
- ivec_sub(SHIFT_IVEC(g, a2i), jt2, dt2_ij);
- ivec_sub(SHIFT_IVEC(g, a2k), jt2, dt2_kj);
- ivec_sub(SHIFT_IVEC(g, a2l), jt2, dt2_lj);
- t12 = IVEC2IS(dt2_ij);
- t22 = IVEC2IS(dt2_kj);
- t32 = IVEC2IS(dt2_lj);
- }
- else if (pbc)
+ if (pbc)
{
t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
vtot += 0.5 * vbond; /* 1*/
- spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
- } /* 44 TOTAL */
+ spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
+ } /* 44 TOTAL */
return vtot;
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
real cos_theta, dVdt, va, vtot;
real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
rvec f_i, f_j, f_k;
- ivec jt, dt_ij, dt_kj;
vtot = 0.0;
for (i = 0; (i < nbonds);)
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
-
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- }
rvec_inc(fshift[t1], f_i);
rvec_inc(fshift[CENTRAL], f_j);
rvec_inc(fshift[t2], f_k); /* 9 */
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
rvec r_ij, r_kj;
real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
rvec f_i, f_j, f_k;
- ivec jt, dt_ij, dt_kj;
vtot = 0.0;
for (i = 0; (i < nbonds);)
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
-
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- }
rvec_inc(fshift[t1], f_i);
rvec_inc(fshift[CENTRAL], f_j);
rvec_inc(fshift[t2], f_k); /* 9 */
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real gmx_unused lambda,
real gmx_unused* dvdlambda,
const t_mdatoms gmx_unused* md,
rvec r_ij, r_kj, r_ik;
real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
rvec f_i, f_j, f_k;
- ivec jt, dt_ij, dt_kj;
vtot = 0.0;
for (i = 0; (i < nbonds);)
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
-
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- }
rvec_inc(fshift[t1], f_i);
rvec_inc(fshift[CENTRAL], f_j);
rvec_inc(fshift[t2], f_k); /* 9 */
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
vtot += vbond; /* 1*/
fbond *= gmx::invsqrt(dr2); /* 6 */
- spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
- } /* 62 TOTAL */
+ spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
+ } /* 62 TOTAL */
return vtot;
}
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
int i, ai, aj, ak, t1, t2, type, table;
rvec r_ij, r_kj;
real cos_theta, cos_theta2, theta, dVdt, va, vtot;
- ivec jt, dt_ij, dt_kj;
vtot = 0.0;
for (i = 0; (i < nbonds);)
if (computeVirial(flavor))
{
- if (g)
- {
- copy_ivec(SHIFT_IVEC(g, aj), jt);
-
- ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
- ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
- t1 = IVEC2IS(dt_ij);
- t2 = IVEC2IS(dt_kj);
- }
rvec_inc(fshift[t1], f_i);
rvec_inc(fshift[CENTRAL], f_j);
rvec_inc(fshift[t2], f_k);
rvec4 f[],
rvec fshift[],
const t_pbc* pbc,
- const t_graph* g,
real lambda,
real* dvdlambda,
const t_mdatoms gmx_unused* md,
forceparams[type].tab.kB, phi + M_PI, lambda, &vpd, &ddphi);
vtot += vpd;
- do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1,
+ do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
t2, t3); /* 112 */
} /* 227 TOTAL */
int nrnbIndex;
};
+ // Bug in old clang versions prevents constexpr. constexpr is needed for MSVC.
+ #if defined(__clang__) && __clang_major__ < 6
+ # define CONSTEXPR_EXCL_OLD_CLANG const
+ #else
+ # define CONSTEXPR_EXCL_OLD_CLANG constexpr
+ #endif
+
/*! \brief Lookup table of bonded interaction functions
*
* This must have as many entries as interaction_function in ifunc.cpp */
template<BondedKernelFlavor flavor>
- const std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions = {
+ CONSTEXPR_EXCL_OLD_CLANG std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions = {
BondedInteractions{ bonds<flavor>, eNR_BONDS }, // F_BONDS
BondedInteractions{ g96bonds<flavor>, eNR_BONDS }, // F_G96BONDS
BondedInteractions{ morse_bonds<flavor>, eNR_MORSE }, // F_MORSE
};
/*! \brief List of instantiated BondedInteractions list */
- const gmx::EnumerationArray<BondedKernelFlavor, std::array<BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor = {
+ CONSTEXPR_EXCL_OLD_CLANG
+ gmx::EnumerationArray<BondedKernelFlavor, std::array<BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor = {
c_bondedInteractionFunctions<BondedKernelFlavor::ForcesSimdWhenAvailable>,
c_bondedInteractionFunctions<BondedKernelFlavor::ForcesNoSimd>,
c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>,
} // namespace
-real calculateSimpleBond(const int ftype,
- const int numForceatoms,
- const t_iatom forceatoms[],
- const t_iparams forceparams[],
- const rvec x[],
- rvec4 f[],
- rvec fshift[],
- const struct t_pbc* pbc,
- const struct t_graph gmx_unused* g,
- const real lambda,
- real* dvdlambda,
- const t_mdatoms* md,
- t_fcdata* fcd,
+real calculateSimpleBond(const int ftype,
+ const int numForceatoms,
+ const t_iatom forceatoms[],
+ const t_iparams forceparams[],
+ const rvec x[],
+ rvec4 f[],
+ rvec fshift[],
+ const struct t_pbc* pbc,
+ const real lambda,
+ real* dvdlambda,
+ const t_mdatoms* md,
+ t_fcdata* fcd,
int gmx_unused* global_atom_index,
const BondedKernelFlavor bondedKernelFlavor)
{
const BondedInteractions& bonded = c_bondedInteractionFunctionsPerFlavor[bondedKernelFlavor][ftype];
- real v = bonded.function(numForceatoms, forceatoms, forceparams, x, f, fshift, pbc, g, lambda,
+ real v = bonded.function(numForceatoms, forceatoms, forceparams, x, f, fshift, pbc, lambda,
dvdlambda, md, fcd, global_atom_index);
return v;
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013-2020, by the GROMACS development team, led by
+ * Copyright (c) 2013-2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/mdlib/forcerec_threading.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdlib/md_support.h"
-#include "gromacs/mdlib/qmmm.h"
#include "gromacs/mdlib/rf_util.h"
#include "gromacs/mdlib/wall.h"
+#include "gromacs/mdlib/wholemoleculetransform.h"
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/fcdata.h"
+#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/group.h"
#include "gromacs/mdtypes/iforceprovider.h"
#include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/mdtypes/interaction_const.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/utility/smalloc.h"
#include "gromacs/utility/strconvert.h"
-/*! \brief environment variable to enable GPU P2P communication */
-static const bool c_enableGpuPmePpComms =
- (getenv("GMX_GPU_PME_PP_COMMS") != nullptr) && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
-
-static real* mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM)
+static std::vector<real> mk_nbfp(const gmx_ffparams_t* idef, gmx_bool bBHAM)
{
- real* nbfp;
- int i, j, k, atnr;
+ std::vector<real> nbfp;
+ int atnr;
atnr = idef->atnr;
if (bBHAM)
{
- snew(nbfp, 3 * atnr * atnr);
- for (i = k = 0; (i < atnr); i++)
+ nbfp.resize(3 * atnr * atnr);
+ int k = 0;
+ for (int i = 0; (i < atnr); i++)
{
- for (j = 0; (j < atnr); j++, k++)
+ for (int j = 0; (j < atnr); j++, k++)
{
BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
}
else
{
- snew(nbfp, 2 * atnr * atnr);
- for (i = k = 0; (i < atnr); i++)
+ nbfp.resize(2 * atnr * atnr);
+ int k = 0;
+ for (int i = 0; (i < atnr); i++)
{
- for (j = 0; (j < atnr); j++, k++)
+ for (int j = 0; (j < atnr); j++, k++)
{
/* nbfp now includes the 6.0/12.0 derivative prefactors */
C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6 * 6.0;
acSETTLE
};
-static cginfo_mb_t* init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr, gmx_bool* bFEP_NonBonded)
+static std::vector<cginfo_mb_t> init_cginfo_mb(const gmx_mtop_t* mtop, const t_forcerec* fr)
{
- cginfo_mb_t* cginfo_mb;
- gmx_bool* type_VDW;
- int* cginfo;
- int* a_con;
-
- snew(cginfo_mb, mtop->molblock.size());
+ gmx_bool* type_VDW;
+ int* a_con;
snew(type_VDW, fr->ntype);
for (int ai = 0; ai < fr->ntype; ai++)
}
}
- *bFEP_NonBonded = FALSE;
-
- int a_offset = 0;
+ std::vector<cginfo_mb_t> cginfoPerMolblock;
+ int a_offset = 0;
for (size_t mb = 0; mb < mtop->molblock.size(); mb++)
{
const gmx_molblock_t& molb = mtop->molblock[mb];
const gmx_moltype_t& molt = mtop->moltype[molb.type];
- const t_blocka& excl = molt.excls;
+ const auto& excl = molt.excls;
/* Check if the cginfo is identical for all molecules in this block.
* If so, we only need an array of the size of one molecule.
}
}
- cginfo_mb[mb].cg_start = a_offset;
- cginfo_mb[mb].cg_end = a_offset + molb.nmol * molt.atoms.nr;
- cginfo_mb[mb].cg_mod = (bId ? 1 : molb.nmol) * molt.atoms.nr;
- snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
- cginfo = cginfo_mb[mb].cginfo;
+ cginfo_mb_t cginfo_mb;
+ cginfo_mb.cg_start = a_offset;
+ cginfo_mb.cg_end = a_offset + molb.nmol * molt.atoms.nr;
+ cginfo_mb.cg_mod = (bId ? 1 : molb.nmol) * molt.atoms.nr;
+ cginfo_mb.cginfo.resize(cginfo_mb.cg_mod);
+ gmx::ArrayRef<int> cginfo = cginfo_mb.cginfo;
/* Set constraints flags for constrained atoms */
snew(a_con, molt.atoms.nr);
bool haveExclusions = false;
/* Loop over all the exclusions of atom ai */
- for (int j = excl.index[a]; j < excl.index[a + 1]; j++)
+ for (const int j : excl[a])
{
- if (excl.a[j] != a)
+ if (j != a)
{
haveExclusions = true;
break;
if (fr->efep != efepNO && PERTURBED(atom))
{
SET_CGINFO_FEP(atomInfo);
- *bFEP_NonBonded = TRUE;
}
}
}
sfree(a_con);
+ cginfoPerMolblock.push_back(cginfo_mb);
+
a_offset += molb.nmol * molt.atoms.nr;
}
sfree(type_VDW);
- return cginfo_mb;
+ return cginfoPerMolblock;
}
-static std::vector<int> cginfo_expand(const int nmb, const cginfo_mb_t* cgi_mb)
+static std::vector<int> cginfo_expand(const int nmb, gmx::ArrayRef<const cginfo_mb_t> cgi_mb)
{
const int ncg = cgi_mb[nmb - 1].cg_end;
return cginfo;
}
-static void done_cginfo_mb(cginfo_mb_t* cginfo_mb, int numMolBlocks)
-{
- if (cginfo_mb == nullptr)
- {
- return;
- }
- for (int mb = 0; mb < numMolBlocks; ++mb)
- {
- sfree(cginfo_mb[mb].cginfo);
- }
- sfree(cginfo_mb);
-}
-
/* Sets the sum of charges (squared) and C6 in the system in fr.
* Returns whether the system has a net charge.
*/
fr->natoms_force = natoms_force;
fr->natoms_force_constr = natoms_force_constr;
- if (fr->natoms_force_constr > fr->nalloc_force)
- {
- fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
- }
-
if (fr->haveDirectVirialContributions)
{
fr->forceBufferForDirectVirialContributions.resize(natoms_f_novirsum);
* both accuracy requirements, when relevant.
*/
static void init_ewald_f_table(const interaction_const_t& ic,
+ const real tableExtensionLength,
EwaldCorrectionTables* coulombTables,
EwaldCorrectionTables* vdwTables)
{
*/
const real tableScale = ewald_spline3_table_scale(ic, useCoulombTable, useVdwTable);
- const int tableSize = static_cast<int>(ic.rcoulomb * tableScale) + 2;
+ real tableLen = ic.rcoulomb;
+ if (useCoulombTable && tableExtensionLength > 0.0)
+ {
+ /* TODO: Ideally this should also check if couple-intramol == no, but that isn't
+ * stored in ir. Grompp puts that info into an opts structure that doesn't make it into the tpr.
+ * The alternative is to look through all the exclusions and check if they come from
+ * couple-intramol == no. Meanwhile, always having larger tables should only affect
+ * memory consumption, not speed (barring cache issues).
+ */
+ tableLen = ic.rcoulomb + tableExtensionLength;
+ }
+ const int tableSize = static_cast<int>(tableLen * tableScale) + 2;
if (useCoulombTable)
{
}
}
-void init_interaction_const_tables(FILE* fp, interaction_const_t* ic)
+void init_interaction_const_tables(FILE* fp, interaction_const_t* ic, const real tableExtensionLength)
{
- if (EEL_PME_EWALD(ic->eeltype))
+ if (EEL_PME_EWALD(ic->eeltype) || EVDW_PME(ic->vdwtype))
{
- init_ewald_f_table(*ic, tableExtensionLength, ic->coulombEwaldTables.get(), nullptr);
- init_ewald_f_table(*ic, ic->coulombEwaldTables.get(), ic->vdwEwaldTables.get());
++ init_ewald_f_table(*ic, tableExtensionLength, ic->coulombEwaldTables.get(),
++ ic->vdwEwaldTables.get());
if (fp != nullptr)
{
- fprintf(fp, "Initialized non-bonded Coulomb Ewald tables, spacing: %.2e size: %zu\n\n",
+ fprintf(fp, "Initialized non-bonded Ewald tables, spacing: %.2e size: %zu\n\n",
1 / ic->coulombEwaldTables->scale, ic->coulombEwaldTables->tableF.size());
}
}
ic->cutoff_scheme = ir->cutoff_scheme;
ic->coulombEwaldTables = std::make_unique<EwaldCorrectionTables>();
+ ic->vdwEwaldTables = std::make_unique<EwaldCorrectionTables>();
/* Lennard-Jones */
ic->vdwtype = ir->vdwtype;
*interaction_const = ic;
}
-bool areMoleculesDistributedOverPbc(const t_inputrec& ir, const gmx_mtop_t& mtop, const gmx::MDLogger& mdlog)
-{
- bool areMoleculesDistributedOverPbc = false;
- const bool useEwaldSurfaceCorrection = (EEL_PME_EWALD(ir.coulombtype) && ir.epsilon_surface != 0);
-
- const bool bSHAKE =
- (ir.eConstrAlg == econtSHAKE
- && (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 || gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0));
-
- /* The group cut-off scheme and SHAKE assume charge groups
- * are whole, but not using molpbc is faster in most cases.
- * With intermolecular interactions we need PBC for calculating
- * distances between atoms in different molecules.
- */
- if (bSHAKE && !mtop.bIntermolecularInteractions)
- {
- areMoleculesDistributedOverPbc = ir.bPeriodicMols;
-
- if (areMoleculesDistributedOverPbc)
- {
- gmx_fatal(FARGS, "SHAKE is not supported with periodic molecules");
- }
- }
- else
- {
- /* Not making molecules whole is faster in most cases,
- * but with orientation restraints or non-tinfoil boundary
- * conditions we need whole molecules.
- */
- areMoleculesDistributedOverPbc =
- (gmx_mtop_ftype_count(mtop, F_ORIRES) == 0 && !useEwaldSurfaceCorrection);
-
- if (getenv("GMX_USE_GRAPH") != nullptr)
- {
- areMoleculesDistributedOverPbc = false;
-
- GMX_LOG(mdlog.warning)
- .asParagraph()
- .appendText(
- "GMX_USE_GRAPH is set, using the graph for bonded "
- "interactions");
-
- if (mtop.bIntermolecularInteractions)
- {
- GMX_LOG(mdlog.warning)
- .asParagraph()
- .appendText(
- "WARNING: Molecules linked by intermolecular interactions "
- "have to reside in the same periodic image, otherwise "
- "artifacts will occur!");
- }
- }
-
- GMX_RELEASE_ASSERT(areMoleculesDistributedOverPbc || !mtop.bIntermolecularInteractions,
- "We need to use PBC within molecules with inter-molecular interactions");
-
- if (bSHAKE && areMoleculesDistributedOverPbc)
- {
- gmx_fatal(FARGS,
- "SHAKE is not properly supported with intermolecular interactions. "
- "For short simulations where linked molecules remain in the same "
- "periodic image, the environment variable GMX_USE_GRAPH can be used "
- "to override this check.\n");
- }
- }
-
- return areMoleculesDistributedOverPbc;
-}
-
void init_forcerec(FILE* fp,
const gmx::MDLogger& mdlog,
t_forcerec* fr,
const char* tabfn,
const char* tabpfn,
gmx::ArrayRef<const std::string> tabbfnm,
- const gmx_hw_info_t& hardwareInfo,
- const gmx_device_info_t* deviceInfo,
- const bool useGpuForBonded,
- const bool pmeOnlyRankUsesGpu,
- real print_force,
- gmx_wallcycle* wcycle)
+ real print_force)
{
- real rtab;
- char* env;
- double dbl;
- gmx_bool bFEP_NonBonded;
+ /* The CMake default turns SIMD kernels on, but it might be turned off further down... */
+ fr->use_simd_kernels = GMX_USE_SIMD_KERNELS;
- /* By default we turn SIMD kernels on, but it might be turned off further down... */
- fr->use_simd_kernels = TRUE;
-
- if (check_box(ir->ePBC, box))
+ if (check_box(ir->pbcType, box))
{
- gmx_fatal(FARGS, "%s", check_box(ir->ePBC, box));
+ gmx_fatal(FARGS, "%s", check_box(ir->pbcType, box));
}
/* Test particle insertion ? */
fr->sc_r_power = ir->fepvals->sc_r_power;
fr->sc_sigma6_def = gmx::power6(ir->fepvals->sc_sigma);
- env = getenv("GMX_SCSIGMA_MIN");
+ char* env = getenv("GMX_SCSIGMA_MIN");
if (env != nullptr)
{
- dbl = 0;
+ double dbl = 0;
sscanf(env, "%20lf", &dbl);
fr->sc_sigma6_min = gmx::power6(dbl);
if (fp)
/* Neighbour searching stuff */
fr->cutoff_scheme = ir->cutoff_scheme;
- fr->ePBC = ir->ePBC;
+ fr->pbcType = ir->pbcType;
/* Determine if we will do PBC for distances in bonded interactions */
- if (fr->ePBC == epbcNONE)
+ if (fr->pbcType == PbcType::No)
{
fr->bMolPBC = FALSE;
}
{
const bool useEwaldSurfaceCorrection =
(EEL_PME_EWALD(ir->coulombtype) && ir->epsilon_surface != 0);
+ const bool haveOrientationRestraints = (gmx_mtop_ftype_count(mtop, F_ORIRES) > 0);
if (!DOMAINDECOMP(cr))
{
- fr->bMolPBC = areMoleculesDistributedOverPbc(*ir, *mtop, mdlog);
- // The assert below is equivalent to fcd->orires.nr != gmx_mtop_ftype_count(mtop, F_ORIRES)
- GMX_RELEASE_ASSERT(!fr->bMolPBC || fcd->orires.nr == 0,
- "Molecules broken over PBC exist in a simulation including "
- "orientation restraints. "
- "This likely means that the global topology and the force constant "
- "data have gotten out of sync.");
- if (useEwaldSurfaceCorrection)
+ fr->bMolPBC = true;
+
+ if (useEwaldSurfaceCorrection || haveOrientationRestraints)
{
- gmx_fatal(FARGS,
- "In GROMACS 2020, Ewald dipole correction is disabled when not "
- "using domain decomposition. With domain decomposition, it only works "
- "when each molecule consists of a single update group (e.g. water). "
- "This will be fixed in GROMACS 2021.");
+ fr->wholeMoleculeTransform =
+ std::make_unique<gmx::WholeMoleculeTransform>(*mtop, ir->pbcType);
}
}
else
{
- fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
+ fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->pbcType);
- if (useEwaldSurfaceCorrection && !dd_moleculesAreAlwaysWhole(*cr->dd))
+ /* With Ewald surface correction it is useful to support e.g. running water
+ * in parallel with update groups.
+ * With orientation restraints there is no sensible use case supported with DD.
+ */
+ if ((useEwaldSurfaceCorrection && !dd_moleculesAreAlwaysWhole(*cr->dd)) || haveOrientationRestraints)
{
gmx_fatal(FARGS,
- "You requested dipole correction (epsilon_surface > 0), but molecules "
- "are broken "
+ "You requested Ewald surface correction or orientation restraints, "
+ "but molecules are broken "
"over periodic boundary conditions by the domain decomposition. "
"Run without domain decomposition instead.");
}
if (useEwaldSurfaceCorrection)
{
- GMX_RELEASE_ASSERT((!DOMAINDECOMP(cr) && !fr->bMolPBC)
- || (DOMAINDECOMP(cr) && dd_moleculesAreAlwaysWhole(*cr->dd)),
+ GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || dd_moleculesAreAlwaysWhole(*cr->dd),
"Molecules can not be broken by PBC with epsilon_surface > 0");
}
}
/* fr->ic is used both by verlet and group kernels (to some extent) now */
init_interaction_const(fp, &fr->ic, ir, mtop, systemHasNetCharge);
- init_interaction_const_tables(fp, fr->ic);
+ init_interaction_const_tables(fp, fr->ic, ir->tabext);
const interaction_const_t* ic = fr->ic;
case eelSWITCH:
case eelSHIFT:
case eelUSER:
- case eelENCADSHIFT:
case eelPMESWITCH:
case eelPMEUSER:
case eelPMEUSERSWITCH:
case evdwSWITCH:
case evdwSHIFT:
- case evdwUSER:
- case evdwENCADSHIFT:
- fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
- break;
+ case evdwUSER: fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE; break;
default: gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[ic->vdwtype]);
}
fr->shiftForces.resize(SHIFTS);
- if (fr->nbfp == nullptr)
+ if (fr->nbfp.empty())
{
fr->ntype = mtop->ffparams.atnr;
fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
* in that case grompp should already have checked that we do not need
* normal tables and we only generate tables for 1-4 interactions.
*/
- rtab = ir->rlist + ir->tabext;
+ real rtab = ir->rlist + ir->tabext;
/* We want to use unmodified tables for 1-4 coulombic
* interactions, so we must in general have an extra set of
}
// QM/MM initialization if requested
- fr->bQMMM = ir->bQMMM;
- if (fr->bQMMM)
+ if (ir->bQMMM)
{
- // Initialize QM/MM if supported
- if (GMX_QMMM)
- {
- GMX_LOG(mdlog.info)
- .asParagraph()
- .appendText(
- "Large parts of the QM/MM support is deprecated, and may be removed in "
- "a future "
- "version. Please get in touch with the developers if you find the "
- "support useful, "
- "as help is needed if the functionality is to continue to be "
- "available.");
- fr->qr = mk_QMMMrec();
- init_QMMMrec(cr, mtop, ir, fr);
- }
- else
- {
- gmx_incons(
- "QM/MM was requested, but is only available when GROMACS "
- "is configured with QM/MM support");
- }
+ gmx_incons("QM/MM was requested, but is no longer available in GROMACS");
}
/* Set all the static charge group info */
- fr->cginfo_mb = init_cginfo_mb(mtop, fr, &bFEP_NonBonded);
+ fr->cginfo_mb = init_cginfo_mb(mtop, fr);
if (!DOMAINDECOMP(cr))
{
fr->cginfo = cginfo_expand(mtop->molblock.size(), fr->cginfo_mb);
fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
snew(fr->ewc_t, fr->nthread_ewc);
- if (fr->cutoff_scheme == ecutsVERLET)
- {
- // We checked the cut-offs in grompp, but double-check here.
- // We have PME+LJcutoff kernels for rcoulomb>rvdw.
- if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == eelCUT)
- {
- GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw,
- "With Verlet lists and PME we should have rcoulomb>=rvdw");
- }
- else
- {
- GMX_RELEASE_ASSERT(
- ir->rcoulomb == ir->rvdw,
- "With Verlet lists and no PME rcoulomb and rvdw should be identical");
- }
-
- fr->nbv = Nbnxm::init_nb_verlet(mdlog, bFEP_NonBonded, ir, fr, cr, hardwareInfo, deviceInfo,
- mtop, box, wcycle);
-
- if (useGpuForBonded)
- {
- auto stream = havePPDomainDecomposition(cr)
- ? Nbnxm::gpu_get_command_stream(
- fr->nbv->gpu_nbv, gmx::InteractionLocality::NonLocal)
- : Nbnxm::gpu_get_command_stream(fr->nbv->gpu_nbv,
- gmx::InteractionLocality::Local);
- // TODO the heap allocation is only needed while
- // t_forcerec lacks a constructor.
- fr->gpuBonded = new gmx::GpuBonded(mtop->ffparams, stream, wcycle);
- }
- }
-
if (ir->eDispCorr != edispcNO)
{
fr->dispersionCorrection = std::make_unique<DispersionCorrection>(
- *mtop, *ir, fr->bBHAM, fr->ntype,
- gmx::arrayRefFromArray(fr->nbfp, fr->ntype * fr->ntype * 2), *fr->ic, tabfn);
+ *mtop, *ir, fr->bBHAM, fr->ntype, fr->nbfp, *fr->ic, tabfn);
fr->dispersionCorrection->print(mdlog);
}
*/
fprintf(fp, "\n");
}
-
- if (pmeOnlyRankUsesGpu && c_enableGpuPmePpComms)
- {
- fr->pmePpCommGpu = std::make_unique<gmx::PmePpCommGpu>(cr->mpi_comm_mysim, cr->dd->pme_nodeid);
- }
}
t_forcerec::t_forcerec() = default;
-t_forcerec::~t_forcerec() = default;
-
-/* Frees GPU memory and sets a tMPI node barrier.
- *
- * Note that this function needs to be called even if GPUs are not used
- * in this run because the PME ranks have no knowledge of whether GPUs
- * are used or not, but all ranks need to enter the barrier below.
- * \todo Remove physical node barrier from this function after making sure
- * that it's not needed anymore (with a shared GPU run).
- */
-void free_gpu_resources(t_forcerec* fr,
- const gmx::PhysicalNodeCommunicator& physicalNodeCommunicator,
- const gmx_gpu_info_t& gpu_info)
-{
- bool isPPrankUsingGPU = (fr != nullptr) && (fr->nbv != nullptr) && fr->nbv->useGpu();
-
- /* stop the GPU profiler (only CUDA) */
- if (gpu_info.n_dev > 0)
- {
- stopGpuProfiler();
- }
-
- if (isPPrankUsingGPU)
- {
- /* Free data in GPU memory and pinned memory before destroying the GPU context */
- fr->nbv.reset();
-
- delete fr->gpuBonded;
- fr->gpuBonded = nullptr;
- }
-
- /* With tMPI we need to wait for all ranks to finish deallocation before
- * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
- * GPU and context.
- *
- * This is not a concern in OpenCL where we use one context per rank which
- * is freed in nbnxn_gpu_free().
- *
- * Note: it is safe to not call the barrier on the ranks which do not use GPU,
- * but it is easier and more futureproof to call it on the whole node.
- */
- if (GMX_THREAD_MPI)
- {
- physicalNodeCommunicator.barrier();
- }
-}
-
-void done_forcerec(t_forcerec* fr, int numMolBlocks)
+t_forcerec::~t_forcerec()
{
- if (fr == nullptr)
- {
- // PME-only ranks don't have a forcerec
- return;
- }
- done_cginfo_mb(fr->cginfo_mb, numMolBlocks);
- sfree(fr->nbfp);
- delete fr->ic;
- sfree(fr->shift_vec);
- sfree(fr->ewc_t);
- tear_down_bonded_threading(fr->bondedThreading);
- GMX_RELEASE_ASSERT(fr->gpuBonded == nullptr, "Should have been deleted earlier, when used");
- fr->bondedThreading = nullptr;
- delete fr;
+ /* Note: This code will disappear when types are converted to C++ */
+ sfree(shift_vec);
+ sfree(ewc_t);
+ tear_down_bonded_threading(bondedThreading);
}
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013-2020, by the GROMACS development team, led by
+ * Copyright (c) 2013-2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/domdec/partition.h"
#include "gromacs/essentialdynamics/edsam.h"
#include "gromacs/ewald/pme.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/ewald/pme_pp_comm_gpu.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
#include "gromacs/mdlib/constr.h"
#include "gromacs/mdlib/enerdata_utils.h"
#include "gromacs/mdlib/force.h"
+#include "gromacs/mdlib/force_flags.h"
#include "gromacs/mdlib/forcerec.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
-#include "gromacs/mdlib/qmmm.h"
#include "gromacs/mdlib/update.h"
+#include "gromacs/mdlib/vsite.h"
+#include "gromacs/mdlib/wholemoleculetransform.h"
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/enerdata.h"
#include "gromacs/mdtypes/forceoutput.h"
+#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/iforceprovider.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
+#include "gromacs/mdtypes/mdatom.h"
#include "gromacs/mdtypes/simulation_workload.h"
#include "gromacs/mdtypes/state.h"
#include "gromacs/mdtypes/state_propagator_data_gpu.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
+#include "gromacs/nbnxm/nbnxm_gpu.h"
#include "gromacs/pbcutil/ishift.h"
-#include "gromacs/pbcutil/mshift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/pulling/pull.h"
#include "gromacs/pulling/pull_rotation.h"
const rvec x[],
const gmx::ForceWithShiftForces& forceWithShiftForces,
tensor vir_part,
- const t_graph* graph,
const matrix box,
t_nrnb* nrnb,
const t_forcerec* fr,
- int ePBC)
+ PbcType pbcType)
{
/* The short-range virial from surrounding boxes */
const rvec* fshift = as_rvec_array(forceWithShiftForces.shiftForces().data());
- calc_vir(SHIFTS, fr->shift_vec, fshift, vir_part, ePBC == epbcSCREW, box);
+ calc_vir(SHIFTS, fr->shift_vec, fshift, vir_part, pbcType == PbcType::Screw, box);
inc_nrnb(nrnb, eNR_VIRIAL, SHIFTS);
/* Calculate partial virial, for local atoms only, based on short range.
* Total virial is computed in global_stat, called from do_md
*/
const rvec* f = as_rvec_array(forceWithShiftForces.force().data());
- f_calc_vir(start, start + homenr, x, f, vir_part, graph, box);
+ f_calc_vir(start, start + homenr, x, f, vir_part, box);
inc_nrnb(nrnb, eNR_VIRIAL, homenr);
if (debug)
* which is why pull_potential is called close to other communication.
*/
wallcycle_start(wcycle, ewcPULLPOT);
- set_pbc(&pbc, ir->ePBC, box);
+ set_pbc(&pbc, ir->pbcType, box);
dvdl = 0;
enerd->term[F_COM_PULL] += pull_potential(pull_work, mdatoms, &pbc, cr, t, lambda[efptRESTRAINT],
as_rvec_array(x.data()), force, &dvdl);
enerd->dvdl_lin[efptRESTRAINT] += dvdl;
+ for (auto& dhdl : enerd->dhdlLambda)
+ {
+ dhdl += dvdl;
+ }
wallcycle_stop(wcycle, ewcPULLPOT);
}
enerd->dvdl_lin[efptCOUL] += dvdl_q;
enerd->dvdl_lin[efptVDW] += dvdl_lj;
+ for (auto& dhdl : enerd->dhdlLambda)
+ {
+ dhdl += dvdl_q + dvdl_lj;
+ }
+
if (wcycle)
{
dd_cycles_add(cr->dd, cycles_seppme, ddCyclPME);
ForceOutputs* forceOutputs,
tensor vir_force,
const t_mdatoms* mdatoms,
- const t_graph* graph,
const t_forcerec* fr,
const gmx_vsite_t* vsite,
const StepWorkload& stepWork)
*/
matrix virial = { { 0 } };
spread_vsite_f(vsite, x, fDirectVir, nullptr, stepWork.computeVirial, virial, nrnb,
- &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
+ top->idef, fr->pbcType, fr->bMolPBC, box, cr, wcycle);
forceWithVirial.addVirialContribution(virial);
}
if (awh)
{
enerd->term[F_COM_PULL] += awh->applyBiasForcesAndUpdateBias(
- inputrec->ePBC, *mdatoms, box, forceWithVirial, t, step, wcycle, fplog);
+ inputrec->pbcType, *mdatoms, box, forceWithVirial, t, step, wcycle, fplog);
}
}
}
}
-/*! \brief Makes PME flags from StepWorkload data.
- *
- * \param[in] stepWork Step schedule flags
- * \returns PME flags
- */
-static int makePmeFlags(const StepWorkload& stepWork)
-{
- return GMX_PME_SPREAD | GMX_PME_SOLVE | (stepWork.computeVirial ? GMX_PME_CALC_ENER_VIR : 0)
- | (stepWork.computeEnergy ? GMX_PME_CALC_ENER_VIR : 0)
- | (stepWork.computeForces ? GMX_PME_CALC_F : 0);
-}
-
/*! \brief Launch the prepare_step and spread stages of PME GPU.
*
* \param[in] pmedata The PME structure
* \param[in] box The box matrix
* \param[in] stepWork Step schedule flags
- * \param[in] pmeFlags PME flags
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in
* the device memory. \param[in] wcycle The wallcycle structure
*/
static inline void launchPmeGpuSpread(gmx_pme_t* pmedata,
const matrix box,
const StepWorkload& stepWork,
- int pmeFlags,
GpuEventSynchronizer* xReadyOnDevice,
gmx_wallcycle_t wcycle)
{
- pme_gpu_prepare_computation(pmedata, stepWork.haveDynamicBox, box, wcycle, pmeFlags,
- stepWork.useGpuPmeFReduction);
+ pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork);
pme_gpu_launch_spread(pmedata, xReadyOnDevice, wcycle);
}
*
* \param[in] pmedata The PME structure
* \param[in] wcycle The wallcycle structure
+ * \param[in] stepWork Step schedule flags
*/
-static void launchPmeGpuFftAndGather(gmx_pme_t* pmedata, gmx_wallcycle_t wcycle)
+static void launchPmeGpuFftAndGather(gmx_pme_t* pmedata, gmx_wallcycle_t wcycle, const gmx::StepWorkload& stepWork)
{
- pme_gpu_launch_complex_transforms(pmedata, wcycle);
- pme_gpu_launch_gather(pmedata, wcycle, PmeForceOutputHandling::Set);
+ pme_gpu_launch_complex_transforms(pmedata, wcycle, stepWork);
+ pme_gpu_launch_gather(pmedata, wcycle);
}
/*! \brief
* \param[in,out] forceOutputs Output buffer for the forces and virial
* \param[in,out] enerd Energy data structure results are reduced into
* \param[in] stepWork Step schedule flags
- * \param[in] pmeFlags PME flags
* \param[in] wcycle The wallcycle structure
*/
static void alternatePmeNbGpuWaitReduce(nonbonded_verlet_t* nbv,
gmx::ForceOutputs* forceOutputs,
gmx_enerdata_t* enerd,
const StepWorkload& stepWork,
- int pmeFlags,
gmx_wallcycle_t wcycle)
{
bool isPmeGpuDone = false;
{
GpuTaskCompletion completionType =
(isNbGpuDone) ? GpuTaskCompletion::Wait : GpuTaskCompletion::Check;
- isPmeGpuDone = pme_gpu_try_finish_task(pmedata, pmeFlags, wcycle, &forceWithVirial,
+ isPmeGpuDone = pme_gpu_try_finish_task(pmedata, stepWork, wcycle, &forceWithVirial,
enerd, completionType);
}
/*! \brief Set up flags that have the lifetime of the domain indicating what type of work is there to compute.
*/
-static DomainLifetimeWorkload setupDomainLifetimeWorkload(const t_inputrec& inputrec,
- const t_forcerec& fr,
- const pull_t* pull_work,
- const gmx_edsam* ed,
- const t_idef& idef,
- const t_fcdata& fcd,
- const t_mdatoms& mdatoms,
+static DomainLifetimeWorkload setupDomainLifetimeWorkload(const t_inputrec& inputrec,
+ const t_forcerec& fr,
+ const pull_t* pull_work,
+ const gmx_edsam* ed,
+ const InteractionDefinitions& idef,
+ const t_fcdata& fcd,
+ const t_mdatoms& mdatoms,
const SimulationWorkload& simulationWork,
const StepWorkload& stepWork)
{
/* \brief Launch end-of-step GPU tasks: buffer clearing and rolling pruning.
*
- * TODO: eliminate the \p useGpuNonbonded and \p useGpuNonbonded when these are
+ * TODO: eliminate \p useGpuPmeOnThisRank when this is
* incorporated in DomainLifetimeWorkload.
*/
static void launchGpuEndOfStepTasks(nonbonded_verlet_t* nbv,
gmx_pme_t* pmedata,
gmx_enerdata_t* enerd,
const gmx::MdrunScheduleWorkload& runScheduleWork,
- bool useGpuNonbonded,
- bool useGpuPme,
+ bool useGpuPmeOnThisRank,
int64_t step,
gmx_wallcycle_t wcycle)
{
- if (useGpuNonbonded)
+ if (runScheduleWork.simulationWork.useGpuNonbonded)
{
/* Launch pruning before buffer clearing because the API overhead of the
* clear kernel launches can leave the GPU idle while it could be running
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
- if (useGpuPme)
+ if (useGpuPmeOnThisRank)
{
pme_gpu_reinit_computation(pmedata, wcycle);
}
}
}
+//! \brief Data structure to hold dipole-related data and staging arrays
+struct DipoleData
+{
+ //! Dipole staging for fast summing over MPI
+ gmx::DVec muStaging[2] = { { 0.0, 0.0, 0.0 } };
+ //! Dipole staging for states A and B (index 0 and 1 resp.)
+ gmx::RVec muStateAB[2] = { { 0.0_real, 0.0_real, 0.0_real } };
+};
+
+
+static void reduceAndUpdateMuTot(DipoleData* dipoleData,
+ const t_commrec* cr,
+ const bool haveFreeEnergy,
+ gmx::ArrayRef<const real> lambda,
+ rvec muTotal,
+ const DDBalanceRegionHandler& ddBalanceRegionHandler)
+{
+ if (PAR(cr))
+ {
+ gmx_sumd(2 * DIM, dipoleData->muStaging[0], cr);
+ ddBalanceRegionHandler.reopenRegionCpu();
+ }
+ for (int i = 0; i < 2; i++)
+ {
+ for (int j = 0; j < DIM; j++)
+ {
+ dipoleData->muStateAB[i][j] = dipoleData->muStaging[i][j];
+ }
+ }
+
+ if (!haveFreeEnergy)
+ {
+ copy_rvec(dipoleData->muStateAB[0], muTotal);
+ }
+ else
+ {
+ for (int j = 0; j < DIM; j++)
+ {
+ muTotal[j] = (1.0 - lambda[efptCOUL]) * dipoleData->muStateAB[0][j]
+ + lambda[efptCOUL] * dipoleData->muStateAB[1][j];
+ }
+ }
+}
void do_force(FILE* fplog,
const t_commrec* cr,
gmx_enerdata_t* enerd,
t_fcdata* fcd,
gmx::ArrayRef<real> lambda,
- t_graph* graph,
t_forcerec* fr,
gmx::MdrunScheduleWorkload* runScheduleWork,
const gmx_vsite_t* vsite,
- rvec mu_tot,
+ rvec muTotal,
double t,
gmx_edsam* ed,
int legacyFlags,
const DDBalanceRegionHandler& ddBalanceRegionHandler)
{
- int i, j;
- double mu[2 * DIM];
+ GMX_ASSERT(force.unpaddedArrayRef().ssize() >= fr->natoms_force_constr,
+ "The size of the force buffer should be at least the number of atoms to compute "
+ "forces for");
+
nonbonded_verlet_t* nbv = fr->nbv.get();
interaction_const_t* ic = fr->ic;
gmx::StatePropagatorDataGpu* stateGpu = fr->stateGpu;
- // TODO remove the code below when the legacy flags are not in use anymore
- /* modify force flag if not doing nonbonded */
- if (!fr->bNonbonded)
- {
- legacyFlags &= ~GMX_FORCE_NONBONDED;
- }
-
const SimulationWorkload& simulationWork = runScheduleWork->simulationWork;
const bool useGpuPmeOnThisRank = simulationWork.useGpuPme && thisRankHasDuty(cr, DUTY_PME);
- const int pmeFlags = makePmeFlags(stepWork);
-
- // Switches on whether to use GPU for position and force buffer operations
- // TODO consider all possible combinations of triggers, and how to combine optimally in each case.
- const BufferOpsUseGpu useGpuXBufOps =
- stepWork.useGpuXBufferOps ? BufferOpsUseGpu::True : BufferOpsUseGpu::False;
- // GPU Force buffer ops are disabled on virial steps, because the virial calc is not yet ported to GPU
- const BufferOpsUseGpu useGpuFBufOps =
- stepWork.useGpuFBufferOps ? BufferOpsUseGpu::True : BufferOpsUseGpu::False;
/* At a search step we need to start the first balancing region
* somewhere early inside the step after communication during domain
ddBalanceRegionHandler.openBeforeForceComputationCpu(DdAllowBalanceRegionReopen::yes);
}
- const int start = 0;
- const int homenr = mdatoms->homenr;
-
clear_mat(vir_force);
- if (stepWork.stateChanged)
- {
- if (inputrecNeedMutot(inputrec))
- {
- /* Calculate total (local) dipole moment in a temporary common array.
- * This makes it possible to sum them over nodes faster.
- */
- calc_mu(start, homenr, x.unpaddedArrayRef(), mdatoms->chargeA, mdatoms->chargeB,
- mdatoms->nChargePerturbed, mu, mu + DIM);
- }
- }
-
- if (fr->ePBC != epbcNONE)
+ if (fr->pbcType != PbcType::No)
{
/* Compute shift vectors every step,
* because of pressure coupling or box deformation!
const bool calcCGCM = (fillGrid && !DOMAINDECOMP(cr));
if (calcCGCM)
{
- put_atoms_in_box_omp(fr->ePBC, box, x.unpaddedArrayRef().subArray(0, homenr),
+ put_atoms_in_box_omp(fr->pbcType, box, x.unpaddedArrayRef().subArray(0, mdatoms->homenr),
gmx_omp_nthreads_get(emntDefault));
- inc_nrnb(nrnb, eNR_SHIFTX, homenr);
- }
- else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph)
- {
- unshift_self(graph, box, as_rvec_array(x.unpaddedArrayRef().data()));
+ inc_nrnb(nrnb, eNR_SHIFTX, mdatoms->homenr);
}
}
nbnxn_atomdata_copy_shiftvec(stepWork.haveDynamicBox, fr->shift_vec, nbv->nbat.get());
-#if GMX_MPI
const bool pmeSendCoordinatesFromGpu =
- simulationWork.useGpuPmePpCommunication && !(stepWork.doNeighborSearch);
+ GMX_MPI && simulationWork.useGpuPmePpCommunication && !(stepWork.doNeighborSearch);
const bool reinitGpuPmePpComms =
- simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch);
-#endif
+ GMX_MPI && simulationWork.useGpuPmePpCommunication && (stepWork.doNeighborSearch);
- const auto localXReadyOnDevice = (stateGpu != nullptr)
+ const auto localXReadyOnDevice = (useGpuPmeOnThisRank || simulationWork.useGpuBufferOps)
? stateGpu->getCoordinatesReadyOnDeviceEvent(
AtomLocality::Local, simulationWork, stepWork)
: nullptr;
-#if GMX_MPI
// If coordinates are to be sent to PME task from CPU memory, perform that send here.
// Otherwise the send will occur after H2D coordinate transfer.
- if (!thisRankHasDuty(cr, DUTY_PME) && !pmeSendCoordinatesFromGpu)
+ if (GMX_MPI && !thisRankHasDuty(cr, DUTY_PME) && !pmeSendCoordinatesFromGpu)
{
- /* Send particle coordinates to the pme nodes.
- * Since this is only implemented for domain decomposition
- * and domain decomposition does not use the graph,
- * we do not need to worry about shifting.
- */
+ /* Send particle coordinates to the pme nodes */
if (!stepWork.doNeighborSearch && simulationWork.useGpuUpdate)
{
GMX_RELEASE_ASSERT(false,
step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms,
pmeSendCoordinatesFromGpu, localXReadyOnDevice, wcycle);
}
-#endif /* GMX_MPI */
// Coordinates on the device are needed if PME or BufferOps are offloaded.
// The local coordinates can be copied right away.
// NOTE: Consider moving this copy to right after they are updated and constrained,
// if the later is not offloaded.
- if (useGpuPmeOnThisRank || useGpuXBufOps == BufferOpsUseGpu::True)
+ if (useGpuPmeOnThisRank || stepWork.useGpuXBufferOps)
{
if (stepWork.doNeighborSearch)
{
+ // TODO refactor this to do_md, after partitioning.
stateGpu->reinit(mdatoms->homenr,
cr->dd != nullptr ? dd_numAtomsZones(*cr->dd) : mdatoms->homenr);
if (useGpuPmeOnThisRank)
// The conditions for gpuHaloExchange e.g. using GPU buffer
// operations were checked before construction, so here we can
// just use it and assert upon any conditions.
- gmx::GpuHaloExchange* gpuHaloExchange =
- (havePPDomainDecomposition(cr) ? cr->dd->gpuHaloExchange.get() : nullptr);
- const bool ddUsesGpuDirectCommunication = (gpuHaloExchange != nullptr);
- GMX_ASSERT(!ddUsesGpuDirectCommunication || (useGpuXBufOps == BufferOpsUseGpu::True),
+ const bool ddUsesGpuDirectCommunication =
+ ((cr->dd != nullptr) && (!cr->dd->gpuHaloExchange.empty()));
+ GMX_ASSERT(!ddUsesGpuDirectCommunication || stepWork.useGpuXBufferOps,
"Must use coordinate buffer ops with GPU halo exchange");
- const bool useGpuForcesHaloExchange =
- ddUsesGpuDirectCommunication && (useGpuFBufOps == BufferOpsUseGpu::True);
+ const bool useGpuForcesHaloExchange = ddUsesGpuDirectCommunication && stepWork.useGpuFBufferOps;
// Copy coordinate from the GPU if update is on the GPU and there
// are forces to be computed on the CPU, or for the computation of
haveCopiedXFromGpu = true;
}
-#if GMX_MPI
// If coordinates are to be sent to PME task from GPU memory, perform that send here.
// Otherwise the send will occur before the H2D coordinate transfer.
- if (pmeSendCoordinatesFromGpu)
+ if (!thisRankHasDuty(cr, DUTY_PME) && pmeSendCoordinatesFromGpu)
{
- /* Send particle coordinates to the pme nodes.
- * Since this is only implemented for domain decomposition
- * and domain decomposition does not use the graph,
- * we do not need to worry about shifting.
- */
+ /* Send particle coordinates to the pme nodes */
gmx_pme_send_coordinates(fr, cr, box, as_rvec_array(x.unpaddedArrayRef().data()), lambda[efptCOUL],
lambda[efptVDW], (stepWork.computeVirial || stepWork.computeEnergy),
step, simulationWork.useGpuPmePpCommunication, reinitGpuPmePpComms,
pmeSendCoordinatesFromGpu, localXReadyOnDevice, wcycle);
}
-#endif /* GMX_MPI */
if (useGpuPmeOnThisRank)
{
- launchPmeGpuSpread(fr->pmedata, box, stepWork, pmeFlags, localXReadyOnDevice, wcycle);
+ launchPmeGpuSpread(fr->pmedata, box, stepWork, localXReadyOnDevice, wcycle);
}
/* do gridding for pair search */
if (stepWork.doNeighborSearch)
{
- if (graph && stepWork.stateChanged)
+ if (fr->wholeMoleculeTransform && stepWork.stateChanged)
{
- /* Calculate intramolecular shift vectors to make molecules whole */
- mk_mshift(fplog, graph, fr->ePBC, box, as_rvec_array(x.unpaddedArrayRef().data()));
+ fr->wholeMoleculeTransform->updateForAtomPbcJumps(x.unpaddedArrayRef(), box);
}
// TODO
}
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
- }
- if (stepWork.doNeighborSearch)
- {
// Need to run after the GPU-offload bonded interaction lists
// are set up to be able to determine whether there is bonded work.
runScheduleWork->domainWork = setupDomainLifetimeWorkload(
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_LOCAL);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
- nbv->constructPairlist(InteractionLocality::Local, &top->excls, step, nrnb);
+ nbv->constructPairlist(InteractionLocality::Local, top->excls, step, nrnb);
nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::Local);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_LOCAL);
wallcycle_stop(wcycle, ewcNS);
- if (useGpuXBufOps == BufferOpsUseGpu::True)
+ if (stepWork.useGpuXBufferOps)
{
nbv->atomdata_init_copy_x_to_nbat_x_gpu();
}
// For force buffer ops, we use the below conditon rather than
- // useGpuFBufOps to ensure that init is performed even if this
+ // useGpuFBufferOps to ensure that init is performed even if this
// NS step is also a virial step (on which f buf ops are deactivated).
if (simulationWork.useGpuBufferOps && simulationWork.useGpuNonbonded && (GMX_GPU == GMX_GPU_CUDA))
{
}
else if (!EI_TPI(inputrec->eI))
{
- if (useGpuXBufOps == BufferOpsUseGpu::True)
+ if (stepWork.useGpuXBufferOps)
{
GMX_ASSERT(stateGpu, "stateGpu should be valid when buffer ops are offloaded");
nbv->convertCoordinatesGpu(AtomLocality::Local, false, stateGpu->getCoordinates(),
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
Nbnxm::gpu_upload_shiftvec(nbv->gpu_nbv, nbv->nbat.get());
- if (stepWork.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False))
+ if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
{
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::Local);
}
if (domainWork.haveGpuBondedWork && !havePPDomainDecomposition(cr))
{
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
- fr->gpuBonded->launchKernel(fr, stepWork, box);
+ fr->gpuBonded->setPbcAndlaunchKernel(fr->pbcType, box, fr->bMolPBC, stepWork);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
}
// X copy/transform to allow overlap as well as after the GPU NB
// launch to avoid FFT launch overhead hijacking the CPU and delaying
// the nonbonded kernel.
- launchPmeGpuFftAndGather(fr->pmedata, wcycle);
+ launchPmeGpuFftAndGather(fr->pmedata, wcycle, stepWork);
}
/* Communicate coordinates and sum dipole if necessary +
wallcycle_start_nocount(wcycle, ewcNS);
wallcycle_sub_start(wcycle, ewcsNBS_SEARCH_NONLOCAL);
/* Note that with a GPU the launch overhead of the list transfer is not timed separately */
- nbv->constructPairlist(InteractionLocality::NonLocal, &top->excls, step, nrnb);
+ nbv->constructPairlist(InteractionLocality::NonLocal, top->excls, step, nrnb);
nbv->setupGpuShortRangeWork(fr->gpuBonded, InteractionLocality::NonLocal);
wallcycle_sub_stop(wcycle, ewcsNBS_SEARCH_NONLOCAL);
wallcycle_stop(wcycle, ewcNS);
+ // TODO refactor this GPU halo exchange re-initialisation
+ // to location in do_md where GPU halo exchange is
+ // constructed at partitioning, after above stateGpu
+ // re-initialization has similarly been refactored
if (ddUsesGpuDirectCommunication)
{
- gpuHaloExchange->reinitHalo(stateGpu->getCoordinates(), stateGpu->getForces());
+ reinitGpuHaloExchange(*cr, stateGpu->getCoordinates(), stateGpu->getForces());
}
}
else
{
// The following must be called after local setCoordinates (which records an event
// when the coordinate data has been copied to the device).
- gpuHaloExchange->communicateHaloCoordinates(box, localXReadyOnDevice);
+ communicateGpuHaloCoordinates(*cr, box, localXReadyOnDevice);
if (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork)
{
dd_move_x(cr->dd, box, x.unpaddedArrayRef(), wcycle);
}
- if (useGpuXBufOps == BufferOpsUseGpu::True)
+ if (stepWork.useGpuXBufferOps)
{
- // The condition here was (pme != nullptr && pme_gpu_get_device_x(fr->pmedata) != nullptr)
if (!useGpuPmeOnThisRank && !ddUsesGpuDirectCommunication)
{
stateGpu->copyCoordinatesToGpu(x.unpaddedArrayRef(), AtomLocality::NonLocal);
{
wallcycle_start(wcycle, ewcLAUNCH_GPU);
- if (stepWork.doNeighborSearch || (useGpuXBufOps == BufferOpsUseGpu::False))
+ if (stepWork.doNeighborSearch || !stepWork.useGpuXBufferOps)
{
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_NONBONDED);
Nbnxm::gpu_copy_xq_to_gpu(nbv->gpu_nbv, nbv->nbat.get(), AtomLocality::NonLocal);
if (domainWork.haveGpuBondedWork)
{
wallcycle_sub_start(wcycle, ewcsLAUNCH_GPU_BONDED);
- fr->gpuBonded->launchKernel(fr, stepWork, box);
+ fr->gpuBonded->setPbcAndlaunchKernel(fr->pbcType, box, fr->bMolPBC, stepWork);
wallcycle_sub_stop(wcycle, ewcsLAUNCH_GPU_BONDED);
}
wallcycle_stop(wcycle, ewcLAUNCH_GPU);
}
- if (stepWork.stateChanged && inputrecNeedMutot(inputrec))
+ gmx::ArrayRef<const gmx::RVec> xWholeMolecules;
+ if (fr->wholeMoleculeTransform)
{
- if (PAR(cr))
- {
- gmx_sumd(2 * DIM, mu, cr);
+ xWholeMolecules = fr->wholeMoleculeTransform->wholeMoleculeCoordinates(x.unpaddedArrayRef(), box);
+ }
- ddBalanceRegionHandler.reopenRegionCpu();
- }
+ DipoleData dipoleData;
- for (i = 0; i < 2; i++)
- {
- for (j = 0; j < DIM; j++)
- {
- fr->mu_tot[i][j] = mu[i * DIM + j];
- }
- }
- }
- if (mdatoms->nChargePerturbed == 0)
- {
- copy_rvec(fr->mu_tot[0], mu_tot);
- }
- else
+ if (simulationWork.computeMuTot)
{
- for (j = 0; j < DIM; j++)
- {
- mu_tot[j] = (1.0 - lambda[efptCOUL]) * fr->mu_tot[0][j] + lambda[efptCOUL] * fr->mu_tot[1][j];
- }
+ const int start = 0;
+
+ /* Calculate total (local) dipole moment in a temporary common array.
+ * This makes it possible to sum them over nodes faster.
+ */
+ gmx::ArrayRef<const gmx::RVec> xRef =
+ (xWholeMolecules.empty() ? x.unpaddedArrayRef() : xWholeMolecules);
+ calc_mu(start, mdatoms->homenr, xRef, mdatoms->chargeA, mdatoms->chargeB,
+ mdatoms->nChargePerturbed, dipoleData.muStaging[0], dipoleData.muStaging[1]);
+
+ reduceAndUpdateMuTot(&dipoleData, cr, (fr->efep != efepNO), lambda, muTotal, ddBalanceRegionHandler);
}
/* Reset energies */
}
}
- /* update QMMMrec, if necessary */
- if (fr->bQMMM)
- {
- update_QMMMrec(cr, fr, as_rvec_array(x.unpaddedArrayRef().data()), mdatoms, box);
- }
-
// TODO Force flags should include haveFreeEnergyWork for this domain
if (ddUsesGpuDirectCommunication && (domainWork.haveCpuBondedWork || domainWork.haveFreeEnergyWork))
{
stateGpu->waitCoordinatesReadyOnHost(AtomLocality::NonLocal);
}
/* Compute the bonded and non-bonded energies and optionally forces */
- do_force_lowlevel(fr, inputrec, &(top->idef), cr, ms, nrnb, wcycle, mdatoms, x, hist, &forceOut, enerd,
- fcd, box, lambda.data(), graph, fr->mu_tot, stepWork, ddBalanceRegionHandler);
+ do_force_lowlevel(fr, inputrec, top->idef, cr, ms, nrnb, wcycle, mdatoms, x, xWholeMolecules,
+ hist, &forceOut, enerd, fcd, box, lambda.data(),
+ as_rvec_array(dipoleData.muStateAB), stepWork, ddBalanceRegionHandler);
wallcycle_stop(wcycle, ewcFORCE);
wallcycle_stop(wcycle, ewcFORCE);
}
- if (useGpuFBufOps == BufferOpsUseGpu::True)
+ if (stepWork.useGpuFBufferOps)
{
gmx::FixedCapacityVector<GpuEventSynchronizer*, 1> dependencyList;
stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(),
AtomLocality::NonLocal);
dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(
- AtomLocality::NonLocal, useGpuFBufOps == BufferOpsUseGpu::True));
+ AtomLocality::NonLocal, stepWork.useGpuFBufferOps));
}
nbv->atomdata_add_nbat_f_to_f_gpu(AtomLocality::NonLocal, stateGpu->getForces(),
{
stateGpu->copyForcesToGpu(forceOut.forceWithShiftForces().force(), AtomLocality::Local);
}
- gpuHaloExchange->communicateHaloForces(domainWork.haveCpuLocalForceWork);
+ communicateGpuHaloForces(*cr, domainWork.haveCpuLocalForceWork);
}
else
{
- if (useGpuFBufOps == BufferOpsUseGpu::True)
+ if (stepWork.useGpuFBufferOps)
{
stateGpu->waitForcesReadyOnHost(AtomLocality::NonLocal);
}
// With both nonbonded and PME offloaded a GPU on the same rank, we use
// an alternating wait/reduction scheme.
bool alternateGpuWait = (!c_disableAlternatingWait && useGpuPmeOnThisRank && simulationWork.useGpuNonbonded
- && !DOMAINDECOMP(cr) && (useGpuFBufOps == BufferOpsUseGpu::False));
+ && !DOMAINDECOMP(cr) && !stepWork.useGpuFBufferOps);
if (alternateGpuWait)
{
- alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &forceOut, enerd, stepWork, pmeFlags, wcycle);
+ alternatePmeNbGpuWaitReduce(fr->nbv.get(), fr->pmedata, &forceOut, enerd, stepWork, wcycle);
}
if (!alternateGpuWait && useGpuPmeOnThisRank)
{
- pme_gpu_wait_and_reduce(fr->pmedata, pmeFlags, wcycle, &forceOut.forceWithVirial(), enerd);
+ pme_gpu_wait_and_reduce(fr->pmedata, stepWork, wcycle, &forceOut.forceWithVirial(), enerd);
}
/* Wait for local GPU NB outputs on the non-alternating wait path */
gmx::ArrayRef<gmx::RVec> forceWithShift = forceOut.forceWithShiftForces().force();
- if (useGpuFBufOps == BufferOpsUseGpu::True)
+ if (stepWork.useGpuFBufferOps)
{
// Flag to specify whether the CPU force buffer has contributions to
// local atoms. This depends on whether there are CPU-based force tasks
auto locality = havePPDomainDecomposition(cr) ? AtomLocality::Local : AtomLocality::All;
stateGpu->copyForcesToGpu(forceWithShift, locality);
- dependencyList.push_back(stateGpu->getForcesReadyOnDeviceEvent(
- locality, useGpuFBufOps == BufferOpsUseGpu::True));
+ dependencyList.push_back(
+ stateGpu->getForcesReadyOnDeviceEvent(locality, stepWork.useGpuFBufferOps));
}
if (useGpuForcesHaloExchange)
{
- dependencyList.push_back(gpuHaloExchange->getForcesReadyOnDeviceEvent());
+ dependencyList.push_back(cr->dd->gpuHaloExchange[0]->getForcesReadyOnDeviceEvent());
}
nbv->atomdata_add_nbat_f_to_f_gpu(AtomLocality::Local, stateGpu->getForces(), pmeForcePtr,
dependencyList, stepWork.useGpuPmeFReduction,
}
launchGpuEndOfStepTasks(nbv, fr->gpuBonded, fr->pmedata, enerd, *runScheduleWork,
- simulationWork.useGpuNonbonded, useGpuPmeOnThisRank, step, wcycle);
+ useGpuPmeOnThisRank, step, wcycle);
if (DOMAINDECOMP(cr))
{
{
rvec* fshift = as_rvec_array(forceOut.forceWithShiftForces().shiftForces().data());
spread_vsite_f(vsite, as_rvec_array(x.unpaddedArrayRef().data()), f, fshift, FALSE,
- nullptr, nrnb, &top->idef, fr->ePBC, fr->bMolPBC, graph, box, cr, wcycle);
+ nullptr, nrnb, top->idef, fr->pbcType, fr->bMolPBC, box, cr, wcycle);
}
if (stepWork.computeVirial)
{
/* Calculation of the virial must be done after vsites! */
calc_virial(0, mdatoms->homenr, as_rvec_array(x.unpaddedArrayRef().data()),
- forceOut.forceWithShiftForces(), vir_force, graph, box, nrnb, fr, inputrec->ePBC);
+ forceOut.forceWithShiftForces(), vir_force, box, nrnb, fr, inputrec->pbcType);
}
}
if (stepWork.computeForces)
{
post_process_forces(cr, step, nrnb, wcycle, top, box, as_rvec_array(x.unpaddedArrayRef().data()),
- &forceOut, vir_force, mdatoms, graph, fr, vsite, stepWork);
+ &forceOut, vir_force, mdatoms, fr, vsite, stepWork);
}
if (stepWork.computeEnergy)
//! Contains command-line options to mdrun.
const MdrunOptions& mdrunOptions;
//! Whether the simulation will start afresh, or restart with/without appending.
- StartingBehavior startingBehavior;
+ const StartingBehavior startingBehavior;
//! Handles virtual sites.
gmx_vsite_t* vsite;
//! Handles constraints.
//! The coordinate-swapping session.
t_swap* swap;
//! Full system topology.
- gmx_mtop_t* top_global;
+ const gmx_mtop_t* top_global;
//! Helper struct for force calculations.
t_fcdata* fcd;
//! Full simulation state (only non-nullptr on master rank).
#include "gromacs/domdec/domdec.h"
#include "gromacs/domdec/domdec_network.h"
#include "gromacs/domdec/domdec_struct.h"
+#include "gromacs/domdec/gpuhaloexchange.h"
#include "gromacs/domdec/mdsetup.h"
#include "gromacs/domdec/partition.h"
#include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
#include "gromacs/ewald/pme_load_balancing.h"
+#include "gromacs/ewald/pme_pp.h"
#include "gromacs/fileio/trxio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
+#include "gromacs/gpu_utils/device_stream_manager.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/imd/imd.h"
#include "gromacs/listed_forces/manage_threading.h"
#include "gromacs/mdlib/tgroup.h"
#include "gromacs/mdlib/trajectory_writing.h"
#include "gromacs/mdlib/update.h"
-#include "gromacs/mdlib/update_constrain_cuda.h"
+#include "gromacs/mdlib/update_constrain_gpu.h"
#include "gromacs/mdlib/vcm.h"
#include "gromacs/mdlib/vsite.h"
#include "gromacs/mdrunutility/handlerestart.h"
#include "gromacs/modularsimulator/energyelement.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
-#include "gromacs/pbcutil/mshift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/pulling/output.h"
#include "gromacs/pulling/pull.h"
rvec mu_tot;
matrix pressureCouplingMu, M;
gmx_repl_ex_t repl_ex = nullptr;
- gmx_localtop_t top;
PaddedHostVector<gmx::RVec> f{};
gmx_global_stat_t gstat;
- t_graph* graph = nullptr;
gmx_shellfc_t* shellfc;
gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
gmx_bool bTemp, bPres, bTrotter;
int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
bGStatEveryStep = (nstglobalcomm == 1);
- SimulationGroups* groups = &top_global->groups;
+ const SimulationGroups* groups = &top_global->groups;
std::unique_ptr<EssentialDynamics> ed = nullptr;
if (opt2bSet("-ei", nfile, fnm))
t_state* state;
+ gmx_localtop_t top(top_global->ffparams);
+
auto mdatoms = mdAtoms->mdatoms();
- std::unique_ptr<UpdateConstrainCuda> integrator;
+ std::unique_ptr<UpdateConstrainGpu> integrator;
if (DOMAINDECOMP(cr))
{
- dd_init_local_top(*top_global, &top);
-
stateInstance = std::make_unique<t_state>();
state = stateInstance.get();
dd_init_local_state(cr->dd, state_global, state);
else
{
state_change_natoms(state_global, state_global->natoms);
- f.resizeWithPadding(state_global->natoms);
/* Copy the pointer to the global state */
state = state_global;
/* Generate and initialize new topology */
- mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &graph, mdAtoms, constr, vsite, shellfc);
+ mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
upd.setNumAtoms(state->natoms);
}
StatePropagatorDataGpu* stateGpu = fr->stateGpu;
+ // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
if (useGpuForUpdate)
{
GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
"Constraints pulling is not supported with the GPU update.\n");
GMX_RELEASE_ASSERT(fcd->orires.nr == 0,
"Orientation restraints are not supported with the GPU update.\n");
- GMX_RELEASE_ASSERT(ir->efep == efepNO,
- "Free energy perturbations are not supported with the GPU update.");
- GMX_RELEASE_ASSERT(graph == nullptr, "The graph is not supported with GPU update.");
+ GMX_RELEASE_ASSERT(
+ ir->efep == efepNO
+ || (!haveFreeEnergyType(*ir, efptBONDED) && !haveFreeEnergyType(*ir, efptMASS)),
+ "Free energy perturbation of masses and constraints are not supported with the GPU "
+ "update.");
if (constr != nullptr && constr->numConstraintsTotal() > 0)
{
{
GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
}
- integrator = std::make_unique<UpdateConstrainCuda>(
- *ir, *top_global, stateGpu->getUpdateStream(), stateGpu->xUpdatedOnDevice());
-
- t_pbc pbc;
- set_pbc(&pbc, epbcXYZ, state->box);
- integrator->setPbc(&pbc);
+ GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
+ "Device stream manager should be initialized in order to use GPU "
+ "update-constraints.");
+ GMX_RELEASE_ASSERT(
+ fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
+ "Update stream should be initialized in order to use GPU "
+ "update-constraints.");
+ integrator = std::make_unique<UpdateConstrainGpu>(
+ *ir, *top_global, fr->deviceStreamManager->context(),
+ fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
+ stateGpu->xUpdatedOnDevice());
+
+ integrator->setPbc(PbcType::Xyz, state->box);
}
if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
{
/* Construct the virtual sites for the initial configuration */
construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr, top.idef.iparams,
- top.idef.il, fr->ePBC, fr->bMolPBC, cr, state->box);
+ top.idef.il, fr->pbcType, fr->bMolPBC, cr, state->box);
}
}
bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
if (PAR(cr))
{
- gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr);
+ gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
}
if (hasReadEkinState)
{
cglo_flags_iteration |= CGLO_STOPCM;
cglo_flags_iteration &= ~CGLO_TEMPERATURE;
}
- compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
- state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, nullptr, enerd,
- force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
- state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
+ compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
+ makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
+ nrnb, &vcm, nullptr, enerd, force_vir, shake_vir, total_vir, pres, constr,
+ &nullSignaller, state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
cglo_flags_iteration
| (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
: 0));
/* At initialization, do not pass x with acceleration-correction mode
* to avoid (incorrect) correction of the initial coordinates.
*/
- rvec* xPtr = nullptr;
- if (vcm.mode != ecmLINEAR_ACCELERATION_CORRECTION)
- {
- xPtr = state->x.rvec_array();
- }
- process_and_stopcm_grp(fplog, &vcm, *mdatoms, xPtr, state->v.rvec_array());
+ auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef<RVec>()
+ : makeArrayRef(state->x);
+ process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
}
}
checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top,
- state->x.rvec_array(), state->box,
+ makeConstArrayRef(state->x), state->box,
&shouldCheckNumberOfBondedInteractions);
if (ir->eI == eiVVAK)
{
kinetic energy calculation. This minimized excess variables, but
perhaps loses some logic?*/
- compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
- state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, nullptr, enerd,
- force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
- state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE);
+ compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
+ makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
+ nrnb, &vcm, nullptr, enerd, force_vir, shake_vir, total_vir, pres, constr,
+ &nullSignaller, state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE);
}
/* Calculate the initial half step temperature, and save the ekinh_old */
"The initial step is not consistent across multi simulations which "
"share the state");
}
- gmx_barrier(cr);
+ gmx_barrier(cr->mpi_comm_mygroup);
}
else
{
bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl);
bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep));
bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
- && (step > 0) && (startingBehavior == StartingBehavior::NewSimulation));
+ && (!bFirstStep));
}
bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
/* Correct the new box if it is too skewed */
if (inputrecDynamicBox(ir))
{
- if (correct_box(fplog, step, state->box, graph))
+ if (correct_box(fplog, step, state->box))
{
bMasterState = TRUE;
// If update is offloaded, it should be informed about the box size change
if (useGpuForUpdate)
{
- t_pbc pbc;
- set_pbc(&pbc, epbcXYZ, state->box);
- integrator->setPbc(&pbc);
+ integrator->setPbc(PbcType::Xyz, state->box);
}
}
}
fr, vsite, constr, nrnb, wcycle, do_verbose && !bPMETunePrinting);
shouldCheckNumberOfBondedInteractions = true;
upd.setNumAtoms(state->natoms);
+
+ // Allocate or re-size GPU halo exchange object, if necessary
+ if (havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange
+ && useGpuForNonbonded && is1D(*cr->dd))
+ {
+ GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
+ "GPU device manager has to be initialized to use GPU "
+ "version of halo exchange.");
+ // TODO remove need to pass local stream into GPU halo exchange - Issue #3093
+ constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager);
+ }
}
}
if (MASTER(cr) && do_log)
{
- energyOutput.printHeader(fplog, step, t); /* can we improve the information printed here? */
+ gmx::EnergyOutput::printHeader(fplog, step,
+ t); /* can we improve the information printed here? */
}
if (ir->efep != efepNO)
/* We need the kinetic energy at minus the half step for determining
* the full step kinetic energy and possibly for T-coupling.*/
/* This may not be quite working correctly yet . . . . */
- compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
- state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
- nullptr, nullptr, nullptr, nullptr, mu_tot, constr, &nullSignaller,
- state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
+ compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
+ makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
+ nrnb, &vcm, wcycle, enerd, nullptr, nullptr, nullptr, nullptr, constr,
+ &nullSignaller, state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global,
- &top, state->x.rvec_array(), state->box,
+ &top, makeConstArrayRef(state->x), state->box,
&shouldCheckNumberOfBondedInteractions);
}
clear_mat(force_vir);
imdSession, pull_work, bNS, force_flags, &top, constr, enerd, fcd,
state->natoms, state->x.arrayRefWithPadding(),
state->v.arrayRefWithPadding(), state->box, state->lambda, &state->hist,
- f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, graph,
- shellfc, fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
+ f.arrayRefWithPadding(), force_vir, mdatoms, nrnb, wcycle, shellfc,
+ fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
}
else
{
*/
do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
- f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, graph,
- fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
+ f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, state->lambda, fr,
+ runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
(bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler);
}
if (bGStat || do_per_step(step - 1, nstglobalcomm))
{
wallcycle_stop(wcycle, ewcUPDATE);
- compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
- state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
- force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller,
- state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
- (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0)
- | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0)
- | (bPres ? CGLO_CONSTRAINT : 0) | (bStopCM ? CGLO_STOPCM : 0)
- | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
- : 0)
- | CGLO_SCALEEKIN);
+ compute_globals(
+ gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
+ makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
+ nrnb, &vcm, wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr,
+ &nullSignaller, state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
+ (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0)
+ | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0)
+ | (bPres ? CGLO_CONSTRAINT : 0) | (bStopCM ? CGLO_STOPCM : 0)
+ | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
+ : 0)
+ | CGLO_SCALEEKIN);
/* explanation of above:
a) We compute Ekin at the full time step
if 1) we are using the AveVel Ekin, and it's not the
b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
EkinAveVel because it's needed for the pressure */
checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
- top_global, &top, state->x.rvec_array(), state->box,
- &shouldCheckNumberOfBondedInteractions);
+ top_global, &top, makeConstArrayRef(state->x),
+ state->box, &shouldCheckNumberOfBondedInteractions);
if (bStopCM)
{
- process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(),
- state->v.rvec_array());
+ process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
+ makeArrayRef(state->v));
inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
}
wallcycle_start(wcycle, ewcUPDATE);
/* We need the kinetic energy at minus the half step for determining
* the full step kinetic energy and possibly for T-coupling.*/
/* This may not be quite working correctly yet . . . . */
- compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(),
- state->v.rvec_array(), state->box, state->lambda[efptVDW],
+ compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
+ makeConstArrayRef(state->v), state->box, state->lambda[efptVDW],
mdatoms, nrnb, &vcm, wcycle, enerd, nullptr, nullptr, nullptr,
- nullptr, mu_tot, constr, &nullSignaller, state->box, nullptr,
+ nullptr, constr, &nullSignaller, state->box, nullptr,
&bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE);
wallcycle_start(wcycle, ewcUPDATE);
}
bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
/* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
- if (startingBehavior != StartingBehavior::NewSimulation
+ if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
&& (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
{
copy_mat(state->svir_prev, shake_vir);
update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, cr, nrnb, wcycle, &upd,
constr, do_log, do_ene);
- finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, constr);
+ finish_update(ir, mdatoms, state, wcycle, &upd, constr);
}
if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
{
/* erase F_EKIN and F_TEMP here? */
/* just compute the kinetic energy at the half step to perform a trotter step */
- compute_globals(gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
- state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
- force_vir, shake_vir, total_vir, pres, mu_tot, constr, &nullSignaller, lastbox,
- nullptr, &bSumEkinhOld, (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE);
+ compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
+ makeConstArrayRef(state->v), state->box, state->lambda[efptVDW],
+ mdatoms, nrnb, &vcm, wcycle, enerd, force_vir, shake_vir, total_vir,
+ pres, constr, &nullSignaller, lastbox, nullptr, &bSumEkinhOld,
+ (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE);
wallcycle_start(wcycle, ewcUPDATE);
trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
/* now we know the scaling, we can compute the positions again */
* to numerical errors, or are they important
* physically? I'm thinking they are just errors, but not completely sure.
* For now, will call without actually constraining, constr=NULL*/
- finish_update(ir, mdatoms, state, graph, nrnb, wcycle, &upd, nullptr);
+ finish_update(ir, mdatoms, state, wcycle, &upd, nullptr);
}
if (EI_VV(ir->eI))
{
/* this factor or 2 correction is necessary
because half of the constraint force is removed
in the vv step, so we have to double it. See
- the Redmine issue #1255. It is not yet clear
+ the Issue #1255. It is not yet clear
if the factor of 2 is exact, or just a very
good approximation, and this will be
investigated. The next step is to see if this
if (vsite != nullptr)
{
wallcycle_start(wcycle, ewcVSITECONSTR);
- if (graph != nullptr)
- {
- shift_self(graph, state->box, state->x.rvec_array());
- }
construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(),
- top.idef.iparams, top.idef.il, fr->ePBC, fr->bMolPBC, cr, state->box);
-
- if (graph != nullptr)
- {
- unshift_self(graph, state->box, state->x.rvec_array());
- }
+ top.idef.iparams, top.idef.il, fr->pbcType, fr->bMolPBC, cr, state->box);
wallcycle_stop(wcycle, ewcVSITECONSTR);
}
SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
compute_globals(
- gstat, cr, ir, fr, ekind, state->x.rvec_array(), state->v.rvec_array(),
- state->box, state->lambda[efptVDW], mdatoms, nrnb, &vcm, wcycle, enerd,
- force_vir, shake_vir, total_vir, pres, mu_tot, constr, &signaller, lastbox,
- &totalNumberOfBondedInteractions, &bSumEkinhOld,
+ gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
+ makeConstArrayRef(state->v), state->box, state->lambda[efptVDW], mdatoms,
+ nrnb, &vcm, wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr,
+ &signaller, lastbox, &totalNumberOfBondedInteractions, &bSumEkinhOld,
(bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
| (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
| (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
| (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
: 0));
checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
- top_global, &top, state->x.rvec_array(), state->box,
- &shouldCheckNumberOfBondedInteractions);
+ top_global, &top, makeConstArrayRef(state->x),
+ state->box, &shouldCheckNumberOfBondedInteractions);
if (!EI_VV(ir->eI) && bStopCM)
{
- process_and_stopcm_grp(fplog, &vcm, *mdatoms, state->x.rvec_array(),
- state->v.rvec_array());
+ process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
+ makeArrayRef(state->v));
inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
// TODO: The special case of removing CM motion should be dealt more gracefully
// force kernels that use the coordinates on the next steps is not implemented
// (not because of a race on state->x being modified on the CPU while H2D is in progress).
stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
+ // If the COM removal changed the velocities on the CPU, this has to be accounted for.
+ if (vcm.mode != ecmNO)
+ {
+ stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
+ }
}
}
}
if (useGpuForUpdate && (doBerendsenPressureCoupling || doParrinelloRahman))
{
integrator->scaleCoordinates(pressureCouplingMu);
- t_pbc pbc;
- set_pbc(&pbc, epbcXYZ, state->box);
- integrator->setPbc(&pbc);
+ integrator->setPbc(PbcType::Xyz, state->box);
}
/* ################# END UPDATE STEP 2 ################# */
if (doSimulatedAnnealing)
{
- energyOutput.printAnnealingTemperatures(do_log ? fplog : nullptr, groups, &(ir->opts));
+ gmx::EnergyOutput::printAnnealingTemperatures(do_log ? fplog : nullptr, groups,
+ &(ir->opts));
}
if (do_log || do_ene || do_dr || do_or)
{
{
if (ir->nstcalcenergy > 0)
{
- energyOutput.printAnnealingTemperatures(fplog, groups, &(ir->opts));
+ gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
energyOutput.printAverages(fplog, groups);
}
}
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
// Coulomb Ewald correction table
std::unique_ptr<EwaldCorrectionTables> coulombEwaldTables;
- /* Note that a Van der Waals Ewald correction table
- * of type EwaldCorrectionTables can be added here if wanted.
- */
+ // Van der Waals Ewald correction table
+ std::unique_ptr<EwaldCorrectionTables> vdwEwaldTables;
};
#endif
//! Returns a pointer for SIMD storing of a Corner object
float* ptr() { return &x; }
- float x; //!< x coordinate
- float y; //!< y coordinate
- float z; //!< z coordinate
- float padding; //!< padding, unused, but should be set to avoid operations on unitialized data
+ //! x coordinate
+ float x;
+ //! y coordinate
+ float y;
+ //! z coordinate
+ float z;
+ //! padding, unused, but should be set to avoid operations on unitialized data
+ float padding;
};
- Corner lower; //!< lower, along x and y and z, corner
- Corner upper; //!< upper, along x and y and z, corner
+ //! lower, along x and y and z, corner
+ Corner lower;
+ //! upper, along x and y and z, corner
+ Corner upper;
};
/*! \internal
*/
struct BoundingBox1D
{
- float lower; //!< lower bound
- float upper; //!< upper bound
+ //! lower bound
+ float lower;
+ //! upper bound
+ float upper;
};
} // namespace Nbnxm
//! Constructs the cluster/cell geometry given the type of pairlist
Geometry(PairlistType pairlistType);
- bool isSimple; //!< Is this grid simple (CPU) or hierarchical (GPU)
- int numAtomsICluster; //!< Number of atoms per cluster
- int numAtomsJCluster; //!< Number of atoms for list j-clusters
- int numAtomsPerCell; //!< Number of atoms per cell
- int numAtomsICluster2Log; //!< 2log of na_c
+ //! Is this grid simple (CPU) or hierarchical (GPU)
+ bool isSimple;
+ //! Number of atoms per cluster
+ int numAtomsICluster;
+ //! Number of atoms for list j-clusters
+ int numAtomsJCluster;
+ //! Number of atoms per cell
+ int numAtomsPerCell;
+ //! 2log of na_c
+ int numAtomsICluster2Log;
};
- // The physical dimensions of a grid
+ //! The physical dimensions of a grid \internal
struct Dimensions
{
//! The lower corner of the (local) grid
//! Returns the number of cells in the column
int numCellsInColumn(int columnIndex) const
{
- return cxy_ind_[columnIndex + 1] - cxy_ind_[columnIndex];
+ return cxy_ind_[columnIndex + 1LL] - cxy_ind_[columnIndex];
}
//! Returns the index of the first atom in the column
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/forcerec.h"
#include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/mdtypes/interaction_const.h"
+#include "gromacs/nbnxm/atomdata.h"
#include "gromacs/nbnxm/gpu_data_mgmt.h"
#include "gromacs/nbnxm/nbnxm.h"
#include "gromacs/nbnxm/pairlist_tuning.h"
#include "gromacs/simd/simd.h"
+#include "gromacs/topology/mtop_util.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/logger.h"
-#include "atomdata.h"
-#include "gpu_types.h"
#include "grid.h"
#include "nbnxm_geometry.h"
#include "nbnxm_simd.h"
* On AMD Zen, tabulated Ewald kernels are faster on all 4 combinations
* of single or double precision and 128 or 256-bit AVX2.
*/
+ MSVC_DIAGNOSTIC_IGNORE(6285) // Always zero because compile time constant
if (
#if GMX_SIMD
(GMX_SIMD_REAL_WIDTH >= 8 || (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE)) &&
{
kernelSetup.ewaldExclusionType = EwaldExclusionType::Analytical;
}
- else
- {
- kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
- }
+ MSVC_DIAGNOSTIC_RESET
+ else { kernelSetup.ewaldExclusionType = EwaldExclusionType::Table; }
if (getenv("GMX_NBNXN_EWALD_TABLE") != nullptr)
{
kernelSetup.ewaldExclusionType = EwaldExclusionType::Table;
{
/*! \brief Gets and returns the minimum i-list count for balacing based on the GPU used or env.var. when set */
-static int getMinimumIlistCountForGpuBalancing(gmx_nbnxn_gpu_t* nbnxmGpu)
+static int getMinimumIlistCountForGpuBalancing(NbnxmGpu* nbnxmGpu)
{
int minimumIlistCount;
return minimumIlistCount;
}
-std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger& mdlog,
- gmx_bool bFEP_NonBonded,
- const t_inputrec* ir,
- const t_forcerec* fr,
- const t_commrec* cr,
- const gmx_hw_info_t& hardwareInfo,
- const gmx_device_info_t* deviceInfo,
- const gmx_mtop_t* mtop,
- matrix box,
- gmx_wallcycle* wcycle)
+std::unique_ptr<nonbonded_verlet_t> init_nb_verlet(const gmx::MDLogger& mdlog,
+ const t_inputrec* ir,
+ const t_forcerec* fr,
+ const t_commrec* cr,
+ const gmx_hw_info_t& hardwareInfo,
+ const bool useGpuForNonbonded,
+ const gmx::DeviceStreamManager* deviceStreamManager,
+ const gmx_mtop_t* mtop,
+ matrix box,
+ gmx_wallcycle* wcycle)
{
const bool emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr);
- const bool useGpu = deviceInfo != nullptr;
- GMX_RELEASE_ASSERT(!(emulateGpu && useGpu),
+ GMX_RELEASE_ASSERT(!(emulateGpu && useGpuForNonbonded),
"When GPU emulation is active, there cannot be a GPU assignment");
NonbondedResource nonbondedResource;
- if (useGpu)
+ if (useGpuForNonbonded)
{
nonbondedResource = NonbondedResource::Gpu;
}
Nbnxm::KernelSetup kernelSetup = pick_nbnxn_kernel(mdlog, fr->use_simd_kernels, hardwareInfo,
nonbondedResource, ir, fr->bNonbonded);
- const bool haveMultipleDomains = (DOMAINDECOMP(cr) && cr->dd->nnodes > 1);
+ const bool haveMultipleDomains = havePPDomainDecomposition(cr);
- PairlistParams pairlistParams(kernelSetup.kernelType, bFEP_NonBonded, ir->rlist,
- havePPDomainDecomposition(cr));
+ bool bFEP_NonBonded = (fr->efep != efepNO) && haveFepPerturbedNBInteractions(mtop);
+ PairlistParams pairlistParams(kernelSetup.kernelType, bFEP_NonBonded, ir->rlist, haveMultipleDomains);
setupDynamicPairlistPruning(mdlog, ir, mtop, box, fr->ic, &pairlistParams);
enbnxninitcombrule = enbnxninitcombruleNONE;
}
- auto pinPolicy = (useGpu ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned);
+ auto pinPolicy = (useGpuForNonbonded ? gmx::PinningPolicy::PinnedIfSupported
+ : gmx::PinningPolicy::CannotBePinned);
auto nbat = std::make_unique<nbnxn_atomdata_t>(pinPolicy);
}
nbnxn_atomdata_init(mdlog, nbat.get(), kernelSetup.kernelType, enbnxninitcombrule, fr->ntype,
fr->nbfp, mimimumNumEnergyGroupNonbonded,
- (useGpu || emulateGpu) ? 1 : gmx_omp_nthreads_get(emntNonbonded));
+ (useGpuForNonbonded || emulateGpu) ? 1 : gmx_omp_nthreads_get(emntNonbonded));
- gmx_nbnxn_gpu_t* gpu_nbv = nullptr;
- int minimumIlistCountForGpuBalancing = 0;
- if (useGpu)
+ NbnxmGpu* gpu_nbv = nullptr;
+ int minimumIlistCountForGpuBalancing = 0;
+ if (useGpuForNonbonded)
{
/* init the NxN GPU data; the last argument tells whether we'll have
* both local and non-local NB calculation on GPU */
- gpu_nbv = gpu_init(deviceInfo, fr->ic, pairlistParams, nbat.get(), cr->nodeid, haveMultipleDomains);
+ GMX_RELEASE_ASSERT(
+ (deviceStreamManager != nullptr),
+ "Device stream manager should be initialized in order to use GPU for non-bonded.");
+ gpu_nbv = gpu_init(*deviceStreamManager, fr->ic, pairlistParams, nbat.get(), haveMultipleDomains);
minimumIlistCountForGpuBalancing = getMinimumIlistCountForGpuBalancing(gpu_nbv);
}
minimumIlistCountForGpuBalancing);
auto pairSearch = std::make_unique<PairSearch>(
- ir->ePBC, EI_TPI(ir->eI), DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
+ ir->pbcType, EI_TPI(ir->eI), DOMAINDECOMP(cr) ? &cr->dd->numCells : nullptr,
DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr, pairlistParams.pairlistType,
bFEP_NonBonded, gmx_omp_nthreads_get(emntPairsearch), pinPolicy);
std::unique_ptr<PairSearch> pairSearch,
std::unique_ptr<nbnxn_atomdata_t> nbat_in,
const Nbnxm::KernelSetup& kernelSetup,
- gmx_nbnxn_gpu_t* gpu_nbv_ptr,
+ NbnxmGpu* gpu_nbv_ptr,
gmx_wallcycle* wcycle) :
pairlistSets_(std::move(pairlistSets)),
pairSearch_(std::move(pairSearch)),
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014,2015, The GROMACS development team.
- * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
+ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include "gromacs/utility/enumerationhelpers.h"
#include "gromacs/utility/real.h"
-// This file with constants is separate from this file to be able
-// to include it during OpenCL jitting without including config.h
-#include "constants.h"
#include "pairlistparams.h"
struct NbnxnPairlistCpuWork;
struct NbnxnPairlistGpuWork;
-/* Convenience type for vector with aligned memory */
+//! Convenience type for vector with aligned memory
template<typename T>
using AlignedVector = std::vector<T, gmx::AlignedAllocator<T>>;
-/* Convenience type for vector that avoids initialization at resize() */
+//! Convenience type for vector that avoids initialization at resize()
template<typename T>
using FastVector = std::vector<T, gmx::DefaultInitializationAllocator<T>>;
-/* A buffer data structure of 64 bytes
+/*! \brief Cache-line protection buffer
+ *
+ * A buffer data structure of 64 bytes
* to be placed at the beginning and end of structs
* to avoid cache invalidation of the real contents
* of the struct by writes to neighboring memory.
*/
typedef struct
{
+ //! Unused field used to create space to protect cache lines that are in use
int dummy[16];
} gmx_cache_protect_t;
-/* This is the actual cluster-pair list j-entry.
+/*! \brief This is the actual cluster-pair list j-entry.
+ *
* cj is the j-cluster.
* The interaction bits in excl are indexed i-major, j-minor.
* The cj entries are sorted such that ones with exclusions come first.
*/
struct nbnxn_cj_t
{
- int cj; /* The j-cluster */
- unsigned int excl; /* The exclusion (interaction) bits */
+ //! The j-cluster
+ int cj;
+ //! The exclusion (interaction) bits
+ unsigned int excl;
};
-/* In nbnxn_ci_t the integer shift contains the shift in the lower 7 bits.
+/*! \brief Constants for interpreting interaction flags
+ *
+ * In nbnxn_ci_t the integer shift contains the shift in the lower 7 bits.
* The upper bits contain information for non-bonded kernel optimization.
* Simply calculating LJ and Coulomb for all pairs in a cluster pair is fine.
* But three flags can be used to skip interactions, currently only for subc=0
* shift & NBNXN_CI_HALF_LJ(subc) => we can skip LJ for the second half of i
* !(shift & NBNXN_CI_DO_COUL(subc)) => we can skip Coulomb for all pairs
*/
+//! \{
#define NBNXN_CI_SHIFT 127
#define NBNXN_CI_DO_LJ(subc) (1 << (7 + 3 * (subc)))
#define NBNXN_CI_HALF_LJ(subc) (1 << (8 + 3 * (subc)))
#define NBNXN_CI_DO_COUL(subc) (1 << (9 + 3 * (subc)))
+//! \}
-/* Cluster-pair Interaction masks
+/*! \brief Cluster-pair Interaction masks
+ *
* Bit i*j-cluster-size + j tells if atom i and j interact.
*/
+//! \{
// TODO: Rename according to convention when moving into Nbnxn namespace
-/* All interaction mask is the same for all kernels */
+//! All interaction mask is the same for all kernels
constexpr unsigned int NBNXN_INTERACTION_MASK_ALL = 0xffffffffU;
-/* 4x4 kernel diagonal mask */
+//! 4x4 kernel diagonal mask
constexpr unsigned int NBNXN_INTERACTION_MASK_DIAG = 0x08ceU;
-/* 4x2 kernel diagonal masks */
+//! 4x2 kernel diagonal masks
+//! \{
constexpr unsigned int NBNXN_INTERACTION_MASK_DIAG_J2_0 = 0x0002U;
constexpr unsigned int NBNXN_INTERACTION_MASK_DIAG_J2_1 = 0x002fU;
-/* 4x8 kernel diagonal masks */
+//! \}
+//! 4x8 kernel diagonal masks
+//! \{
constexpr unsigned int NBNXN_INTERACTION_MASK_DIAG_J8_0 = 0xf0f8fcfeU;
constexpr unsigned int NBNXN_INTERACTION_MASK_DIAG_J8_1 = 0x0080c0e0U;
+//! \}
+//! \}
+
+/*! \brief Lower limit for square interaction distances in nonbonded kernels.
+ *
+ * For smaller values we will overflow when calculating r^-1 or r^-12, but
+ * to keep it simple we always apply the limit from the tougher r^-12 condition.
+ */
+#if GMX_DOUBLE
+// Some double precision SIMD architectures use single precision in the first
+// step, so although the double precision criterion would allow smaller rsq,
+// we need to stay in single precision with some margin for the N-R iterations.
+constexpr double c_nbnxnMinDistanceSquared = 1.0e-36;
+#else
+// The worst intermediate value we might evaluate is r^-12, which
+// means we should ensure r^2 stays above pow(GMX_FLOAT_MAX,-1.0/6.0)*1.01 (some margin)
+constexpr float c_nbnxnMinDistanceSquared = 3.82e-07F; // r > 6.2e-4
+#endif
-/* Simple pair-list i-unit */
+
+//! The number of clusters in a super-cluster, used for GPU
+constexpr int c_nbnxnGpuNumClusterPerSupercluster = 8;
+
+/*! \brief With GPU kernels we group cluster pairs in 4 to optimize memory usage
+ * of integers containing 32 bits.
+ */
+constexpr int c_nbnxnGpuJgroupSize = (32 / c_nbnxnGpuNumClusterPerSupercluster);
+
+/*! \internal
+ * \brief Simple pair-list i-unit
+ */
struct nbnxn_ci_t
{
- int ci; /* i-cluster */
- int shift; /* Shift vector index plus possible flags, see above */
- int cj_ind_start; /* Start index into cj */
- int cj_ind_end; /* End index into cj */
+ //! i-cluster
+ int ci;
+ //! Shift vector index plus possible flags, see above
+ int shift;
+ //! Start index into cj
+ int cj_ind_start;
+ //! End index into cj
+ int cj_ind_end;
};
-/* Grouped pair-list i-unit */
+//! Grouped pair-list i-unit
typedef struct
{
- /* Returns the number of j-cluster groups in this entry */
+ //! Returns the number of j-cluster groups in this entry
int numJClusterGroups() const { return cj4_ind_end - cj4_ind_start; }
- int sci; /* i-super-cluster */
- int shift; /* Shift vector index plus possible flags */
- int cj4_ind_start; /* Start index into cj4 */
- int cj4_ind_end; /* End index into cj4 */
+ //! i-super-cluster
+ int sci;
+ //! Shift vector index plus possible flags
+ int shift;
+ //! Start index into cj4
+ int cj4_ind_start;
+ //! End index into cj4
+ int cj4_ind_end;
} nbnxn_sci_t;
-/* Interaction data for a j-group for one warp */
+//! Interaction data for a j-group for one warp
struct nbnxn_im_ei_t
{
- // The i-cluster interactions mask for 1 warp
+ //! The i-cluster interactions mask for 1 warp
unsigned int imask = 0U;
- // Index into the exclusion array for 1 warp, default index 0 which means no exclusions
+ //! Index into the exclusion array for 1 warp, default index 0 which means no exclusions
int excl_ind = 0;
};
+//! Four-way j-cluster lists
typedef struct
{
- int cj[c_nbnxnGpuJgroupSize]; /* The 4 j-clusters */
- nbnxn_im_ei_t imei[c_nbnxnGpuClusterpairSplit]; /* The i-cluster mask data for 2 warps */
+ //! The 4 j-clusters
+ int cj[c_nbnxnGpuJgroupSize];
+ //! The i-cluster mask data for 2 warps
+ nbnxn_im_ei_t imei[c_nbnxnGpuClusterpairSplit];
} nbnxn_cj4_t;
-/* Struct for storing the atom-pair interaction bits for a cluster pair in a GPU pairlist */
+//! Struct for storing the atom-pair interaction bits for a cluster pair in a GPU pairlist
struct nbnxn_excl_t
{
- /* Constructor, sets no exclusions, so all atom pairs interacting */
+ //! Constructor, sets no exclusions, so all atom pairs interacting
+ MSVC_DIAGNOSTIC_IGNORE(26495) // pair is not being initialized!
nbnxn_excl_t()
{
for (unsigned int& pairEntry : pair)
pairEntry = NBNXN_INTERACTION_MASK_ALL;
}
}
+ MSVC_DIAGNOSTIC_RESET
- /* Topology exclusion interaction bits per warp */
+ //! Topology exclusion interaction bits per warp
unsigned int pair[c_nbnxnGpuExclSize];
};
-/* Cluster pairlist type for use on CPUs */
+//! Cluster pairlist type for use on CPUs
struct NbnxnPairlistCpu
{
NbnxnPairlistCpu();
+ //! Cache protection
gmx_cache_protect_t cp0;
- int na_ci; /* The number of atoms per i-cluster */
- int na_cj; /* The number of atoms per j-cluster */
- real rlist; /* The radius for constructing the list */
- FastVector<nbnxn_ci_t> ci; /* The i-cluster list */
- FastVector<nbnxn_ci_t> ciOuter; /* The outer, unpruned i-cluster list */
-
- FastVector<nbnxn_cj_t> cj; /* The j-cluster list, size ncj */
- FastVector<nbnxn_cj_t> cjOuter; /* The outer, unpruned j-cluster list */
- int ncjInUse; /* The number of j-clusters that are used by ci entries in this list, will be <= cj.size() */
-
- int nci_tot; /* The total number of i clusters */
+ //! The number of atoms per i-cluster
+ int na_ci;
+ //! The number of atoms per j-cluster
+ int na_cj;
+ //! The radius for constructing the list
+ real rlist;
+ //! The i-cluster list
+ FastVector<nbnxn_ci_t> ci;
+ //! The outer, unpruned i-cluster list
+ FastVector<nbnxn_ci_t> ciOuter;
+
+ //! The j-cluster list, size ncj
+ FastVector<nbnxn_cj_t> cj;
+ //! The outer, unpruned j-cluster list
+ FastVector<nbnxn_cj_t> cjOuter;
+ //! The number of j-clusters that are used by ci entries in this list, will be <= cj.size()
+ int ncjInUse;
+
+ //! The total number of i clusters
+ int nci_tot;
- /* Working data storage for list construction */
+ //! Working data storage for list construction
std::unique_ptr<NbnxnPairlistCpuWork> work;
+ //! Cache protection
gmx_cache_protect_t cp1;
};
*/
struct NbnxnPairlistGpu
{
- /* Constructor
+ /*! \brief Constructor
*
* \param[in] pinningPolicy Sets the pinning policy for all buffers used on the GPU
*/
NbnxnPairlistGpu(gmx::PinningPolicy pinningPolicy);
+ //! Cache protection
gmx_cache_protect_t cp0;
- int na_ci; /* The number of atoms per i-cluster */
- int na_cj; /* The number of atoms per j-cluster */
- int na_sc; /* The number of atoms per super cluster */
- real rlist; /* The radius for constructing the list */
+ //! The number of atoms per i-cluster
+ int na_ci;
+ //! The number of atoms per j-cluster
+ int na_cj;
+ //! The number of atoms per super cluster
+ int na_sc;
+ //! The radius for constructing the list
+ real rlist;
// The i-super-cluster list, indexes into cj4;
gmx::HostVector<nbnxn_sci_t> sci;
// The list of 4*j-cluster groups
// The total number of i-clusters
int nci_tot;
- /* Working data storage for list construction */
+ //! Working data storage for list construction
std::unique_ptr<NbnxnPairlistGpuWork> work;
+ //! Cache protection
gmx_cache_protect_t cp1;
};
store_->reserve(setValues_.size());
// For bool the loop variable isn't a reference (it's its special reference type)
// clang-format off
- CLANG_DIAGNOSTIC_IGNORE(-Wrange-loop-analysis)
+ CLANG_DIAGNOSTIC_IGNORE(-Wrange-loop-analysis);
// clang-format on
for (const auto& value : setValues_)
{
store_->append(value);
}
- DIAGNOSTIC_RESET;
+ CLANG_DIAGNOSTIC_RESET;
clearSet();
}
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2010,2012,2013,2014,2015, The GROMACS development team.
+# Copyright (c) 2010,2012,2013,2014,2015 by the GROMACS development team.
# Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
gmx_target_warning_suppression(scanner -Wno-unused HAS_NO_UNUSED)
gmx_target_warning_suppression(scanner -Wno-unused-parameter HAS_NO_UNUSED_PARAMETER)
gmx_target_warning_suppression(scanner -Wno-missing-declarations HAS_NO_MISSING_DECLARATIONS)
+ gmx_target_warning_suppression(scanner -Wno-null-conversion HAS_NO_NULL_CONVERSIONS)
gmx_target_warning_suppression(scanner -wd1419 HAS_DECL_IN_SOURCE)
endif()
list(APPEND libgromacs_object_library_dependencies scanner)
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
class SimdDouble
{
public:
+ MSVC_DIAGNOSTIC_IGNORE(26495) // simdInternal_ is not being initialized!
SimdDouble() {}
-
+ MSVC_DIAGNOSTIC_RESET
SimdDouble(double d) : simdInternal_(_mm256_set1_pd(d)) {}
// Internal utility constructor to simplify return statements
class SimdDInt32
{
public:
+ MSVC_DIAGNOSTIC_IGNORE(26495) // simdInternal_ is not being initialized!
SimdDInt32() {}
-
+ MSVC_DIAGNOSTIC_RESET
SimdDInt32(std::int32_t i) : simdInternal_(_mm_set1_epi32(i)) {}
// Internal utility constructor to simplify return statements
class SimdDBool
{
public:
+ MSVC_DIAGNOSTIC_IGNORE(26495) // simdInternal_ is not being initialized!
SimdDBool() {}
-
+ MSVC_DIAGNOSTIC_RESET
SimdDBool(bool b) : simdInternal_(_mm256_castsi256_pd(_mm256_set1_epi32(b ? 0xFFFFFFFF : 0))) {}
// Internal utility constructor to simplify return statements
class SimdDIBool
{
public:
+ MSVC_DIAGNOSTIC_IGNORE(26495) // simdInternal_ is not being initialized!
SimdDIBool() {}
-
+ MSVC_DIAGNOSTIC_RESET
SimdDIBool(bool b) : simdInternal_(_mm_set1_epi32(b ? 0xFFFFFFFF : 0)) {}
// Internal utility constructor to simplify return statements
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
+ * Copyright (c) 2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
class SimdFloat
{
public:
+ MSVC_DIAGNOSTIC_IGNORE(26495) // simdInternal_ is not being initialized!
SimdFloat() {}
-
+ MSVC_DIAGNOSTIC_RESET
SimdFloat(float f) : simdInternal_(_mm256_set1_ps(f)) {}
// Internal utility constructor to simplify return statements
class SimdFInt32
{
public:
+ MSVC_DIAGNOSTIC_IGNORE(26495) // simdInternal_ is not being initialized!
SimdFInt32() {}
-
+ MSVC_DIAGNOSTIC_RESET
SimdFInt32(std::int32_t i) : simdInternal_(_mm256_set1_epi32(i)) {}
// Internal utility constructor to simplify return statements
class SimdFBool
{
public:
+ MSVC_DIAGNOSTIC_IGNORE(26495) // simdInternal_ is not being initialized!
SimdFBool() {}
-
+ MSVC_DIAGNOSTIC_RESET
SimdFBool(bool b) : simdInternal_(_mm256_castsi256_ps(_mm256_set1_epi32(b ? 0xFFFFFFFF : 0))) {}
// Internal utility constructor to simplify return statements
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#include <array>
#include <type_traits>
+ #include "gromacs/utility/basedefinitions.h"
#include "gromacs/utility/classhelpers.h"
#include "gromacs/utility/real.h"
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2010,2014,2015,2018,2019, by the GROMACS development team, led by
+ * Copyright (c) 2010,2014,2015,2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
namespace gmx
{
+template<typename>
+class ListOfLists;
+
/*! \brief Division of a range of indices into consecutive blocks
*
* A range of consecutive indices 0 to full.range.end() is divided
int numBlocks() const { return static_cast<int>(index_.size()) - 1; }
/*! \brief Returns the size of the block with index \p blockIndex */
- Block block(int blockIndex) const { return Block(index_[blockIndex], index_[blockIndex + 1]); }
+ Block block(int blockIndex) const
+ {
+ return Block(index_[blockIndex], index_[blockIndex + 1LL]);
+ }
/*! \brief Returns the full range */
Block fullRange() const { return Block(index_.front(), index_.back()); }
void reduceNumBlocks(int newNumBlocks)
{
GMX_ASSERT(newNumBlocks <= numBlocks(), "Can only shrink to fewer blocks");
- index_.resize(newNumBlocks + 1);
+ index_.resize(newNumBlocks + 1LL);
}
/*! \brief Sets the partitioning to \p numBlocks blocks each of size 1 */
void pr_block(FILE* fp, int indent, const char* title, const t_block* block, gmx_bool bShowNumbers);
void pr_blocka(FILE* fp, int indent, const char* title, const t_blocka* block, gmx_bool bShowNumbers);
+void pr_listoflists(FILE* fp, int indent, const char* title, const gmx::ListOfLists<int>* block, gmx_bool bShowNumbers);
#endif
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
# define DO_PRAGMA(x) _Pragma(# x)
# define CLANG_DIAGNOSTIC_IGNORE(warning) \
_Pragma("clang diagnostic push") DO_PRAGMA(clang diagnostic ignored #warning)
- # define DIAGNOSTIC_RESET _Pragma("clang diagnostic pop")
+ # define CLANG_DIAGNOSTIC_RESET _Pragma("clang diagnostic pop")
#else
- //! Ignore specified clang warning until DIAGNOSTIC_RESET
+ //! Ignore specified clang warning until CLANG_DIAGNOSTIC_RESET
# define CLANG_DIAGNOSTIC_IGNORE(warning)
//! Reset all diagnostics to default
- # define DIAGNOSTIC_RESET
+ # define CLANG_DIAGNOSTIC_RESET
+ #endif
+
+ #ifdef _MSC_VER
+ # define MSVC_DIAGNOSTIC_IGNORE(id) __pragma(warning(push)) __pragma(warning(disable : id))
+ # define MSVC_DIAGNOSTIC_RESET __pragma(warning(pop))
+ #else
+ //! Ignore specified MSVC warning until MSVC_DIAGNOSTIC_RESET
+ # define MSVC_DIAGNOSTIC_IGNORE(warning)
+ //! Reset all diagnostics to default
+ # define MSVC_DIAGNOSTIC_RESET
#endif
namespace gmx
*
* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
* Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
+ * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* them. Can wait for later, as the master branch has ready code to do all
* that. */
writer->writeLine(formatString("C compiler: %s", BUILD_C_COMPILER));
- writer->writeLine(formatString("C compiler flags: %s", BUILD_CFLAGS));
+ writer->writeLine(formatString("C compiler flags: %s %s", BUILD_CFLAGS,
+ CMAKE_BUILD_CONFIGURATION_C_FLAGS));
writer->writeLine(formatString("C++ compiler: %s", BUILD_CXX_COMPILER));
- writer->writeLine(formatString("C++ compiler flags: %s", BUILD_CXXFLAGS));
+ writer->writeLine(formatString("C++ compiler flags: %s %s", BUILD_CXXFLAGS,
+ CMAKE_BUILD_CONFIGURATION_CXX_FLAGS));
#ifdef HAVE_LIBMKL
/* MKL might be used for LAPACK/BLAS even if FFTs use FFTW, so keep it separate */
writer->writeLine(formatString("Linked with Intel MKL version %d.%d.%d.", __INTEL_MKL__,
#endif
#if GMX_GPU == GMX_GPU_CUDA
writer->writeLine(formatString("CUDA compiler: %s", CUDA_COMPILER_INFO));
- writer->writeLine(formatString("CUDA compiler flags:%s", CUDA_COMPILER_FLAGS));
+ writer->writeLine(formatString("CUDA compiler flags:%s %s", CUDA_COMPILER_FLAGS,
+ CMAKE_BUILD_CONFIGURATION_CXX_FLAGS));
writer->writeLine("CUDA driver: " + gmx::getCudaDriverVersionString());
writer->writeLine("CUDA runtime: " + gmx::getCudaRuntimeVersionString());
#endif
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2011,2012,2013,2014,2015, The GROMACS development team.
+# Copyright (c) 2011,2012,2013,2014,2015 by the GROMACS development team.
# Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
if(HAVE_TINYXML2)
include_directories(SYSTEM ${TinyXML2_INCLUDE_DIR})
- target_link_libraries(testutils ${TinyXML2_LIBRARIES})
+ target_link_libraries(testutils PRIVATE ${TinyXML2_LIBRARIES})
else()
include_directories(BEFORE SYSTEM "../external/tinyxml2")
endif()
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
+# Copyright (c) 2011,2012,2013,2014,2015 by the GROMACS development team.
+# Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
# the research papers on the package. Check out http://www.gromacs.org.
include(CMakeParseArguments)
+include(gmxClangCudaUtils)
function (gmx_add_unit_test_library NAME)
if (GMX_BUILD_UNITTESTS AND BUILD_TESTING)
target_compile_definitions(${NAME} PRIVATE HAVE_CONFIG_H)
target_include_directories(${NAME} SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
target_link_libraries(${NAME} PRIVATE testutils gmock)
- # clang-3.6 warns about a number of issues that are not reported by more modern compilers
- # and we know they are not real issues. So we only check that it can compile without error
- # but ignore all warnings.
- if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^3\.6")
- target_compile_options(${NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-w>)
+ if(GMX_CLANG_TIDY)
+ set_target_properties(${NAME} PROPERTIES CXX_CLANG_TIDY
+ "${CLANG_TIDY_EXE};-warnings-as-errors=*;-header-filter=.*")
+ endif()
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7")
+ target_compile_options(${NAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Weverything ${IGNORED_CLANG_ALL_WARNINGS} -Wno-gnu-zero-variadic-macro-arguments -Wno-zero-as-null-pointer-constant -Wno-missing-variable-declarations>)
endif()
endif()
endfunction ()
+# This function creates a GoogleTest test executable for a module. It
+# hides all the complexity of how to treat different source files
+# under different configuration conditions. It should be extended
+# if we ever support another GPU compilation approach.
+#
+# It can be called with extra options and arguments:
+# MPI
+# To trigger the ctest runner to run this test with multiple ranks
+# HARDWARE_DETECTION
+# To trigger the test executable setup code to run hardware detection
+# CPP_SOURCE_FILES file1.cpp file2.cpp ...
+# All the normal C++ .cpp source files
+# GPU_CPP_SOURCE_FILES file1.cpp file2.cpp ...
+# All the C++ .cpp source files that are always needed, but must be
+# compiled in the way that suits GMX_GPU.
+# CUDA_CU_SOURCE_FILES file1.cu file2.cu ...
+# All the normal CUDA .cu source files
+# CUDA_CPP_SOURCE_FILES file1.cpp file2.cpp ...
+# All the other .cpp source files to be compiled as CUDA
+# OPENCL_CPP_SOURCE_FILES file1.cpp file2.cpp ...
+# All the other C++ .cpp source files needed only with OpenCL
+# NON_GPU_CPP_SOURCE_FILES file1.cpp file2.cpp ...
+# All the other C++ .cpp source files needed only with neither OpenCL nor CUDA
function (gmx_add_gtest_executable EXENAME)
if (GMX_BUILD_UNITTESTS AND BUILD_TESTING)
set(_options MPI HARDWARE_DETECTION)
- cmake_parse_arguments(ARG "${_options}" "" "" ${ARGN})
- set(_source_files ${ARG_UNPARSED_ARGUMENTS})
+ set(_multi_value_keywords
+ CPP_SOURCE_FILES
+ CUDA_CU_SOURCE_FILES
+ GPU_CPP_SOURCE_FILES
+ CUDA_CPP_SOURCE_FILES
+ OPENCL_CPP_SOURCE_FILES
+ NON_GPU_CPP_SOURCE_FILES
+ )
+ cmake_parse_arguments(ARG "${_options}" "" "${_multi_value_keywords}" ${ARGN})
file(RELATIVE_PATH _input_files_path ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
set(_temporary_files_path "${CMAKE_CURRENT_BINARY_DIR}/Testing/Temporary")
TEST_USES_HARDWARE_DETECTION=true)
endif()
- add_executable(${EXENAME} ${UNITTEST_TARGET_OPTIONS}
- ${_source_files} ${TESTUTILS_DIR}/unittest_main.cpp)
+ if (GMX_USE_CUDA AND NOT GMX_CLANG_CUDA)
+ # Work around FindCUDA that prevents using target_link_libraries()
+ # with keywords otherwise...
+ set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES})
+ cuda_add_executable(${EXENAME} ${UNITTEST_TARGET_OPTIONS}
+ ${ARG_CPP_SOURCE_FILES}
+ ${ARG_CUDA_CU_SOURCE_FILES}
+ ${ARG_CUDA_CPP_SOURCE_FILES}
+ ${ARG_GPU_CPP_SOURCE_FILES}
+ ${TESTUTILS_DIR}/unittest_main.cpp)
+ else()
+ add_executable(${EXENAME} ${UNITTEST_TARGET_OPTIONS}
+ ${ARG_CPP_SOURCE_FILES}
+ ${TESTUTILS_DIR}/unittest_main.cpp)
+ endif()
+
+ if (GMX_USE_CUDA)
+ if (GMX_CLANG_CUDA)
+ target_sources(${EXENAME} PRIVATE
+ ${ARG_CUDA_CU_SOURCE_FILES}
+ ${ARG_CUDA_CPP_SOURCE_FILES}
+ ${ARG_GPU_CPP_SOURCE_FILES})
+ set_source_files_properties(${ARG_CUDA_CPP_SOURCE_FILES} ${ARG_GPU_CPP_SOURCE_FILES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+ gmx_compile_cuda_file_with_clang(${ARG_CUDA_CU_SOURCE_FILES})
+ if(ARG_CUDA_CPP_SOURCE_FILES OR ARG_CUDA_CU_SOURCE_FILES OR ARG_GPU_CPP_SOURCE_FILES)
+ target_link_libraries(${EXENAME} PRIVATE ${GMX_EXTRA_LIBRARIES})
+ endif()
+ endif()
+ elseif (GMX_USE_OPENCL)
+ target_sources(${EXENAME} PRIVATE ${ARG_OPENCL_CPP_SOURCE_FILES} ${ARG_GPU_CPP_SOURCE_FILES})
+ if(ARG_OPENCL_CPP_SOURCE_FILES OR ARG_GPU_CPP_SOURCE_FILES)
+ target_link_libraries(${EXENAME} PRIVATE ${OpenCL_LIBRARIES})
+ endif()
+ else()
+ target_sources(${EXENAME} PRIVATE ${ARG_NON_GPU_CPP_SOURCE_FILES} ${ARG_GPU_CPP_SOURCE_FILES})
+ endif()
+
gmx_target_compile_options(${EXENAME})
target_compile_definitions(${EXENAME} PRIVATE HAVE_CONFIG_H ${EXTRA_COMPILE_DEFINITIONS})
target_include_directories(${EXENAME} SYSTEM BEFORE PRIVATE ${PROJECT_SOURCE_DIR}/src/external/thread_mpi/include)
set_target_properties(${EXENAME} PROPERTIES CXX_CLANG_TIDY
"${CLANG_TIDY_EXE};-warnings-as-errors=*;-header-filter=.*")
endif()
- if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^6\.0")
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "7")
target_compile_options(${EXENAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Weverything ${IGNORED_CLANG_ALL_WARNINGS} -Wno-gnu-zero-variadic-macro-arguments -Wno-zero-as-null-pointer-constant -Wno-missing-variable-declarations>)
endif()
+ # clang-3.6 warns about a number of issues that are not reported by more modern compilers
+ # and we know they are not real issues. So we only check that it can compile without error
+ # but ignore all warnings.
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^3\.6")
+ target_compile_options(${EXENAME} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-w>)
+ endif()
endif()
endfunction()
# INTEGRATION_TEST requires the use of the IntegrationTest label in CTest
# SLOW_TEST requires the use of the SlowTest label in CTest, and
# increase the length of the ctest timeout.
+# IGNORE_LEAKS Skip some memory safety checks.
#
# TODO When a test case needs it, generalize the MPI_RANKS mechanism so
# that ctest can run the test binary over a range of numbers of MPI
# ranks.
function (gmx_register_gtest_test NAME EXENAME)
if (GMX_BUILD_UNITTESTS AND BUILD_TESTING)
- set(_options INTEGRATION_TEST SLOW_TEST)
+ set(_options INTEGRATION_TEST SLOW_TEST IGNORE_LEAKS)
set(_one_value_args MPI_RANKS OPENMP_THREADS)
cmake_parse_arguments(ARG "${_options}" "${_one_value_args}" "" ${ARGN})
set(_xml_path ${CMAKE_BINARY_DIR}/Testing/Temporary/${NAME}.xml)
else()
set(_timeout 120)
endif()
- gmx_get_test_prefix_cmd(_prefix_cmd IGNORE_LEAKS)
elseif (ARG_SLOW_TEST)
list(APPEND _labels SlowTest)
set(_timeout 480)
- gmx_get_test_prefix_cmd(_prefix_cmd IGNORE_LEAKS)
else()
list(APPEND _labels UnitTest)
gmx_get_test_prefix_cmd(_prefix_cmd)
endif()
+ if (ARG_IGNORE_LEAKS)
+ gmx_get_test_prefix_cmd(_prefix_cmd IGNORE_LEAKS)
+ endif ()
set(_cmd ${_prefix_cmd} $<TARGET_FILE:${EXENAME}>)
if (ARG_OPENMP_THREADS)
if (GMX_OPENMP)