set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
-# PROJECT_VERSION should have the following structure:
-# VERSION-dev[-SUFFIX] where the VERSION should have the for: vMajor.vMinor.vPatch
-#
-# The "-dev" suffix is important to keep because it makes possible to distinguish
-# between a build from official release and a build from git release branch on a
-# machine with no git.
-#
-# NOTE: when releasing the "-dev" suffix needs to be stripped off!
-# REGRESSIONTEST_VERSION and REGRESSIONTEST_BRANCH should always be
-# defined.
-set(PROJECT_VERSION "5.0.3-dev")
-# If this is a released tarball, "-dev" will not be present in
-# PROJECT_VERSION, and REGRESSIONTEST_VERSION specifies the version
-# number of the regressiontest tarball against which the code tarball
-# can be tested. This will be the version of the last patch release.
-set(REGRESSIONTEST_VERSION "5.0.3-dev")
-# The MD5 checksum of the regressiontest tarball. Only used if "-dev"
-# is not present in the PROJECT_VERSION
-set(REGRESSIONTEST_MD5SUM "004f2377adf252e2caa9c241390d4b9c")
-# If this is not a released tarball, "-dev" will be present in
-# PROJECT_VERSION, and REGRESSIONTEST_BRANCH specifies the name of the
-# gerrit.gromacs.org branch whose HEAD can test this code, *if* this
-# code contains all recent fixes from the corresponding code branch.
-set(REGRESSIONTEST_BRANCH "refs/heads/release-5-0")
-
-set(CUSTOM_VERSION_STRING ""
- CACHE STRING "Custom version string (if empty, use hard-coded default)")
-mark_as_advanced(CUSTOM_VERSION_STRING)
-if (CUSTOM_VERSION_STRING)
- set(PROJECT_VERSION ${CUSTOM_VERSION_STRING})
-endif()
-set(LIBRARY_SOVERSION 0)
-set(LIBRARY_VERSION ${LIBRARY_SOVERSION}.0.0)
-# It is a bit irritating, but this has to be set separately for now!
-SET(CPACK_PACKAGE_VERSION_MAJOR "5")
-SET(CPACK_PACKAGE_VERSION_MINOR "0")
-#SET(CPACK_PACKAGE_VERSION_PATCH "0")
-
-# The numerical gromacs version. It is 40600 for 4.6.0.
-# The #define GMX_VERSION in gromacs/version.h is set to this value.
-math(EXPR NUM_VERSION
- "${CPACK_PACKAGE_VERSION_MAJOR}*10000 + ${CPACK_PACKAGE_VERSION_MINOR}*100")
-if(CPACK_PACKAGE_VERSION_PATCH)
- math(EXPR NUM_VERSION
- "${NUM_VERSION} + ${CPACK_PACKAGE_VERSION_PATCH}")
-endif()
-
-# The API version tracks the numerical Gromacs version (for now).
-# It is potentially different from the Gromacs version in the future, if
-# the programs/libraries diverge from the presumably more stable API.
-# The #define GMX_API_VERSION in version.h is set to this value to
-# provide backward compatibility of software written against the Gromacs API.
-set(API_VERSION ${NUM_VERSION})
+# Set up common version variables, as well as general information about
+# the build tree (whether the build is from a source package or from a git
+# repository). Also declares a few functions that will be used for generating
+# version info files later.
+include(gmxVersionInfo)
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND UNIX)
set(CMAKE_INSTALL_PREFIX "/usr/local/gromacs" CACHE STRING "Installation prefix (installation will need write permissions here)" FORCE)
endif()
include(gmxBuildTypeReference)
+include(gmxBuildTypeProfile)
include(gmxBuildTypeTSAN)
include(gmxBuildTypeASAN)
include(gmxBuildTypeReleaseWithAssert)
if(NOT CMAKE_BUILD_TYPE)
- set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert." FORCE)
+ set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert Profile." FORCE)
# There's no need to offer a user the choice of ThreadSanitizer
# Set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
- "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert")
+ "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert" "Profile")
endif()
if(CMAKE_CONFIGURATION_TYPES)
# Add appropriate GROMACS-specific build types for the Visual
"List of configuration types"
FORCE)
endif()
- set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE)
-set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBINFO RELWITHASSERT MINSIZEREL)
++set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBINFO RELWITHASSERT MINSIZEREL PROFILE)
-enable_language(C)
-enable_language(CXX)
set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON)
-set(CPACK_PACKAGE_NAME "gromacs")
-set(CPACK_PACKAGE_VERSION ${PROJECT_VERSION})
-set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}")
-set(CPACK_PACKAGE_VENDOR "gromacs.org")
-set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Gromacs - a toolkit for high-performance molecular simulation")
-set(CPACK_RESOURCE_FILE_WELCOME "${CMAKE_SOURCE_DIR}/admin/InstallWelcome.txt")
-# Its GPL/LGPL, so they do not have to agree to a license for mere usage, but some installers require this...
-set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING")
-set(CPACK_RESOURCE_FILE_README "${CMAKE_SOURCE_DIR}/admin/InstallInfo.txt")
-set(CPACK_SOURCE_IGNORE_FILES "\\\\.isreposource$;\\\\.git/;\\\\.gitignore$;\\\\.gitattributes;")
-set(CPACK_PROJECT_CONFIG_FILE "${CMAKE_SOURCE_DIR}/CPackInit.cmake")
-# CPack source archives include only the directories we list here.
-# This variable is a list of pairs of names of source and destination
-# directories. Most of these are used for content GROMACS generates as
-# part of the configuration or build.
-set(CPACK_SOURCE_INSTALLED_DIRECTORIES "${CMAKE_SOURCE_DIR};/;${CMAKE_BINARY_DIR}/src/programs/completion;src/programs/completion;${CMAKE_BINARY_DIR}/docs/man/man1;docs/man/man1;${CMAKE_BINARY_DIR}/docs/man/man7;docs/man/man7;${CMAKE_BINARY_DIR}/docs/old-html/final;docs/old-html/final;${CMAKE_BINARY_DIR}/docs/install-guide/final;/")
-set(CPACK_PACKAGE_CONTACT "gmx-users@gromacs.org")
-set(CPACK_GMX_BUILD_HELP "${GMX_BUILD_HELP}") #Works even though GMX_BUILD_HELP is defined later because it is off by default.
-
-#must come after all cpack settings!
-include(CPack)
-
# Set a default valgrind suppression file.
# This unfortunately needs to duplicate information from CTest to work as
# expected...
"File that contains suppressions for the memory checker")
include(CTest)
-set(SOURCE_IS_GIT_REPOSITORY OFF)
-set(SOURCE_IS_SOURCE_DISTRIBUTION OFF)
-if(EXISTS "${CMAKE_SOURCE_DIR}/.git")
- set(SOURCE_IS_GIT_REPOSITORY ON)
-endif()
-if(NOT EXISTS "${CMAKE_SOURCE_DIR}/admin/.isreposource")
- set(SOURCE_IS_SOURCE_DISTRIBUTION ON)
-endif()
+include(gmxCPackUtilities)
+gmx_cpack_init()
+
+# Variables that accumulate stuff influencing the installed headers
+set(INSTALLED_HEADER_INCLUDE_DIRS "")
+set(INSTALLED_HEADER_DEFINITIONS "")
########################################################################
# Check and warn if cache generated on a different host is being reused
GMX_SIMD
"SIMD instruction set for CPU kernels and compiler optimization"
"${GMX_SUGGESTED_SIMD}"
- None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 IBM_QPX Sparc64_HPC_ACE Reference)
+ None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 ARM_NEON ARM_NEON_ASIMD IBM_QPX IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference)
gmx_option_multichoice(
GMX_FFT_LIBRARY
option(GMX_USE_TNG "Use the TNG library for trajectory I/O" ON)
-option(GMX_GIT_VERSION_INFO "Generate git version information" ${SOURCE_IS_GIT_REPOSITORY})
-mark_as_advanced(GMX_GIT_VERSION_INFO)
-
if(UNIX)
option(GMX_SYMLINK_OLD_BINARY_NAMES "Create symbolic links for pre-5.0 binary names" ON)
endif()
endif()
-
-set(PKG_CFLAGS "")
if(GMX_DOUBLE)
add_definitions(-DGMX_DOUBLE)
- set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_DOUBLE")
+ list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_DOUBLE")
if(GMX_RELAXED_DOUBLE_PRECISION)
add_definitions(-DGMX_RELAXED_DOUBLE_PRECISION)
endif()
endif()
if(GMX_SOFTWARE_INVSQRT)
- set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_SOFTWARE_INVSQRT")
+ list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_SOFTWARE_INVSQRT")
endif()
if(WIN32 AND NOT CYGWIN)
add_definitions(-DNOMINMAX)
endif()
-# only bother with finding git and using version.h if the source is a git repo
-if(GMX_GIT_VERSION_INFO)
- if (NOT SOURCE_IS_GIT_REPOSITORY)
- message(FATAL_ERROR
- "Cannot generate git version information from source tree not under git. "
- "Set GMX_GIT_VERSION_INFO=OFF to proceed.")
- endif()
- # We need at least git v1.5.3 be able to parse git's date output. If not
- # found or the version is too small, we can't generate version information.
- find_package(Git)
-
- if(NOT GIT_FOUND OR GIT_VERSION_STRING VERSION_LESS "1.5.3")
- message(FATAL_ERROR
- "No compatible git version found (>= 1.5.3 required). "
- "Won't be able to generate development version information. "
- "Set GMX_GIT_VERSION_INFO=OFF to proceed.")
- endif()
-endif()
-
# Detect boost unless GMX_EXTERNAL_BOOST is explicitly OFF
# Used for default if GMX_EXTERNAL_BOOST is not defined (first CMake pass)
if(NOT DEFINED GMX_EXTERNAL_BOOST OR GMX_EXTERNAL_BOOST)
gmx_test_zlib(HAVE_ZLIB)
endif()
-########################################################################
-# Generate development version info for cache
-########################################################################
-# set(GEN_VERSION_INFO_INTERNAL "ON")
-# include(gmxGenerateVersionString)
-
########################################################################
# Our own GROMACS tests
########################################################################
include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/include)
# Required for config.h, maybe should only be set in src/CMakeLists.txt
include_directories(BEFORE ${CMAKE_BINARY_DIR}/src)
-# Required for gmx_header_config_gen.h to be found before installation
-include_directories(BEFORE ${CMAKE_BINARY_DIR}/src/gromacs/utility)
-# Required for now to make old code compile
-include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders)
include(gmxTestInlineASM)
gmx_test_inline_asm_gcc_x86(GMX_X86_GCC_INLINE_ASM)
include_directories(${COREWRAP_INCLUDE_DIR})
endif()
-option(GMX_BUILD_HELP "Build man pages, HTML help, and completions automatically (requires that compiled binaries can be executed on the build host)" OFF)
+# Value of AUTO tries to generate things, but will only produce warnings if
+# that fails.
+set(build_help_default AUTO)
+if (SOURCE_IS_SOURCE_DISTRIBUTION)
+ set(build_help_default OFF)
+endif()
+gmx_option_trivalue(GMX_BUILD_HELP "Build man pages, HTML help, and completions automatically (requires that compiled binaries can be executed on the build host)" ${build_help_default})
mark_as_advanced(GMX_BUILD_HELP)
-if (GMX_BUILD_HELP AND SOURCE_IS_SOURCE_DISTRIBUTION AND
- "${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}")
-
+if (GMX_BUILD_HELP AND SOURCE_IS_SOURCE_DISTRIBUTION AND BUILD_IS_INSOURCE)
message(FATAL_ERROR
- "Rebuilding HTML and man pages is not supported for in-source "
- "builds from a source distribution. "
+ "Rebuilding HTML and man pages or shell completions is not supported "
+ "for in-source builds from a source distribution. "
"Set GMX_BUILD_HELP=OFF or do an out-of-source build to proceed.")
endif()
set(GMX_EXE_LINKER_FLAGS ${GMX_EXE_LINKER_FLAGS} ${OpenMP_LINKER_FLAGS})
set(GMX_SHARED_LINKER_FLAGS ${GMX_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS})
endif()
-set(PKG_CFLAGS "${PKG_CFLAGS} ${OpenMP_C_FLAGS}")
########################################################################
# Specify install locations
# These variables are used internally to provide a central location for
# customizing the install locations.
-set(LIB_INSTALL_DIR ${GMX_LIB_INSTALL_DIR})
-set(BIN_INSTALL_DIR bin)
-set(DATA_INSTALL_DIR share/${GMX_DATA_INSTALL_DIR})
-set(MAN_INSTALL_DIR share/man)
-set(INCL_INSTALL_DIR include)
+set(LIB_INSTALL_DIR ${GMX_LIB_INSTALL_DIR})
+set(BIN_INSTALL_DIR bin)
+set(DATA_INSTALL_DIR share/${GMX_DATA_INSTALL_DIR})
+set(MAN_INSTALL_DIR share/man)
+# If the nesting level wrt. the installation root is changed,
+# gromacs-config.cmake.cmakein needs to be adapted.
+set(CMAKE_INSTALL_DIR share/cmake)
+# TODO: Make GMXRC adapt if this is changed
+set(PKGCONFIG_INSTALL_DIR ${LIB_INSTALL_DIR}/pkgconfig)
+set(INCL_INSTALL_DIR include)
# These variables get written into config.h for use in finding the data
# directories.
set(GMXLIB_SEARCH_DIR share/${GMX_DATA_INSTALL_DIR}/top)
set(GMXLIB_FALLBACK ${CMAKE_INSTALL_PREFIX}/${DATA_INSTALL_DIR}/top)
+list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${INCL_INSTALL_DIR})
+
# Binary and library suffix options
include(gmxManageSuffixes)
if(GMX_EXTERNAL_BOOST)
include_directories(${Boost_INCLUDE_DIRS})
- set(PKG_CFLAGS "${PKG_CFLAGS} -I${Boost_INCLUDE_DIRS}")
+ list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${Boost_INCLUDE_DIRS})
else()
include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/boost)
+ list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${INCL_INSTALL_DIR}/gromacs/external/boost)
+ list(APPEND INSTALLED_HEADED_DEFINITIONS "-DBOOST_NO_TYPEID")
# typeid not supported for minimal internal version
# (would add significant amount of code)
add_definitions(-DBOOST_NO_TYPEID)
- # TODO: Propagate the above settings to the installed CMakeFiles.txt template
- # (from share/template/)
- set(PKG_CFLAGS "${PKG_CFLAGS} -DBOOST_NO_TYPEID -I${CMAKE_INSTALL_PREFIX}/${INCL_INSTALL_DIR}/gromacs/external/boost")
if (NOT GMX_BUILD_MDRUN_ONLY)
install(DIRECTORY ${CMAKE_SOURCE_DIR}/src/external/boost/boost
DESTINATION ${INCL_INSTALL_DIR}/gromacs/external/boost
add_subdirectory(tests)
endif()
+gmx_cpack_write_config()
+
# Issue a warning if NVIDIA GPUs were detected, but CUDA was not found.
# Don't bother the user after the first configure pass.
if ((CUDA_NOTFOUND_AUTO AND GMX_DETECT_GPU_AVAILABLE) AND NOT GMX_GPU_DETECTION_DONE)
# be set up elsewhere and passed to this function, but it is
# inconvenient in CMake to pass more than one list, and such a
# list is only used here.
- foreach(build_type RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE)
- foreach(build_type RELWITHDEBINFO RELWITHASSERT MINSIZEREL)
++ foreach(build_type RELWITHDEBINFO RELWITHASSERT MINSIZEREL PROFILE)
set(GMXC_${language}FLAGS_${build_type} "${GMXC_${language}FLAGS_RELEASE}")
endforeach()
# Copy the flags that are only used by the real Release build
endif()
# Append to the variables for the given build type for
- # each language, in the parent scope.
+ # each language, in the parent scope. We add our new variables at the end, so
+ # compiler-specific choices are more likely to override default CMake choices.
+ # This is for instance useful for RelWithDebInfo builds, where we want to use the full
+ # set of our optimization flags detected in this file, rather than having -O2 override them.
set(CMAKE_${language}_FLAGS${punctuation}${build_type}
- "${GMXC_${language}FLAGS${punctuation}${build_type}} ${CMAKE_${language}_FLAGS${punctuation}${build_type}}"
+ "${CMAKE_${language}_FLAGS${punctuation}${build_type}} ${GMXC_${language}FLAGS${punctuation}${build_type}}"
PARENT_SCOPE)
endforeach()
endforeach()
# Since 4.8 on by default. For previous version disabling is a no-op. Only disabling for Release because with assert
# the warnings are OK.
GMX_TEST_CFLAG(CFLAGS_WARN_REL "-Wno-array-bounds" GMXC_CFLAGS_RELEASE_ONLY)
- # Since gcc 4.8 strict - false postives with old gmx_fatal. TODO: Remove in master
- GMX_TEST_CFLAG(CFLAGS_WARN_UNINIT "-Wno-maybe-uninitialized" GMXC_CFLAGS)
if(CYGWIN)
GMX_TEST_CFLAG(CFLAGS_WARN_SUBSCRIPT "-Wno-char-subscripts" GMXC_CFLAGS)
endif()
# new in gcc 4.5
GMX_TEST_CFLAG(CFLAGS_EXCESS_PREC "-fexcess-precision=fast" GMXC_CFLAGS_RELEASE)
- GMX_TEST_CFLAG(CFLAGS_COPT "-fomit-frame-pointer -funroll-all-loops"
+ GMX_TEST_CFLAG(CFLAGS_COPT "-funroll-all-loops"
GMXC_CFLAGS_RELEASE)
GMX_TEST_CFLAG(CFLAGS_NOINLINE "-fno-inline" GMXC_CFLAGS_DEBUG)
endif()
GMX_TEST_CFLAG(CXXFLAGS_WARN_REL "-Wno-array-bounds" GMXC_CXXFLAGS_RELEASE_ONLY)
# new in gcc 4.5
GMX_TEST_CXXFLAG(CXXFLAGS_EXCESS_PREC "-fexcess-precision=fast" GMXC_CXXFLAGS_RELEASE)
- GMX_TEST_CXXFLAG(CXXFLAGS_COPT "-fomit-frame-pointer -funroll-all-loops"
+ GMX_TEST_CXXFLAG(CXXFLAGS_COPT "-funroll-all-loops"
GMXC_CXXFLAGS_RELEASE)
GMX_TEST_CXXFLAG(CXXFLAGS_NOINLINE "-fno-inline" GMXC_CXXFLAGS_DEBUG)
endif()
GMX_TEST_CFLAG(CFLAGS_PRAGMA "-wd161" GMXC_CFLAGS)
endif()
endif()
-# 111: statement is unreachable
-# 177: function ".." was declared but never referenced
-# 181: argument of type ".." is incompatible with format "..", expecting argument of type ".."
+# 177: function/variable ".." was declared but never referenced
# 193: zero used for undefined preprocessing identifier ".."
# 271: trailing comma is nonstandard
# 304: access control not specified ("public" by default)
#3346: dynamic exception specifications are deprecated
#11074: Inlining inhibited by limit max-size(/max-total-size)
#11076: To get full report use -opt-report=3 -opt-report-phase ipo (shown for previous remark)
- GMX_TEST_CFLAG(CFLAGS_WARN "-w3 -wd111 -wd177 -wd181 -wd193 -wd271 -wd304 -wd383 -wd424 -wd444 -wd522 -wd593 -wd869 -wd981 -wd1418 -wd1419 -wd1572 -wd1599 -wd2259 -wd2415 -wd2547 -wd2557 -wd3280 -wd3346 -wd11074 -wd11076" GMXC_CFLAGS)
+ GMX_TEST_CFLAG(CFLAGS_WARN "-w3 -wd177 -wd193 -wd271 -wd304 -wd383 -wd424 -wd444 -wd522 -wd593 -wd869 -wd981 -wd1418 -wd1419 -wd1572 -wd1599 -wd2259 -wd2415 -wd2547 -wd2557 -wd3280 -wd3346 -wd11074 -wd11076" GMXC_CFLAGS)
GMX_TEST_CFLAG(CFLAGS_STDGNU "-std=gnu99" GMXC_CFLAGS)
GMX_TEST_CFLAG(CFLAGS_OPT "-ip -funroll-all-loops -alias-const -ansi-alias" GMXC_CFLAGS_RELEASE)
else()
GMX_TEST_CFLAG(CFLAGS_PRAGMA "/wd161" GMXC_CFLAGS)
endif()
endif()
- GMX_TEST_CFLAG(CFLAGS_WARN "/W3 /wd111 /wd177 /wd181 /wd193 /wd271 /wd304 /wd383 /wd424 /wd444 /wd522 /wd593 /wd869 /wd981 /wd1418 /wd1419 /wd1572 /wd1599 /wd2259 /wd2415 /wd2547 /wd2557 /wd3280 /wd3346" GMXC_CFLAGS)
+ GMX_TEST_CFLAG(CFLAGS_WARN "/W3 /wd177 /wd193 /wd271 /wd304 /wd383 /wd424 /wd444 /wd522 /wd593 /wd869 /wd981 /wd1418 /wd1419 /wd1572 /wd1599 /wd2259 /wd2415 /wd2547 /wd2557 /wd3280 /wd3346" GMXC_CFLAGS)
GMX_TEST_CFLAG(CFLAGS_OPT "/Qip" GMXC_CFLAGS_RELEASE)
endif()
endif()
#All but the following warnings are identical for the C-compiler (see above)
#1782: #pragma once is obsolete
#2282: unrecognized GCC pragma
- GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-w3 -wd111 -wd177 -wd181 -wd193 -wd271 -wd304 -wd383 -wd424 -wd444 -wd522 -wd593 -wd869 -wd981 -wd1418 -wd1419 -wd1572 -wd1599 -wd2259 -wd2415 -wd2547 -wd2557 -wd3280 -wd3346 -wd11074 -wd11076 -wd1782 -wd2282" GMXC_CXXFLAGS)
+ GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-w3 -wd177 -wd193 -wd271 -wd304 -wd383 -wd424 -wd444 -wd522 -wd593 -wd869 -wd981 -wd1418 -wd1419 -wd1572 -wd1599 -wd2259 -wd2415 -wd2547 -wd2557 -wd3280 -wd3346 -wd11074 -wd11076 -wd1782 -wd2282" GMXC_CXXFLAGS)
GMX_TEST_CXXFLAG(CXXFLAGS_OPT "-ip -funroll-all-loops -alias-const -ansi-alias" GMXC_CXXFLAGS_RELEASE)
else()
if(NOT GMX_OPENMP)
GMX_TEST_CXXFLAG(CXXFLAGS_PRAGMA "/wd161" GMXC_CXXFLAGS)
endif()
endif()
- GMX_TEST_CXXFLAG(CXXFLAGS_WARN "/W3 /wd111 /wd177 /wd181 /wd193 /wd271 /wd304 /wd383 /wd424 /wd444 /wd522 /wd593 /wd869 /wd981 /wd1418 /wd1419 /wd1572 /wd1599 /wd2259 /wd2415 /wd2547 /wd2557 /wd3280 /wd3346 /wd1782 /wd2282" GMXC_CXXFLAGS)
+ GMX_TEST_CXXFLAG(CXXFLAGS_WARN "/W3 /wd177 /wd193 /wd271 /wd304 /wd383 /wd424 /wd444 /wd522 /wd593 /wd869 /wd981 /wd1418 /wd1419 /wd1572 /wd1599 /wd2259 /wd2415 /wd2547 /wd2557 /wd3280 /wd3346 /wd1782 /wd2282" GMXC_CXXFLAGS)
GMX_TEST_CXXFLAG(CXXFLAGS_OPT "/Qip" GMXC_CXXFLAGS_RELEASE)
endif()
endif()
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "config.h"
+#include <stdlib.h>
#include <time.h>
+
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include "gromacs/commandline/pargs.h"
-#include "typedefs.h"
-#include "types/commrec.h"
-#include "gromacs/utility/smalloc.h"
-#include "vec.h"
-#include "copyrite.h"
#include "gromacs/fileio/tpxio.h"
-#include "gromacs/utility/cstringutil.h"
-#include "readinp.h"
-#include "calcgrid.h"
-#include "checkpoint.h"
-#include "macros.h"
-#include "gmx_ana.h"
-#include "names.h"
-#include "perf_est.h"
-#include "inputrec.h"
-#include "gromacs/timing/walltime_accounting.h"
+#include "gromacs/gmxana/gmx_ana.h"
+#include "gromacs/legacyheaders/calcgrid.h"
+#include "gromacs/legacyheaders/checkpoint.h"
+#include "gromacs/legacyheaders/inputrec.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/perf_est.h"
+#include "gromacs/legacyheaders/readinp.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/math/utilities.h"
-
-#include "gmx_fatal.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/timing/walltime_accounting.h"
+#include "gromacs/utility/baseversion.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
/* Enum for situations that can occur during log file parsing, the
* corresponding string entries can be found in do_the_tests() in
}
-static gmx_bool is_equal(real a, real b)
-{
- real diff, eps = 1.0e-7;
-
-
- diff = a - b;
-
- if (diff < 0.0)
- {
- diff = -diff;
- }
-
- if (diff < eps)
- {
- return TRUE;
- }
- else
- {
- return FALSE;
- }
-}
-
-
static void remove_if_exists(const char *fn)
{
if (gmx_fexist(fn))
fprintf(fp, "\n");
/* Only mention settings if they were modified: */
- bRefinedCoul = !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]);
- bRefinedVdW = !is_equal(info->rvdw[k_win], info->rvdw[0] );
+ bRefinedCoul = !gmx_within_tol(info->rcoulomb[k_win], info->rcoulomb[0], GMX_REAL_EPS);
+ bRefinedVdW = !gmx_within_tol(info->rvdw[k_win], info->rvdw[0], GMX_REAL_EPS);
bRefinedGrid = !(info->nkx[k_win] == info->nkx[0] &&
info->nky[k_win] == info->nky[0] &&
info->nkz[k_win] == info->nkz[0]);
{
if ( (cp = getenv("MPIRUN")) != NULL)
{
- *cmd_mpirun = strdup(cp);
+ *cmd_mpirun = gmx_strdup(cp);
}
else
{
- *cmd_mpirun = strdup(def_mpirun);
+ *cmd_mpirun = gmx_strdup(def_mpirun);
}
}
else
{
- *cmd_mpirun = strdup(empty_mpirun);
+ *cmd_mpirun = gmx_strdup(empty_mpirun);
}
if ( (cp = getenv("MDRUN" )) != NULL)
{
- *cmd_mdrun = strdup(cp);
+ *cmd_mdrun = gmx_strdup(cp);
}
else
{
- *cmd_mdrun = strdup(def_mdrun);
+ *cmd_mdrun = gmx_strdup(def_mdrun);
}
}
{
/* Determine which Coulomb radii rc to use in the benchmarks */
add = (rmax-rmin)/(*ntprs-1);
- if (is_equal(rmin, info->rcoulomb[0]))
+ if (gmx_within_tol(rmin, info->rcoulomb[0], GMX_REAL_EPS))
{
ir->rcoulomb = rmin + j*add;
}
- else if (is_equal(rmax, info->rcoulomb[0]))
+ else if (gmx_within_tol(rmax, info->rcoulomb[0], GMX_REAL_EPS))
{
ir->rcoulomb = rmin + (j-1)*add;
}
fprintf(fp, " %-14s\n", fn_bench_tprs[j]);
/* Make it clear to the user that some additional settings were modified */
- if (!is_equal(ir->rvdw, info->rvdw[0])
- || !is_equal(ir->rlistlong, info->rlistlong[0]) )
+ if (!gmx_within_tol(ir->rvdw, info->rvdw[0], GMX_REAL_EPS)
+ || !gmx_within_tol(ir->rlistlong, info->rlistlong[0], GMX_REAL_EPS) )
{
bNote = TRUE;
}
{
/* To prevent confusion, do not again issue a gmx_fatal here since we already
* get the error message from mdrun itself */
- sprintf(msg, "Cannot run the benchmark simulations! Please check the error message of\n"
+ sprintf(msg,
+ "Cannot run the first benchmark simulation! Please check the error message of\n"
"mdrun for the source of the problem. Did you provide a command line\n"
- "argument that neither g_tune_pme nor mdrun understands? Offending command:\n"
+ "argument that neither gmx tune_pme nor mdrun understands? If you're\n"
+ "sure your command line should work, you can bypass this check with \n"
+ "gmx tune_pme -nocheck. The failing command was:\n"
"\n%s\n\n", command);
fprintf(stderr, "%s", msg);
int npme_fixed, /* If >= -1, test fixed number of PME
* nodes only */
const char *npmevalues_opt, /* Which -npme values should be tested */
- t_perf **perfdata, /* Here the performace data is stored */
+ t_perf **perfdata, /* Here the performance data is stored */
int *pmeentries, /* Entries in the nPMEnodes list */
int repeats, /* Repeat each test this often */
int nnodes, /* Total number of nodes = nPP + nPME */
const t_filenm *fnm, /* List of filenames from command line */
int nfile, /* Number of files specified on the cmdl. */
int presteps, /* DLB equilibration steps, is checked */
- gmx_int64_t cpt_steps) /* Time step counter in the checkpoint */
+ gmx_int64_t cpt_steps, /* Time step counter in the checkpoint */
+ gmx_bool bCheck) /* Check whether benchmark mdrun works */
{
int i, nr, k, ret, count = 0, totaltests;
int *nPMEnodes = NULL;
cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench);
/* To prevent that all benchmarks fail due to a show-stopper argument
- * on the mdrun command line, we make a quick check first */
- if (bFirst)
+ * on the mdrun command line, we make a quick check first.
+ * This check can be turned off in cases where the automatically chosen
+ * number of PME-only ranks leads to a number of PP ranks for which no
+ * decomposition can be found (e.g. for large prime numbers) */
+ if (bFirst && bCheck)
{
make_sure_it_runs(pd->mdrun_cmd_line, cmdline_length, fp, fnm, nfile);
}
/* Add test scenarios if rmin or rmax were set */
if (*ntprs <= 2)
{
- if (!is_equal(*rmin, rcoulomb) && (*ntprs == 1) )
+ if (!gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) && (*ntprs == 1) )
{
(*ntprs)++;
fprintf(stderr, "NOTE: Setting -rmin to %g changed -ntpr to %d\n",
*rmin, *ntprs);
}
- if (!is_equal(*rmax, rcoulomb) && (*ntprs == 1) )
+ if (!gmx_within_tol(*rmax, rcoulomb, GMX_REAL_EPS) && (*ntprs == 1) )
{
(*ntprs)++;
fprintf(stderr, "NOTE: Setting -rmax to %g changed -ntpr to %d\n",
}
old = *ntprs;
/* If one of rmin, rmax is set, we need 2 tpr files at minimum */
- if (!is_equal(*rmax, rcoulomb) || !is_equal(*rmin, rcoulomb) )
+ if (!gmx_within_tol(*rmax, rcoulomb, GMX_REAL_EPS) || !gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) )
{
*ntprs = max(*ntprs, 2);
}
/* If both rmin, rmax are set, we need 3 tpr files at minimum */
- if (!is_equal(*rmax, rcoulomb) && !is_equal(*rmin, rcoulomb) )
+ if (!gmx_within_tol(*rmax, rcoulomb, GMX_REAL_EPS) && !gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) )
{
*ntprs = max(*ntprs, 3);
}
if (*ntprs > 1)
{
- if (is_equal(*rmin, rcoulomb) && is_equal(rcoulomb, *rmax)) /* We have just a single rc */
+ if (gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) && gmx_within_tol(rcoulomb, *rmax, GMX_REAL_EPS)) /* We have just a single rc */
{
fprintf(stderr, "WARNING: Resetting -ntpr to 1 since no Coulomb radius scaling is requested.\n"
"Please set rmin < rmax to test Coulomb radii in the [rmin, rmax] interval\n"
"need to provide a machine- or hostfile. This can also be passed",
"via the MPIRUN variable, e.g.[PAR]",
"[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]",
+ "Before doing the actual benchmark runs, [THISMODULE] will do a quick",
+ "check whether mdrun works as expected with the provided parallel settings",
+ "if the [TT]-check[tt] option is activated (the default).",
"Please call [THISMODULE] with the normal options you would pass to",
"[gmx-mdrun] and add [TT]-np[tt] for the number of ranks to perform the",
"tests on, or [TT]-ntmpi[tt] for the number of threads. You can also add [TT]-r[tt]",
"written with enlarged cutoffs and smaller Fourier grids respectively.",
"Typically, the first test (number 0) will be with the settings from the input",
"[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have the Coulomb cutoff",
- "specified by [TT]-rmax[tt] with a somwhat smaller PME grid at the same time. ",
+ "specified by [TT]-rmax[tt] with a somewhat smaller PME grid at the same time. ",
"In this last test, the Fourier spacing is multiplied with [TT]rmax[tt]/rcoulomb. ",
"The remaining [TT].tpr[tt] files will have equally-spaced Coulomb radii (and Fourier "
"spacings) between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1",
"MD systems. The dynamic load balancing needs about 100 time steps",
"to adapt to local load imbalances, therefore the time step counters",
"are by default reset after 100 steps. For large systems (>1M atoms), as well as ",
- "for a higher accuarcy of the measurements, you should set [TT]-resetstep[tt] to a higher value.",
+ "for a higher accuracy of the measurements, you should set [TT]-resetstep[tt] to a higher value.",
"From the 'DD' load imbalance entries in the md.log output file you",
"can tell after how many steps the load is sufficiently balanced. Example call:[PAR]"
"[TT]gmx tune_pme -np 64 -s protein.tpr -launch[tt][PAR]",
/* g_tune_pme */
{ efOUT, "-p", "perf", ffWRITE },
{ efLOG, "-err", "bencherr", ffWRITE },
- { efTPX, "-so", "tuned", ffWRITE },
+ { efTPR, "-so", "tuned", ffWRITE },
/* mdrun: */
- { efTPX, NULL, NULL, ffREAD },
+ { efTPR, NULL, NULL, ffREAD },
{ efTRN, "-o", NULL, ffWRITE },
{ efCOMPRESSED, "-x", NULL, ffOPTWR },
{ efCPT, "-cpi", NULL, ffOPTRD },
gmx_bool bKeepAndNumCPT = FALSE;
gmx_bool bResetCountersHalfWay = FALSE;
gmx_bool bBenchmark = TRUE;
+ gmx_bool bCheck = TRUE;
output_env_t oenv = NULL;
"Launch the real simulation after optimization" },
{ "-bench", FALSE, etBOOL, {&bBenchmark},
"Run the benchmarks or just create the input [TT].tpr[tt] files?" },
+ { "-check", FALSE, etBOOL, {&bCheck},
+ "Before the benchmark runs, check whether mdrun works in parallel" },
/******************/
/* mdrun options: */
/******************/
sep_line(fp);
fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n");
sep_line(fp);
- fprintf(fp, "%s for Gromacs %s\n", ShortProgram(), GromacsVersion());
+ fprintf(fp, "%s for Gromacs %s\n", output_env_get_program_display_name(oenv),
+ gmx_version());
if (!bThreads)
{
fprintf(fp, "Number of ranks : %d\n", nnodes);
{
do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries,
repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun,
- cmd_args_bench, fnm, NFILE, presteps, cpt_steps);
+ cmd_args_bench, fnm, NFILE, presteps, cpt_steps, bCheck);
fprintf(fp, "\nTuning took%8.1f minutes.\n", (gmx_gettime()-seconds)/60.0);
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "gromacs/legacyheaders/calcgrid.h"
#include <math.h>
-#include "typedefs.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/smalloc.h"
-#include "gmx_fatal.h"
-#include "calcgrid.h"
/* The grid sizes below are based on timing of a 3D cubic grid in fftw
* compiled with SSE using 4 threads in fft5d.c.
/* Determine how many pre-factors of 2 we need */
fac2 = 1;
i = g_baseNR - 1;
- while (fac2*grid_base[i-1] < nmin)
+ while (fac2*grid_base[i] < nmin)
{
fac2 *= 2;
}
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+#include "toppush.h"
+
+#include <assert.h>
#include <ctype.h>
#include <math.h>
-#include <assert.h>
+#include <stdlib.h>
-#include "sysstuff.h"
-#include "gromacs/utility/smalloc.h"
-#include "macros.h"
+#include "gromacs/gmxpreprocess/gpp_atomtype.h"
+#include "gromacs/gmxpreprocess/gpp_bond_atomtype.h"
+#include "gromacs/gmxpreprocess/readir.h"
+#include "gromacs/gmxpreprocess/topdirs.h"
+#include "gromacs/gmxpreprocess/toputil.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/warninp.h"
+#include "gromacs/topology/symtab.h"
#include "gromacs/utility/cstringutil.h"
-#include "names.h"
-#include "toputil.h"
-#include "toppush.h"
-#include "topdirs.h"
-#include "readir.h"
-#include "symtab.h"
-#include "gmx_fatal.h"
-#include "warninp.h"
-#include "gpp_atomtype.h"
-#include "gpp_bond_atomtype.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
void generate_nbparams(int comb, int ftype, t_params *plist, gpp_atomtype_t atype,
warninp_t wi)
{
sprintf(errbuf, "Overriding %s parameters.%s",
interaction_function[ftype].longname,
- (ftype == F_PDIHS) ? "\nUse dihedraltype 4 to allow several multiplicity terms." : "");
+ (ftype == F_PDIHS) ?
+ "\nUse dihedraltype 9 to allow several multiplicity terms. Only consecutive lines are combined. Non-consective lines overwrite each other."
+ : "");
warning(wi, errbuf);
- fprintf(stderr, " old:");
+ fprintf(stderr, " old: ");
for (j = 0; (j < nrfp); j++)
{
fprintf(stderr, " %g", bt->param[i].c[j]);
#ifndef _domdec_h
#define _domdec_h
-#include "typedefs.h"
-#include "vsite.h"
-#include "genborn.h"
+#include "gromacs/legacyheaders/genborn.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/vsite.h"
+#include "gromacs/timing/wallcycle.h"
#ifdef __cplusplus
extern "C" {
* possible after subsequently setting a shorter cut-off with change_dd_cutoff.
*/
+ gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd);
+ /* Return if the DLB lock is set */
+
+ void dd_dlb_set_lock(gmx_domdec_t *dd, gmx_bool bValue);
+ /* Set a lock such that with DLB=auto DLB can (not) get turned on */
+
void dd_setup_dlb_resource_sharing(t_commrec *cr,
const gmx_hw_info_t *hwinfo,
const gmx_hw_opt_t *hw_opt);
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+/*! \internal \file
+ *
+ * \brief This file defines functions necessary for mdrun and tools to
+ * compute energies and forces for bonded interactions.
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ *
+ * \ingroup module_listed-forces
+ */
+#include "gmxpre.h"
+
+#include "bonded.h"
+
+#include "config.h"
-#include <math.h>
#include <assert.h>
-#include "physics.h"
-#include "vec.h"
-#include "gromacs/math/utilities.h"
-#include "txtdump.h"
-#include "bondf.h"
-#include "gromacs/utility/smalloc.h"
-#include "pbc.h"
-#include "ns.h"
-#include "macros.h"
-#include "names.h"
-#include "gmx_fatal.h"
-#include "mshift.h"
-#include "main.h"
-#include "disre.h"
-#include "orires.h"
-#include "force.h"
-#include "nonbonded.h"
-#include "restcbt.h"
+#include <cmath>
+
+#include <algorithm>
+
+#include "gromacs/legacyheaders/disre.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/nonbonded.h"
+#include "gromacs/legacyheaders/ns.h"
+#include "gromacs/legacyheaders/orires.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/math/units.h"
+#include "gromacs/math/utilities.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/mshift.h"
+#include "gromacs/pbcutil/pbc.h"
#include "gromacs/simd/simd.h"
#include "gromacs/simd/simd_math.h"
#include "gromacs/simd/vector_operations.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
-/* Find a better place for this? */
+#include "restcbt.h"
+
+/*! \brief Mysterious CMAP coefficient matrix */
const int cmap_coeff_matrix[] = {
1, 0, -3, 2, 0, 0, 0, 0, -3, 0, 9, -6, 2, 0, -6, 4,
0, 0, 0, 0, 0, 0, 0, 0, 3, 0, -9, 6, -2, 0, 6, -4,
};
-
+/* TODO This function should go and live in nonbonded.c where it is
+ really needed. Here, it only supports giving a fatal error message
+ with FENE_bonds */
int glatnr(int *global_atom_index, int i)
{
int atnr;
return atnr;
}
+/*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
+ *
+ * \todo This kind of code appears in many places. Consolidate it */
static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
{
if (pbc)
gmx_simd_real_t bxx;
} pbc_simd_t;
-/* Set the SIMD pbc data from a normal t_pbc struct */
+/*! \brief Set the SIMD pbc data from a normal t_pbc struct */
static void set_pbc_simd(const t_pbc *pbc, pbc_simd_t *pbc_simd)
{
rvec inv_bdiag;
}
}
-/* Correct distance vector *dx,*dy,*dz for PBC using SIMD */
+/*! \brief Correct distance vector *dx,*dy,*dz for PBC using SIMD */
static gmx_inline void
pbc_dx_simd(gmx_simd_real_t *dx, gmx_simd_real_t *dy, gmx_simd_real_t *dz,
const pbc_simd_t *pbc)
#endif /* GMX_SIMD_HAVE_REAL */
-/*
- * Morse potential bond by Frank Everdij
+/*! \brief Morse potential bond
*
- * Three parameters needed:
+ * By Frank Everdij. Three parameters needed:
*
* b0 = equilibrium distance in nm
* be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
* Note: the potential is referenced to be +cb at infinite separation
* and zero at the equilibrium distance!
*/
-
real morse_bonds(int nbonds,
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
return vtot;
}
+//! \cond
real cubic_bonds(int nbonds,
const t_iatom forceatoms[], const t_iparams forceparams[],
const rvec x[], rvec f[], rvec fshift[],
const real half = 0.5;
const real one = 1.0;
real bm, kb;
- real dr, dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot;
+ real dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot;
rvec dx;
int i, m, ki, type, ai, aj;
ivec dt;
kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
r_HH = 1.0/forceparams[type0].wpol.rHH;
- r_OD = 1.0/forceparams[type0].wpol.rOD;
if (debug)
{
fprintf(debug, "WPOL: qS = %10.5f aS = %5d\n", qS, aS);
rvec fshift[], real afac)
{
rvec r12;
- real r12sq, r12_1, r12n, r12bar, v0, v1, fscal, ebar, fff;
+ real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
int m, t;
t = pbc_rvec_sub(pbc, xi, xj, r12); /* 3 */
int gmx_unused *global_atom_index)
{
/* Interaction between two pairs of particles with opposite charge */
- int i, type, a1, da1, a2, da2;
- real q1, q2, qq, a, al1, al2, afac;
- real V = 0;
+ int i, type, a1, da1, a2, da2;
+ real q1, q2, qq, a, al1, al2, afac;
+ real V = 0;
+ const real minusOneOnSix = -1.0/6.0;
for (i = 0; (i < nbonds); )
{
al1 = forceparams[type].thole.alpha1;
al2 = forceparams[type].thole.alpha2;
qq = q1*q2;
- afac = a*pow(al1*al2, -1.0/6.0);
+ afac = a*pow(al1*al2, minusOneOnSix);
V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
V += do_1_thole(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
V += do_1_thole(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
rvec m, rvec n, rvec f[])
{
rvec f_i, f_j, f_k, f_l;
- rvec uvec, vvec, svec, dx_jl;
+ rvec uvec, vvec, svec;
real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
real a, b, p, q, toler;
- ivec jt, dt_ij, dt_kj, dt_lj;
iprm = iprod(m, m); /* 5 */
iprn = iprod(n, n); /* 5 */
const int nfa1 = 5;
int i, iu, s;
int type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH];
- real ddphi;
real dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
real buf_array[7*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf;
- real *cp, *phi0, *mult, *phi, *p, *q, *sf_i, *msf_l;
+ real *cp, *phi0, *mult, *p, *q;
gmx_simd_real_t phi0_S, phi_S;
gmx_simd_real_t mx_S, my_S, mz_S;
gmx_simd_real_t nx_S, ny_S, nz_S;
mult = buf + 2*GMX_SIMD_REAL_WIDTH;
p = buf + 3*GMX_SIMD_REAL_WIDTH;
q = buf + 4*GMX_SIMD_REAL_WIDTH;
- sf_i = buf + 5*GMX_SIMD_REAL_WIDTH;
- msf_l = buf + 6*GMX_SIMD_REAL_WIDTH;
set_pbc_simd(pbc, &pbc_simd);
}
}
- real ddphi;
+ /* This is mostly a copy of pdihs_noener_simd above, but with using
+ * the RB potential instead of a harmonic potential.
+ * This function can replace rbdihs() when no energy and virial are needed.
+ */
+ static void
+ rbdihs_noener_simd(int nbonds,
+ const t_iatom forceatoms[], const t_iparams forceparams[],
+ const rvec x[], rvec f[],
+ const t_pbc *pbc, const t_graph gmx_unused *g,
+ real gmx_unused lambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
+ {
+ const int nfa1 = 5;
+ int i, iu, s, j;
+ int type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH];
- real *parm, *phi, *p, *q, *sf_i, *msf_l;
+ real dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
+ real buf_array[(NR_RBDIHS + 4)*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf;
- sf_i = buf + (NR_RBDIHS + 2)*GMX_SIMD_REAL_WIDTH;
- msf_l = buf + (NR_RBDIHS + 3)*GMX_SIMD_REAL_WIDTH;
++ real *parm, *p, *q;
+
+ gmx_simd_real_t phi_S;
+ gmx_simd_real_t ddphi_S, cosfac_S;
+ gmx_simd_real_t mx_S, my_S, mz_S;
+ gmx_simd_real_t nx_S, ny_S, nz_S;
+ gmx_simd_real_t nrkj_m2_S, nrkj_n2_S;
+ gmx_simd_real_t parm_S, c_S;
+ gmx_simd_real_t sin_S, cos_S;
+ gmx_simd_real_t sf_i_S, msf_l_S;
+ pbc_simd_t pbc_simd;
+
+ gmx_simd_real_t pi_S = gmx_simd_set1_r(M_PI);
+ gmx_simd_real_t one_S = gmx_simd_set1_r(1.0);
+
+ /* Ensure SIMD register alignment */
+ dr = gmx_simd_align_r(dr_array);
+ buf = gmx_simd_align_r(buf_array);
+
+ /* Extract aligned pointer for parameters and variables */
+ parm = buf;
+ p = buf + (NR_RBDIHS + 0)*GMX_SIMD_REAL_WIDTH;
+ q = buf + (NR_RBDIHS + 1)*GMX_SIMD_REAL_WIDTH;
+
+ set_pbc_simd(pbc, &pbc_simd);
+
+ /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
+ for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
+ {
+ /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
+ * iu indexes into forceatoms, we should not let iu go beyond nbonds.
+ */
+ iu = i;
+ for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
+ {
+ type = forceatoms[iu];
+ ai[s] = forceatoms[iu+1];
+ aj[s] = forceatoms[iu+2];
+ ak[s] = forceatoms[iu+3];
+ al[s] = forceatoms[iu+4];
+
+ /* We don't need the first parameter, since that's a constant
+ * which only affects the energies, not the forces.
+ */
+ for (j = 1; j < NR_RBDIHS; j++)
+ {
+ parm[j*GMX_SIMD_REAL_WIDTH + s] =
+ forceparams[type].rbdihs.rbcA[j];
+ }
+
+ /* At the end fill the arrays with identical entries */
+ if (iu + nfa1 < nbonds)
+ {
+ iu += nfa1;
+ }
+ }
+
+ /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
+ dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
+ dr,
+ &phi_S,
+ &mx_S, &my_S, &mz_S,
+ &nx_S, &ny_S, &nz_S,
+ &nrkj_m2_S,
+ &nrkj_n2_S,
+ p, q);
+
+ /* Change to polymer convention */
+ phi_S = gmx_simd_sub_r(phi_S, pi_S);
+
+ gmx_simd_sincos_r(phi_S, &sin_S, &cos_S);
+
+ ddphi_S = gmx_simd_setzero_r();
+ c_S = one_S;
+ cosfac_S = one_S;
+ for (j = 1; j < NR_RBDIHS; j++)
+ {
+ parm_S = gmx_simd_load_r(parm + j*GMX_SIMD_REAL_WIDTH);
+ ddphi_S = gmx_simd_fmadd_r(gmx_simd_mul_r(c_S, parm_S), cosfac_S, ddphi_S);
+ cosfac_S = gmx_simd_mul_r(cosfac_S, cos_S);
+ c_S = gmx_simd_add_r(c_S, one_S);
+ }
+
+ /* Note that here we do not use the minus sign which is present
+ * in the normal RB code. This is corrected for through (m)sf below.
+ */
+ ddphi_S = gmx_simd_mul_r(ddphi_S, sin_S);
+
+ sf_i_S = gmx_simd_mul_r(ddphi_S, nrkj_m2_S);
+ msf_l_S = gmx_simd_mul_r(ddphi_S, nrkj_n2_S);
+
+ /* After this m?_S will contain f[i] */
+ mx_S = gmx_simd_mul_r(sf_i_S, mx_S);
+ my_S = gmx_simd_mul_r(sf_i_S, my_S);
+ mz_S = gmx_simd_mul_r(sf_i_S, mz_S);
+
+ /* After this m?_S will contain -f[l] */
+ nx_S = gmx_simd_mul_r(msf_l_S, nx_S);
+ ny_S = gmx_simd_mul_r(msf_l_S, ny_S);
+ nz_S = gmx_simd_mul_r(msf_l_S, nz_S);
+
+ gmx_simd_store_r(dr + 0*GMX_SIMD_REAL_WIDTH, mx_S);
+ gmx_simd_store_r(dr + 1*GMX_SIMD_REAL_WIDTH, my_S);
+ gmx_simd_store_r(dr + 2*GMX_SIMD_REAL_WIDTH, mz_S);
+ gmx_simd_store_r(dr + 3*GMX_SIMD_REAL_WIDTH, nx_S);
+ gmx_simd_store_r(dr + 4*GMX_SIMD_REAL_WIDTH, ny_S);
+ gmx_simd_store_r(dr + 5*GMX_SIMD_REAL_WIDTH, nz_S);
+
+ iu = i;
+ s = 0;
+ do
+ {
+ do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
+ p[s], q[s],
+ dr[ XX *GMX_SIMD_REAL_WIDTH+s],
+ dr[ YY *GMX_SIMD_REAL_WIDTH+s],
+ dr[ ZZ *GMX_SIMD_REAL_WIDTH+s],
+ dr[(DIM+XX)*GMX_SIMD_REAL_WIDTH+s],
+ dr[(DIM+YY)*GMX_SIMD_REAL_WIDTH+s],
+ dr[(DIM+ZZ)*GMX_SIMD_REAL_WIDTH+s],
+ f);
+ s++;
+ iu += nfa1;
+ }
+ while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds);
+ }
+ }
+
#endif /* GMX_SIMD_HAVE_REAL */
dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
- do_dih_fup(ai, aj, ak, al, (real)(-ddphi), r_ij, r_kj, r_kl, m, n,
+ do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
/* 218 TOTAL */
#ifdef DEBUG
}
}
+/*! \brief Computes forces and potential for flat-bottom cylindrical restraints.
+ * Returns the flat-bottom potential. */
+static real do_fbposres_cylinder(int fbdim, rvec fm, rvec dx, real rfb, real kk, gmx_bool bInvert)
+{
+ int d;
+ real dr, dr2, invdr, v, rfb2;
+
+ dr2 = 0.0;
+ rfb2 = sqr(rfb);
+ v = 0.0;
+
+ for (d = 0; d < DIM; d++)
+ {
+ if (d != fbdim)
+ {
+ dr2 += sqr(dx[d]);
+ }
+ }
+
+ if (dr2 > 0.0 &&
+ ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) )
+ )
+ {
+ dr = sqrt(dr2);
+ invdr = 1./dr;
+ v = 0.5*kk*sqr(dr - rfb);
+ for (d = 0; d < DIM; d++)
+ {
+ if (d != fbdim)
+ {
+ fm[d] = -kk*(dr-rfb)*dx[d]*invdr; /* Force pointing to the center */
+ }
+ }
+ }
+
+ return v;
+}
+
/*! \brief Adds forces of flat-bottomed positions restraints to f[]
* and fixes vir_diag. Returns the flat-bottomed potential. */
real fbposres(int nbonds,
int i, ai, m, d, type, npbcdim = 0, fbdim;
const t_iparams *pr;
real vtot, kk, v;
- real ref = 0, dr, dr2, rpot, rfb, rfb2, fact, invdr;
- rvec com_sc, rdist, pos, dx, dpdl, fm;
+ real dr, dr2, rfb, rfb2, fact;
+ rvec com_sc, rdist, dx, dpdl, fm;
gmx_bool bInvert;
npbcdim = ePBC2npbcdim(ePBC);
svmul(fact, dx, fm);
}
break;
+ case efbposresCYLINDERX:
+ /* cylindrical flat-bottom posres in y-z plane. fm[XX] = 0. */
+ fbdim = XX;
+ v = do_fbposres_cylinder(fbdim, fm, dx, rfb, kk, bInvert);
+ break;
+ case efbposresCYLINDERY:
+ /* cylindrical flat-bottom posres in x-z plane. fm[YY] = 0. */
+ fbdim = YY;
+ v = do_fbposres_cylinder(fbdim, fm, dx, rfb, kk, bInvert);
+ break;
case efbposresCYLINDER:
- /* cylidrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */
- dr2 = sqr(dx[XX])+sqr(dx[YY]);
- if (dr2 > 0.0 &&
- ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) )
- )
- {
- dr = sqrt(dr2);
- invdr = 1./dr;
- v = 0.5*kk*sqr(dr - rfb);
- fm[XX] = -kk*(dr-rfb)*dx[XX]*invdr; /* Force pointing to the center */
- fm[YY] = -kk*(dr-rfb)*dx[YY]*invdr;
- }
+ /* equivalent to efbposresCYLINDERZ for backwards compatibility */
+ case efbposresCYLINDERZ:
+ /* cylindrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */
+ fbdim = ZZ;
+ v = do_fbposres_cylinder(fbdim, fm, dx, rfb, kk, bInvert);
break;
case efbposresX: /* fbdim=XX */
case efbposresY: /* fbdim=YY */
real lambda, real *dvdlambda,
int refcoord_scaling, int ePBC, rvec comA, rvec comB)
{
- int i, ai, m, d, type, ki, npbcdim = 0;
+ int i, ai, m, d, type, npbcdim = 0;
const t_iparams *pr;
real L1;
real vtot, kk, fm;
- real posA, posB, ref = 0;
- rvec comA_sc, comB_sc, rdist, dpdl, pos, dx;
+ rvec comA_sc, comB_sc, rdist, dpdl, dx;
gmx_bool bForceValid = TRUE;
if ((f == NULL) || (vir_diag == NULL)) /* should both be null together! */
real vtot = 0;
int ai, aj, ak, al, i, k, type, t1, t2, t3;
real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
- real phi, ddphi, ddp, ddp2, dp, sign, d2r, fc, L1;
+ real phi, ddphi, ddp, ddp2, dp, sign, d2r, L1;
rvec r_ij, r_kj, r_kl, m, n;
L1 = 1.0-lambda;
{
int i, d, ai, aj, ak, type, m;
int t1, t2;
- rvec r_ij, r_kj;
real v, vtot;
ivec jt, dt_ij, dt_kj;
rvec f_i, f_j, f_k;
t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
- t3 = pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
+ pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
/* This function computes factors needed for restricted angle potential.
pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
- t3 = pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
+ pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
/* \brief Compute factors for CBT potential
return vtot;
}
-int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
+//! \endcond
+
+/*! \brief Mysterious undocumented function */
+static int
+cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
{
int im1, ip1, ip2;
}
-real cmap_dihs(int nbonds,
- const t_iatom forceatoms[], const t_iparams forceparams[],
- const gmx_cmap_t *cmap_grid,
- const rvec x[], rvec f[], rvec fshift[],
- const t_pbc *pbc, const t_graph *g,
- real gmx_unused lambda, real gmx_unused *dvdlambda,
- const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
- int gmx_unused *global_atom_index)
+/*! \brief Compute CMAP dihedral energies and forces */
+static real
+cmap_dihs(int nbonds,
+ const t_iatom forceatoms[], const t_iparams forceparams[],
+ const gmx_cmap_t *cmap_grid,
+ const rvec x[], rvec f[], rvec fshift[],
+ const t_pbc *pbc, const t_graph *g,
+ real gmx_unused lambda, real gmx_unused *dvdlambda,
+ const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+ int gmx_unused *global_atom_index)
{
int i, j, k, n, idx;
int ai, aj, ak, al, am;
int t11, t21, t31, t12, t22, t32;
int iphi1, ip1m1, ip1p1, ip1p2;
int iphi2, ip2m1, ip2p1, ip2p2;
- int l1, l2, l3, l4;
- int pos1, pos2, pos3, pos4, tmp;
+ int l1, l2, l3;
+ int pos1, pos2, pos3, pos4;
real ty[4], ty1[4], ty2[4], ty12[4], tc[16], tx[16];
- real phi1, psi1, cos_phi1, sin_phi1, sign1, xphi1;
- real phi2, psi2, cos_phi2, sin_phi2, sign2, xphi2;
- real dx, xx, tt, tu, e, df1, df2, ddf1, ddf2, ddf12, vtot;
+ real phi1, cos_phi1, sin_phi1, sign1, xphi1;
+ real phi2, cos_phi2, sin_phi2, sign2, xphi2;
+ real dx, xx, tt, tu, e, df1, df2, vtot;
real ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
real ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
real fg1, hg1, fga1, hgb1, gaa1, gbb1;
b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
- tmp = pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
+ pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
ra21 = iprod(a1, a1); /* 5 */
rb21 = iprod(b1, b1); /* 5 */
b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
- tmp = pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
+ pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
ra22 = iprod(a2, a2); /* 5 */
rb22 = iprod(b2, b2); /* 5 */
dx = 2*M_PI / cmap_grid->grid_spacing;
/* Where on the grid are we */
- iphi1 = (int)(xphi1/dx);
- iphi2 = (int)(xphi2/dx);
+ iphi1 = static_cast<int>(xphi1/dx);
+ iphi2 = static_cast<int>(xphi2/dx);
iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
e = 0;
df1 = 0;
df2 = 0;
- ddf1 = 0;
- ddf2 = 0;
- ddf12 = 0;
for (i = 3; i >= 0; i--)
{
e = tt * e + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
df1 = tu * df1 + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
df2 = tt * df2 + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
- ddf1 = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
- ddf2 = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
}
- ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
- 3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
-
fac = RAD2DEG/dx;
df1 = df1 * fac;
df2 = df2 * fac;
- ddf1 = ddf1 * fac * fac;
- ddf2 = ddf2 * fac * fac;
- ddf12 = ddf12 * fac * fac;
/* CMAP energy */
vtot += e;
}
-
+//! \cond
/***********************************************************
*
* G R O M O S 9 6 F U N C T I O N S
/* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
* pp. 842-847
*/
- int i, ai, aj, ak, type, m, t1, t2, t3;
+ int i, ai, aj, ak, type, m, t1, t2;
rvec r_ij, r_kj, r_ik;
real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
rvec f_i, f_j, f_k;
/* Compute distance vectors ... */
t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
- t3 = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
+ pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
/* ... and their lengths */
r1 = norm(r_ij);
{
real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
int n0, nnn;
- real v, f, dvdlambda;
+ real dvdlambda;
k = (1.0 - lambda)*kA + lambda*kB;
VFtab = table->data;
rt = r*tabscale;
- n0 = rt;
+ n0 = static_cast<int>(rt);
if (n0 >= table->n)
{
gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
if (cos_theta2 < 1)
{
int m;
- real snt, st, sth;
+ real st, sth;
real cik, cii, ckk;
real nrkj2, nrij2;
rvec f_i, f_j, f_k;
return vtot;
}
-/* Return if this is a potential calculated in bondfree.c,
- * i.e. an interaction that actually calculates a potential and
- * works on multiple atoms (not e.g. a connection or a position restraint).
- */
-static gmx_inline gmx_bool ftype_is_bonded_potential(int ftype)
+//! \endcond
+
+gmx_bool
+ftype_is_bonded_potential(int ftype)
{
return
(interaction_function[ftype].flags & IF_BOND) &&
(ftype < F_GB12 || ftype > F_GB14);
}
-static void divide_bondeds_over_threads(t_idef *idef, int nthreads)
-{
- int ftype;
- int nat1;
- int t;
- int il_nr_thread;
-
- idef->nthreads = nthreads;
-
- if (F_NRE*(nthreads+1) > idef->il_thread_division_nalloc)
- {
- idef->il_thread_division_nalloc = F_NRE*(nthreads+1);
- snew(idef->il_thread_division, idef->il_thread_division_nalloc);
- }
-
- for (ftype = 0; ftype < F_NRE; ftype++)
- {
- if (ftype_is_bonded_potential(ftype))
- {
- nat1 = interaction_function[ftype].nratoms + 1;
-
- for (t = 0; t <= nthreads; t++)
- {
- /* Divide the interactions equally over the threads.
- * When the different types of bonded interactions
- * are distributed roughly equally over the threads,
- * this should lead to well localized output into
- * the force buffer on each thread.
- * If this is not the case, a more advanced scheme
- * (not implemented yet) will do better.
- */
- il_nr_thread = (((idef->il[ftype].nr/nat1)*t)/nthreads)*nat1;
-
- /* Ensure that distance restraint pairs with the same label
- * end up on the same thread.
- * This is slighlty tricky code, since the next for iteration
- * may have an initial il_nr_thread lower than the final value
- * in the previous iteration, but this will anyhow be increased
- * to the approriate value again by this while loop.
- */
- while (ftype == F_DISRES &&
- il_nr_thread > 0 &&
- il_nr_thread < idef->il[ftype].nr &&
- idef->iparams[idef->il[ftype].iatoms[il_nr_thread]].disres.label ==
- idef->iparams[idef->il[ftype].iatoms[il_nr_thread-nat1]].disres.label)
- {
- il_nr_thread += nat1;
- }
-
- idef->il_thread_division[ftype*(nthreads+1)+t] = il_nr_thread;
- }
- }
- }
-}
-
-static unsigned
-calc_bonded_reduction_mask(const t_idef *idef,
- int shift,
- int t, int nt)
-{
- unsigned mask;
- int ftype, nb, nat1, nb0, nb1, i, a;
-
- mask = 0;
-
- for (ftype = 0; ftype < F_NRE; ftype++)
- {
- if (ftype_is_bonded_potential(ftype))
- {
- nb = idef->il[ftype].nr;
- if (nb > 0)
- {
- nat1 = interaction_function[ftype].nratoms + 1;
-
- /* Divide this interaction equally over the threads.
- * This is not stored: should match division in calc_bonds.
- */
- nb0 = idef->il_thread_division[ftype*(nt+1)+t];
- nb1 = idef->il_thread_division[ftype*(nt+1)+t+1];
-
- for (i = nb0; i < nb1; i += nat1)
- {
- for (a = 1; a < nat1; a++)
- {
- mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
- }
- }
- }
- }
- }
-
- return mask;
-}
-
-void setup_bonded_threading(t_forcerec *fr, t_idef *idef)
-{
-#define MAX_BLOCK_BITS 32
- int t;
- int ctot, c, b;
-
- assert(fr->nthreads >= 1);
-
- /* Divide the bonded interaction over the threads */
- divide_bondeds_over_threads(idef, fr->nthreads);
-
- if (fr->nthreads == 1)
- {
- fr->red_nblock = 0;
-
- return;
- }
-
- /* We divide the force array in a maximum of 32 blocks.
- * Minimum force block reduction size is 2^6=64.
- */
- fr->red_ashift = 6;
- while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
- {
- fr->red_ashift++;
- }
- if (debug)
- {
- fprintf(debug, "bonded force buffer block atom shift %d bits\n",
- fr->red_ashift);
- }
-
- /* Determine to which blocks each thread's bonded force calculation
- * contributes. Store this is a mask for each thread.
- */
-#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
- for (t = 1; t < fr->nthreads; t++)
- {
- fr->f_t[t].red_mask =
- calc_bonded_reduction_mask(idef, fr->red_ashift, t, fr->nthreads);
- }
-
- /* Determine the maximum number of blocks we need to reduce over */
- fr->red_nblock = 0;
- ctot = 0;
- for (t = 0; t < fr->nthreads; t++)
- {
- c = 0;
- for (b = 0; b < MAX_BLOCK_BITS; b++)
- {
- if (fr->f_t[t].red_mask & (1U<<b))
- {
- fr->red_nblock = max(fr->red_nblock, b+1);
- c++;
- }
- }
- if (debug)
- {
- fprintf(debug, "thread %d flags %x count %d\n",
- t, fr->f_t[t].red_mask, c);
- }
- ctot += c;
- }
- if (debug)
- {
- fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
- fr->red_nblock, 1<<fr->red_ashift);
- fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
- ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
- ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
- }
-}
-
+/*! \brief Zero thread-local force-output buffers */
static void zero_thread_forces(f_thread_t *f_t, int n,
int nblock, int blocksize)
{
if (f_t->red_mask && (1U<<b))
{
a0 = b*blocksize;
- a1 = min((b+1)*blocksize, n);
+ a1 = std::min((b+1)*blocksize, n);
for (a = a0; a < a1; a++)
{
clear_rvec(f_t->f[a]);
}
}
+/*! \brief The max thread number is arbitrary, we used a fixed number
+ * to avoid memory management. Using more than 16 threads is probably
+ * never useful performance wise. */
+#define MAX_BONDED_THREADS 256
+
+/*! \brief Reduce thread-local force buffers */
static void reduce_thread_force_buffer(int n, rvec *f,
int nthreads, f_thread_t *f_t,
int nblock, int block_size)
{
- /* The max thread number is arbitrary,
- * we used a fixed number to avoid memory management.
- * Using more than 16 threads is probably never useful performance wise.
- */
-#define MAX_BONDED_THREADS 256
int b;
if (nthreads > MAX_BONDED_THREADS)
/* Reduce force buffers for threads that contribute */
a0 = b *block_size;
a1 = (b+1)*block_size;
- a1 = min(a1, n);
+ a1 = std::min(a1, n);
for (a = a0; a < a1; a++)
{
for (fb = 0; fb < nfb; fb++)
}
}
+/*! \brief Reduce thread-local forces */
static void reduce_thread_forces(int n, rvec *f, rvec *fshift,
real *ener, gmx_grppairener_t *grpp, real *dvdl,
int nthreads, f_thread_t *f_t,
}
}
-static real calc_one_bond(FILE *fplog, int thread,
+/*! \brief Calculate one element of the list of bonded interactions
+ for this thread */
+static real calc_one_bond(int thread,
int ftype, const t_idef *idef,
- rvec x[], rvec f[], rvec fshift[],
+ const rvec x[], rvec f[], rvec fshift[],
t_forcerec *fr,
const t_pbc *pbc, const t_graph *g,
gmx_grppairener_t *grpp,
real *lambda, real *dvdl,
const t_mdatoms *md, t_fcdata *fcd,
gmx_bool bCalcEnerVir,
- int *global_atom_index, gmx_bool bPrintSepPot)
+ int *global_atom_index)
{
int nat1, nbonds, efptFTYPE;
real v = 0;
{
v = cmap_dihs(nbn, iatoms+nb0,
idef->iparams, &idef->cmap_grid,
- (const rvec*)x, f, fshift,
+ x, f, fshift,
pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
md, fcd, global_atom_index);
}
/* No energies, shift forces, dvdl */
angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
idef->iparams,
- (const rvec*)x, f,
+ x, f,
pbc, g, lambda[efptFTYPE], md, fcd,
global_atom_index);
v = 0;
#endif
(nbn, idef->il[ftype].iatoms+nb0,
idef->iparams,
- (const rvec*)x, f,
+ x, f,
pbc, g, lambda[efptFTYPE], md, fcd,
global_atom_index);
v = 0;
}
+ #ifdef GMX_SIMD_HAVE_REAL
+ else if (ftype == F_RBDIHS &&
+ !bCalcEnerVir && fr->efep == efepNO)
+ {
+ /* No energies, shift forces, dvdl */
+ rbdihs_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
+ idef->iparams,
+ (const rvec*)x, f,
+ pbc, g, lambda[efptFTYPE], md, fcd,
+ global_atom_index);
+ v = 0;
+ }
+ #endif
else
{
v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
idef->iparams,
- (const rvec*)x, f, fshift,
+ x, f, fshift,
pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
md, fcd, global_atom_index);
}
- if (bPrintSepPot)
- {
- fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n",
- interaction_function[ftype].longname,
- nbonds, v, lambda[efptFTYPE]);
- }
}
else
{
- v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
+ v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, x, f, fshift,
pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
-
- if (bPrintSepPot)
- {
- fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
- interaction_function[ftype].longname,
- interaction_function[F_LJ14].longname, nbonds, dvdl[efptVDW]);
- fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
- interaction_function[ftype].longname,
- interaction_function[F_COUL14].longname, nbonds, dvdl[efptCOUL]);
- }
}
if (thread == 0)
return v;
}
-void calc_bonds(FILE *fplog, const gmx_multisim_t *ms,
+void calc_bonds(const gmx_multisim_t *ms,
const t_idef *idef,
- rvec x[], history_t *hist,
+ const rvec x[], history_t *hist,
rvec f[], t_forcerec *fr,
- const t_pbc *pbc, const t_graph *g,
+ const struct t_pbc *pbc, const struct t_graph *g,
gmx_enerdata_t *enerd, t_nrnb *nrnb,
real *lambda,
const t_mdatoms *md,
t_fcdata *fcd, int *global_atom_index,
- t_atomtypes gmx_unused *atype, gmx_genborn_t gmx_unused *born,
- int force_flags,
- gmx_bool bPrintSepPot, gmx_int64_t step)
+ int force_flags)
{
gmx_bool bCalcEnerVir;
int i;
- real v, dvdl[efptNR], dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
+ real dvdl[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
of lambda, which will be thrown away in the end*/
const t_pbc *pbc_null;
- char buf[22];
int thread;
assert(fr->nthreads == idef->nthreads);
{
pbc_null = NULL;
}
- if (bPrintSepPot)
- {
- fprintf(fplog, "Step %s: bonded V and dVdl for this rank\n",
- gmx_step_str(step, buf));
- }
#ifdef DEBUG
if (g && debug)
enerd->term[F_ORIRESDEV] =
calc_orires_dev(ms, idef->il[F_ORIRES].nr,
idef->il[F_ORIRES].iatoms,
- idef->iparams, md, (const rvec*)x,
+ idef->iparams, md, x,
pbc_null, fcd, hist);
}
if (idef->il[F_DISRES].nr)
{
calc_disres_R_6(idef->il[F_DISRES].nr,
idef->il[F_DISRES].iatoms,
- idef->iparams, (const rvec*)x, pbc_null,
+ idef->iparams, x, pbc_null,
fcd, hist);
#ifdef GMX_MPI
if (fcd->disres.nsystems > 1)
{
if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype))
{
- v = calc_one_bond(fplog, thread, ftype, idef, x,
+ v = calc_one_bond(thread, ftype, idef, x,
ft, fshift, fr, pbc_null, g, grpp,
nrnb, lambda, dvdlt,
md, fcd, bCalcEnerVir,
- global_atom_index, bPrintSepPot);
+ global_atom_index);
epot[ftype] += v;
}
}
}
}
-void calc_bonds_lambda(FILE *fplog,
- const t_idef *idef,
- rvec x[],
+void calc_bonds_lambda(const t_idef *idef,
+ const rvec x[],
t_forcerec *fr,
- const t_pbc *pbc, const t_graph *g,
+ const struct t_pbc *pbc, const struct t_graph *g,
gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
real *lambda,
const t_mdatoms *md,
t_fcdata *fcd,
int *global_atom_index)
{
- int i, ftype, nr_nonperturbed, nr;
+ int ftype, nr_nonperturbed, nr;
real v;
real dvdl_dum[efptNR];
rvec *f, *fshift;
if (nr - nr_nonperturbed > 0)
{
- v = calc_one_bond(fplog, 0, ftype, &idef_fe,
+ v = calc_one_bond(0, ftype, &idef_fe,
x, f, fshift, fr, pbc_null, g,
grpp, nrnb, lambda, dvdl_dum,
md, fcd, TRUE,
- global_atom_index, FALSE);
+ global_atom_index);
epot[ftype] += v;
}
}
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#include <stdio.h>
-#include <time.h>
+#include "gromacs/legacyheaders/domdec.h"
+
+#include "config.h"
+
+#include <assert.h>
#include <math.h>
-#include <string.h>
+#include <stdio.h>
#include <stdlib.h>
-#include <assert.h>
+#include <string.h>
+#include <time.h>
+
+#include <algorithm>
-#include "typedefs.h"
-#include "gromacs/utility/smalloc.h"
-#include "gmx_fatal.h"
-#include "gmx_fatal_collective.h"
-#include "vec.h"
-#include "domdec.h"
-#include "domdec_network.h"
-#include "nrnb.h"
-#include "pbc.h"
-#include "chargegroup.h"
-#include "constr.h"
-#include "mdatoms.h"
-#include "names.h"
-#include "force.h"
-#include "pme.h"
-#include "mdrun.h"
-#include "nsgrid.h"
-#include "shellfc.h"
-#include "mtop_util.h"
-#include "gmx_ga2la.h"
-#include "macros.h"
-#include "nbnxn_search.h"
-#include "bondf.h"
-#include "gmx_omp_nthreads.h"
-#include "gpu_utils.h"
-
-#include "gromacs/fileio/futil.h"
#include "gromacs/fileio/gmxfio.h"
#include "gromacs/fileio/pdbio.h"
+#include "gromacs/imd/imd.h"
+#include "gromacs/legacyheaders/bonded-threading.h"
+#include "gromacs/legacyheaders/chargegroup.h"
+#include "gromacs/legacyheaders/constr.h"
+#include "gromacs/legacyheaders/domdec_network.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/gmx_ga2la.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/gpu_utils.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/mdatoms.h"
+#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/nsgrid.h"
+#include "gromacs/legacyheaders/pme.h"
+#include "gromacs/legacyheaders/shellfc.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/listed-forces/bonded.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/pulling/pull.h"
+#include "gromacs/pulling/pull_rotation.h"
+#include "gromacs/swap/swapcoords.h"
#include "gromacs/timing/wallcycle.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/basenetwork.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/futil.h"
#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/swap/swapcoords.h"
#include "gromacs/utility/qsort_threadsafe.h"
-#include "gromacs/pulling/pull.h"
-#include "gromacs/pulling/pull_rotation.h"
-#include "gromacs/imd/imd.h"
+#include "gromacs/utility/smalloc.h"
#define DDRANK(dd, rank) (rank)
#define DDMASTERRANK(dd) (dd->masterrank)
/* The DLB option */
int eDLB;
+ /* Is eDLB=edlbAUTO locked such that we currently can't turn it on? */
+ gmx_bool bDLB_locked;
/* Are we actually using DLB? */
gmx_bool bDynLoadBal;
int eFlop;
double flop;
int flop_n;
- /* Have often have did we have load measurements */
+ /* How many times have did we have load measurements */
int n_load_have;
- /* Have often have we collected the load measurements */
+ /* How many times have we collected the load measurements */
int n_load_collect;
/* Statistics */
rvec *buf, *sbuf;
ivec vis;
int is;
- gmx_bool bPBC, bScrew;
+ gmx_bool bShiftForcesNeedPbc, bScrew;
comm = dd->comm;
buf = comm->vbuf.v;
- n = 0;
nzone = comm->zones.n/2;
nat_tot = dd->nat_tot;
for (d = dd->ndim-1; d >= 0; d--)
{
- bPBC = (dd->ci[dd->dim[d]] == 0);
- bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
+ /* Only forces in domains near the PBC boundaries need to
+ consider PBC in the treatment of fshift */
+ bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0);
+ bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
if (fshift == NULL && !bScrew)
{
- bPBC = FALSE;
+ bShiftForcesNeedPbc = FALSE;
}
/* Determine which shift vector we need */
clear_ivec(vis);
index = ind->index;
/* Add the received forces */
n = 0;
- if (!bPBC)
+ if (!bShiftForcesNeedPbc)
{
for (i = 0; i < ind->nsend[nzone]; i++)
{
}
else if (!bScrew)
{
+ /* fshift should always be defined if this function is
+ * called when bShiftForcesNeedPbc is true */
+ assert(NULL != fshift);
for (i = 0; i < ind->nsend[nzone]; i++)
{
at0 = cgindex[index[i]];
buf = &comm->vbuf.v[0][0];
- n = 0;
nzone = comm->zones.n/2;
nat_tot = dd->nat_tot;
for (d = dd->ndim-1; d >= 0; d--)
static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
rvec cell_ns_x0, rvec cell_ns_x1)
{
- int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
+ int d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
gmx_ddzone_t *zp;
gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
if (bPBC)
{
/* Take the minimum to avoid double communication */
- npulse_min = min(npulse, dd->nc[dim]-1-npulse);
+ npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
}
else
{
{
for (d1 = d; d1 < dd->ndim-1; d1++)
{
- extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
- extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
- extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
+ extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
+ extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
+ extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
}
}
}
{
if (bUse)
{
- buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
- buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
- buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
+ buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
+ buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
+ buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
}
if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
}
if (bUse && dh[d1] >= 0)
{
- buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
- buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
+ buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
+ buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
}
}
/* Copy the received buffer to the send buffer,
for (d1 = d; d1 < dd->ndim-1; d1++)
{
- extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
- extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
- extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
+ extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
+ extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
+ extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
pos++;
}
{
print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
}
- cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
- cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
+ cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
+ cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
}
}
if (dd->ndim >= 3)
{
print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
}
- cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
- cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
+ cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
+ cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
}
}
}
void dd_collect_vec(gmx_domdec_t *dd,
t_state *state_local, rvec *lv, rvec *v)
{
- gmx_domdec_master_t *ma;
- int n, i, c, a, nalloc = 0;
- rvec *buf = NULL;
-
dd_collect_cg(dd, state_local);
if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
{
gmx_domdec_master_t *ma;
int *scounts = NULL, *disps = NULL;
- int n, i, c, a, nalloc = 0;
+ int n, i, c, a;
rvec *buf = NULL;
if (DDMASTER(dd))
snew(grid_r, 2*dd->nnodes);
}
- dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
+ dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
if (DDMASTER(dd))
{
r = comm->cellsize_min[dd->dim[0]];
for (di = 1; di < dd->ndim; di++)
{
- r = min(r, comm->cellsize_min[dd->dim[di]]);
+ r = std::min(r, comm->cellsize_min[dd->dim[di]]);
}
if (comm->bBondComm)
{
- r = max(r, comm->cutoff_mbody);
+ r = std::max(r, comm->cutoff_mbody);
}
else
{
- r = min(r, comm->cutoff);
+ r = std::min(r, comm->cutoff);
}
}
}
r_mb = dd_cutoff_mbody(dd);
- return max(dd->comm->cutoff, r_mb);
+ return std::max(dd->comm->cutoff, r_mb);
}
static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
{
gmx_domdec_t *dd;
- ivec coords, coords_pme, nc;
+ ivec coords;
int slab;
dd = cr->dd;
{
gmx_domdec_t *dd;
gmx_domdec_comm_t *comm;
- ivec coord, coord_pme;
int i;
int pmenode = -1;
if (comm->bCartesianPP_PME)
{
#ifdef GMX_MPI
+ ivec coord, coord_pme;
MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
{
static gmx_bool receive_vir_ener(t_commrec *cr)
{
gmx_domdec_comm_t *comm;
- int pmenode, coords[DIM], rank;
+ int pmenode;
gmx_bool bReceive;
bReceive = TRUE;
{
pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
#ifdef GMX_MPI
+ ivec coords;
MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
coords[comm->cartpmedim]++;
if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
{
+ int rank;
MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
if (dd_simnode2pmenode(cr, rank) == pmenode)
{
{
int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
int *zone2cg, *zone_ncg1, *index_gl, *gatindex;
- gmx_ga2la_t *ga2la;
- char *bLocalCG;
gmx_bool bCGs;
- bLocalCG = dd->comm->bLocalCG;
-
if (dd->nat_tot > dd->gatindex_nalloc)
{
dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
const char *where)
{
- int ncg, i, ngl, nerr;
+ int i, ngl, nerr;
nerr = 0;
if (bLocalCG == NULL)
/* The cut-off might have changed, e.g. by PME load balacning,
* from the value used to set comm->cellsize_min, so check it.
*/
- cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
+ cellsize_min = std::max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
if (comm->bPMELoadBalDLBLimits)
{
/* Check for the cut-off limit set by the PME load balancing */
- cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
+ cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
}
}
{
if (comm->bPMELoadBalDLBLimits)
{
- cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
+ cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff);
}
- grid_jump_limit = max(grid_jump_limit,
- cutoff/comm->cd[dim_ind].np);
+ grid_jump_limit = std::max(grid_jump_limit,
+ cutoff/comm->cd[dim_ind].np);
}
return grid_jump_limit;
{
slab = pmeindex % ddpme->nslab;
}
- ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
- ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
+ ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
+ ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
}
}
{
npulse[d]++;
}
- cellsize_min[d] = min(cellsize_min[d], cellsize);
+ cellsize_min[d] = std::min(cellsize_min[d], cellsize);
}
if (setmode == setcellsizeslbLOCAL)
{
gmx_bool bUniform, gmx_int64_t step)
{
gmx_domdec_comm_t *comm;
- int ncd, d1, i, j, pos;
+ int ncd, d1, i, pos;
real *cell_size;
real load_aver, load_i, imbalance, change, change_max, sc;
real cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
cell_size[i] = 1.0/ncd;
}
}
- else if (dd_load_count(comm))
+ else if (dd_load_count(comm) > 0)
{
load_aver = comm->load[d].sum_m/ncd;
change_max = 0;
imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
/* Determine the change of the cell size using underrelaxation */
change = -relax*imbalance;
- change_max = max(change_max, max(change, -change));
+ change_max = std::max(change_max, std::max(change, -change));
}
/* Limit the amount of scaling.
* We need to use the same rescaling for all cells in one row,
gmx_ddbox_t *ddbox)
{
gmx_domdec_comm_t *comm;
- int d1, dim1, pos;
+ int d1, pos;
comm = dd->comm;
{
gmx_domdec_master_t *ma;
int **tmp_ind = NULL, *tmp_nalloc = NULL;
- int i, icg, j, k, k0, k1, d, npbcdim;
+ int i, icg, j, k, k0, k1, d;
matrix tcm;
- rvec box_size, cg_cm;
+ rvec cg_cm;
ivec ind;
real nrcg, inv_ncg, pos_d;
atom_id *cgindex;
- gmx_bool bUnbounded, bScrew;
+ gmx_bool bScrew;
ma = dd->ma;
}
dd_scatterv(dd,
- DDMASTER(dd) ? ma->ibuf : NULL,
- DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
- DDMASTER(dd) ? ma->cg : NULL,
+ bMaster ? ma->ibuf : NULL,
+ bMaster ? ma->ibuf+dd->nnodes : NULL,
+ bMaster ? ma->cg : NULL,
dd->ncg_home*sizeof(int), dd->index_gl);
/* Determine the home charge group sizes */
static void print_cg_move(FILE *fplog,
gmx_domdec_t *dd,
gmx_int64_t step, int cg, int dim, int dir,
- gmx_bool bHaveLimitdAndCMOld, real limitd,
+ gmx_bool bHaveCgcmOld, real limitd,
rvec cm_old, rvec cm_new, real pos_d)
{
gmx_domdec_comm_t *comm;
comm = dd->comm;
fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
- if (bHaveLimitdAndCMOld)
+ if (limitd > 0)
{
- fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
+ fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
+ dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim));
}
else
{
- fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
+ /* We don't have a limiting distance available: don't print it */
+ fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
+ dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
ddglatnr(dd, dd->cgindex[cg]), dim2char(dim));
}
fprintf(fplog, "distance out of cell %f\n",
dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
- if (bHaveLimitdAndCMOld)
+ if (bHaveCgcmOld)
{
fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
cm_old[XX], cm_old[YY], cm_old[ZZ]);
static void cg_move_error(FILE *fplog,
gmx_domdec_t *dd,
gmx_int64_t step, int cg, int dim, int dir,
- gmx_bool bHaveLimitdAndCMOld, real limitd,
+ gmx_bool bHaveCgcmOld, real limitd,
rvec cm_old, rvec cm_new, real pos_d)
{
if (fplog)
{
print_cg_move(fplog, dd, step, cg, dim, dir,
- bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
+ bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
}
print_cg_move(stderr, dd, step, cg, dim, dir,
- bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d);
+ bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
gmx_fatal(FARGS,
- "A charge group moved too far between two domain decomposition steps\n"
- "This usually means that your system is not well equilibrated");
+ "%s moved too far between two domain decomposition steps\n"
+ "This usually means that your system is not well equilibrated",
+ dd->comm->bCGs ? "A charge group" : "An atom");
}
static void rotate_state_atom(t_state *state, int a)
int *move)
{
int npbcdim;
- int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
- int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
+ int cg, k, k0, k1, d, dim, d2;
+ int mc, nrcg;
int flag;
gmx_bool bScrew;
ivec dev;
{
if (pos_d >= limit1[d])
{
- cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d],
+ cg_move_error(fplog, dd, step, cg, d, 1,
+ cg_cm != state->x, limitd[d],
cg_cm[cg], cm_new, pos_d);
}
dev[d] = 1;
{
if (pos_d < limit0[d])
{
- cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d],
+ cg_move_error(fplog, dd, step, cg, d, -1,
+ cg_cm != state->x, limitd[d],
cg_cm[cg], cm_new, pos_d);
}
dev[d] = -1;
int *move;
int npbcdim;
int ncg[DIM*2], nat[DIM*2];
- int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
- int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
+ int c, i, cg, k, d, dim, dim2, dir, d2, d3;
+ int mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec;
int sbuf[2], rbuf[2];
int home_pos_cg, home_pos_at, buf_pos;
int flag;
gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE;
- gmx_bool bScrew;
- ivec dev;
- real inv_ncg, pos_d;
+ real pos_d;
matrix tcm;
- rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
+ rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1;
atom_id *cgindex;
cginfo_mb_t *cginfo_mb;
gmx_domdec_comm_t *comm;
{
dim = dd->dim[d];
ncg_recv = 0;
- nat_recv = 0;
nvr = 0;
for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
{
comm->cgcm_state[cdd], nvs,
comm->vbuf.v+nvr, i);
ncg_recv += rbuf[0];
- nat_recv += rbuf[1];
nvr += i;
}
{
cg_move_error(fplog, dd, step, cg, dim,
(flag & DD_FLAG_FW(d)) ? 1 : 0,
- FALSE, 0,
+ fr->cutoff_scheme == ecutsGROUP, 0,
comm->vbuf.v[buf_pos],
comm->vbuf.v[buf_pos],
comm->vbuf.v[buf_pos][dim]);
gmx_domdec_comm_t *comm;
gmx_domdec_load_t *load;
gmx_domdec_root_t *root = NULL;
- int d, dim, cid, i, pos;
+ int d, dim, i, pos;
float cell_frac = 0, sbuf[DD_NLOAD_MAX];
gmx_bool bSepPME;
for (i = 0; i < dd->nc[dim]; i++)
{
load->sum += load->load[pos++];
- load->max = max(load->max, load->load[pos]);
+ load->max = std::max(load->max, load->load[pos]);
pos++;
if (dd->bGridJump)
{
/* This direction could not be load balanced properly,
* therefore we need to use the maximum iso the average load.
*/
- load->sum_m = max(load->sum_m, load->load[pos]);
+ load->sum_m = std::max(load->sum_m, load->load[pos]);
}
else
{
load->sum_m += load->load[pos];
}
pos++;
- load->cvol_min = min(load->cvol_min, load->load[pos]);
+ load->cvol_min = std::min(load->cvol_min, load->load[pos]);
pos++;
if (d < dd->ndim-1)
{
}
if (bSepPME)
{
- load->mdf = max(load->mdf, load->load[pos]);
+ load->mdf = std::max(load->mdf, load->load[pos]);
pos++;
- load->pme = max(load->pme, load->load[pos]);
+ load->pme = std::max(load->pme, load->load[pos]);
pos++;
}
}
void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
{
- gmx_bool bZYX;
int d, dim, i, j, m;
ivec tmp, s;
int nzone, nzonep;
static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
{
gmx_domdec_t *dd;
+ dd = cr->dd;
+
+#ifdef GMX_MPI
gmx_domdec_comm_t *comm;
- int i, rank, *buf;
+ int rank, *buf;
ivec periods;
-#ifdef GMX_MPI
MPI_Comm comm_cart;
-#endif
- dd = cr->dd;
comm = dd->comm;
-#ifdef GMX_MPI
if (comm->bCartesianPP)
{
/* Set up cartesian communication for the particle-particle part */
dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
}
- for (i = 0; i < DIM; i++)
+ for (int i = 0; i < DIM; i++)
{
periods[i] = TRUE;
}
/* Determine the master coordinates and rank.
* The DD master should be the same node as the master of this sim.
*/
- for (i = 0; i < dd->nnodes; i++)
+ for (int i = 0; i < dd->nnodes; i++)
{
if (comm->ddindex2simnodeid[i] == 0)
{
}
}
-static void receive_ddindex2simnodeid(t_commrec *cr)
+static void receive_ddindex2simnodeid(t_commrec gmx_unused *cr)
{
+#ifdef GMX_MPI
gmx_domdec_t *dd;
-
gmx_domdec_comm_t *comm;
- int *buf;
dd = cr->dd;
comm = dd->comm;
-#ifdef GMX_MPI
if (!comm->bCartesianPP_PME && comm->bCartesianPP)
{
+ int *buf;
snew(comm->ddindex2simnodeid, dd->nnodes);
snew(buf, dd->nnodes);
if (cr->duty & DUTY_PP)
{
buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
}
-#ifdef GMX_MPI
/* Communicate the ddindex to simulation nodeid index */
MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
cr->mpi_comm_mysim);
-#endif
sfree(buf);
}
#endif
{
gmx_domdec_t *dd;
gmx_domdec_comm_t *comm;
- int i, rank;
+ int i;
gmx_bool bDiv[DIM];
- ivec periods;
#ifdef GMX_MPI
MPI_Comm comm_cart;
#endif
#ifdef GMX_MPI
if (comm->bCartesianPP_PME)
{
+ int rank;
+ ivec periods;
+
if (fplog)
{
fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
}
MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
&comm_cart);
-
MPI_Comm_rank(comm_cart, &rank);
if (MASTERNODE(cr) && rank != 0)
{
for (i = 0; i < nc; i++)
{
dbl = 0;
- sscanf(size_string, "%lf%n", &dbl, &n);
+ sscanf(size_string, "%20lf%n", &dbl, &n);
if (dbl == 0)
{
gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
val = getenv(env_var);
if (val)
{
- if (sscanf(val, "%d", &nst) <= 0)
+ if (sscanf(val, "%20d", &nst) <= 0)
{
nst = 1;
}
{
d = dd->dim[di];
/* Check using the initial average cell size */
- r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
+ r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
}
return r;
const char *dlb_opt, gmx_bool bRecordLoad,
unsigned long Flags, t_inputrec *ir)
{
- gmx_domdec_t *dd;
int eDLB = -1;
char buf[STRLEN];
gmx_domdec_t *dd;
gmx_domdec_comm_t *comm;
int recload;
- int d, i, j;
real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
gmx_bool bC;
char buf[STRLEN];
+ const real tenPercentMargin = 1.1;
if (fplog)
{
/* Initialize to GPU share count to 0, might change later */
comm->nrank_gpu_shared = 0;
- comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
+ comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir);
+ comm->bDLB_locked = FALSE;
comm->bDynLoadBal = (comm->eDLB == edlbYES);
if (fplog)
comm->cellsize_limit = 0;
comm->bBondComm = FALSE;
- comm->cellsize_limit = max(comm->cellsize_limit,
- ir->rlistlong - max(ir->rvdw, ir->rcoulomb));
+ /* Atoms should be able to move by up to half the list buffer size (if > 0)
+ * within nstlist steps. Since boundaries are allowed to displace by half
+ * a cell size, DD cells should be at least the size of the list buffer.
+ */
++ comm->cellsize_limit = std::max(comm->cellsize_limit,
++ ir->rlistlong - std::max(ir->rvdw, ir->rcoulomb));
+
if (comm->bInterCGBondeds)
{
if (comm_distance_min > 0)
}
else
{
- comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
+ comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
}
r_bonded_limit = comm->cutoff_mbody;
}
*/
if (Flags & MD_DDBONDCOMM)
{
- if (max(r_2b, r_mb) > comm->cutoff)
+ if (std::max(r_2b, r_mb) > comm->cutoff)
{
- r_bonded = max(r_2b, r_mb);
- r_bonded_limit = 1.1*r_bonded;
+ r_bonded = std::max(r_2b, r_mb);
+ r_bonded_limit = tenPercentMargin*r_bonded;
comm->bBondComm = TRUE;
}
else
{
r_bonded = r_mb;
- r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
+ r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
}
/* We determine cutoff_mbody later */
}
/* No special bonded communication,
* simply increase the DD cut-off.
*/
- r_bonded_limit = 1.1*max(r_2b, r_mb);
+ r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb);
comm->cutoff_mbody = r_bonded_limit;
- comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
+ comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
}
}
- comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
+ comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
if (fplog)
{
fprintf(fplog,
"User supplied maximum distance required for P-LINCS: %.3f nm\n",
rconstr);
}
- comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
+ comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
comm->cgs_gl = gmx_mtop_global_cgs(mtop);
if (comm->eDLB != edlbNO)
{
/* Check if this does not limit the scaling */
- comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
+ comm->cutoff_mbody = std::min(comm->cutoff_mbody, dlb_scale*acs);
}
if (!comm->bBondComm)
{
/* Without bBondComm do not go beyond the n.b. cut-off */
- comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
+ comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
if (comm->cellsize_limit >= comm->cutoff)
{
/* We don't loose a lot of efficieny
}
}
/* Check if we did not end up below our original limit */
- comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
+ comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
if (comm->cutoff_mbody > comm->cellsize_limit)
{
cellsize_min = comm->cellsize_min[dd->dim[0]];
for (d = 1; d < dd->ndim; d++)
{
- cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
+ cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
}
if (cellsize_min < comm->cellsize_limit*1.05)
t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
{
gmx_domdec_comm_t *comm;
- gmx_bool bBondComm;
- int d;
dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
limit = dd->comm->cellsize_min[XX];
for (d = 1; d < DIM; d++)
{
- limit = min(limit, dd->comm->cellsize_min[d]);
+ limit = std::min(limit, dd->comm->cellsize_min[d]);
}
}
{
fprintf(fplog, "%40s %-7s %6.3f nm\n",
"two-body bonded interactions", "(-rdd)",
- max(comm->cutoff, comm->cutoff_mbody));
+ std::max(comm->cutoff, comm->cutoff_mbody));
fprintf(fplog, "%40s %-7s %6.3f nm\n",
"multi-body bonded interactions", "(-rdd)",
- (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
+ (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
}
if (dd->vsite_comm)
{
/* Determine the maximum number of comm. pulses in one dimension */
- comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
+ comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
/* Determine the maximum required number of grid pulses */
if (comm->cellsize_limit >= comm->cutoff)
else
{
/* There is no cell size limit */
- npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
+ npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
}
if (!bNoCutOff && npulse > 1)
dim = dd->dim[d];
npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
/(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
- npulse_d_max = max(npulse_d_max, npulse_d);
+ npulse_d_max = std::max(npulse_d_max, npulse_d);
}
- npulse = min(npulse, npulse_d_max);
+ npulse = std::min(npulse, npulse_d_max);
}
/* This env var can override npulse */
comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
for (d = 0; d < dd->ndim; d++)
{
- comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1);
+ comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1);
comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
- comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
+ comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
{
comm->bVacDLBNoLimit = FALSE;
/* cellsize_limit is set for LINCS in init_domain_decomposition */
if (!comm->bVacDLBNoLimit)
{
- comm->cellsize_limit = max(comm->cellsize_limit,
- comm->cutoff/comm->maxpulse);
+ comm->cellsize_limit = std::max(comm->cellsize_limit,
+ comm->cutoff/comm->maxpulse);
}
- comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
+ comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
/* Set the minimum cell size for each DD dimension */
for (d = 0; d < dd->ndim; d++)
{
}
if (comm->cutoff_mbody <= 0)
{
- comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
+ comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
}
if (comm->bDynLoadBal)
{
}
natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
- dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
+ dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
}
static gmx_bool test_dd_cutoff(t_commrec *cr,
comm->PMELoadBal_max_cutoff = comm->cutoff;
}
+ gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd)
+ {
+ return dd->comm->bDLB_locked;
+ }
+
+ void dd_dlb_set_lock(gmx_domdec_t *dd, gmx_bool bValue)
+ {
+ /* We can only lock the DLB when it is set to auto, otherwise don't lock */
+ if (dd->comm->eDLB == edlbAUTO)
+ {
+ dd->comm->bDLB_locked = bValue;
+ }
+ }
+
static void merge_cg_buffers(int ncell,
gmx_domdec_comm_dim_t *cd, int pulse,
int *ncg_cell,
c->c[1][1] = comm->cell_x0[dim1];
if (dd->bGridJump)
{
- c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
+ c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
if (bDistMB)
{
/* For the multi-body distance we need the maximum */
- c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
+ c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
}
}
/* Set the upper-right corner for rounding */
if (j >= 4)
{
c->c[2][j-4] =
- max(c->c[2][j-4],
- comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
+ std::max(c->c[2][j-4],
+ comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
}
}
}
{
for (j = 0; j < 2; j++)
{
- c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
+ c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
}
}
}
c->cr1[3] = comm->cell_x1[dim1];
if (dd->bGridJump)
{
- c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
+ c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
if (bDistMB)
{
/* For the multi-body distance we need the maximum */
- c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
+ c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
}
}
}
{
int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
int nzone, nzone_send, zone, zonei, cg0, cg1;
- int c, i, j, cg, cg_gl, nrcg;
+ int c, i, cg, cg_gl, nrcg;
int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
gmx_domdec_comm_t *comm;
gmx_domdec_zones_t *zones;
gmx_domdec_ind_t *ind;
cginfo_mb_t *cginfo_mb;
gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded;
- real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
+ real r_comm2, r_bcomm2;
dd_corners_t corners;
ivec tric_dist;
rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
{
- dim = dd->dim[dim_ind];
-
/* Check if we need to use triclinic distances */
tric_dist[dim_ind] = 0;
for (i = 0; i <= dim_ind; i++)
/* The rvec buffer is also required for atom buffers
* of size nrecv in dd_move_x and dd_move_f.
*/
- i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
+ i = std::max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
vec_rvec_check_alloc(&comm->vbuf2, i);
}
}
gmx_domdec_comm_t *comm;
gmx_domdec_zones_t *zones;
gmx_bool bDistMB;
- int z, zi, zj0, zj1, d, dim;
+ int z, zi, d, dim;
real rcs, rcmbs;
int i, j;
- real size_j, add_tric;
real vol;
comm = dd->comm;
* With multiple pulses this will lead
* to a larger zone then strictly necessary.
*/
- zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
- zones->size[zi].x1[dim]+rcmbs);
+ zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
+ zones->size[zi].x1[dim]+rcmbs);
}
}
}
{
if (zones->shift[z][dim] > 0)
{
- zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
- zones->size[zi].x1[dim]+rcs);
+ zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
+ zones->size[zi].x1[dim]+rcs);
}
}
}
{
for (i = 0; i < DIM; i++)
{
- corner_min[i] = min(corner_min[i], corner[i]);
- corner_max[i] = max(corner_max[i], corner[i]);
+ corner_min[i] = std::min(corner_min[i], corner[i]);
+ corner_max[i] = std::max(corner_max[i], corner[i]);
}
}
}
{
gmx_domdec_sort_t *sort;
gmx_cgsort_t *cgsort, *sort_i;
- int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
- int sort_last, sort_skip;
+ int ncg_new, nsort2, nsort_new, i, *a, moved;
sort = dd->comm->sort;
int ncg_home_old)
{
gmx_domdec_sort_t *sort;
- gmx_cgsort_t *cgsort, *sort_i;
+ gmx_cgsort_t *cgsort;
int *cgindex;
int ncg_new, i, *ibuf, cgsize;
rvec *vbuf;
t_block *cgs_gl;
gmx_int64_t step_pcoupl;
rvec cell_ns_x0, cell_ns_x1;
- int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
+ int i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
gmx_bool bRedist, bSortCG, bResortAll;
ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
}
/* Check if we have recorded loads on the nodes */
- if (comm->bRecordLoad && dd_load_count(comm))
+ if (comm->bRecordLoad && dd_load_count(comm) > 0)
{
- if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
+ if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal && !dd_dlb_is_locked(dd))
{
/* Check if we should use DLB at the second partitioning
* and every 100 partitionings,
* so the extra communication cost is negligible.
*/
- n = std::max(100, nstglobalcomm);
+ const int nddp_chk_dlb = 100;
-
bCheckDLB = (comm->n_load_collect == 0 ||
- comm->n_load_have % n == n-1);
+ comm->n_load_have % nddp_chk_dlb == nddp_chk_dlb - 1);
}
else
{
/* Since the timings are node dependent, the master decides */
if (DDMASTER(dd))
{
- bTurnOnDLB =
- (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
+ /* Here we check if the max PME rank load is more than 0.98
+ * the max PP force load. If so, PP DLB will not help,
+ * since we are (almost) limited by PME. Furthermore,
+ * DLB will cause a significant extra x/f redistribution
+ * cost on the PME ranks, which will then surely result
+ * in lower total performance.
+ * This check might be fragile, since one measurement
+ * below 0.98 (although only done once every 100 DD part.)
+ * could turn on DLB for the rest of the run.
+ */
+ if (cr->npmenodes > 0 &&
+ dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON)
+ {
+ bTurnOnDLB = FALSE;
+ }
+ else
+ {
+ bTurnOnDLB =
+ (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON);
+ }
if (debug)
{
fprintf(debug, "step %s, imb loss %f\n",
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "gromacs/legacyheaders/force.h"
+
+#include "config.h"
+#include <assert.h>
#include <math.h>
#include <string.h>
-#include <assert.h>
-#include "sysstuff.h"
-#include "typedefs.h"
-#include "macros.h"
-#include "gromacs/utility/smalloc.h"
-#include "macros.h"
-#include "physics.h"
-#include "force.h"
-#include "nonbonded.h"
-#include "names.h"
-#include "network.h"
-#include "pbc.h"
-#include "ns.h"
-#include "nrnb.h"
-#include "bondf.h"
-#include "mshift.h"
-#include "txtdump.h"
-#include "coulomb.h"
-#include "pme.h"
-#include "mdrun.h"
-#include "domdec.h"
-#include "qmmm.h"
-#include "gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/coulomb.h"
+#include "gromacs/legacyheaders/domdec.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nonbonded.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/ns.h"
+#include "gromacs/legacyheaders/pme.h"
+#include "gromacs/legacyheaders/qmmm.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/listed-forces/bonded.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/mshift.h"
+#include "gromacs/pbcutil/pbc.h"
#include "gromacs/timing/wallcycle.h"
-#include "gmx_fatal.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
void ns(FILE *fp,
t_forcerec *fr,
int nthreads, f_thread_t *f_t)
{
int t, i;
+ int nthreads_loop gmx_unused;
/* This reduction can run over any number of threads */
- #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
+ nthreads_loop = gmx_omp_nthreads_get(emntBonded);
+ #pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
for (i = 0; i < n; i++)
{
for (t = 1; t < nthreads; t++)
}
}
-void gmx_print_sepdvdl(FILE *fplog, const char *s, real v, real dvdlambda)
-{
- fprintf(fplog, " %-30s V %12.5e dVdl %12.5e\n", s, v, dvdlambda);
-}
-
-void do_force_lowlevel(FILE *fplog, gmx_int64_t step,
- t_forcerec *fr, t_inputrec *ir,
+void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir,
t_idef *idef, t_commrec *cr,
t_nrnb *nrnb, gmx_wallcycle_t wcycle,
t_mdatoms *md,
t_fcdata *fcd,
gmx_localtop_t *top,
gmx_genborn_t *born,
- t_atomtypes *atype,
gmx_bool bBornRadii,
matrix box,
t_lambda *fepvals,
{
int i, j;
int donb_flags;
- gmx_bool bDoEpot, bSepDVDL, bSB;
+ gmx_bool bDoEpot, bSB;
int pme_flags;
matrix boxs;
rvec box_size;
double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
#endif
-#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) { gmx_print_sepdvdl(fplog, s, v, dvdlambda); }
-
set_pbc(&pbc, fr->ePBC, box);
/* reset free energy components */
box_size[i] = box[i][i];
}
- bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog));
debug_gmx();
/* do QMMM first if requested */
enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
}
- if (bSepDVDL)
- {
- fprintf(fplog, "Step %s: non-bonded V and dVdl for rank %d:\n",
- gmx_step_str(step, buf), cr->nodeid);
- }
-
/* Call the short range functions all in one go. */
#ifdef GMX_MPI
/* foreign lambda component for walls */
real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
enerd->grpp.ener[egLJSR], nrnb);
- PRINT_SEPDVDL("Walls", 0.0, dvdl_walls);
enerd->dvdl_lin[efptVDW] += dvdl_walls;
}
/* MRS: Eventually, many need to include free energy contribution here! */
if (ir->implicit_solvent)
{
- wallcycle_sub_start(wcycle, ewcsBONDED);
+ wallcycle_sub_start(wcycle, ewcsLISTED);
calc_gb_forces(cr, md, born, top, x, f, fr, idef,
ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
- wallcycle_sub_stop(wcycle, ewcsBONDED);
+ wallcycle_sub_stop(wcycle, ewcsLISTED);
}
#ifdef GMX_MPI
enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
}
- if (bSepDVDL)
- {
- real V_short_range = 0;
- real dvdl_short_range = 0;
-
- for (i = 0; i < enerd->grpp.nener; i++)
- {
- V_short_range +=
- (fr->bBHAM ?
- enerd->grpp.ener[egBHAMSR][i] :
- enerd->grpp.ener[egLJSR][i])
- + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
- }
- dvdl_short_range = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
- PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",
- V_short_range,
- dvdl_short_range);
- }
debug_gmx();
pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
}
- /* Shift the coordinates. Must be done before bonded forces and PPPM,
+ /* Shift the coordinates. Must be done before listed forces and PPPM,
* but is also necessary for SHAKE and update, therefore it can NOT
- * go when no bonded forces have to be evaluated.
+ * go when no listed forces have to be evaluated.
*/
/* Here sometimes we would not need to shift with NBFonly,
inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
}
}
- /* Check whether we need to do bondeds or correct for exclusions */
+ /* Check whether we need to do listed interactions or correct for exclusions */
if (fr->bMolPBC &&
- ((flags & GMX_FORCE_BONDED)
+ ((flags & GMX_FORCE_LISTED)
|| EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
{
/* Since all atoms are in the rectangular or triclinic unit-cell,
}
debug_gmx();
- if (flags & GMX_FORCE_BONDED)
+ if (flags & GMX_FORCE_LISTED)
{
- wallcycle_sub_start(wcycle, ewcsBONDED);
- calc_bonds(fplog, cr->ms,
- idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
- DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
- flags,
- fr->bSepDVDL && do_per_step(step, ir->nstlog), step);
+ wallcycle_sub_start(wcycle, ewcsLISTED);
+ calc_bonds(cr->ms,
+ idef, (const rvec *) x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
+ DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
+ flags);
/* Check if we have to determine energy differences
* at foreign lambda's.
{
lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
}
- calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md,
+ calc_bonds_lambda(idef, (const rvec *) x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md,
fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
}
debug_gmx();
- wallcycle_sub_stop(wcycle, ewcsBONDED);
+ wallcycle_sub_stop(wcycle, ewcsLISTED);
}
where();
*cycles_pme = 0;
+ clear_mat(fr->vir_el_recip);
+ clear_mat(fr->vir_lj_recip);
+
+ /* Do long-range electrostatics and/or LJ-PME, including related short-range
+ * corrections.
+ */
if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
{
- real Vlr = 0, Vcorr = 0;
- real dvdl_long_range = 0;
- int status = 0;
+ real Vlr = 0, Vcorr = 0;
+ real dvdl_long_range = 0;
+ int status = 0;
+ real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
+ real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;
bSB = (ir->nwall == 2);
if (bSB)
svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
box_size[ZZ] *= ir->wall_ewald_zfac;
}
- }
-
- /* Do long-range electrostatics and/or LJ-PME, including related short-range
- * corrections.
- */
-
- clear_mat(fr->vir_el_recip);
- clear_mat(fr->vir_lj_recip);
-
- if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
- {
- real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
- real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;
- int status = 0;
if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
{
fr->vir_el_recip);
}
- PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr_q, dvdl_long_range_correction_q);
- PRINT_SEPDVDL("Ewald excl. corr. LJ", Vcorr_lj, dvdl_long_range_correction_lj);
enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q;
enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj;
- }
- if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)))
- {
- if (cr->duty & DUTY_PME)
+ if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME))
{
/* Do reciprocal PME for Coulomb and/or LJ. */
assert(fr->n_tpi >= 0);
md->chargeA + md->homenr - fr->n_tpi,
&Vlr_q);
}
- PRINT_SEPDVDL("PME mesh", Vlr_q + Vlr_lj, dvdl_long_range_q+dvdl_long_range_lj);
}
}
box_size, cr, md->homenr,
fr->vir_el_recip, fr->ewaldcoeff_q,
lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table);
- PRINT_SEPDVDL("Ewald long-range", Vlr_q, dvdl_long_range_q);
}
/* Note that with separate PME nodes we get the real energies later */
fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
- PRINT_SEPDVDL("RF exclusion correction",
- enerd->term[F_RF_EXCL], dvdl_rf_excl);
}
}
}
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
#include <math.h>
-#include "typedefs.h"
-#include "mdatoms.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/mdatoms.h"
+#include "gromacs/legacyheaders/qmmm.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/topology/mtop_util.h"
#include "gromacs/utility/smalloc.h"
-#include "main.h"
-#include "qmmm.h"
-#include "mtop_util.h"
-#include "gmx_omp_nthreads.h"
#define ALMOST_ZERO 1e-30
t_grpopts *opts;
gmx_groups_t *groups;
gmx_molblock_t *molblock;
+ int nthreads gmx_unused;
bLJPME = EVDW_PME(ir->vdwtype);
alook = gmx_mtop_atomlookup_init(mtop);
- #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntDefault)) schedule(static)
+ nthreads = gmx_omp_nthreads_get(emntDefault);
+ #pragma omp parallel for num_threads(nthreads) schedule(static)
for (i = 0; i < md->nr; i++)
{
int g, ag, molb;
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "config.h"
+#include <math.h>
#include <string.h>
#include <time.h>
-#include <math.h>
-#include "sysstuff.h"
-#include "gromacs/utility/cstringutil.h"
-#include "network.h"
-#include "gromacs/utility/smalloc.h"
-#include "nrnb.h"
-#include "main.h"
-#include "force.h"
-#include "macros.h"
-#include "names.h"
-#include "gmx_fatal.h"
-#include "txtdump.h"
-#include "typedefs.h"
-#include "update.h"
-#include "constr.h"
-#include "vec.h"
-#include "tgroup.h"
-#include "mdebin.h"
-#include "vsite.h"
-#include "force.h"
-#include "mdrun.h"
-#include "md_support.h"
-#include "sim_util.h"
-#include "domdec.h"
-#include "mdatoms.h"
-#include "ns.h"
-#include "mtop_util.h"
-#include "pme.h"
-#include "bondf.h"
-#include "gmx_omp_nthreads.h"
-#include "md_logging.h"
#include "gromacs/fileio/confio.h"
+#include "gromacs/fileio/mtxio.h"
#include "gromacs/fileio/trajectory_writing.h"
-#include "gromacs/linearalgebra/mtxio.h"
+#include "gromacs/imd/imd.h"
+#include "gromacs/legacyheaders/bonded-threading.h"
+#include "gromacs/legacyheaders/constr.h"
+#include "gromacs/legacyheaders/domdec.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/md_support.h"
+#include "gromacs/legacyheaders/mdatoms.h"
+#include "gromacs/legacyheaders/mdebin.h"
+#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/ns.h"
+#include "gromacs/legacyheaders/pme.h"
+#include "gromacs/legacyheaders/sim_util.h"
+#include "gromacs/legacyheaders/tgroup.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/update.h"
+#include "gromacs/legacyheaders/vsite.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/linearalgebra/sparsematrix.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/pbcutil/mshift.h"
+#include "gromacs/pbcutil/pbc.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/imd/imd.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
typedef struct {
t_state s;
int start, end;
rvec *x1, *x2;
real dvdl_constr;
+ int nthreads gmx_unused;
s1 = &ems1->s;
s2 = &ems2->s;
x1 = s1->x;
x2 = s2->x;
- #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
+ nthreads = gmx_omp_nthreads_get(emntUpdate);
+ #pragma omp parallel num_threads(nthreads)
{
int gf, i, m;
}
/* Calculate long range corrections to pressure and energy */
- calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
+ calc_dispcorr(inputrec, fr, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
enerd->term[F_DISPCORR] = enercorr;
enerd->term[F_EPOT] += enercorr;
ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box,
ems->s.lambda[efptBONDED], &dvdl_constr,
NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0);
- if (fr->bSepDVDL && fplog)
- {
- gmx_print_sepdvdl(fplog, "Constraints", t, dvdl_constr);
- }
enerd->term[F_DVDL_CONSTR] += dvdl_constr;
m_add(force_vir, shake_vir, vir);
wallcycle_stop(wcycle, ewcCONSTR);
}
stepsize = 1.0/fnorm;
- converged = FALSE;
/* Start the loop over BFGS steps.
* Each successful step is counted, and we continue until
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "nbnxn_cuda.h"
+
+#include "config.h"
-#include <stdlib.h>
#include <assert.h>
+#include <stdlib.h>
#if defined(_MSVC)
#include <limits>
#include <cuda.h>
-#include "types/simple.h"
-#include "types/nbnxn_pairlist.h"
-#include "types/nb_verlet.h"
-#include "types/ishift.h"
-#include "types/force_flags.h"
-#include "../nbnxn_consts.h"
-
#ifdef TMPI_ATOMICS
#include "thread_mpi/atomic.h"
#endif
+#include "gromacs/gmxlib/cuda_tools/cudautils.cuh"
+#include "gromacs/legacyheaders/types/force_flags.h"
+#include "gromacs/legacyheaders/types/simple.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/utility/cstringutil.h"
+
#include "nbnxn_cuda_types.h"
-#include "../../gmxlib/cuda_tools/cudautils.cuh"
-#include "nbnxn_cuda.h"
-#include "nbnxn_cuda_data_mgmt.h"
#if defined TEXOBJ_SUPPORTED && __CUDA_ARCH__ >= 300
#define USE_TEXOBJ
#define CL_SIZE (NBNXN_GPU_CLUSTER_SIZE)
/***** The kernels come here *****/
-#include "nbnxn_cuda_kernel_utils.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh"
/* Top-level kernel generation: will generate through multiple inclusion the
* following flavors for all kernels:
* - force and energy output with pair list pruning.
*/
/** Force only **/
-#include "nbnxn_cuda_kernels.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh"
/** Force & energy **/
#define CALC_ENERGIES
-#include "nbnxn_cuda_kernels.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh"
#undef CALC_ENERGIES
/*** Pair-list pruning kernels ***/
/** Force only **/
#define PRUNE_NBL
-#include "nbnxn_cuda_kernels.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh"
/** Force & energy **/
#define CALC_ENERGIES
-#include "nbnxn_cuda_kernels.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh"
#undef CALC_ENERGIES
#undef PRUNE_NBL
nbnxn_cu_kfunc_ptr_t res;
assert(eeltype < eelCuNR);
- assert(evdwtype < eelCuNR);
+ assert(evdwtype < evdwCuNR);
if (bDoEne)
{
if (devinfo->prop.major >= 3)
{
/* Default kernel on sm 3.x 48/16 kB Shared/L1 */
- stat = cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferShared);
- stat = cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferShared);
- stat = cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferShared);
+ cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferShared);
+ cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferShared);
+ cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferShared);
stat = cudaFuncSetCacheConfig(nb_kfunc_noener_noprune_ptr[i][j], cudaFuncCachePreferShared);
}
else
{
/* On Fermi prefer L1 gives 2% higher performance */
/* Default kernel on sm_2.x 16/48 kB Shared/L1 */
- stat = cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferL1);
- stat = cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferL1);
- stat = cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferL1);
+ cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferL1);
+ cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferL1);
+ cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferL1);
stat = cudaFuncSetCacheConfig(nb_kfunc_noener_noprune_ptr[i][j], cudaFuncCachePreferL1);
}
CU_RET_ERR(stat, "cudaFuncSetCacheConfig failed");
/* cut-off + V shift LJ */
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJ ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJEwCombGeom ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_GEOM
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w LB combination rules */
#define LJ_EWALD_COMB_LB
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJEwCombLB ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_LB
#undef NB_KERNEL_FUNC_NAME
/* F switch LJ */
#define LJ_FORCE_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJFsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_FORCE_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* V switch LJ */
#define LJ_POT_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJPsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_POT_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* cut-off + V shift LJ */
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJ ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJEwCombGeom ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_GEOM
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w LB combination rules */
#define LJ_EWALD_COMB_LB
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJEwCombLB ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_LB
#undef NB_KERNEL_FUNC_NAME
/* F switch LJ */
#define LJ_FORCE_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJFsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_FORCE_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* V switch LJ */
#define LJ_POT_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJPsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_POT_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* cut-off + V shift LJ */
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJ ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJEwCombGeom ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_GEOM
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w LB combination rules */
#define LJ_EWALD_COMB_LB
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJEwCombLB ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_LB
#undef NB_KERNEL_FUNC_NAME
/* F switch LJ */
#define LJ_FORCE_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJFsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_FORCE_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* V switch LJ */
#define LJ_POT_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJPsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_POT_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* Analytical Ewald interaction kernels with twin-range cut-off
*/
#define EL_EWALD_ANA
- #define LJ_CUTOFF_CHECK
+ #define VDW_CUTOFF_CHECK
/* cut-off + V shift LJ */
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJ ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJEwCombGeom ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_GEOM
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w LB combination rules */
#define LJ_EWALD_COMB_LB
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJEwCombLB ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_LB
#undef NB_KERNEL_FUNC_NAME
/* F switch LJ */
#define LJ_FORCE_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJFsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_FORCE_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* V switch LJ */
#define LJ_POT_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJPsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_POT_SWITCH
#undef NB_KERNEL_FUNC_NAME
#undef EL_EWALD_ANA
- #undef LJ_CUTOFF_CHECK
+ #undef VDW_CUTOFF_CHECK
/* Tabulated Ewald interaction kernels */
/* cut-off + V shift LJ */
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJ ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJEwCombGeom ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_GEOM
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w LB combination rules */
#define LJ_EWALD_COMB_LB
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJEwCombLB ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_LB
#undef NB_KERNEL_FUNC_NAME
/* F switch LJ */
#define LJ_FORCE_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJFsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_FORCE_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* V switch LJ */
#define LJ_POT_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJPsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_POT_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* Tabulated Ewald interaction kernels with twin-range cut-off */
#define EL_EWALD_TAB
- #define LJ_CUTOFF_CHECK
+ #define VDW_CUTOFF_CHECK
/* cut-off + V shift LJ */
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJ ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w geometric combination rules */
#define LJ_EWALD_COMB_GEOM
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJEwCombGeom ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_GEOM
#undef NB_KERNEL_FUNC_NAME
/* LJ-Ewald w LB combination rules */
#define LJ_EWALD_COMB_LB
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJEwCombLB ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_EWALD_COMB_LB
#undef NB_KERNEL_FUNC_NAME
/* F switch LJ */
#define LJ_FORCE_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJFsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_FORCE_SWITCH
#undef NB_KERNEL_FUNC_NAME
/* V switch LJ */
#define LJ_POT_SWITCH
#define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJPsw ## __VA_ARGS__
-#include "nbnxn_cuda_kernel.cuh"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh"
#undef LJ_POT_SWITCH
#undef NB_KERNEL_FUNC_NAME
#undef EL_EWALD_TAB
- #undef LJ_CUTOFF_CHECK
+ #undef VDW_CUTOFF_CHECK
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#include "typedefs.h"
+#include "config.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/mdlib/nb_verlet.h"
#include "gromacs/mdlib/nbnxn_simd.h"
#ifdef {0}
{2}
#define GMX_SIMD_J_UNROLL_SIZE {3}
#include "{4}"
-#include "../nbnxn_kernel_common.h"
-#include "gmx_omp_nthreads.h"
-#include "types/force_flags.h"
-#include "gmx_fatal.h"
+
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/types/force_flags.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+#include "gromacs/utility/fatalerror.h"
/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
*/
#else /* {0} */
-#include "gmx_fatal.h"
+#include "gromacs/utility/fatalerror.h"
#endif /* {0} */
nbnxn_pairlist_t **nbl;
int coulkt, vdwkt = 0;
int nb;
+ int nthreads gmx_unused;
nnbl = nbl_list->nnbl;
nbl = nbl_list->nbl;
gmx_incons("Unsupported VdW interaction type");
}}
- #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+ nthreads = gmx_omp_nthreads_get(emntNonbonded);
+ #pragma omp parallel for schedule(static) num_threads(nthreads)
for (nb = 0; nb < nnbl; nb++)
{{
nbnxn_atomdata_output_t *out;
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "nbnxn_kernel_ref.h"
+
+#include "config.h"
-#include <math.h>
#include <assert.h>
+#include <math.h>
-#include "typedefs.h"
-#include "vec.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+#include "gromacs/pbcutil/ishift.h"
#include "gromacs/utility/smalloc.h"
-#include "force.h"
-#include "gmx_omp_nthreads.h"
-#include "nbnxn_kernel_ref.h"
-#include "../nbnxn_consts.h"
-#include "nbnxn_kernel_common.h"
/*! \brief Typedefs for declaring lookup tables of kernel functions.
*/
/* Analytical reaction-field kernels */
#define CALC_COUL_RF
#define LJ_CUT
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_CUT
#define LJ_FORCE_SWITCH
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_FORCE_SWITCH
#define LJ_POT_SWITCH
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_POT_SWITCH
#define LJ_EWALD
#define LJ_CUT
#define LJ_EWALD_COMB_GEOM
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_EWALD_COMB_GEOM
#define LJ_EWALD_COMB_LB
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_EWALD_COMB_LB
#undef LJ_CUT
#undef LJ_EWALD
/* Tabulated exclusion interaction electrostatics kernels */
#define CALC_COUL_TAB
#define LJ_CUT
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_CUT
#define LJ_FORCE_SWITCH
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_FORCE_SWITCH
#define LJ_POT_SWITCH
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_POT_SWITCH
#define LJ_EWALD
#define LJ_CUT
#define LJ_EWALD_COMB_GEOM
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_EWALD_COMB_GEOM
#define LJ_EWALD_COMB_LB
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_EWALD_COMB_LB
#undef LJ_CUT
#undef LJ_EWALD
/* Twin-range cut-off kernels */
#define VDW_CUTOFF_CHECK
#define LJ_CUT
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_CUT
#define LJ_FORCE_SWITCH
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_FORCE_SWITCH
#define LJ_POT_SWITCH
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_POT_SWITCH
#define LJ_EWALD
#define LJ_CUT
#define LJ_EWALD_COMB_GEOM
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_EWALD_COMB_GEOM
#define LJ_EWALD_COMB_LB
-#include "nbnxn_kernel_ref_includes.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h"
#undef LJ_EWALD_COMB_LB
#undef LJ_CUT
#undef LJ_EWALD
int coult;
int vdwt;
int nb;
+ int nthreads gmx_unused;
nnbl = nbl_list->nnbl;
nbl = nbl_list->nbl;
gmx_incons("Unsupported vdwtype in nbnxn reference kernel");
}
- #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+ nthreads = gmx_omp_nthreads_get(emntNonbonded);
+ #pragma omp parallel for schedule(static) num_threads(nthreads)
for (nb = 0; nb < nnbl; nb++)
{
nbnxn_atomdata_output_t *out;
* kernel type 2xnn.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#include "typedefs.h"
+#include "config.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/mdlib/nb_verlet.h"
#include "gromacs/mdlib/nbnxn_simd.h"
#ifdef GMX_NBNXN_SIMD_2XNN
#define GMX_SIMD_J_UNROLL_SIZE 2
#include "nbnxn_kernel_simd_2xnn.h"
-#include "../nbnxn_kernel_common.h"
-#include "gmx_omp_nthreads.h"
-#include "types/force_flags.h"
-#include "gmx_fatal.h"
+
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/types/force_flags.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+#include "gromacs/utility/fatalerror.h"
/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
*/
#else /* GMX_NBNXN_SIMD_2XNN */
-#include "gmx_fatal.h"
+#include "gromacs/utility/fatalerror.h"
#endif /* GMX_NBNXN_SIMD_2XNN */
nbnxn_pairlist_t **nbl;
int coulkt, vdwkt = 0;
int nb;
+ int nthreads gmx_unused;
nnbl = nbl_list->nnbl;
nbl = nbl_list->nbl;
gmx_incons("Unsupported VdW interaction type");
}
- #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+ nthreads = gmx_omp_nthreads_get(emntNonbonded);
+ #pragma omp parallel for schedule(static) num_threads(nthreads)
for (nb = 0; nb < nnbl; nb++)
{
nbnxn_atomdata_output_t *out;
* kernel type 4xn.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#include "typedefs.h"
+#include "config.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/mdlib/nb_verlet.h"
#include "gromacs/mdlib/nbnxn_simd.h"
#ifdef GMX_NBNXN_SIMD_4XN
#define GMX_SIMD_J_UNROLL_SIZE 1
#include "nbnxn_kernel_simd_4xn.h"
-#include "../nbnxn_kernel_common.h"
-#include "gmx_omp_nthreads.h"
-#include "types/force_flags.h"
-#include "gmx_fatal.h"
+
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/types/force_flags.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+#include "gromacs/utility/fatalerror.h"
/*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
*/
#else /* GMX_NBNXN_SIMD_4XN */
-#include "gmx_fatal.h"
+#include "gromacs/utility/fatalerror.h"
#endif /* GMX_NBNXN_SIMD_4XN */
nbnxn_pairlist_t **nbl;
int coulkt, vdwkt = 0;
int nb;
+ int nthreads gmx_unused;
nnbl = nbl_list->nnbl;
nbl = nbl_list->nbl;
gmx_incons("Unsupported VdW interaction type");
}
- #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
+ nthreads = gmx_omp_nthreads_get(emntNonbonded);
+ #pragma omp parallel for schedule(static) num_threads(nthreads)
for (nb = 0; nb < nnbl; nb++)
{
nbnxn_atomdata_output_t *out;
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "nbnxn_search.h"
+
+#include "config.h"
+#include <assert.h>
#include <math.h>
#include <string.h>
-#include <assert.h>
-#include "sysstuff.h"
-#include "gromacs/utility/smalloc.h"
-#include "types/commrec.h"
-#include "macros.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/ns.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/math/utilities.h"
-#include "vec.h"
-#include "pbc.h"
-#include "nbnxn_consts.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_atomdata.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/utility/smalloc.h"
+
/* nbnxn_internal.h included gromacs/simd/macros.h */
-#include "nbnxn_internal.h"
-#ifdef GMX_NBNXN_SIMD
+#include "gromacs/mdlib/nbnxn_internal.h"
+#ifdef GMX_SIMD
#include "gromacs/simd/vector_operations.h"
#endif
-#include "nbnxn_atomdata.h"
-#include "nbnxn_search.h"
-#include "gmx_omp_nthreads.h"
-#include "nrnb.h"
-#include "ns.h"
-
-#include "gromacs/fileio/gmxfio.h"
#ifdef NBNXN_SEARCH_BB_SIMD4
/* Always use 4-wide SIMD for bounding box calculations */
int cxy_start, int cxy_end,
int *sort_work)
{
- int cxy;
- int cx, cy, cz = -1, c = -1, ncz;
- int na, ash, na_c, ind, a;
- int subdiv_z, sub_z, na_z, ash_z;
- int subdiv_y, sub_y, na_y, ash_y;
- int subdiv_x, sub_x, na_x, ash_x;
+ int cxy;
+ int cx, cy, cz = -1, c = -1, ncz;
+ int na, ash, na_c, ind, a;
+ int subdiv_z, sub_z, na_z, ash_z;
+ int subdiv_y, sub_y, na_y, ash_y;
+ int subdiv_x, sub_x, na_x, ash_x;
- /* cppcheck-suppress unassignedVariable */
nbnxn_bb_t bb_work_array[2], *bb_work_aligned;
bb_work_aligned = (nbnxn_bb_t *)(((size_t)(bb_work_array+1)) & (~((size_t)15)));
float *bbcz;
nbnxn_bb_t *bb;
int ncd, sc;
+ int nthreads gmx_unused;
grid = &nbs->grid[0];
bbcz = grid->bbcz_simple;
bb = grid->bb_simple;
- #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static)
+ nthreads = gmx_omp_nthreads_get(emntPairsearch);
+ #pragma omp parallel for num_threads(nthreads) schedule(static)
for (sc = 0; sc < grid->nc; sc++)
{
int c, tx, na;
}
#ifdef GMX_NBNXN_SIMD_4XN
-#include "nbnxn_search_simd_4xn.h"
+#include "gromacs/mdlib/nbnxn_search_simd_4xn.h"
#endif
#ifdef GMX_NBNXN_SIMD_2XNN
-#include "nbnxn_search_simd_2xnn.h"
+#include "gromacs/mdlib/nbnxn_search_simd_2xnn.h"
#endif
/* Plain C or SIMD4 code for making a pair list of super-cell sci vs scj.
{
int nsci, ncj4, nexcl;
int n, i;
+ int nthreads gmx_unused;
if (nblc->bSimple)
{
/* Each thread should copy its own data to the combined arrays,
* as otherwise data will go back and forth between different caches.
*/
- #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static)
+ nthreads = gmx_omp_nthreads_get(emntPairsearch);
+ #pragma omp parallel for num_threads(nthreads) schedule(static)
for (n = 0; n < nnbl; n++)
{
int sci_offset;
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "gromacs/legacyheaders/ns.h"
#include <math.h>
+#include <stdlib.h>
#include <string.h>
-#include "sysstuff.h"
-#include "gromacs/utility/smalloc.h"
-#include "macros.h"
+
+#include "gromacs/legacyheaders/domdec.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nonbonded.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/nsgrid.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/math/utilities.h"
-#include "vec.h"
-#include "types/commrec.h"
-#include "network.h"
-#include "nsgrid.h"
-#include "force.h"
-#include "nonbonded.h"
-#include "ns.h"
-#include "pbc.h"
-#include "names.h"
-#include "gmx_fatal.h"
-#include "nrnb.h"
-#include "txtdump.h"
-#include "mtop_util.h"
-
-#include "domdec.h"
-#include "adress.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
+#include "adress.h"
/*
* E X C L U S I O N H A N D L I N G
/* Did we get the solvent loops so we can use optimized water kernels? */
if (nbl->nlist_sr[eNL_VDWQQ_WATER].kernelptr_vf == NULL
|| nbl->nlist_sr[eNL_QQ_WATER].kernelptr_vf == NULL
-#ifndef DISABLE_WATERWATER_NLIST
|| nbl->nlist_sr[eNL_VDWQQ_WATERWATER].kernelptr_vf == NULL
- || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf == NULL
-#endif
- )
+ || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf == NULL)
{
fr->solvent_opt = esolNO;
if (log != NULL)
nlist->jindex[nri+1] = nlist->nrj;
len = nlist->nrj - nlist->jindex[nri];
+ /* If there are no j-particles we have to reduce the
+ * number of i-particles again, to prevent errors in the
+ * kernel functions.
+ */
+ if ((len == 0) && (nlist->nri > 0))
+ {
+ nlist->nri--;
+ }
}
}
if (iwater != esolNO)
{
- vdwc = &nlist[eNL_VDWQQ_WATER];
- vdw = &nlist[eNL_VDW];
- coul = &nlist[eNL_QQ_WATER];
-#ifndef DISABLE_WATERWATER_NLIST
+ vdwc = &nlist[eNL_VDWQQ_WATER];
+ vdw = &nlist[eNL_VDW];
+ coul = &nlist[eNL_QQ_WATER];
vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER];
coul_ww = &nlist[eNL_QQ_WATERWATER];
-#endif
}
else
{
if (bDoCoul && bDoVdW)
{
new_i_nblist(vdwc, i_atom, shift, gid);
-#ifndef DISABLE_WATERWATER_NLIST
new_i_nblist(vdwc_ww, i_atom, shift, gid);
-#endif
}
if (bDoVdW)
{
if (bDoCoul)
{
new_i_nblist(coul, i_atom, shift, gid);
-#ifndef DISABLE_WATERWATER_NLIST
new_i_nblist(coul_ww, i_atom, shift, gid);
-#endif
}
/* Loop over the j charge groups */
for (j = 0; (j < nj); j++)
}
else
{
-#ifdef DISABLE_WATERWATER_NLIST
- /* Add entries for the three atoms - only do VdW if we need to */
- if (!bDoVdW)
- {
- add_j_to_nblist(coul, jj0, bLR);
- }
- else
- {
- add_j_to_nblist(vdwc, jj0, bLR);
- }
- add_j_to_nblist(coul, jj0+1, bLR);
- add_j_to_nblist(coul, jj0+2, bLR);
-#else
/* One entry for the entire water-water interaction */
if (!bDoVdW)
{
{
add_j_to_nblist(vdwc_ww, jj0, bLR);
}
-#endif
}
}
else if (iwater == esolTIP4P && jwater == esolTIP4P)
}
else
{
-#ifdef DISABLE_WATERWATER_NLIST
- /* Add entries for the four atoms - only do VdW if we need to */
- if (bDoVdW)
- {
- add_j_to_nblist(vdw, jj0, bLR);
- }
- add_j_to_nblist(coul, jj0+1, bLR);
- add_j_to_nblist(coul, jj0+2, bLR);
- add_j_to_nblist(coul, jj0+3, bLR);
-#else
/* One entry for the entire water-water interaction */
if (!bDoVdW)
{
{
add_j_to_nblist(vdwc_ww, jj0, bLR);
}
-#endif
}
}
else
close_i_nblist(vdw);
close_i_nblist(coul);
close_i_nblist(vdwc);
-#ifndef DISABLE_WATERWATER_NLIST
close_i_nblist(coul_ww);
close_i_nblist(vdwc_ww);
-#endif
}
else
{
gmx_ns_t *ns;
atom_id **nl_lr_ljc, **nl_lr_one, **nl_sr;
int *nlr_ljc, *nlr_one, *nsr;
- gmx_domdec_t *dd = NULL;
+ gmx_domdec_t *dd;
t_block *cgs = &(top->cgs);
int *cginfo = fr->cginfo;
/* atom_id *i_atoms,*cgsindex=cgs->index; */
ns = &fr->ns;
bDomDec = DOMAINDECOMP(cr);
- if (bDomDec)
- {
- dd = cr->dd;
- }
+ dd = cr->dd;
bTriclinicX = ((YY < grid->npbcdim &&
(!bDomDec || dd->nc[YY] == 1) && box[YY][XX] != 0) ||
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "gromacs/legacyheaders/sim_util.h"
+
+#include "config.h"
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
+
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
-#include "typedefs.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/smalloc.h"
-#include "names.h"
-#include "txtdump.h"
-#include "pbc.h"
-#include "chargegroup.h"
-#include "vec.h"
-#include "nrnb.h"
-#include "mshift.h"
-#include "mdrun.h"
-#include "sim_util.h"
-#include "update.h"
-#include "physics.h"
-#include "main.h"
-#include "mdatoms.h"
-#include "force.h"
-#include "bondf.h"
-#include "pme.h"
-#include "disre.h"
-#include "orires.h"
-#include "network.h"
-#include "calcmu.h"
-#include "constr.h"
-#include "xvgr.h"
-#include "copyrite.h"
-#include "domdec.h"
-#include "genborn.h"
-#include "nbnxn_atomdata.h"
-#include "nbnxn_search.h"
-#include "nbnxn_kernels/nbnxn_kernel_ref.h"
-#include "nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
-#include "nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
-#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
-#include "nonbonded.h"
-#include "../gmxlib/nonbonded/nb_kernel.h"
-#include "../gmxlib/nonbonded/nb_free_energy.h"
-
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/utility/gmxmpi.h"
#include "gromacs/essentialdynamics/edsam.h"
+#include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
+#include "gromacs/gmxlib/nonbonded/nb_kernel.h"
+#include "gromacs/imd/imd.h"
+#include "gromacs/legacyheaders/calcmu.h"
+#include "gromacs/legacyheaders/chargegroup.h"
+#include "gromacs/legacyheaders/constr.h"
+#include "gromacs/legacyheaders/copyrite.h"
+#include "gromacs/legacyheaders/disre.h"
+#include "gromacs/legacyheaders/domdec.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/genborn.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/mdatoms.h"
+#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nonbonded.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/orires.h"
+#include "gromacs/legacyheaders/pme.h"
+#include "gromacs/legacyheaders/qmmm.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/update.h"
+#include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/listed-forces/bonded.h"
+#include "gromacs/math/units.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_atomdata.h"
+#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h"
+#include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
+#include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/mshift.h"
+#include "gromacs/pbcutil/pbc.h"
#include "gromacs/pulling/pull.h"
#include "gromacs/pulling/pull_rotation.h"
-#include "gromacs/imd/imd.h"
-#include "adress.h"
-#include "qmmm.h"
-
-#include "gmx_omp_nthreads.h"
+#include "gromacs/timing/wallcycle.h"
+#include "gromacs/timing/walltime_accounting.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/smalloc.h"
-#include "nbnxn_cuda_data_mgmt.h"
-#include "nbnxn_cuda/nbnxn_cuda.h"
+#include "adress.h"
void print_time(FILE *out,
gmx_walltime_accounting_t walltime_accounting,
}
}
-static void posres_wrapper(FILE *fplog,
- int flags,
- gmx_bool bSepDVDL,
+static void posres_wrapper(int flags,
t_inputrec *ir,
t_nrnb *nrnb,
gmx_localtop_t *top,
ir->ePBC == epbcNONE ? NULL : &pbc,
lambda[efptRESTRAINT], &dvdl,
fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB);
- if (bSepDVDL)
- {
- gmx_print_sepdvdl(fplog, interaction_function[F_POSRES].longname, v, dvdl);
- }
enerd->term[F_POSRES] += v;
/* If just the force constant changes, the FEP term is linear,
* but if k changes, it is not.
inc_nrnb(nrnb, eNR_FBPOSRES, top->idef.il[F_FBPOSRES].nr/2);
}
-static void pull_potential_wrapper(FILE *fplog,
- gmx_bool bSepDVDL,
- t_commrec *cr,
+static void pull_potential_wrapper(t_commrec *cr,
t_inputrec *ir,
matrix box, rvec x[],
rvec f[],
enerd->term[F_COM_PULL] +=
pull_potential(ir->ePull, ir->pull, mdatoms, &pbc,
cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl);
- if (bSepDVDL)
- {
- gmx_print_sepdvdl(fplog, "Com pull", enerd->term[F_COM_PULL], dvdl);
- }
enerd->dvdl_lin[efptRESTRAINT] += dvdl;
wallcycle_stop(wcycle, ewcPULLPOT);
}
-static void pme_receive_force_ener(FILE *fplog,
- gmx_bool bSepDVDL,
- t_commrec *cr,
+static void pme_receive_force_ener(t_commrec *cr,
gmx_wallcycle_t wcycle,
gmx_enerdata_t *enerd,
t_forcerec *fr)
gmx_pme_receive_f(cr, fr->f_novirsum, fr->vir_el_recip, &e_q,
fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj,
&cycles_seppme);
- if (bSepDVDL)
- {
- gmx_print_sepdvdl(fplog, "Electrostatic PME mesh", e_q, dvdl_q);
- gmx_print_sepdvdl(fplog, "Lennard-Jones PME mesh", e_lj, dvdl_lj);
- }
enerd->term[F_COUL_RECIP] += e_q;
enerd->term[F_LJ_RECIP] += e_lj;
enerd->dvdl_lin[efptCOUL] += dvdl_q;
wallcycle_sub_stop(wcycle, ewcsNONBONDED);
}
+gmx_bool use_GPU(const nonbonded_verlet_t *nbv)
+{
+ return nbv != NULL && nbv->bUseGPU;
+}
+
void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
t_inputrec *inputrec,
gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
int start, homenr;
int nb_kernel_type;
double mu[2*DIM];
- gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
+ gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
gmx_bool bDoLongRange, bDoForces, bSepLRF, bUseGPU, bUseOrEmulGPU;
gmx_bool bDiffKernels = FALSE;
matrix boxs;
start = 0;
homenr = mdatoms->homenr;
- bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
-
clear_mat(vir_force);
cg0 = 0;
}
/* We calculate the non-bonded forces, when done on the CPU, here.
- * We do this before calling do_force_lowlevel, as in there bondeds
- * forces are calculated before PME, which does communication.
- * With this order, non-bonded and bonded force calculation imbalance
- * can be balanced out by the domain decomposition load balancing.
+ * We do this before calling do_force_lowlevel, because in that
+ * function, the listed forces are calculated before PME, which
+ * does communication. With this order, non-bonded and listed
+ * force calculation imbalance can be balanced out by the domain
+ * decomposition load balancing.
*/
if (!bUseOrEmulGPU)
update_QMMMrec(cr, fr, x, mdatoms, box, top);
}
- if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
+ if ((flags & GMX_FORCE_LISTED) && top->idef.il[F_POSRES].nr > 0)
{
- posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
+ posres_wrapper(flags, inputrec, nrnb, top, box, x,
enerd, lambda, fr);
}
- if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
+ if ((flags & GMX_FORCE_LISTED) && top->idef.il[F_FBPOSRES].nr > 0)
{
fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
}
/* Compute the bonded and non-bonded energies and optionally forces */
- do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
+ do_force_lowlevel(fr, inputrec, &(top->idef),
cr, nrnb, wcycle, mdatoms,
x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
- &(top->atomtypes), bBornRadii, box,
+ bBornRadii, box,
inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot,
flags, &cycles_pme);
if (bDoForces && DOMAINDECOMP(cr))
{
+ if (bUseGPU)
+ {
+ /* We are done with the CPU compute, but the GPU local non-bonded
+ * kernel can still be running while we communicate the forces.
+ * We start a counter here, so we can, hopefully, time the rest
+ * of the GPU kernel execution and data transfer.
+ */
+ wallcycle_start(wcycle, ewcWAIT_GPU_NB_L_EST);
+ }
+
/* Communicate the forces */
wallcycle_start(wcycle, ewcMOVEF);
dd_move_f(cr->dd, f, fr->fshift);
/* wait for local forces (or calculate in emulation mode) */
if (bUseGPU)
{
+ float cycles_tmp, cycles_wait_est;
+ const float cuda_api_overhead_margin = 50000.0f; /* cycles */
+
wallcycle_start(wcycle, ewcWAIT_GPU_NB_L);
nbnxn_cuda_wait_gpu(nbv->cu_nbv,
nbv->grp[eintLocal].nbat,
flags, eatLocal,
enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
fr->fshift);
- cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
+ cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L);
+
+ if (bDoForces && DOMAINDECOMP(cr))
+ {
+ cycles_wait_est = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L_EST);
+
+ if (cycles_tmp < cuda_api_overhead_margin)
+ {
+ /* We measured few cycles, it could be that the kernel
+ * and transfer finished earlier and there was no actual
+ * wait time, only API call overhead.
+ * Then the actual time could be anywhere between 0 and
+ * cycles_wait_est. As a compromise, we use half the time.
+ */
+ cycles_wait_est *= 0.5f;
+ }
+ }
+ else
+ {
+ /* No force communication so we actually timed the wait */
+ cycles_wait_est = cycles_tmp;
+ }
+ /* Even though this is after dd_move_f, the actual task we are
+ * waiting for runs asynchronously with dd_move_f and we usually
+ * have nothing to balance it with, so we can and should add
+ * the time to the force time for load balancing.
+ */
+ cycles_force += cycles_wait_est;
+ cycles_wait_gpu += cycles_wait_est;
/* now clear the GPU outputs while we finish the step on the CPU */
/* Since the COM pulling is always done mass-weighted, no forces are
* applied to vsites and this call can be done after vsite spreading.
*/
- pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
+ pull_potential_wrapper(cr, inputrec, box, x,
f, vir_force, mdatoms, enerd, lambda, t,
wcycle);
}
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
*/
- pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
+ pme_receive_force_ener(cr, wcycle, enerd, fr);
}
if (bDoForces)
int cg0, cg1, i, j;
int start, homenr;
double mu[2*DIM];
- gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
+ gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS;
gmx_bool bDoLongRangeNS, bDoForces, bDoPotential, bSepLRF;
gmx_bool bDoAdressWF;
matrix boxs;
start = 0;
homenr = mdatoms->homenr;
- bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog));
-
clear_mat(vir_force);
cg0 = 0;
update_QMMMrec(cr, fr, x, mdatoms, box, top);
}
- if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
+ if ((flags & GMX_FORCE_LISTED) && top->idef.il[F_POSRES].nr > 0)
{
- posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x,
+ posres_wrapper(flags, inputrec, nrnb, top, box, x,
enerd, lambda, fr);
}
- if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
+ if ((flags & GMX_FORCE_LISTED) && top->idef.il[F_FBPOSRES].nr > 0)
{
fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr);
}
/* Compute the bonded and non-bonded energies and optionally forces */
- do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef),
+ do_force_lowlevel(fr, inputrec, &(top->idef),
cr, nrnb, wcycle, mdatoms,
x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born,
- &(top->atomtypes), bBornRadii, box,
+ bBornRadii, box,
inputrec->fepvals, lambda,
graph, &(top->excls), fr->mu_tot,
flags,
if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
{
- pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x,
+ pull_potential_wrapper(cr, inputrec, box, x,
f, vir_force, mdatoms, enerd, lambda, t,
wcycle);
}
/* In case of node-splitting, the PP nodes receive the long-range
* forces, virial and energy from the PME nodes here.
*/
- pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr);
+ pme_receive_force_ener(cr, wcycle, enerd, fr);
}
if (bDoForces)
}
}
-void calc_dispcorr(FILE *fplog, t_inputrec *ir, t_forcerec *fr,
- gmx_int64_t step, int natoms,
+void calc_dispcorr(t_inputrec *ir, t_forcerec *fr,
+ int natoms,
matrix box, real lambda, tensor pres, tensor virial,
real *prescorr, real *enercorr, real *dvdlcorr)
{
}
}
- if (fr->bSepDVDL && do_per_step(step, ir->nstlog))
- {
- gmx_print_sepdvdl(fplog, "Dispersion correction", *enercorr, dvdlambda);
- }
if (fr->efep != efepNO)
{
*dvdlcorr += dvdlambda;
t_inputrec *inputrec,
t_nrnb nrnb[], gmx_wallcycle_t wcycle,
gmx_walltime_accounting_t walltime_accounting,
- wallclock_gpu_t *gputimes,
+ nonbonded_verlet_t *nbv,
gmx_bool bWriteStat)
{
int i, j;
if (SIMMASTER(cr))
{
+ wallclock_gpu_t* gputimes = use_GPU(nbv) ?
+ nbnxn_cuda_get_timings(nbv->cu_nbv) : NULL;
wallcycle_print(fplog, cr->nnodes, cr->npmenodes,
elapsed_time_over_all_ranks,
wcycle, gputimes);
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+#include "gromacs/legacyheaders/update.h"
-#include <stdio.h>
#include <math.h>
-
-#include "types/commrec.h"
-#include "sysstuff.h"
-#include "gromacs/utility/smalloc.h"
-#include "typedefs.h"
-#include "nrnb.h"
-#include "physics.h"
-#include "macros.h"
-#include "vec.h"
-#include "main.h"
-#include "update.h"
-#include "gromacs/random/random.h"
-#include "mshift.h"
-#include "tgroup.h"
-#include "force.h"
-#include "names.h"
-#include "txtdump.h"
-#include "mdrun.h"
-#include "constr.h"
-#include "disre.h"
-#include "orires.h"
-#include "gmx_omp_nthreads.h"
+#include <stdio.h>
#include "gromacs/fileio/confio.h"
-#include "gromacs/fileio/futil.h"
+#include "gromacs/legacyheaders/constr.h"
+#include "gromacs/legacyheaders/disre.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/orires.h"
+#include "gromacs/legacyheaders/tgroup.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/math/units.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/pbcutil/mshift.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/pulling/pull.h"
+#include "gromacs/random/random.h"
#include "gromacs/timing/wallcycle.h"
+#include "gromacs/utility/futil.h"
#include "gromacs/utility/gmxomp.h"
-#include "gromacs/pulling/pull.h"
+#include "gromacs/utility/smalloc.h"
/*For debugging, start at v(-dt/2) for velolcity verlet -- uncomment next line */
/*#define STARTFROMDT2*/
}
else
{
- #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntUpdate)) schedule(static)
+ nth = gmx_omp_nthreads_get(emntUpdate);
+
+ #pragma omp parallel for num_threads(nth) schedule(static)
for (i = start; i < nrend; i++)
{
copy_rvec(upd->xp[i], state->x[i]);
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#include "gromacs/timing/wallcycle.h"
+#include "gmxpre.h"
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "wallcycle.h"
-#include <string.h>
+#include "config.h"
-#include "gromacs/utility/smalloc.h"
-#include "gmx_fatal.h"
-#include "md_logging.h"
-#include "gromacs/utility/cstringutil.h"
+#include <stdlib.h>
+#include <string.h>
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/types/commrec.h"
#include "gromacs/timing/cyclecounter.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/smalloc.h"
/* DEBUG_WCYCLE adds consistency checking for the counters.
* It checks if you stop a counter different from the last
"DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.",
"Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh",
"PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec",
- "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.",
+ "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "Wait GPU loc. est.", "NB X/F buffer ops.",
"Vsite spread", "COM pull force",
"Write traj.", "Update", "Constraints", "Comm. energies",
"Enforced rotation", "Add rot. forces", "Coordinate swapping", "IMD", "Test"
"DD redist.", "DD NS grid + sort", "DD setup comm.",
"DD make top.", "DD make constr.", "DD top. other",
"NS grid local", "NS grid non-loc.", "NS search local", "NS search non-loc.",
- "Bonded F", "Nonbonded F", "Ewald F correction",
+ "Listed F", "Nonbonded F", "Ewald F correction",
"NB X buffer ops.", "NB F buffer ops."
};
wcc = wc->wcc;
+ /* The GPU wait estimate counter is used for load balancing only
+ * and will mess up the total due to double counting: clear it.
+ */
+ wcc[ewcWAIT_GPU_NB_L_EST].n = 0;
+ wcc[ewcWAIT_GPU_NB_L_EST].c = 0;
+
for (i = 0; i < ewcNR; i++)
{
if (is_pme_counter(i) || (i == ewcRUN && cr->duty == DUTY_PME))
#define GMX_TIMING_WALLCYCLE_H
#include <stdio.h>
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/commrec.h"
+
+#include "gromacs/legacyheaders/types/commrec_fwd.h"
+#include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
+#include "gromacs/utility/basedefinitions.h"
#ifdef __cplusplus
extern "C" {
#endif
+typedef struct gmx_wallcycle *gmx_wallcycle_t;
+
enum {
ewcRUN, ewcSTEP, ewcPPDURINGPME, ewcDOMDEC, ewcDDCOMMLOAD,
ewcDDCOMMBOUND, ewcVSITECONSTR, ewcPP_PMESENDX, ewcNS, ewcLAUNCH_GPU_NB,
ewcMOVEX, ewcGB, ewcFORCE, ewcMOVEF, ewcPMEMESH,
ewcPME_REDISTXF, ewcPME_SPREADGATHER, ewcPME_FFT, ewcPME_FFTCOMM, ewcLJPME, ewcPME_SOLVE,
- ewcPMEWAITCOMM, ewcPP_PMEWAITRECVF, ewcWAIT_GPU_NB_NL, ewcWAIT_GPU_NB_L, ewcNB_XF_BUF_OPS,
+ ewcPMEWAITCOMM, ewcPP_PMEWAITRECVF, ewcWAIT_GPU_NB_NL, ewcWAIT_GPU_NB_L, ewcWAIT_GPU_NB_L_EST, ewcNB_XF_BUF_OPS,
ewcVSITESPREAD, ewcPULLPOT,
ewcTRAJ, ewcUPDATE, ewcCONSTR, ewcMoveE, ewcROT, ewcROTadd, ewcSWAP, ewcIMD,
ewcTEST, ewcNR
ewcsDD_MAKETOP, ewcsDD_MAKECONSTR, ewcsDD_TOPOTHER,
ewcsNBS_GRID_LOCAL, ewcsNBS_GRID_NONLOCAL,
ewcsNBS_SEARCH_LOCAL, ewcsNBS_SEARCH_NONLOCAL,
- ewcsBONDED, ewcsNONBONDED, ewcsEWALD_CORRECTION,
+ ewcsLISTED, ewcsNONBONDED, ewcsEWALD_CORRECTION,
ewcsNB_X_BUF_OPS, ewcsNB_F_BUF_OPS,
ewcsNR
};
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#include "typedefs.h"
-#include "gromacs/utility/smalloc.h"
-#include "sysstuff.h"
-#include "vec.h"
-#include "vcm.h"
-#include "mdebin.h"
-#include "nrnb.h"
-#include "calcmu.h"
-#include "index.h"
-#include "vsite.h"
-#include "update.h"
-#include "ns.h"
-#include "mdrun.h"
-#include "md_support.h"
-#include "md_logging.h"
-#include "network.h"
-#include "xvgr.h"
-#include "physics.h"
-#include "names.h"
-#include "force.h"
-#include "disre.h"
-#include "orires.h"
-#include "pme.h"
-#include "mdatoms.h"
-#include "repl_ex.h"
-#include "deform.h"
-#include "qmmm.h"
-#include "domdec.h"
-#include "domdec_network.h"
-#include "gromacs/gmxlib/topsort.h"
-#include "coulomb.h"
-#include "constr.h"
-#include "shellfc.h"
-#include "gromacs/gmxpreprocess/compute_io.h"
-#include "checkpoint.h"
-#include "mtop_util.h"
-#include "sighandler.h"
-#include "txtdump.h"
-#include "gromacs/utility/cstringutil.h"
-#include "pme_loadbal.h"
-#include "bondf.h"
-#include "membed.h"
-#include "types/nlistheuristics.h"
-#include "types/iteratedconstraints.h"
-#include "nbnxn_cuda_data_mgmt.h"
+#include "config.h"
+
+#include <stdlib.h>
-#include "gromacs/utility/gmxmpi.h"
#include "gromacs/fileio/confio.h"
+#include "gromacs/fileio/mdoutf.h"
#include "gromacs/fileio/trajectory_writing.h"
#include "gromacs/fileio/trnio.h"
#include "gromacs/fileio/trxio.h"
#include "gromacs/fileio/xtcio.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
+#include "gromacs/gmxpreprocess/compute_io.h"
+#include "gromacs/imd/imd.h"
+#include "gromacs/legacyheaders/bonded-threading.h"
+#include "gromacs/legacyheaders/calcmu.h"
+#include "gromacs/legacyheaders/checkpoint.h"
+#include "gromacs/legacyheaders/constr.h"
+#include "gromacs/legacyheaders/coulomb.h"
+#include "gromacs/legacyheaders/disre.h"
+#include "gromacs/legacyheaders/domdec.h"
+#include "gromacs/legacyheaders/domdec_network.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/md_support.h"
+#include "gromacs/legacyheaders/mdatoms.h"
+#include "gromacs/legacyheaders/mdebin.h"
+#include "gromacs/legacyheaders/mdrun.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nrnb.h"
+#include "gromacs/legacyheaders/ns.h"
+#include "gromacs/legacyheaders/orires.h"
+#include "gromacs/legacyheaders/pme.h"
+#include "gromacs/legacyheaders/qmmm.h"
+#include "gromacs/legacyheaders/shellfc.h"
+#include "gromacs/legacyheaders/sighandler.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/update.h"
+#include "gromacs/legacyheaders/vcm.h"
+#include "gromacs/legacyheaders/vsite.h"
+#include "gromacs/legacyheaders/types/iteratedconstraints.h"
+#include "gromacs/legacyheaders/types/nlistheuristics.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
+#include "gromacs/pbcutil/mshift.h"
+#include "gromacs/pbcutil/pbc.h"
#include "gromacs/pulling/pull.h"
#include "gromacs/swap/swapcoords.h"
-#include "gromacs/imd/imd.h"
+#include "gromacs/timing/wallcycle.h"
+#include "gromacs/timing/walltime_accounting.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/smalloc.h"
+
+#include "deform.h"
+#include "membed.h"
+#include "pme_loadbal.h"
+#include "repl_ex.h"
#ifdef GMX_FAHCORE
#include "corewrap.h"
gmx_int64_t *step_rel, t_inputrec *ir,
gmx_wallcycle_t wcycle, t_nrnb *nrnb,
gmx_walltime_accounting_t walltime_accounting,
- nbnxn_cuda_ptr_t cu_nbv)
+ struct nonbonded_verlet_t *nbv)
{
char sbuf[STEPSTRSIZE];
md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n",
gmx_step_str(step, sbuf));
- if (cu_nbv)
- {
- nbnxn_cuda_reset_timings(cu_nbv);
- }
+ nbnxn_cuda_reset_timings(nbv);
wallcycle_stop(wcycle, ewcRUN);
wallcycle_reset_all(wcycle);
rvec mu_tot;
t_vcm *vcm;
t_state *bufstate = NULL;
- matrix *scale_tot, pcoupl_mu, M, ebox;
+ matrix pcoupl_mu, M;
gmx_nlheur_t nlh;
t_trxframe rerun_fr;
gmx_repl_ex_t repl_ex = NULL;
gmx_ekindata_t *ekind, *ekind_save;
gmx_shellfc_t shellfc;
int count, nconverged = 0;
- real timestep = 0;
- double tcount = 0;
- gmx_bool bConverged = TRUE, bOK, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
- gmx_bool bAppend;
+ double tcount = 0;
+ gmx_bool bConverged = TRUE, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
gmx_bool bResetCountersHalfMaxH = FALSE;
gmx_bool bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter;
gmx_bool bUpdateDoLR;
double cycles;
real saved_conserved_quantity = 0;
real last_ekin = 0;
- int iter_i;
t_extmass MassQ;
int **trotter_seq;
char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
/* Check for special mdrun options */
bRerunMD = (Flags & MD_RERUN);
- bAppend = (Flags & MD_APPENDFILES);
if (Flags & MD_RESETCOUNTERSHALFWAY)
{
if (ir->nsteps > 0)
*/
if ((Flags & MD_TUNEPME) &&
EEL_PME(fr->eeltype) &&
- ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
+ ( use_GPU(fr->nbv) || !(cr->duty & DUTY_PME)) &&
!bRerunMD)
{
pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata);
bStateFromTPX = !bStateFromCP;
bInitStep = bFirstStep && (bStateFromTPX || bVV);
bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
- bLastStep = FALSE;
bSumEkinhOld = FALSE;
- bDoReplEx = FALSE;
bExchanged = FALSE;
bNeedRepartition = FALSE;
step = ir->init_step;
step_rel = 0;
- if (ir->nstlist == -1)
- {
- init_nlistheuristics(&nlh, bGStatEveryStep, step);
- }
+ init_nlistheuristics(&nlh, bGStatEveryStep, step);
if (MULTISIM(cr) && (repl_ex_nst <= 0 ))
{
}
}
- bOK = TRUE;
if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */
{
wallcycle_stop(wcycle, ewcUPDATE);
/* Correct the virial for multiple time stepping */
m_sub(shake_vir, fr->vir_twin_constr, shake_vir);
}
-
- if (!bOK)
- {
- gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
- }
-
}
else if (graph)
{
*/
copy_mat(state->box, lastbox);
- bOK = TRUE;
dvdl_constr = 0;
if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
FALSE, bCalcVir,
state->veta);
}
- if (!bOK)
- {
- gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains");
- }
- if (fr->bSepDVDL && fplog && do_log)
- {
- gmx_print_sepdvdl(fplog, "Constraint dV/dl", 0.0, dvdl_constr);
- }
if (bVV)
{
/* this factor or 2 correction is necessary
}
dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning);
- fr->nbv->bUseGPU && DOMAINDECOMP(cr) &&
+ if (bPMETuneRunning &&
++ use_GPU(fr->nbv) && DOMAINDECOMP(cr) &&
+ !(cr->duty & DUTY_PME))
+ {
+ /* Lock DLB=auto to off (does nothing when DLB=yes/no).
+ * With GPUs + separate PME ranks, we don't want DLB.
+ * This could happen when we scan coarse grids and
+ * it would then never be turned off again.
+ * This would hurt performance at the final, optimal
+ * grid spacing, where DLB almost never helps.
+ * Also, DLB can limit the cut-off for PME tuning.
+ */
+ dd_dlb_set_lock(cr->dd, TRUE);
+ }
+
if (bPMETuneRunning || step_rel > ir->nstlist*50)
{
bPMETuneTry = FALSE;
{
calc_enervirdiff(NULL, ir->eDispCorr, fr);
}
+
+ if (!bPMETuneRunning &&
+ DOMAINDECOMP(cr) &&
+ dd_dlb_is_locked(cr->dd))
+ {
+ /* Unlock the DLB=auto, DLB is allowed to activate
+ * (but we don't expect it to activate in most cases).
+ */
+ dd_dlb_set_lock(cr->dd, FALSE);
+ }
}
cycles_pmes = 0;
}
{
/* Reset all the counters related to performance over the run */
reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting,
- fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
+ use_GPU(fr->nbv) ? fr->nbv : NULL);
wcycle_set_reset_counters(wcycle, -1);
if (!(cr->duty & DUTY_PME))
{
if (pme_loadbal != NULL)
{
pme_loadbal_done(pme_loadbal, cr, fplog,
- fr->nbv != NULL && fr->nbv->bUseGPU);
+ use_GPU(fr->nbv));
}
if (shellfc && fplog)
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#include "gromacs/utility/smalloc.h"
-#include "types/commrec.h"
-#include "network.h"
-#include "calcgrid.h"
-#include "pme.h"
-#include "vec.h"
-#include "domdec.h"
-#include "nbnxn_cuda_data_mgmt.h"
-#include "force.h"
-#include "macros.h"
-#include "md_logging.h"
#include "pme_loadbal.h"
+#include "config.h"
+
+#include "gromacs/legacyheaders/calcgrid.h"
+#include "gromacs/legacyheaders/domdec.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/pme.h"
+#include "gromacs/legacyheaders/sim_util.h"
+#include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
+#include "gromacs/pbcutil/pbc.h"
+#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/smalloc.h"
+
/* Parameters and setting for one PP-PME setup */
typedef struct {
real rcut_coulomb; /* Coulomb cut-off */
while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing || !grid_ok);
set->rcut_coulomb = pme_lb->cut_spacing*sp;
+ if (set->rcut_coulomb < pme_lb->rcut_coulomb_start)
+ {
+ /* This is unlikely, but can happen when e.g. continuing from
+ * a checkpoint after equilibration where the box shrank a lot.
+ * We want to avoid rcoulomb getting smaller than rvdw
+ * and there might be more issues with decreasing rcoulomb.
+ */
+ set->rcut_coulomb = pme_lb->rcut_coulomb_start;
+ }
if (pme_lb->cutoff_scheme == ecutsVERLET)
{
pme_lb->cur = pme_lb->start - 1;
}
-gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
- t_commrec *cr,
- FILE *fp_err,
- FILE *fp_log,
- t_inputrec *ir,
- t_state *state,
- double cycles,
- interaction_const_t *ic,
- nonbonded_verlet_t *nbv,
- gmx_pme_t *pmedata,
- gmx_int64_t step)
+gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
+ t_commrec *cr,
+ FILE *fp_err,
+ FILE *fp_log,
+ t_inputrec *ir,
+ t_state *state,
+ double cycles,
+ interaction_const_t *ic,
+ struct nonbonded_verlet_t *nbv,
+ gmx_pme_t *pmedata,
+ gmx_int64_t step)
{
gmx_bool OK;
pme_setup_t *set;
}
bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
- if (pme_lb->cutoff_scheme == ecutsVERLET &&
- nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
- {
- nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv, ic);
-
- /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
- * also sharing texture references. To keep the code simple, we don't
- * treat texture references as shared resources, but this means that
- * the coulomb_tab texture ref will get updated by multiple threads.
- * Hence, to ensure that the non-bonded kernels don't start before all
- * texture binding operations are finished, we need to wait for all ranks
- * to arrive here before continuing.
- *
- * Note that we could omit this barrier if GPUs are not shared (or
- * texture objects are used), but as this is initialization code, there
- * is not point in complicating things.
- */
+ nbnxn_cuda_pme_loadbal_update_param(nbv, ic);
+
+ /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
+ * also sharing texture references. To keep the code simple, we don't
+ * treat texture references as shared resources, but this means that
+ * the coulomb_tab texture ref will get updated by multiple threads.
+ * Hence, to ensure that the non-bonded kernels don't start before all
+ * texture binding operations are finished, we need to wait for all ranks
+ * to arrive here before continuing.
+ *
+ * Note that we could omit this barrier if GPUs are not shared (or
+ * texture objects are used), but as this is initialization code, there
+ * is not point in complicating things.
+ */
#ifdef GMX_THREAD_MPI
- if (PAR(cr))
- {
- gmx_barrier(cr);
- }
-#endif /* GMX_THREAD_MPI */
+ if (PAR(cr) && use_GPU(nbv))
+ {
+ gmx_barrier(cr);
}
+#endif /* GMX_THREAD_MPI */
/* Usually we won't need the simple tables with GPUs.
* But we do with hybrid acceleration and with free energy.