From: Mark Abraham Date: Wed, 1 Oct 2014 20:02:02 +0000 (+0200) Subject: Merge release-5-0 into master X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=19d3c2e5d0c401eb59010960d11a18b6ba2c54c6;hp=-c;p=alexxy%2Fgromacs.git Merge release-5-0 into master Conflicts: CMakeLists.txt Version numbers not bumped; fixed to use the right name for RelWithDebInfo. cmake/gmxCFlags.cmake Fixed to use the right name for RelWithDebInfo. src/gromacs/listed-forces/bonded.cpp New RB SIMD function in bonded.cpp had unused variables, now eliminated src/gromacs/mdlib/domdec.cpp Bug fixes from release-5-0 incorporated. std::max now used in code newly arrived from release-5-0. md.cpp had no conflict, but fr->nbv->bUseGPU had to be replaced by use_GPU(fr->nbv) to work in master branch. Change-Id: I65326b691745111fbdaa9435be6c92fa1acf6e7d --- 19d3c2e5d0c401eb59010960d11a18b6ba2c54c6 diff --combined CMakeLists.txt index 70185949c4,53498532af..7fbe947522 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@@ -48,28 -48,75 +48,28 @@@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CM set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) -# PROJECT_VERSION should have the following structure: -# VERSION-dev[-SUFFIX] where the VERSION should have the for: vMajor.vMinor.vPatch -# -# The "-dev" suffix is important to keep because it makes possible to distinguish -# between a build from official release and a build from git release branch on a -# machine with no git. -# -# NOTE: when releasing the "-dev" suffix needs to be stripped off! -# REGRESSIONTEST_VERSION and REGRESSIONTEST_BRANCH should always be -# defined. -set(PROJECT_VERSION "5.0.3-dev") -# If this is a released tarball, "-dev" will not be present in -# PROJECT_VERSION, and REGRESSIONTEST_VERSION specifies the version -# number of the regressiontest tarball against which the code tarball -# can be tested. This will be the version of the last patch release. -set(REGRESSIONTEST_VERSION "5.0.3-dev") -# The MD5 checksum of the regressiontest tarball. Only used if "-dev" -# is not present in the PROJECT_VERSION -set(REGRESSIONTEST_MD5SUM "004f2377adf252e2caa9c241390d4b9c") -# If this is not a released tarball, "-dev" will be present in -# PROJECT_VERSION, and REGRESSIONTEST_BRANCH specifies the name of the -# gerrit.gromacs.org branch whose HEAD can test this code, *if* this -# code contains all recent fixes from the corresponding code branch. -set(REGRESSIONTEST_BRANCH "refs/heads/release-5-0") - -set(CUSTOM_VERSION_STRING "" - CACHE STRING "Custom version string (if empty, use hard-coded default)") -mark_as_advanced(CUSTOM_VERSION_STRING) -if (CUSTOM_VERSION_STRING) - set(PROJECT_VERSION ${CUSTOM_VERSION_STRING}) -endif() -set(LIBRARY_SOVERSION 0) -set(LIBRARY_VERSION ${LIBRARY_SOVERSION}.0.0) -# It is a bit irritating, but this has to be set separately for now! -SET(CPACK_PACKAGE_VERSION_MAJOR "5") -SET(CPACK_PACKAGE_VERSION_MINOR "0") -#SET(CPACK_PACKAGE_VERSION_PATCH "0") - -# The numerical gromacs version. It is 40600 for 4.6.0. -# The #define GMX_VERSION in gromacs/version.h is set to this value. -math(EXPR NUM_VERSION - "${CPACK_PACKAGE_VERSION_MAJOR}*10000 + ${CPACK_PACKAGE_VERSION_MINOR}*100") -if(CPACK_PACKAGE_VERSION_PATCH) - math(EXPR NUM_VERSION - "${NUM_VERSION} + ${CPACK_PACKAGE_VERSION_PATCH}") -endif() - -# The API version tracks the numerical Gromacs version (for now). -# It is potentially different from the Gromacs version in the future, if -# the programs/libraries diverge from the presumably more stable API. -# The #define GMX_API_VERSION in version.h is set to this value to -# provide backward compatibility of software written against the Gromacs API. -set(API_VERSION ${NUM_VERSION}) +# Set up common version variables, as well as general information about +# the build tree (whether the build is from a source package or from a git +# repository). Also declares a few functions that will be used for generating +# version info files later. +include(gmxVersionInfo) if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND UNIX) set(CMAKE_INSTALL_PREFIX "/usr/local/gromacs" CACHE STRING "Installation prefix (installation will need write permissions here)" FORCE) endif() include(gmxBuildTypeReference) +include(gmxBuildTypeProfile) include(gmxBuildTypeTSAN) include(gmxBuildTypeASAN) include(gmxBuildTypeReleaseWithAssert) if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert." FORCE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel Reference RelWithAssert Profile." FORCE) # There's no need to offer a user the choice of ThreadSanitizer # Set the possible values of build type for cmake-gui set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" - "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert") + "MinSizeRel" "RelWithDebInfo" "Reference" "RelWithAssert" "Profile") endif() if(CMAKE_CONFIGURATION_TYPES) # Add appropriate GROMACS-specific build types for the Visual @@@ -81,10 -128,34 +81,10 @@@ "List of configuration types" FORCE) endif() - set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE) -set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBINFO RELWITHASSERT MINSIZEREL) ++set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBINFO RELWITHASSERT MINSIZEREL PROFILE) -enable_language(C) -enable_language(CXX) set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON) -set(CPACK_PACKAGE_NAME "gromacs") -set(CPACK_PACKAGE_VERSION ${PROJECT_VERSION}) -set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}") -set(CPACK_PACKAGE_VENDOR "gromacs.org") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Gromacs - a toolkit for high-performance molecular simulation") -set(CPACK_RESOURCE_FILE_WELCOME "${CMAKE_SOURCE_DIR}/admin/InstallWelcome.txt") -# Its GPL/LGPL, so they do not have to agree to a license for mere usage, but some installers require this... -set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING") -set(CPACK_RESOURCE_FILE_README "${CMAKE_SOURCE_DIR}/admin/InstallInfo.txt") -set(CPACK_SOURCE_IGNORE_FILES "\\\\.isreposource$;\\\\.git/;\\\\.gitignore$;\\\\.gitattributes;") -set(CPACK_PROJECT_CONFIG_FILE "${CMAKE_SOURCE_DIR}/CPackInit.cmake") -# CPack source archives include only the directories we list here. -# This variable is a list of pairs of names of source and destination -# directories. Most of these are used for content GROMACS generates as -# part of the configuration or build. -set(CPACK_SOURCE_INSTALLED_DIRECTORIES "${CMAKE_SOURCE_DIR};/;${CMAKE_BINARY_DIR}/src/programs/completion;src/programs/completion;${CMAKE_BINARY_DIR}/docs/man/man1;docs/man/man1;${CMAKE_BINARY_DIR}/docs/man/man7;docs/man/man7;${CMAKE_BINARY_DIR}/docs/old-html/final;docs/old-html/final;${CMAKE_BINARY_DIR}/docs/install-guide/final;/") -set(CPACK_PACKAGE_CONTACT "gmx-users@gromacs.org") -set(CPACK_GMX_BUILD_HELP "${GMX_BUILD_HELP}") #Works even though GMX_BUILD_HELP is defined later because it is off by default. - -#must come after all cpack settings! -include(CPack) - # Set a default valgrind suppression file. # This unfortunately needs to duplicate information from CTest to work as # expected... @@@ -94,12 -165,14 +94,12 @@@ set(MEMORYCHECK_SUPPRESSIONS_FIL "File that contains suppressions for the memory checker") include(CTest) -set(SOURCE_IS_GIT_REPOSITORY OFF) -set(SOURCE_IS_SOURCE_DISTRIBUTION OFF) -if(EXISTS "${CMAKE_SOURCE_DIR}/.git") - set(SOURCE_IS_GIT_REPOSITORY ON) -endif() -if(NOT EXISTS "${CMAKE_SOURCE_DIR}/admin/.isreposource") - set(SOURCE_IS_SOURCE_DISTRIBUTION ON) -endif() +include(gmxCPackUtilities) +gmx_cpack_init() + +# Variables that accumulate stuff influencing the installed headers +set(INSTALLED_HEADER_INCLUDE_DIRS "") +set(INSTALLED_HEADER_DEFINITIONS "") ######################################################################## # Check and warn if cache generated on a different host is being reused @@@ -190,7 -263,7 +190,7 @@@ gmx_option_multichoice GMX_SIMD "SIMD instruction set for CPU kernels and compiler optimization" "${GMX_SUGGESTED_SIMD}" - None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 IBM_QPX Sparc64_HPC_ACE Reference) + None SSE2 SSE4.1 AVX_128_FMA AVX_256 AVX2_256 ARM_NEON ARM_NEON_ASIMD IBM_QPX IBM_VMX IBM_VSX Sparc64_HPC_ACE Reference) gmx_option_multichoice( GMX_FFT_LIBRARY @@@ -231,6 -304,9 +231,6 @@@ option(GMX_OPENMP "Enable OpenMP-based option(GMX_USE_TNG "Use the TNG library for trajectory I/O" ON) -option(GMX_GIT_VERSION_INFO "Generate git version information" ${SOURCE_IS_GIT_REPOSITORY}) -mark_as_advanced(GMX_GIT_VERSION_INFO) - if(UNIX) option(GMX_SYMLINK_OLD_BINARY_NAMES "Create symbolic links for pre-5.0 binary names" ON) endif() @@@ -304,15 -380,17 +304,15 @@@ if(GMX_SIMD STREQUAL "AVX_256 endif() - -set(PKG_CFLAGS "") if(GMX_DOUBLE) add_definitions(-DGMX_DOUBLE) - set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_DOUBLE") + list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_DOUBLE") if(GMX_RELAXED_DOUBLE_PRECISION) add_definitions(-DGMX_RELAXED_DOUBLE_PRECISION) endif() endif() if(GMX_SOFTWARE_INVSQRT) - set(PKG_CFLAGS "${PKG_CFLAGS} -DGMX_SOFTWARE_INVSQRT") + list(APPEND INSTALLED_HEADER_DEFINITIONS "-DGMX_SOFTWARE_INVSQRT") endif() if(WIN32 AND NOT CYGWIN) @@@ -477,6 -555,25 +477,6 @@@ if(WIN32 AND NOT CYGWIN add_definitions(-DNOMINMAX) endif() -# only bother with finding git and using version.h if the source is a git repo -if(GMX_GIT_VERSION_INFO) - if (NOT SOURCE_IS_GIT_REPOSITORY) - message(FATAL_ERROR - "Cannot generate git version information from source tree not under git. " - "Set GMX_GIT_VERSION_INFO=OFF to proceed.") - endif() - # We need at least git v1.5.3 be able to parse git's date output. If not - # found or the version is too small, we can't generate version information. - find_package(Git) - - if(NOT GIT_FOUND OR GIT_VERSION_STRING VERSION_LESS "1.5.3") - message(FATAL_ERROR - "No compatible git version found (>= 1.5.3 required). " - "Won't be able to generate development version information. " - "Set GMX_GIT_VERSION_INFO=OFF to proceed.") - endif() -endif() - # Detect boost unless GMX_EXTERNAL_BOOST is explicitly OFF # Used for default if GMX_EXTERNAL_BOOST is not defined (first CMake pass) if(NOT DEFINED GMX_EXTERNAL_BOOST OR GMX_EXTERNAL_BOOST) @@@ -519,6 -616,12 +519,6 @@@ if(GMX_USE_TNG AND NOT GMX_EXTERNAL_TNG gmx_test_zlib(HAVE_ZLIB) endif() -######################################################################## -# Generate development version info for cache -######################################################################## -# set(GEN_VERSION_INFO_INTERNAL "ON") -# include(gmxGenerateVersionString) - ######################################################################## # Our own GROMACS tests ######################################################################## @@@ -528,6 -631,10 +528,6 @@@ include_directories(BEFORE ${CMAKE_SOUR include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/include) # Required for config.h, maybe should only be set in src/CMakeLists.txt include_directories(BEFORE ${CMAKE_BINARY_DIR}/src) -# Required for gmx_header_config_gen.h to be found before installation -include_directories(BEFORE ${CMAKE_BINARY_DIR}/src/gromacs/utility) -# Required for now to make old code compile -include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders) include(gmxTestInlineASM) gmx_test_inline_asm_gcc_x86(GMX_X86_GCC_INLINE_ASM) @@@ -682,18 -789,14 +682,18 @@@ if(GMX_FAHCORE include_directories(${COREWRAP_INCLUDE_DIR}) endif() -option(GMX_BUILD_HELP "Build man pages, HTML help, and completions automatically (requires that compiled binaries can be executed on the build host)" OFF) +# Value of AUTO tries to generate things, but will only produce warnings if +# that fails. +set(build_help_default AUTO) +if (SOURCE_IS_SOURCE_DISTRIBUTION) + set(build_help_default OFF) +endif() +gmx_option_trivalue(GMX_BUILD_HELP "Build man pages, HTML help, and completions automatically (requires that compiled binaries can be executed on the build host)" ${build_help_default}) mark_as_advanced(GMX_BUILD_HELP) -if (GMX_BUILD_HELP AND SOURCE_IS_SOURCE_DISTRIBUTION AND - "${CMAKE_CURRENT_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_BINARY_DIR}") - +if (GMX_BUILD_HELP AND SOURCE_IS_SOURCE_DISTRIBUTION AND BUILD_IS_INSOURCE) message(FATAL_ERROR - "Rebuilding HTML and man pages is not supported for in-source " - "builds from a source distribution. " + "Rebuilding HTML and man pages or shell completions is not supported " + "for in-source builds from a source distribution. " "Set GMX_BUILD_HELP=OFF or do an out-of-source build to proceed.") endif() @@@ -725,6 -828,7 +725,6 @@@ else( set(GMX_EXE_LINKER_FLAGS ${GMX_EXE_LINKER_FLAGS} ${OpenMP_LINKER_FLAGS}) set(GMX_SHARED_LINKER_FLAGS ${GMX_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}) endif() -set(PKG_CFLAGS "${PKG_CFLAGS} ${OpenMP_C_FLAGS}") ######################################################################## # Specify install locations @@@ -741,24 -845,17 +741,24 @@@ mark_as_advanced(GMX_LIB_INSTALL_DIR GM # These variables are used internally to provide a central location for # customizing the install locations. -set(LIB_INSTALL_DIR ${GMX_LIB_INSTALL_DIR}) -set(BIN_INSTALL_DIR bin) -set(DATA_INSTALL_DIR share/${GMX_DATA_INSTALL_DIR}) -set(MAN_INSTALL_DIR share/man) -set(INCL_INSTALL_DIR include) +set(LIB_INSTALL_DIR ${GMX_LIB_INSTALL_DIR}) +set(BIN_INSTALL_DIR bin) +set(DATA_INSTALL_DIR share/${GMX_DATA_INSTALL_DIR}) +set(MAN_INSTALL_DIR share/man) +# If the nesting level wrt. the installation root is changed, +# gromacs-config.cmake.cmakein needs to be adapted. +set(CMAKE_INSTALL_DIR share/cmake) +# TODO: Make GMXRC adapt if this is changed +set(PKGCONFIG_INSTALL_DIR ${LIB_INSTALL_DIR}/pkgconfig) +set(INCL_INSTALL_DIR include) # These variables get written into config.h for use in finding the data # directories. set(GMXLIB_SEARCH_DIR share/${GMX_DATA_INSTALL_DIR}/top) set(GMXLIB_FALLBACK ${CMAKE_INSTALL_PREFIX}/${DATA_INSTALL_DIR}/top) +list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${INCL_INSTALL_DIR}) + # Binary and library suffix options include(gmxManageSuffixes) @@@ -804,14 -901,15 +804,14 @@@ install(FILES COPYING DESTINATION ${DAT if(GMX_EXTERNAL_BOOST) include_directories(${Boost_INCLUDE_DIRS}) - set(PKG_CFLAGS "${PKG_CFLAGS} -I${Boost_INCLUDE_DIRS}") + list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${Boost_INCLUDE_DIRS}) else() include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/boost) + list(APPEND INSTALLED_HEADER_INCLUDE_DIRS ${INCL_INSTALL_DIR}/gromacs/external/boost) + list(APPEND INSTALLED_HEADED_DEFINITIONS "-DBOOST_NO_TYPEID") # typeid not supported for minimal internal version # (would add significant amount of code) add_definitions(-DBOOST_NO_TYPEID) - # TODO: Propagate the above settings to the installed CMakeFiles.txt template - # (from share/template/) - set(PKG_CFLAGS "${PKG_CFLAGS} -DBOOST_NO_TYPEID -I${CMAKE_INSTALL_PREFIX}/${INCL_INSTALL_DIR}/gromacs/external/boost") if (NOT GMX_BUILD_MDRUN_ONLY) install(DIRECTORY ${CMAKE_SOURCE_DIR}/src/external/boost/boost DESTINATION ${INCL_INSTALL_DIR}/gromacs/external/boost @@@ -854,8 -952,6 +854,8 @@@ if (BUILD_TESTING add_subdirectory(tests) endif() +gmx_cpack_write_config() + # Issue a warning if NVIDIA GPUs were detected, but CUDA was not found. # Don't bother the user after the first configure pass. if ((CUDA_NOTFOUND_AUTO AND GMX_DETECT_GPU_AVAILABLE) AND NOT GMX_GPU_DETECTION_DONE) diff --combined cmake/gmxCFlags.cmake index 011cada36e,124107b65c..3a0e3bbe15 --- a/cmake/gmxCFlags.cmake +++ b/cmake/gmxCFlags.cmake @@@ -64,7 -64,7 +64,7 @@@ function(gmx_set_cmake_compiler_flags # be set up elsewhere and passed to this function, but it is # inconvenient in CMake to pass more than one list, and such a # list is only used here. - foreach(build_type RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE) - foreach(build_type RELWITHDEBINFO RELWITHASSERT MINSIZEREL) ++ foreach(build_type RELWITHDEBINFO RELWITHASSERT MINSIZEREL PROFILE) set(GMXC_${language}FLAGS_${build_type} "${GMXC_${language}FLAGS_RELEASE}") endforeach() # Copy the flags that are only used by the real Release build @@@ -85,9 -85,12 +85,12 @@@ endif() # Append to the variables for the given build type for - # each language, in the parent scope. + # each language, in the parent scope. We add our new variables at the end, so + # compiler-specific choices are more likely to override default CMake choices. + # This is for instance useful for RelWithDebInfo builds, where we want to use the full + # set of our optimization flags detected in this file, rather than having -O2 override them. set(CMAKE_${language}_FLAGS${punctuation}${build_type} - "${GMXC_${language}FLAGS${punctuation}${build_type}} ${CMAKE_${language}_FLAGS${punctuation}${build_type}}" + "${CMAKE_${language}_FLAGS${punctuation}${build_type}} ${GMXC_${language}FLAGS${punctuation}${build_type}}" PARENT_SCOPE) endforeach() endforeach() @@@ -110,12 -113,14 +113,12 @@@ MACRO(gmx_c_flags # Since 4.8 on by default. For previous version disabling is a no-op. Only disabling for Release because with assert # the warnings are OK. GMX_TEST_CFLAG(CFLAGS_WARN_REL "-Wno-array-bounds" GMXC_CFLAGS_RELEASE_ONLY) - # Since gcc 4.8 strict - false postives with old gmx_fatal. TODO: Remove in master - GMX_TEST_CFLAG(CFLAGS_WARN_UNINIT "-Wno-maybe-uninitialized" GMXC_CFLAGS) if(CYGWIN) GMX_TEST_CFLAG(CFLAGS_WARN_SUBSCRIPT "-Wno-char-subscripts" GMXC_CFLAGS) endif() # new in gcc 4.5 GMX_TEST_CFLAG(CFLAGS_EXCESS_PREC "-fexcess-precision=fast" GMXC_CFLAGS_RELEASE) - GMX_TEST_CFLAG(CFLAGS_COPT "-fomit-frame-pointer -funroll-all-loops" + GMX_TEST_CFLAG(CFLAGS_COPT "-funroll-all-loops" GMXC_CFLAGS_RELEASE) GMX_TEST_CFLAG(CFLAGS_NOINLINE "-fno-inline" GMXC_CFLAGS_DEBUG) endif() @@@ -131,7 -136,7 +134,7 @@@ GMX_TEST_CFLAG(CXXFLAGS_WARN_REL "-Wno-array-bounds" GMXC_CXXFLAGS_RELEASE_ONLY) # new in gcc 4.5 GMX_TEST_CXXFLAG(CXXFLAGS_EXCESS_PREC "-fexcess-precision=fast" GMXC_CXXFLAGS_RELEASE) - GMX_TEST_CXXFLAG(CXXFLAGS_COPT "-fomit-frame-pointer -funroll-all-loops" + GMX_TEST_CXXFLAG(CXXFLAGS_COPT "-funroll-all-loops" GMXC_CXXFLAGS_RELEASE) GMX_TEST_CXXFLAG(CXXFLAGS_NOINLINE "-fno-inline" GMXC_CXXFLAGS_DEBUG) endif() @@@ -148,7 -153,9 +151,7 @@@ GMX_TEST_CFLAG(CFLAGS_PRAGMA "-wd161" GMXC_CFLAGS) endif() endif() -# 111: statement is unreachable -# 177: function ".." was declared but never referenced -# 181: argument of type ".." is incompatible with format "..", expecting argument of type ".." +# 177: function/variable ".." was declared but never referenced # 193: zero used for undefined preprocessing identifier ".." # 271: trailing comma is nonstandard # 304: access control not specified ("public" by default) @@@ -171,7 -178,7 +174,7 @@@ #3346: dynamic exception specifications are deprecated #11074: Inlining inhibited by limit max-size(/max-total-size) #11076: To get full report use -opt-report=3 -opt-report-phase ipo (shown for previous remark) - GMX_TEST_CFLAG(CFLAGS_WARN "-w3 -wd111 -wd177 -wd181 -wd193 -wd271 -wd304 -wd383 -wd424 -wd444 -wd522 -wd593 -wd869 -wd981 -wd1418 -wd1419 -wd1572 -wd1599 -wd2259 -wd2415 -wd2547 -wd2557 -wd3280 -wd3346 -wd11074 -wd11076" GMXC_CFLAGS) + GMX_TEST_CFLAG(CFLAGS_WARN "-w3 -wd177 -wd193 -wd271 -wd304 -wd383 -wd424 -wd444 -wd522 -wd593 -wd869 -wd981 -wd1418 -wd1419 -wd1572 -wd1599 -wd2259 -wd2415 -wd2547 -wd2557 -wd3280 -wd3346 -wd11074 -wd11076" GMXC_CFLAGS) GMX_TEST_CFLAG(CFLAGS_STDGNU "-std=gnu99" GMXC_CFLAGS) GMX_TEST_CFLAG(CFLAGS_OPT "-ip -funroll-all-loops -alias-const -ansi-alias" GMXC_CFLAGS_RELEASE) else() @@@ -182,7 -189,7 +185,7 @@@ GMX_TEST_CFLAG(CFLAGS_PRAGMA "/wd161" GMXC_CFLAGS) endif() endif() - GMX_TEST_CFLAG(CFLAGS_WARN "/W3 /wd111 /wd177 /wd181 /wd193 /wd271 /wd304 /wd383 /wd424 /wd444 /wd522 /wd593 /wd869 /wd981 /wd1418 /wd1419 /wd1572 /wd1599 /wd2259 /wd2415 /wd2547 /wd2557 /wd3280 /wd3346" GMXC_CFLAGS) + GMX_TEST_CFLAG(CFLAGS_WARN "/W3 /wd177 /wd193 /wd271 /wd304 /wd383 /wd424 /wd444 /wd522 /wd593 /wd869 /wd981 /wd1418 /wd1419 /wd1572 /wd1599 /wd2259 /wd2415 /wd2547 /wd2557 /wd3280 /wd3346" GMXC_CFLAGS) GMX_TEST_CFLAG(CFLAGS_OPT "/Qip" GMXC_CFLAGS_RELEASE) endif() endif() @@@ -199,7 -206,7 +202,7 @@@ #All but the following warnings are identical for the C-compiler (see above) #1782: #pragma once is obsolete #2282: unrecognized GCC pragma - GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-w3 -wd111 -wd177 -wd181 -wd193 -wd271 -wd304 -wd383 -wd424 -wd444 -wd522 -wd593 -wd869 -wd981 -wd1418 -wd1419 -wd1572 -wd1599 -wd2259 -wd2415 -wd2547 -wd2557 -wd3280 -wd3346 -wd11074 -wd11076 -wd1782 -wd2282" GMXC_CXXFLAGS) + GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-w3 -wd177 -wd193 -wd271 -wd304 -wd383 -wd424 -wd444 -wd522 -wd593 -wd869 -wd981 -wd1418 -wd1419 -wd1572 -wd1599 -wd2259 -wd2415 -wd2547 -wd2557 -wd3280 -wd3346 -wd11074 -wd11076 -wd1782 -wd2282" GMXC_CXXFLAGS) GMX_TEST_CXXFLAG(CXXFLAGS_OPT "-ip -funroll-all-loops -alias-const -ansi-alias" GMXC_CXXFLAGS_RELEASE) else() if(NOT GMX_OPENMP) @@@ -209,7 -216,7 +212,7 @@@ GMX_TEST_CXXFLAG(CXXFLAGS_PRAGMA "/wd161" GMXC_CXXFLAGS) endif() endif() - GMX_TEST_CXXFLAG(CXXFLAGS_WARN "/W3 /wd111 /wd177 /wd181 /wd193 /wd271 /wd304 /wd383 /wd424 /wd444 /wd522 /wd593 /wd869 /wd981 /wd1418 /wd1419 /wd1572 /wd1599 /wd2259 /wd2415 /wd2547 /wd2557 /wd3280 /wd3346 /wd1782 /wd2282" GMXC_CXXFLAGS) + GMX_TEST_CXXFLAG(CXXFLAGS_WARN "/W3 /wd177 /wd193 /wd271 /wd304 /wd383 /wd424 /wd444 /wd522 /wd593 /wd869 /wd981 /wd1418 /wd1419 /wd1572 /wd1599 /wd2259 /wd2415 /wd2547 /wd2557 /wd3280 /wd3346 /wd1782 /wd2282" GMXC_CXXFLAGS) GMX_TEST_CXXFLAG(CXXFLAGS_OPT "/Qip" GMXC_CXXFLAGS_RELEASE) endif() endif() diff --combined src/gromacs/gmxana/gmx_tune_pme.c index b9c2af3e71,a437803932..c4a7e6e021 --- a/src/gromacs/gmxana/gmx_tune_pme.c +++ b/src/gromacs/gmxana/gmx_tune_pme.c @@@ -32,36 -32,35 +32,36 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" + +#include "config.h" +#include #include + #ifdef HAVE_SYS_TIME_H #include #endif #include "gromacs/commandline/pargs.h" -#include "typedefs.h" -#include "types/commrec.h" -#include "gromacs/utility/smalloc.h" -#include "vec.h" -#include "copyrite.h" #include "gromacs/fileio/tpxio.h" -#include "gromacs/utility/cstringutil.h" -#include "readinp.h" -#include "calcgrid.h" -#include "checkpoint.h" -#include "macros.h" -#include "gmx_ana.h" -#include "names.h" -#include "perf_est.h" -#include "inputrec.h" -#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/gmxana/gmx_ana.h" +#include "gromacs/legacyheaders/calcgrid.h" +#include "gromacs/legacyheaders/checkpoint.h" +#include "gromacs/legacyheaders/inputrec.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/perf_est.h" +#include "gromacs/legacyheaders/readinp.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/legacyheaders/types/commrec.h" #include "gromacs/math/utilities.h" - -#include "gmx_fatal.h" +#include "gromacs/math/vec.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/utility/baseversion.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" /* Enum for situations that can occur during log file parsing, the * corresponding string entries can be found in do_the_tests() in @@@ -144,6 -143,29 +144,6 @@@ static void cleandata(t_perf *perfdata } -static gmx_bool is_equal(real a, real b) -{ - real diff, eps = 1.0e-7; - - - diff = a - b; - - if (diff < 0.0) - { - diff = -diff; - } - - if (diff < eps) - { - return TRUE; - } - else - { - return FALSE; - } -} - - static void remove_if_exists(const char *fn) { if (gmx_fexist(fn)) @@@ -531,8 -553,8 +531,8 @@@ static gmx_bool analyze_data fprintf(fp, "\n"); /* Only mention settings if they were modified: */ - bRefinedCoul = !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]); - bRefinedVdW = !is_equal(info->rvdw[k_win], info->rvdw[0] ); + bRefinedCoul = !gmx_within_tol(info->rcoulomb[k_win], info->rcoulomb[0], GMX_REAL_EPS); + bRefinedVdW = !gmx_within_tol(info->rvdw[k_win], info->rvdw[0], GMX_REAL_EPS); bRefinedGrid = !(info->nkx[k_win] == info->nkx[0] && info->nky[k_win] == info->nky[0] && info->nkz[k_win] == info->nkz[0]); @@@ -594,25 -616,25 +594,25 @@@ static void get_program_paths(gmx_bool { if ( (cp = getenv("MPIRUN")) != NULL) { - *cmd_mpirun = strdup(cp); + *cmd_mpirun = gmx_strdup(cp); } else { - *cmd_mpirun = strdup(def_mpirun); + *cmd_mpirun = gmx_strdup(def_mpirun); } } else { - *cmd_mpirun = strdup(empty_mpirun); + *cmd_mpirun = gmx_strdup(empty_mpirun); } if ( (cp = getenv("MDRUN" )) != NULL) { - *cmd_mdrun = strdup(cp); + *cmd_mdrun = gmx_strdup(cp); } else { - *cmd_mdrun = strdup(def_mdrun); + *cmd_mdrun = gmx_strdup(def_mdrun); } } @@@ -964,11 -986,11 +964,11 @@@ static void make_benchmark_tprs { /* Determine which Coulomb radii rc to use in the benchmarks */ add = (rmax-rmin)/(*ntprs-1); - if (is_equal(rmin, info->rcoulomb[0])) + if (gmx_within_tol(rmin, info->rcoulomb[0], GMX_REAL_EPS)) { ir->rcoulomb = rmin + j*add; } - else if (is_equal(rmax, info->rcoulomb[0])) + else if (gmx_within_tol(rmax, info->rcoulomb[0], GMX_REAL_EPS)) { ir->rcoulomb = rmin + (j-1)*add; } @@@ -1068,8 -1090,8 +1068,8 @@@ fprintf(fp, " %-14s\n", fn_bench_tprs[j]); /* Make it clear to the user that some additional settings were modified */ - if (!is_equal(ir->rvdw, info->rvdw[0]) - || !is_equal(ir->rlistlong, info->rlistlong[0]) ) + if (!gmx_within_tol(ir->rvdw, info->rvdw[0], GMX_REAL_EPS) + || !gmx_within_tol(ir->rlistlong, info->rlistlong[0], GMX_REAL_EPS) ) { bNote = TRUE; } @@@ -1308,9 -1330,12 +1308,12 @@@ static void make_sure_it_runs(char *mdr { /* To prevent confusion, do not again issue a gmx_fatal here since we already * get the error message from mdrun itself */ - sprintf(msg, "Cannot run the benchmark simulations! Please check the error message of\n" + sprintf(msg, + "Cannot run the first benchmark simulation! Please check the error message of\n" "mdrun for the source of the problem. Did you provide a command line\n" - "argument that neither g_tune_pme nor mdrun understands? Offending command:\n" + "argument that neither gmx tune_pme nor mdrun understands? If you're\n" + "sure your command line should work, you can bypass this check with \n" + "gmx tune_pme -nocheck. The failing command was:\n" "\n%s\n\n", command); fprintf(stderr, "%s", msg); @@@ -1341,7 -1366,7 +1344,7 @@@ static void do_the_tests int npme_fixed, /* If >= -1, test fixed number of PME * nodes only */ const char *npmevalues_opt, /* Which -npme values should be tested */ - t_perf **perfdata, /* Here the performace data is stored */ + t_perf **perfdata, /* Here the performance data is stored */ int *pmeentries, /* Entries in the nPMEnodes list */ int repeats, /* Repeat each test this often */ int nnodes, /* Total number of nodes = nPP + nPME */ @@@ -1354,7 -1379,8 +1357,8 @@@ const t_filenm *fnm, /* List of filenames from command line */ int nfile, /* Number of files specified on the cmdl. */ int presteps, /* DLB equilibration steps, is checked */ - gmx_int64_t cpt_steps) /* Time step counter in the checkpoint */ + gmx_int64_t cpt_steps, /* Time step counter in the checkpoint */ + gmx_bool bCheck) /* Check whether benchmark mdrun works */ { int i, nr, k, ret, count = 0, totaltests; int *nPMEnodes = NULL; @@@ -1454,8 -1480,11 +1458,11 @@@ cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench); /* To prevent that all benchmarks fail due to a show-stopper argument - * on the mdrun command line, we make a quick check first */ - if (bFirst) + * on the mdrun command line, we make a quick check first. + * This check can be turned off in cases where the automatically chosen + * number of PME-only ranks leads to a number of PP ranks for which no + * decomposition can be found (e.g. for large prime numbers) */ + if (bFirst && bCheck) { make_sure_it_runs(pd->mdrun_cmd_line, cmdline_length, fp, fnm, nfile); } @@@ -1631,13 -1660,13 +1638,13 @@@ static void check_input /* Add test scenarios if rmin or rmax were set */ if (*ntprs <= 2) { - if (!is_equal(*rmin, rcoulomb) && (*ntprs == 1) ) + if (!gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) && (*ntprs == 1) ) { (*ntprs)++; fprintf(stderr, "NOTE: Setting -rmin to %g changed -ntpr to %d\n", *rmin, *ntprs); } - if (!is_equal(*rmax, rcoulomb) && (*ntprs == 1) ) + if (!gmx_within_tol(*rmax, rcoulomb, GMX_REAL_EPS) && (*ntprs == 1) ) { (*ntprs)++; fprintf(stderr, "NOTE: Setting -rmax to %g changed -ntpr to %d\n", @@@ -1646,13 -1675,13 +1653,13 @@@ } old = *ntprs; /* If one of rmin, rmax is set, we need 2 tpr files at minimum */ - if (!is_equal(*rmax, rcoulomb) || !is_equal(*rmin, rcoulomb) ) + if (!gmx_within_tol(*rmax, rcoulomb, GMX_REAL_EPS) || !gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) ) { *ntprs = max(*ntprs, 2); } /* If both rmin, rmax are set, we need 3 tpr files at minimum */ - if (!is_equal(*rmax, rcoulomb) && !is_equal(*rmin, rcoulomb) ) + if (!gmx_within_tol(*rmax, rcoulomb, GMX_REAL_EPS) && !gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) ) { *ntprs = max(*ntprs, 3); } @@@ -1664,7 -1693,7 +1671,7 @@@ if (*ntprs > 1) { - if (is_equal(*rmin, rcoulomb) && is_equal(rcoulomb, *rmax)) /* We have just a single rc */ + if (gmx_within_tol(*rmin, rcoulomb, GMX_REAL_EPS) && gmx_within_tol(rcoulomb, *rmax, GMX_REAL_EPS)) /* We have just a single rc */ { fprintf(stderr, "WARNING: Resetting -ntpr to 1 since no Coulomb radius scaling is requested.\n" "Please set rmin < rmax to test Coulomb radii in the [rmin, rmax] interval\n" @@@ -2016,6 -2045,9 +2023,9 @@@ int gmx_tune_pme(int argc, char *argv[] "need to provide a machine- or hostfile. This can also be passed", "via the MPIRUN variable, e.g.[PAR]", "[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]", + "Before doing the actual benchmark runs, [THISMODULE] will do a quick", + "check whether mdrun works as expected with the provided parallel settings", + "if the [TT]-check[tt] option is activated (the default).", "Please call [THISMODULE] with the normal options you would pass to", "[gmx-mdrun] and add [TT]-np[tt] for the number of ranks to perform the", "tests on, or [TT]-ntmpi[tt] for the number of threads. You can also add [TT]-r[tt]", @@@ -2025,7 -2057,7 +2035,7 @@@ "written with enlarged cutoffs and smaller Fourier grids respectively.", "Typically, the first test (number 0) will be with the settings from the input", "[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have the Coulomb cutoff", - "specified by [TT]-rmax[tt] with a somwhat smaller PME grid at the same time. ", + "specified by [TT]-rmax[tt] with a somewhat smaller PME grid at the same time. ", "In this last test, the Fourier spacing is multiplied with [TT]rmax[tt]/rcoulomb. ", "The remaining [TT].tpr[tt] files will have equally-spaced Coulomb radii (and Fourier " "spacings) between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1", @@@ -2035,7 -2067,7 +2045,7 @@@ "MD systems. The dynamic load balancing needs about 100 time steps", "to adapt to local load imbalances, therefore the time step counters", "are by default reset after 100 steps. For large systems (>1M atoms), as well as ", - "for a higher accuarcy of the measurements, you should set [TT]-resetstep[tt] to a higher value.", + "for a higher accuracy of the measurements, you should set [TT]-resetstep[tt] to a higher value.", "From the 'DD' load imbalance entries in the md.log output file you", "can tell after how many steps the load is sufficiently balanced. Example call:[PAR]" "[TT]gmx tune_pme -np 64 -s protein.tpr -launch[tt][PAR]", @@@ -2092,9 -2124,9 +2102,9 @@@ /* g_tune_pme */ { efOUT, "-p", "perf", ffWRITE }, { efLOG, "-err", "bencherr", ffWRITE }, - { efTPX, "-so", "tuned", ffWRITE }, + { efTPR, "-so", "tuned", ffWRITE }, /* mdrun: */ - { efTPX, NULL, NULL, ffREAD }, + { efTPR, NULL, NULL, ffREAD }, { efTRN, "-o", NULL, ffWRITE }, { efCOMPRESSED, "-x", NULL, ffOPTWR }, { efCPT, "-cpi", NULL, ffOPTRD }, @@@ -2162,6 -2194,7 +2172,7 @@@ gmx_bool bKeepAndNumCPT = FALSE; gmx_bool bResetCountersHalfWay = FALSE; gmx_bool bBenchmark = TRUE; + gmx_bool bCheck = TRUE; output_env_t oenv = NULL; @@@ -2205,6 -2238,8 +2216,8 @@@ "Launch the real simulation after optimization" }, { "-bench", FALSE, etBOOL, {&bBenchmark}, "Run the benchmarks or just create the input [TT].tpr[tt] files?" }, + { "-check", FALSE, etBOOL, {&bCheck}, + "Before the benchmark runs, check whether mdrun works in parallel" }, /******************/ /* mdrun options: */ /******************/ @@@ -2364,8 -2399,7 +2377,8 @@@ sep_line(fp); fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n"); sep_line(fp); - fprintf(fp, "%s for Gromacs %s\n", ShortProgram(), GromacsVersion()); + fprintf(fp, "%s for Gromacs %s\n", output_env_get_program_display_name(oenv), + gmx_version()); if (!bThreads) { fprintf(fp, "Number of ranks : %d\n", nnodes); @@@ -2457,7 -2491,7 +2470,7 @@@ { do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries, repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun, - cmd_args_bench, fnm, NFILE, presteps, cpt_steps); + cmd_args_bench, fnm, NFILE, presteps, cpt_steps, bCheck); fprintf(fp, "\nTuning took%8.1f minutes.\n", (gmx_gettime()-seconds)/60.0); diff --combined src/gromacs/gmxlib/calcgrid.c index 1de1f57efe,81856d489c..7bbee76f12 --- a/src/gromacs/gmxlib/calcgrid.c +++ b/src/gromacs/gmxlib/calcgrid.c @@@ -34,15 -34,16 +34,15 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" + +#include "gromacs/legacyheaders/calcgrid.h" #include -#include "typedefs.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/utility/fatalerror.h" #include "gromacs/utility/smalloc.h" -#include "gmx_fatal.h" -#include "calcgrid.h" /* The grid sizes below are based on timing of a 3D cubic grid in fftw * compiled with SSE using 4 threads in fft5d.c. @@@ -136,7 -137,7 +136,7 @@@ real calc_grid(FILE *fp, matrix box, re /* Determine how many pre-factors of 2 we need */ fac2 = 1; i = g_baseNR - 1; - while (fac2*grid_base[i-1] < nmin) + while (fac2*grid_base[i] < nmin) { fac2 *= 2; } diff --combined src/gromacs/gmxpreprocess/toppush.c index 161940b72e,fab21dda51..8272b7787e --- a/src/gromacs/gmxpreprocess/toppush.c +++ b/src/gromacs/gmxpreprocess/toppush.c @@@ -34,27 -34,28 +34,27 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" +#include "toppush.h" + +#include #include #include -#include +#include -#include "sysstuff.h" -#include "gromacs/utility/smalloc.h" -#include "macros.h" +#include "gromacs/gmxpreprocess/gpp_atomtype.h" +#include "gromacs/gmxpreprocess/gpp_bond_atomtype.h" +#include "gromacs/gmxpreprocess/readir.h" +#include "gromacs/gmxpreprocess/topdirs.h" +#include "gromacs/gmxpreprocess/toputil.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/warninp.h" +#include "gromacs/topology/symtab.h" #include "gromacs/utility/cstringutil.h" -#include "names.h" -#include "toputil.h" -#include "toppush.h" -#include "topdirs.h" -#include "readir.h" -#include "symtab.h" -#include "gmx_fatal.h" -#include "warninp.h" -#include "gpp_atomtype.h" -#include "gpp_bond_atomtype.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" void generate_nbparams(int comb, int ftype, t_params *plist, gpp_atomtype_t atype, warninp_t wi) @@@ -599,9 -600,11 +599,11 @@@ static void push_bondtype(t_params { sprintf(errbuf, "Overriding %s parameters.%s", interaction_function[ftype].longname, - (ftype == F_PDIHS) ? "\nUse dihedraltype 4 to allow several multiplicity terms." : ""); + (ftype == F_PDIHS) ? + "\nUse dihedraltype 9 to allow several multiplicity terms. Only consecutive lines are combined. Non-consective lines overwrite each other." + : ""); warning(wi, errbuf); - fprintf(stderr, " old:"); + fprintf(stderr, " old: "); for (j = 0; (j < nrfp); j++) { fprintf(stderr, " %g", bt->param[i].c[j]); diff --combined src/gromacs/legacyheaders/domdec.h index 13b55711e8,240a7901cc..c44c7bf840 --- a/src/gromacs/legacyheaders/domdec.h +++ b/src/gromacs/legacyheaders/domdec.h @@@ -36,10 -36,9 +36,10 @@@ #ifndef _domdec_h #define _domdec_h -#include "typedefs.h" -#include "vsite.h" -#include "genborn.h" +#include "gromacs/legacyheaders/genborn.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/legacyheaders/vsite.h" +#include "gromacs/timing/wallcycle.h" #ifdef __cplusplus extern "C" { @@@ -139,6 -138,12 +139,12 @@@ void change_dd_dlb_cutoff_limit(t_commr * possible after subsequently setting a shorter cut-off with change_dd_cutoff. */ + gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd); + /* Return if the DLB lock is set */ + + void dd_dlb_set_lock(gmx_domdec_t *dd, gmx_bool bValue); + /* Set a lock such that with DLB=auto DLB can (not) get turned on */ + void dd_setup_dlb_resource_sharing(t_commrec *cr, const gmx_hw_info_t *hwinfo, const gmx_hw_opt_t *hw_opt); diff --combined src/gromacs/listed-forces/bonded.cpp index b1ded380a4,0eb8d55791..dfb846d9e7 --- a/src/gromacs/listed-forces/bonded.cpp +++ b/src/gromacs/listed-forces/bonded.cpp @@@ -34,50 -34,36 +34,50 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +/*! \internal \file + * + * \brief This file defines functions necessary for mdrun and tools to + * compute energies and forces for bonded interactions. + * + * \author Mark Abraham + * + * \ingroup module_listed-forces + */ +#include "gmxpre.h" + +#include "bonded.h" + +#include "config.h" -#include #include -#include "physics.h" -#include "vec.h" -#include "gromacs/math/utilities.h" -#include "txtdump.h" -#include "bondf.h" -#include "gromacs/utility/smalloc.h" -#include "pbc.h" -#include "ns.h" -#include "macros.h" -#include "names.h" -#include "gmx_fatal.h" -#include "mshift.h" -#include "main.h" -#include "disre.h" -#include "orires.h" -#include "force.h" -#include "nonbonded.h" -#include "restcbt.h" +#include + +#include + +#include "gromacs/legacyheaders/disre.h" +#include "gromacs/legacyheaders/force.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/nonbonded.h" +#include "gromacs/legacyheaders/ns.h" +#include "gromacs/legacyheaders/orires.h" +#include "gromacs/legacyheaders/txtdump.h" +#include "gromacs/math/units.h" +#include "gromacs/math/utilities.h" +#include "gromacs/math/vec.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" #include "gromacs/simd/simd.h" #include "gromacs/simd/simd_math.h" #include "gromacs/simd/vector_operations.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" -/* Find a better place for this? */ +#include "restcbt.h" + +/*! \brief Mysterious CMAP coefficient matrix */ const int cmap_coeff_matrix[] = { 1, 0, -3, 2, 0, 0, 0, 0, -3, 0, 9, -6, 2, 0, -6, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, -9, 6, -2, 0, 6, -4, @@@ -98,9 -84,7 +98,9 @@@ }; - +/* TODO This function should go and live in nonbonded.c where it is + really needed. Here, it only supports giving a fatal error message + with FENE_bonds */ int glatnr(int *global_atom_index, int i) { int atnr; @@@ -117,9 -101,6 +117,9 @@@ return atnr; } +/*! \brief Compute dx = xi - xj, modulo PBC if non-NULL + * + * \todo This kind of code appears in many places. Consolidate it */ static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx) { if (pbc) @@@ -148,7 -129,7 +148,7 @@@ typedef struct gmx_simd_real_t bxx; } pbc_simd_t; -/* Set the SIMD pbc data from a normal t_pbc struct */ +/*! \brief Set the SIMD pbc data from a normal t_pbc struct */ static void set_pbc_simd(const t_pbc *pbc, pbc_simd_t *pbc_simd) { rvec inv_bdiag; @@@ -188,7 -169,7 +188,7 @@@ } } -/* Correct distance vector *dx,*dy,*dz for PBC using SIMD */ +/*! \brief Correct distance vector *dx,*dy,*dz for PBC using SIMD */ static gmx_inline void pbc_dx_simd(gmx_simd_real_t *dx, gmx_simd_real_t *dy, gmx_simd_real_t *dz, const pbc_simd_t *pbc) @@@ -210,9 -191,10 +210,9 @@@ #endif /* GMX_SIMD_HAVE_REAL */ -/* - * Morse potential bond by Frank Everdij +/*! \brief Morse potential bond * - * Three parameters needed: + * By Frank Everdij. Three parameters needed: * * b0 = equilibrium distance in nm * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e)) @@@ -221,6 -203,7 +221,6 @@@ * Note: the potential is referenced to be +cb at infinite separation * and zero at the equilibrium distance! */ - real morse_bonds(int nbonds, const t_iatom forceatoms[], const t_iparams forceparams[], const rvec x[], rvec f[], rvec fshift[], @@@ -295,7 -278,6 +295,7 @@@ return vtot; } +//! \cond real cubic_bonds(int nbonds, const t_iatom forceatoms[], const t_iparams forceparams[], const rvec x[], rvec f[], rvec fshift[], @@@ -369,7 -351,7 +369,7 @@@ real FENE_bonds(int nbonds const real half = 0.5; const real one = 1.0; real bm, kb; - real dr, dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot; + real dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot; rvec dx; int i, m, ki, type, ai, aj; ivec dt; @@@ -769,6 -751,7 +769,6 @@@ real water_pol(int nbonds kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y; kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z; r_HH = 1.0/forceparams[type0].wpol.rHH; - r_OD = 1.0/forceparams[type0].wpol.rOD; if (debug) { fprintf(debug, "WPOL: qS = %10.5f aS = %5d\n", qS, aS); @@@ -892,7 -875,7 +892,7 @@@ static real do_1_thole(const rvec xi, c rvec fshift[], real afac) { rvec r12; - real r12sq, r12_1, r12n, r12bar, v0, v1, fscal, ebar, fff; + real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff; int m, t; t = pbc_rvec_sub(pbc, xi, xj, r12); /* 3 */ @@@ -931,10 -914,9 +931,10 @@@ real thole_pol(int nbonds int gmx_unused *global_atom_index) { /* Interaction between two pairs of particles with opposite charge */ - int i, type, a1, da1, a2, da2; - real q1, q2, qq, a, al1, al2, afac; - real V = 0; + int i, type, a1, da1, a2, da2; + real q1, q2, qq, a, al1, al2, afac; + real V = 0; + const real minusOneOnSix = -1.0/6.0; for (i = 0; (i < nbonds); ) { @@@ -949,7 -931,7 +949,7 @@@ al1 = forceparams[type].thole.alpha1; al2 = forceparams[type].thole.alpha2; qq = q1*q2; - afac = a*pow(al1*al2, -1.0/6.0); + afac = a*pow(al1*al2, minusOneOnSix); V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac); V += do_1_thole(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac); V += do_1_thole(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac); @@@ -1734,9 -1716,10 +1734,9 @@@ do_dih_fup_noshiftf(int i, int j, int k rvec m, rvec n, rvec f[]) { rvec f_i, f_j, f_k, f_l; - rvec uvec, vvec, svec, dx_jl; + rvec uvec, vvec, svec; real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2; real a, b, p, q, toler; - ivec jt, dt_ij, dt_kj, dt_lj; iprm = iprod(m, m); /* 5 */ iprn = iprod(n, n); /* 5 */ @@@ -2006,9 -1989,10 +2006,9 @@@ pdihs_noener_simd(int nbonds const int nfa1 = 5; int i, iu, s; int type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH]; - real ddphi; real dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr; real buf_array[7*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf; - real *cp, *phi0, *mult, *phi, *p, *q, *sf_i, *msf_l; + real *cp, *phi0, *mult, *p, *q; gmx_simd_real_t phi0_S, phi_S; gmx_simd_real_t mx_S, my_S, mz_S; gmx_simd_real_t nx_S, ny_S, nz_S; @@@ -2029,6 -2013,8 +2029,6 @@@ mult = buf + 2*GMX_SIMD_REAL_WIDTH; p = buf + 3*GMX_SIMD_REAL_WIDTH; q = buf + 4*GMX_SIMD_REAL_WIDTH; - sf_i = buf + 5*GMX_SIMD_REAL_WIDTH; - msf_l = buf + 6*GMX_SIMD_REAL_WIDTH; set_pbc_simd(pbc, &pbc_simd); @@@ -2117,6 -2103,155 +2117,152 @@@ } } + /* This is mostly a copy of pdihs_noener_simd above, but with using + * the RB potential instead of a harmonic potential. + * This function can replace rbdihs() when no energy and virial are needed. + */ + static void + rbdihs_noener_simd(int nbonds, + const t_iatom forceatoms[], const t_iparams forceparams[], + const rvec x[], rvec f[], + const t_pbc *pbc, const t_graph gmx_unused *g, + real gmx_unused lambda, + const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd, + int gmx_unused *global_atom_index) + { + const int nfa1 = 5; + int i, iu, s, j; + int type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH]; - real ddphi; + real dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr; + real buf_array[(NR_RBDIHS + 4)*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf; - real *parm, *phi, *p, *q, *sf_i, *msf_l; ++ real *parm, *p, *q; + + gmx_simd_real_t phi_S; + gmx_simd_real_t ddphi_S, cosfac_S; + gmx_simd_real_t mx_S, my_S, mz_S; + gmx_simd_real_t nx_S, ny_S, nz_S; + gmx_simd_real_t nrkj_m2_S, nrkj_n2_S; + gmx_simd_real_t parm_S, c_S; + gmx_simd_real_t sin_S, cos_S; + gmx_simd_real_t sf_i_S, msf_l_S; + pbc_simd_t pbc_simd; + + gmx_simd_real_t pi_S = gmx_simd_set1_r(M_PI); + gmx_simd_real_t one_S = gmx_simd_set1_r(1.0); + + /* Ensure SIMD register alignment */ + dr = gmx_simd_align_r(dr_array); + buf = gmx_simd_align_r(buf_array); + + /* Extract aligned pointer for parameters and variables */ + parm = buf; + p = buf + (NR_RBDIHS + 0)*GMX_SIMD_REAL_WIDTH; + q = buf + (NR_RBDIHS + 1)*GMX_SIMD_REAL_WIDTH; - sf_i = buf + (NR_RBDIHS + 2)*GMX_SIMD_REAL_WIDTH; - msf_l = buf + (NR_RBDIHS + 3)*GMX_SIMD_REAL_WIDTH; + + set_pbc_simd(pbc, &pbc_simd); + + /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */ + for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1) + { + /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals. + * iu indexes into forceatoms, we should not let iu go beyond nbonds. + */ + iu = i; + for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++) + { + type = forceatoms[iu]; + ai[s] = forceatoms[iu+1]; + aj[s] = forceatoms[iu+2]; + ak[s] = forceatoms[iu+3]; + al[s] = forceatoms[iu+4]; + + /* We don't need the first parameter, since that's a constant + * which only affects the energies, not the forces. + */ + for (j = 1; j < NR_RBDIHS; j++) + { + parm[j*GMX_SIMD_REAL_WIDTH + s] = + forceparams[type].rbdihs.rbcA[j]; + } + + /* At the end fill the arrays with identical entries */ + if (iu + nfa1 < nbonds) + { + iu += nfa1; + } + } + + /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */ + dih_angle_simd(x, ai, aj, ak, al, &pbc_simd, + dr, + &phi_S, + &mx_S, &my_S, &mz_S, + &nx_S, &ny_S, &nz_S, + &nrkj_m2_S, + &nrkj_n2_S, + p, q); + + /* Change to polymer convention */ + phi_S = gmx_simd_sub_r(phi_S, pi_S); + + gmx_simd_sincos_r(phi_S, &sin_S, &cos_S); + + ddphi_S = gmx_simd_setzero_r(); + c_S = one_S; + cosfac_S = one_S; + for (j = 1; j < NR_RBDIHS; j++) + { + parm_S = gmx_simd_load_r(parm + j*GMX_SIMD_REAL_WIDTH); + ddphi_S = gmx_simd_fmadd_r(gmx_simd_mul_r(c_S, parm_S), cosfac_S, ddphi_S); + cosfac_S = gmx_simd_mul_r(cosfac_S, cos_S); + c_S = gmx_simd_add_r(c_S, one_S); + } + + /* Note that here we do not use the minus sign which is present + * in the normal RB code. This is corrected for through (m)sf below. + */ + ddphi_S = gmx_simd_mul_r(ddphi_S, sin_S); + + sf_i_S = gmx_simd_mul_r(ddphi_S, nrkj_m2_S); + msf_l_S = gmx_simd_mul_r(ddphi_S, nrkj_n2_S); + + /* After this m?_S will contain f[i] */ + mx_S = gmx_simd_mul_r(sf_i_S, mx_S); + my_S = gmx_simd_mul_r(sf_i_S, my_S); + mz_S = gmx_simd_mul_r(sf_i_S, mz_S); + + /* After this m?_S will contain -f[l] */ + nx_S = gmx_simd_mul_r(msf_l_S, nx_S); + ny_S = gmx_simd_mul_r(msf_l_S, ny_S); + nz_S = gmx_simd_mul_r(msf_l_S, nz_S); + + gmx_simd_store_r(dr + 0*GMX_SIMD_REAL_WIDTH, mx_S); + gmx_simd_store_r(dr + 1*GMX_SIMD_REAL_WIDTH, my_S); + gmx_simd_store_r(dr + 2*GMX_SIMD_REAL_WIDTH, mz_S); + gmx_simd_store_r(dr + 3*GMX_SIMD_REAL_WIDTH, nx_S); + gmx_simd_store_r(dr + 4*GMX_SIMD_REAL_WIDTH, ny_S); + gmx_simd_store_r(dr + 5*GMX_SIMD_REAL_WIDTH, nz_S); + + iu = i; + s = 0; + do + { + do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s], + p[s], q[s], + dr[ XX *GMX_SIMD_REAL_WIDTH+s], + dr[ YY *GMX_SIMD_REAL_WIDTH+s], + dr[ ZZ *GMX_SIMD_REAL_WIDTH+s], + dr[(DIM+XX)*GMX_SIMD_REAL_WIDTH+s], + dr[(DIM+YY)*GMX_SIMD_REAL_WIDTH+s], + dr[(DIM+ZZ)*GMX_SIMD_REAL_WIDTH+s], + f); + s++; + iu += nfa1; + } + while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds); + } + } + #endif /* GMX_SIMD_HAVE_REAL */ @@@ -2175,7 -2310,7 +2321,7 @@@ real idihs(int nbonds dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp; - do_dih_fup(ai, aj, ak, al, (real)(-ddphi), r_ij, r_kj, r_kl, m, n, + do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, g, x, t1, t2, t3); /* 112 */ /* 218 TOTAL */ #ifdef DEBUG @@@ -2265,44 -2400,6 +2411,44 @@@ static void posres_dx(const rvec x, con } } +/*! \brief Computes forces and potential for flat-bottom cylindrical restraints. + * Returns the flat-bottom potential. */ +static real do_fbposres_cylinder(int fbdim, rvec fm, rvec dx, real rfb, real kk, gmx_bool bInvert) +{ + int d; + real dr, dr2, invdr, v, rfb2; + + dr2 = 0.0; + rfb2 = sqr(rfb); + v = 0.0; + + for (d = 0; d < DIM; d++) + { + if (d != fbdim) + { + dr2 += sqr(dx[d]); + } + } + + if (dr2 > 0.0 && + ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) ) + ) + { + dr = sqrt(dr2); + invdr = 1./dr; + v = 0.5*kk*sqr(dr - rfb); + for (d = 0; d < DIM; d++) + { + if (d != fbdim) + { + fm[d] = -kk*(dr-rfb)*dx[d]*invdr; /* Force pointing to the center */ + } + } + } + + return v; +} + /*! \brief Adds forces of flat-bottomed positions restraints to f[] * and fixes vir_diag. Returns the flat-bottomed potential. */ real fbposres(int nbonds, @@@ -2315,8 -2412,8 +2461,8 @@@ int i, ai, m, d, type, npbcdim = 0, fbdim; const t_iparams *pr; real vtot, kk, v; - real ref = 0, dr, dr2, rpot, rfb, rfb2, fact, invdr; - rvec com_sc, rdist, pos, dx, dpdl, fm; + real dr, dr2, rfb, rfb2, fact; + rvec com_sc, rdist, dx, dpdl, fm; gmx_bool bInvert; npbcdim = ePBC2npbcdim(ePBC); @@@ -2377,22 -2474,19 +2523,22 @@@ svmul(fact, dx, fm); } break; + case efbposresCYLINDERX: + /* cylindrical flat-bottom posres in y-z plane. fm[XX] = 0. */ + fbdim = XX; + v = do_fbposres_cylinder(fbdim, fm, dx, rfb, kk, bInvert); + break; + case efbposresCYLINDERY: + /* cylindrical flat-bottom posres in x-z plane. fm[YY] = 0. */ + fbdim = YY; + v = do_fbposres_cylinder(fbdim, fm, dx, rfb, kk, bInvert); + break; case efbposresCYLINDER: - /* cylidrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */ - dr2 = sqr(dx[XX])+sqr(dx[YY]); - if (dr2 > 0.0 && - ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) ) - ) - { - dr = sqrt(dr2); - invdr = 1./dr; - v = 0.5*kk*sqr(dr - rfb); - fm[XX] = -kk*(dr-rfb)*dx[XX]*invdr; /* Force pointing to the center */ - fm[YY] = -kk*(dr-rfb)*dx[YY]*invdr; - } + /* equivalent to efbposresCYLINDERZ for backwards compatibility */ + case efbposresCYLINDERZ: + /* cylindrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */ + fbdim = ZZ; + v = do_fbposres_cylinder(fbdim, fm, dx, rfb, kk, bInvert); break; case efbposresX: /* fbdim=XX */ case efbposresY: /* fbdim=YY */ @@@ -2434,11 -2528,12 +2580,11 @@@ real posres(int nbonds real lambda, real *dvdlambda, int refcoord_scaling, int ePBC, rvec comA, rvec comB) { - int i, ai, m, d, type, ki, npbcdim = 0; + int i, ai, m, d, type, npbcdim = 0; const t_iparams *pr; real L1; real vtot, kk, fm; - real posA, posB, ref = 0; - rvec comA_sc, comB_sc, rdist, dpdl, pos, dx; + rvec comA_sc, comB_sc, rdist, dpdl, dx; gmx_bool bForceValid = TRUE; if ((f == NULL) || (vir_diag == NULL)) /* should both be null together! */ @@@ -2631,7 -2726,7 +2777,7 @@@ real dihres(int nbonds real vtot = 0; int ai, aj, ak, al, i, k, type, t1, t2, t3; real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac; - real phi, ddphi, ddp, ddp2, dp, sign, d2r, fc, L1; + real phi, ddphi, ddp, ddp2, dp, sign, d2r, L1; rvec r_ij, r_kj, r_kl, m, n; L1 = 1.0-lambda; @@@ -2738,6 -2833,7 +2884,6 @@@ real restrangles(int nbonds { int i, d, ai, aj, ak, type, m; int t1, t2; - rvec r_ij, r_kj; real v, vtot; ivec jt, dt_ij, dt_kj; rvec f_i, f_j, f_k; @@@ -2865,7 -2961,7 +3011,7 @@@ real restrdihs(int nbonds t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp); pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante); t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt); - t3 = pbc_rvec_sub(pbc, x[ak], x[al], vec_temp); + pbc_rvec_sub(pbc, x[ak], x[al], vec_temp); pbc_rvec_sub(pbc, x[al], x[ak], delta_post); /* This function computes factors needed for restricted angle potential. @@@ -2976,7 -3072,7 +3122,7 @@@ real cbtdihs(int nbonds pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante); t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp); pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt); - t3 = pbc_rvec_sub(pbc, x[ak], x[al], vec_temp); + pbc_rvec_sub(pbc, x[ak], x[al], vec_temp); pbc_rvec_sub(pbc, x[al], x[ak], delta_post); /* \brief Compute factors for CBT potential @@@ -3151,11 -3247,7 +3297,11 @@@ real rbdihs(int nbonds return vtot; } -int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2) +//! \endcond + +/*! \brief Mysterious undocumented function */ +static int +cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2) { int im1, ip1, ip2; @@@ -3194,16 -3286,14 +3340,16 @@@ } -real cmap_dihs(int nbonds, - const t_iatom forceatoms[], const t_iparams forceparams[], - const gmx_cmap_t *cmap_grid, - const rvec x[], rvec f[], rvec fshift[], - const t_pbc *pbc, const t_graph *g, - real gmx_unused lambda, real gmx_unused *dvdlambda, - const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd, - int gmx_unused *global_atom_index) +/*! \brief Compute CMAP dihedral energies and forces */ +static real +cmap_dihs(int nbonds, + const t_iatom forceatoms[], const t_iparams forceparams[], + const gmx_cmap_t *cmap_grid, + const rvec x[], rvec f[], rvec fshift[], + const t_pbc *pbc, const t_graph *g, + real gmx_unused lambda, real gmx_unused *dvdlambda, + const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd, + int gmx_unused *global_atom_index) { int i, j, k, n, idx; int ai, aj, ak, al, am; @@@ -3212,13 -3302,13 +3358,13 @@@ int t11, t21, t31, t12, t22, t32; int iphi1, ip1m1, ip1p1, ip1p2; int iphi2, ip2m1, ip2p1, ip2p2; - int l1, l2, l3, l4; - int pos1, pos2, pos3, pos4, tmp; + int l1, l2, l3; + int pos1, pos2, pos3, pos4; real ty[4], ty1[4], ty2[4], ty12[4], tc[16], tx[16]; - real phi1, psi1, cos_phi1, sin_phi1, sign1, xphi1; - real phi2, psi2, cos_phi2, sin_phi2, sign2, xphi2; - real dx, xx, tt, tu, e, df1, df2, ddf1, ddf2, ddf12, vtot; + real phi1, cos_phi1, sin_phi1, sign1, xphi1; + real phi2, cos_phi2, sin_phi2, sign2, xphi2; + real dx, xx, tt, tu, e, df1, df2, vtot; real ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1; real ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2; real fg1, hg1, fga1, hgb1, gaa1, gbb1; @@@ -3280,7 -3370,7 +3426,7 @@@ b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2]; b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */ - tmp = pbc_rvec_sub(pbc, x[a1l], x[a1k], h1); + pbc_rvec_sub(pbc, x[a1l], x[a1k], h1); ra21 = iprod(a1, a1); /* 5 */ rb21 = iprod(b1, b1); /* 5 */ @@@ -3341,7 -3431,7 +3487,7 @@@ b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2]; b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */ - tmp = pbc_rvec_sub(pbc, x[a2l], x[a2k], h2); + pbc_rvec_sub(pbc, x[a2l], x[a2k], h2); ra22 = iprod(a2, a2); /* 5 */ rb22 = iprod(b2, b2); /* 5 */ @@@ -3406,8 -3496,8 +3552,8 @@@ dx = 2*M_PI / cmap_grid->grid_spacing; /* Where on the grid are we */ - iphi1 = (int)(xphi1/dx); - iphi2 = (int)(xphi2/dx); + iphi1 = static_cast(xphi1/dx); + iphi2 = static_cast(xphi2/dx); iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2); iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2); @@@ -3472,6 -3562,9 +3618,6 @@@ e = 0; df1 = 0; df2 = 0; - ddf1 = 0; - ddf2 = 0; - ddf12 = 0; for (i = 3; i >= 0; i--) { @@@ -3482,11 -3575,19 +3628,11 @@@ e = tt * e + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4]; df1 = tu * df1 + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3]; df2 = tt * df2 + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1]; - ddf1 = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2]; - ddf2 = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2]; } - ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) + - 3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt); - fac = RAD2DEG/dx; df1 = df1 * fac; df2 = df2 * fac; - ddf1 = ddf1 * fac * fac; - ddf2 = ddf2 * fac * fac; - ddf12 = ddf12 * fac * fac; /* CMAP energy */ vtot += e; @@@ -3593,7 -3694,7 +3739,7 @@@ } - +//! \cond /*********************************************************** * * G R O M O S 9 6 F U N C T I O N S @@@ -3852,7 -3953,7 +3998,7 @@@ real cross_bond_angle(int nbonds /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003) * pp. 842-847 */ - int i, ai, aj, ak, type, m, t1, t2, t3; + int i, ai, aj, ak, type, m, t1, t2; rvec r_ij, r_kj, r_ik; real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3; rvec f_i, f_j, f_k; @@@ -3873,7 -3974,7 +4019,7 @@@ /* Compute distance vectors ... */ t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij); t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj); - t3 = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); + pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); /* ... and their lengths */ r1 = norm(r_ij); @@@ -3931,7 -4032,7 +4077,7 @@@ static real bonded_tab(const char *type { real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF; int n0, nnn; - real v, f, dvdlambda; + real dvdlambda; k = (1.0 - lambda)*kA + lambda*kB; @@@ -3939,7 -4040,7 +4085,7 @@@ VFtab = table->data; rt = r*tabscale; - n0 = rt; + n0 = static_cast(rt); if (n0 >= table->n) { gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d", @@@ -4066,7 -4167,7 +4212,7 @@@ real tab_angles(int nbonds if (cos_theta2 < 1) { int m; - real snt, st, sth; + real st, sth; real cik, cii, ckk; real nrkj2, nrij2; rvec f_i, f_j, f_k; @@@ -4160,10 -4261,11 +4306,10 @@@ real tab_dihs(int nbonds return vtot; } -/* Return if this is a potential calculated in bondfree.c, - * i.e. an interaction that actually calculates a potential and - * works on multiple atoms (not e.g. a connection or a position restraint). - */ -static gmx_inline gmx_bool ftype_is_bonded_potential(int ftype) +//! \endcond + +gmx_bool +ftype_is_bonded_potential(int ftype) { return (interaction_function[ftype].flags & IF_BOND) && @@@ -4171,7 -4273,173 +4317,7 @@@ (ftype < F_GB12 || ftype > F_GB14); } -static void divide_bondeds_over_threads(t_idef *idef, int nthreads) -{ - int ftype; - int nat1; - int t; - int il_nr_thread; - - idef->nthreads = nthreads; - - if (F_NRE*(nthreads+1) > idef->il_thread_division_nalloc) - { - idef->il_thread_division_nalloc = F_NRE*(nthreads+1); - snew(idef->il_thread_division, idef->il_thread_division_nalloc); - } - - for (ftype = 0; ftype < F_NRE; ftype++) - { - if (ftype_is_bonded_potential(ftype)) - { - nat1 = interaction_function[ftype].nratoms + 1; - - for (t = 0; t <= nthreads; t++) - { - /* Divide the interactions equally over the threads. - * When the different types of bonded interactions - * are distributed roughly equally over the threads, - * this should lead to well localized output into - * the force buffer on each thread. - * If this is not the case, a more advanced scheme - * (not implemented yet) will do better. - */ - il_nr_thread = (((idef->il[ftype].nr/nat1)*t)/nthreads)*nat1; - - /* Ensure that distance restraint pairs with the same label - * end up on the same thread. - * This is slighlty tricky code, since the next for iteration - * may have an initial il_nr_thread lower than the final value - * in the previous iteration, but this will anyhow be increased - * to the approriate value again by this while loop. - */ - while (ftype == F_DISRES && - il_nr_thread > 0 && - il_nr_thread < idef->il[ftype].nr && - idef->iparams[idef->il[ftype].iatoms[il_nr_thread]].disres.label == - idef->iparams[idef->il[ftype].iatoms[il_nr_thread-nat1]].disres.label) - { - il_nr_thread += nat1; - } - - idef->il_thread_division[ftype*(nthreads+1)+t] = il_nr_thread; - } - } - } -} - -static unsigned -calc_bonded_reduction_mask(const t_idef *idef, - int shift, - int t, int nt) -{ - unsigned mask; - int ftype, nb, nat1, nb0, nb1, i, a; - - mask = 0; - - for (ftype = 0; ftype < F_NRE; ftype++) - { - if (ftype_is_bonded_potential(ftype)) - { - nb = idef->il[ftype].nr; - if (nb > 0) - { - nat1 = interaction_function[ftype].nratoms + 1; - - /* Divide this interaction equally over the threads. - * This is not stored: should match division in calc_bonds. - */ - nb0 = idef->il_thread_division[ftype*(nt+1)+t]; - nb1 = idef->il_thread_division[ftype*(nt+1)+t+1]; - - for (i = nb0; i < nb1; i += nat1) - { - for (a = 1; a < nat1; a++) - { - mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift)); - } - } - } - } - } - - return mask; -} - -void setup_bonded_threading(t_forcerec *fr, t_idef *idef) -{ -#define MAX_BLOCK_BITS 32 - int t; - int ctot, c, b; - - assert(fr->nthreads >= 1); - - /* Divide the bonded interaction over the threads */ - divide_bondeds_over_threads(idef, fr->nthreads); - - if (fr->nthreads == 1) - { - fr->red_nblock = 0; - - return; - } - - /* We divide the force array in a maximum of 32 blocks. - * Minimum force block reduction size is 2^6=64. - */ - fr->red_ashift = 6; - while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<red_ashift))) - { - fr->red_ashift++; - } - if (debug) - { - fprintf(debug, "bonded force buffer block atom shift %d bits\n", - fr->red_ashift); - } - - /* Determine to which blocks each thread's bonded force calculation - * contributes. Store this is a mask for each thread. - */ -#pragma omp parallel for num_threads(fr->nthreads) schedule(static) - for (t = 1; t < fr->nthreads; t++) - { - fr->f_t[t].red_mask = - calc_bonded_reduction_mask(idef, fr->red_ashift, t, fr->nthreads); - } - - /* Determine the maximum number of blocks we need to reduce over */ - fr->red_nblock = 0; - ctot = 0; - for (t = 0; t < fr->nthreads; t++) - { - c = 0; - for (b = 0; b < MAX_BLOCK_BITS; b++) - { - if (fr->f_t[t].red_mask & (1U<red_nblock = max(fr->red_nblock, b+1); - c++; - } - } - if (debug) - { - fprintf(debug, "thread %d flags %x count %d\n", - t, fr->f_t[t].red_mask, c); - } - ctot += c; - } - if (debug) - { - fprintf(debug, "Number of blocks to reduce: %d of size %d\n", - fr->red_nblock, 1<red_ashift); - fprintf(debug, "Reduction density %.2f density/#thread %.2f\n", - ctot*(1<red_ashift)/(double)fr->natoms_force, - ctot*(1<red_ashift)/(double)(fr->natoms_force*fr->nthreads)); - } -} - +/*! \brief Zero thread-local force-output buffers */ static void zero_thread_forces(f_thread_t *f_t, int n, int nblock, int blocksize) { @@@ -4190,7 -4458,7 +4336,7 @@@ if (f_t->red_mask && (1U<f[a]); @@@ -4219,16 -4487,15 +4365,16 @@@ } } +/*! \brief The max thread number is arbitrary, we used a fixed number + * to avoid memory management. Using more than 16 threads is probably + * never useful performance wise. */ +#define MAX_BONDED_THREADS 256 + +/*! \brief Reduce thread-local force buffers */ static void reduce_thread_force_buffer(int n, rvec *f, int nthreads, f_thread_t *f_t, int nblock, int block_size) { - /* The max thread number is arbitrary, - * we used a fixed number to avoid memory management. - * Using more than 16 threads is probably never useful performance wise. - */ -#define MAX_BONDED_THREADS 256 int b; if (nthreads > MAX_BONDED_THREADS) @@@ -4261,7 -4528,7 +4407,7 @@@ /* Reduce force buffers for threads that contribute */ a0 = b *block_size; a1 = (b+1)*block_size; - a1 = min(a1, n); + a1 = std::min(a1, n); for (a = a0; a < a1; a++) { for (fb = 0; fb < nfb; fb++) @@@ -4273,7 -4540,6 +4419,7 @@@ } } +/*! \brief Reduce thread-local forces */ static void reduce_thread_forces(int n, rvec *f, rvec *fshift, real *ener, gmx_grppairener_t *grpp, real *dvdl, int nthreads, f_thread_t *f_t, @@@ -4331,11 -4597,9 +4477,11 @@@ } } -static real calc_one_bond(FILE *fplog, int thread, +/*! \brief Calculate one element of the list of bonded interactions + for this thread */ +static real calc_one_bond(int thread, int ftype, const t_idef *idef, - rvec x[], rvec f[], rvec fshift[], + const rvec x[], rvec f[], rvec fshift[], t_forcerec *fr, const t_pbc *pbc, const t_graph *g, gmx_grppairener_t *grpp, @@@ -4343,7 -4607,7 +4489,7 @@@ real *lambda, real *dvdl, const t_mdatoms *md, t_fcdata *fcd, gmx_bool bCalcEnerVir, - int *global_atom_index, gmx_bool bPrintSepPot) + int *global_atom_index) { int nat1, nbonds, efptFTYPE; real v = 0; @@@ -4372,7 -4636,7 +4518,7 @@@ { v = cmap_dihs(nbn, iatoms+nb0, idef->iparams, &idef->cmap_grid, - (const rvec*)x, f, fshift, + x, f, fshift, pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd, global_atom_index); } @@@ -4383,7 -4647,7 +4529,7 @@@ /* No energies, shift forces, dvdl */ angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0, idef->iparams, - (const rvec*)x, f, + x, f, pbc, g, lambda[efptFTYPE], md, fcd, global_atom_index); v = 0; @@@ -4400,24 -4664,53 +4546,37 @@@ #endif (nbn, idef->il[ftype].iatoms+nb0, idef->iparams, - (const rvec*)x, f, + x, f, pbc, g, lambda[efptFTYPE], md, fcd, global_atom_index); v = 0; } + #ifdef GMX_SIMD_HAVE_REAL + else if (ftype == F_RBDIHS && + !bCalcEnerVir && fr->efep == efepNO) + { + /* No energies, shift forces, dvdl */ + rbdihs_noener_simd(nbn, idef->il[ftype].iatoms+nb0, + idef->iparams, + (const rvec*)x, f, + pbc, g, lambda[efptFTYPE], md, fcd, + global_atom_index); + v = 0; + } + #endif else { v = interaction_function[ftype].ifunc(nbn, iatoms+nb0, idef->iparams, - (const rvec*)x, f, fshift, + x, f, fshift, pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd, global_atom_index); } - if (bPrintSepPot) - { - fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n", - interaction_function[ftype].longname, - nbonds, v, lambda[efptFTYPE]); - } } else { - v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift, + v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, x, f, fshift, pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index); - - if (bPrintSepPot) - { - fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n", - interaction_function[ftype].longname, - interaction_function[F_LJ14].longname, nbonds, dvdl[efptVDW]); - fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n", - interaction_function[ftype].longname, - interaction_function[F_COUL14].longname, nbonds, dvdl[efptCOUL]); - } } if (thread == 0) @@@ -4428,22 -4721,25 +4587,22 @@@ return v; } -void calc_bonds(FILE *fplog, const gmx_multisim_t *ms, +void calc_bonds(const gmx_multisim_t *ms, const t_idef *idef, - rvec x[], history_t *hist, + const rvec x[], history_t *hist, rvec f[], t_forcerec *fr, - const t_pbc *pbc, const t_graph *g, + const struct t_pbc *pbc, const struct t_graph *g, gmx_enerdata_t *enerd, t_nrnb *nrnb, real *lambda, const t_mdatoms *md, t_fcdata *fcd, int *global_atom_index, - t_atomtypes gmx_unused *atype, gmx_genborn_t gmx_unused *born, - int force_flags, - gmx_bool bPrintSepPot, gmx_int64_t step) + int force_flags) { gmx_bool bCalcEnerVir; int i; - real v, dvdl[efptNR], dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values + real dvdl[efptNR]; /* The dummy array is to have a place to store the dhdl at other values of lambda, which will be thrown away in the end*/ const t_pbc *pbc_null; - char buf[22]; int thread; assert(fr->nthreads == idef->nthreads); @@@ -4462,6 -4758,11 +4621,6 @@@ { pbc_null = NULL; } - if (bPrintSepPot) - { - fprintf(fplog, "Step %s: bonded V and dVdl for this rank\n", - gmx_step_str(step, buf)); - } #ifdef DEBUG if (g && debug) @@@ -4476,14 -4777,14 +4635,14 @@@ enerd->term[F_ORIRESDEV] = calc_orires_dev(ms, idef->il[F_ORIRES].nr, idef->il[F_ORIRES].iatoms, - idef->iparams, md, (const rvec*)x, + idef->iparams, md, x, pbc_null, fcd, hist); } if (idef->il[F_DISRES].nr) { calc_disres_R_6(idef->il[F_DISRES].nr, idef->il[F_DISRES].iatoms, - idef->iparams, (const rvec*)x, pbc_null, + idef->iparams, x, pbc_null, fcd, hist); #ifdef GMX_MPI if (fcd->disres.nsystems > 1) @@@ -4527,11 -4828,11 +4686,11 @@@ { if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype)) { - v = calc_one_bond(fplog, thread, ftype, idef, x, + v = calc_one_bond(thread, ftype, idef, x, ft, fshift, fr, pbc_null, g, grpp, nrnb, lambda, dvdlt, md, fcd, bCalcEnerVir, - global_atom_index, bPrintSepPot); + global_atom_index); epot[ftype] += v; } } @@@ -4561,17 -4862,18 +4720,17 @@@ } } -void calc_bonds_lambda(FILE *fplog, - const t_idef *idef, - rvec x[], +void calc_bonds_lambda(const t_idef *idef, + const rvec x[], t_forcerec *fr, - const t_pbc *pbc, const t_graph *g, + const struct t_pbc *pbc, const struct t_graph *g, gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb, real *lambda, const t_mdatoms *md, t_fcdata *fcd, int *global_atom_index) { - int i, ftype, nr_nonperturbed, nr; + int ftype, nr_nonperturbed, nr; real v; real dvdl_dum[efptNR]; rvec *f, *fshift; @@@ -4612,11 -4914,11 +4771,11 @@@ if (nr - nr_nonperturbed > 0) { - v = calc_one_bond(fplog, 0, ftype, &idef_fe, + v = calc_one_bond(0, ftype, &idef_fe, x, f, fshift, fr, pbc_null, g, grpp, nrnb, lambda, dvdl_dum, md, fcd, TRUE, - global_atom_index, FALSE); + global_atom_index); epot[ftype] += v; } } diff --combined src/gromacs/mdlib/domdec.cpp index 13934d5fa6,6cc6b73895..06f2602916 --- a/src/gromacs/mdlib/domdec.cpp +++ b/src/gromacs/mdlib/domdec.cpp @@@ -33,59 -33,53 +33,59 @@@ * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" -#include -#include +#include "gromacs/legacyheaders/domdec.h" + +#include "config.h" + +#include #include -#include +#include #include -#include +#include +#include + +#include -#include "typedefs.h" -#include "gromacs/utility/smalloc.h" -#include "gmx_fatal.h" -#include "gmx_fatal_collective.h" -#include "vec.h" -#include "domdec.h" -#include "domdec_network.h" -#include "nrnb.h" -#include "pbc.h" -#include "chargegroup.h" -#include "constr.h" -#include "mdatoms.h" -#include "names.h" -#include "force.h" -#include "pme.h" -#include "mdrun.h" -#include "nsgrid.h" -#include "shellfc.h" -#include "mtop_util.h" -#include "gmx_ga2la.h" -#include "macros.h" -#include "nbnxn_search.h" -#include "bondf.h" -#include "gmx_omp_nthreads.h" -#include "gpu_utils.h" - -#include "gromacs/fileio/futil.h" #include "gromacs/fileio/gmxfio.h" #include "gromacs/fileio/pdbio.h" +#include "gromacs/imd/imd.h" +#include "gromacs/legacyheaders/bonded-threading.h" +#include "gromacs/legacyheaders/chargegroup.h" +#include "gromacs/legacyheaders/constr.h" +#include "gromacs/legacyheaders/domdec_network.h" +#include "gromacs/legacyheaders/force.h" +#include "gromacs/legacyheaders/gmx_ga2la.h" +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/gpu_utils.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/mdatoms.h" +#include "gromacs/legacyheaders/mdrun.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/network.h" +#include "gromacs/legacyheaders/nrnb.h" +#include "gromacs/legacyheaders/nsgrid.h" +#include "gromacs/legacyheaders/pme.h" +#include "gromacs/legacyheaders/shellfc.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/listed-forces/bonded.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_search.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/pulling/pull_rotation.h" +#include "gromacs/swap/swapcoords.h" #include "gromacs/timing/wallcycle.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/basenetwork.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/futil.h" #include "gromacs/utility/gmxmpi.h" -#include "gromacs/swap/swapcoords.h" #include "gromacs/utility/qsort_threadsafe.h" -#include "gromacs/pulling/pull.h" -#include "gromacs/pulling/pull_rotation.h" -#include "gromacs/imd/imd.h" +#include "gromacs/utility/smalloc.h" #define DDRANK(dd, rank) (rank) #define DDMASTERRANK(dd) (dd->masterrank) @@@ -275,6 -269,8 +275,8 @@@ typedef struct gmx_domdec_com /* The DLB option */ int eDLB; + /* Is eDLB=edlbAUTO locked such that we currently can't turn it on? */ + gmx_bool bDLB_locked; /* Are we actually using DLB? */ gmx_bool bDynLoadBal; @@@ -391,9 -387,9 +393,9 @@@ int eFlop; double flop; int flop_n; - /* Have often have did we have load measurements */ + /* How many times have did we have load measurements */ int n_load_have; - /* Have often have we collected the load measurements */ + /* How many times have we collected the load measurements */ int n_load_collect; /* Statistics */ @@@ -772,7 -768,7 +774,7 @@@ void dd_move_f(gmx_domdec_t *dd, rvec f rvec *buf, *sbuf; ivec vis; int is; - gmx_bool bPBC, bScrew; + gmx_bool bShiftForcesNeedPbc, bScrew; comm = dd->comm; @@@ -780,17 -776,16 +782,17 @@@ buf = comm->vbuf.v; - n = 0; nzone = comm->zones.n/2; nat_tot = dd->nat_tot; for (d = dd->ndim-1; d >= 0; d--) { - bPBC = (dd->ci[dd->dim[d]] == 0); - bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX); + /* Only forces in domains near the PBC boundaries need to + consider PBC in the treatment of fshift */ + bShiftForcesNeedPbc = (dd->ci[dd->dim[d]] == 0); + bScrew = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX); if (fshift == NULL && !bScrew) { - bPBC = FALSE; + bShiftForcesNeedPbc = FALSE; } /* Determine which shift vector we need */ clear_ivec(vis); @@@ -826,7 -821,7 +828,7 @@@ index = ind->index; /* Add the received forces */ n = 0; - if (!bPBC) + if (!bShiftForcesNeedPbc) { for (i = 0; i < ind->nsend[nzone]; i++) { @@@ -841,9 -836,6 +843,9 @@@ } else if (!bScrew) { + /* fshift should always be defined if this function is + * called when bShiftForcesNeedPbc is true */ + assert(NULL != fshift); for (i = 0; i < ind->nsend[nzone]; i++) { at0 = cgindex[index[i]]; @@@ -964,6 -956,7 +966,6 @@@ void dd_atom_sum_real(gmx_domdec_t *dd buf = &comm->vbuf.v[0][0]; - n = 0; nzone = comm->zones.n/2; nat_tot = dd->nat_tot; for (d = dd->ndim-1; d >= 0; d--) @@@ -1069,7 -1062,7 +1071,7 @@@ static void dd_sendrecv_ddzone(const gm static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox, rvec cell_ns_x0, rvec cell_ns_x1) { - int d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min; + int d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min; gmx_ddzone_t *zp; gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE]; gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE]; @@@ -1141,7 -1134,7 +1143,7 @@@ if (bPBC) { /* Take the minimum to avoid double communication */ - npulse_min = min(npulse, dd->nc[dim]-1-npulse); + npulse_min = std::min(npulse, dd->nc[dim]-1-npulse); } else { @@@ -1165,9 -1158,9 +1167,9 @@@ { for (d1 = d; d1 < dd->ndim-1; d1++) { - extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]); - extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]); - extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]); + extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]); + extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]); + extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]); } } } @@@ -1228,9 -1221,9 +1230,9 @@@ { if (bUse) { - buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0); - buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1); - buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1); + buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0); + buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1); + buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1); } if (dd->ndim == 3 && d == 0 && i == buf_size - 1) @@@ -1243,8 -1236,8 +1245,8 @@@ } if (bUse && dh[d1] >= 0) { - buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]); - buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]); + buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]); + buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]); } } /* Copy the received buffer to the send buffer, @@@ -1260,9 -1253,9 +1262,9 @@@ for (d1 = d; d1 < dd->ndim-1; d1++) { - extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0); - extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1); - extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1); + extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0); + extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1); + extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1); pos++; } @@@ -1292,8 -1285,8 +1294,8 @@@ { print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]); } - cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0); - cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1); + cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0); + cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1); } } if (dd->ndim >= 3) @@@ -1307,8 -1300,8 +1309,8 @@@ { print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]); } - cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0); - cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1); + cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0); + cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1); } } } @@@ -1534,6 -1527,10 +1536,6 @@@ static void dd_collect_vec_gatherv(gmx_ void dd_collect_vec(gmx_domdec_t *dd, t_state *state_local, rvec *lv, rvec *v) { - gmx_domdec_master_t *ma; - int n, i, c, a, nalloc = 0; - rvec *buf = NULL; - dd_collect_cg(dd, state_local); if (dd->nnodes <= GMX_DD_NNODES_SENDRECV) @@@ -1755,7 -1752,7 +1757,7 @@@ static void dd_distribute_vec_scatterv( { gmx_domdec_master_t *ma; int *scounts = NULL, *disps = NULL; - int n, i, c, a, nalloc = 0; + int n, i, c, a; rvec *buf = NULL; if (DDMASTER(dd)) @@@ -1949,7 -1946,7 +1951,7 @@@ static void write_dd_grid_pdb(const cha snew(grid_r, 2*dd->nnodes); } - dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL); + dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL); if (DDMASTER(dd)) { @@@ -2092,15 -2089,15 +2094,15 @@@ real dd_cutoff_mbody(gmx_domdec_t *dd r = comm->cellsize_min[dd->dim[0]]; for (di = 1; di < dd->ndim; di++) { - r = min(r, comm->cellsize_min[dd->dim[di]]); + r = std::min(r, comm->cellsize_min[dd->dim[di]]); } if (comm->bBondComm) { - r = max(r, comm->cutoff_mbody); + r = std::max(r, comm->cutoff_mbody); } else { - r = min(r, comm->cutoff); + r = std::min(r, comm->cutoff); } } } @@@ -2114,7 -2111,7 +2116,7 @@@ real dd_cutoff_twobody(gmx_domdec_t *dd r_mb = dd_cutoff_mbody(dd); - return max(dd->comm->cutoff, r_mb); + return std::max(dd->comm->cutoff, r_mb); } @@@ -2176,7 -2173,7 +2178,7 @@@ static int *dd_pmenodes(t_commrec *cr static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z) { gmx_domdec_t *dd; - ivec coords, coords_pme, nc; + ivec coords; int slab; dd = cr->dd; @@@ -2245,6 -2242,7 +2247,6 @@@ static int dd_simnode2pmenode(t_commre { gmx_domdec_t *dd; gmx_domdec_comm_t *comm; - ivec coord, coord_pme; int i; int pmenode = -1; @@@ -2255,7 -2253,6 +2257,7 @@@ if (comm->bCartesianPP_PME) { #ifdef GMX_MPI + ivec coord, coord_pme; MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord); if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim]) { @@@ -2393,7 -2390,7 +2395,7 @@@ void get_pme_ddnodes(t_commrec *cr, in static gmx_bool receive_vir_ener(t_commrec *cr) { gmx_domdec_comm_t *comm; - int pmenode, coords[DIM], rank; + int pmenode; gmx_bool bReceive; bReceive = TRUE; @@@ -2404,12 -2401,10 +2406,12 @@@ { pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid); #ifdef GMX_MPI + ivec coords; MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords); coords[comm->cartpmedim]++; if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim]) { + int rank; MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank); if (dd_simnode2pmenode(cr, rank) == pmenode) { @@@ -2517,8 -2512,12 +2519,8 @@@ static void make_dd_indices(gmx_domdec_ { int nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl; int *zone2cg, *zone_ncg1, *index_gl, *gatindex; - gmx_ga2la_t *ga2la; - char *bLocalCG; gmx_bool bCGs; - bLocalCG = dd->comm->bLocalCG; - if (dd->nat_tot > dd->gatindex_nalloc) { dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot); @@@ -2583,7 -2582,7 +2585,7 @@@ static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG, const char *where) { - int ncg, i, ngl, nerr; + int i, ngl, nerr; nerr = 0; if (bLocalCG == NULL) @@@ -2743,12 -2742,12 +2745,12 @@@ static real cellsize_min_dlb(gmx_domdec /* The cut-off might have changed, e.g. by PME load balacning, * from the value used to set comm->cellsize_min, so check it. */ - cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb); + cellsize_min = std::max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb); if (comm->bPMELoadBalDLBLimits) { /* Check for the cut-off limit set by the PME load balancing */ - cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb); + cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb); } } @@@ -2771,10 -2770,10 +2773,10 @@@ static real grid_jump_limit(gmx_domdec_ { if (comm->bPMELoadBalDLBLimits) { - cutoff = max(cutoff, comm->PMELoadBal_max_cutoff); + cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff); } - grid_jump_limit = max(grid_jump_limit, - cutoff/comm->cd[dim_ind].np); + grid_jump_limit = std::max(grid_jump_limit, + cutoff/comm->cd[dim_ind].np); } return grid_jump_limit; @@@ -2961,8 -2960,8 +2963,8 @@@ static void init_ddpme(gmx_domdec_t *dd { slab = pmeindex % ddpme->nslab; } - ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]); - ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]); + ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]); + ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]); } } @@@ -3163,7 -3162,7 +3165,7 @@@ static void set_dd_cell_sizes_slb(gmx_d { npulse[d]++; } - cellsize_min[d] = min(cellsize_min[d], cellsize); + cellsize_min[d] = std::min(cellsize_min[d], cellsize); } if (setmode == setcellsizeslbLOCAL) { @@@ -3433,7 -3432,7 +3435,7 @@@ static void set_dd_cell_sizes_dlb_root( gmx_bool bUniform, gmx_int64_t step) { gmx_domdec_comm_t *comm; - int ncd, d1, i, j, pos; + int ncd, d1, i, pos; real *cell_size; real load_aver, load_i, imbalance, change, change_max, sc; real cellsize_limit_f, dist_min_f, dist_min_f_hard, space; @@@ -3465,7 -3464,7 +3467,7 @@@ cell_size[i] = 1.0/ncd; } } - else if (dd_load_count(comm)) + else if (dd_load_count(comm) > 0) { load_aver = comm->load[d].sum_m/ncd; change_max = 0; @@@ -3476,7 -3475,7 +3478,7 @@@ imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1); /* Determine the change of the cell size using underrelaxation */ change = -relax*imbalance; - change_max = max(change_max, max(change, -change)); + change_max = std::max(change_max, std::max(change, -change)); } /* Limit the amount of scaling. * We need to use the same rescaling for all cells in one row, @@@ -3619,7 -3618,7 +3621,7 @@@ static void distribute_dd_cell_sizes_dl gmx_ddbox_t *ddbox) { gmx_domdec_comm_t *comm; - int d1, dim1, pos; + int d1, pos; comm = dd->comm; @@@ -3907,13 -3906,13 +3909,13 @@@ static void distribute_cg(FILE *fplog, { gmx_domdec_master_t *ma; int **tmp_ind = NULL, *tmp_nalloc = NULL; - int i, icg, j, k, k0, k1, d, npbcdim; + int i, icg, j, k, k0, k1, d; matrix tcm; - rvec box_size, cg_cm; + rvec cg_cm; ivec ind; real nrcg, inv_ncg, pos_d; atom_id *cgindex; - gmx_bool bUnbounded, bScrew; + gmx_bool bScrew; ma = dd->ma; @@@ -4121,9 -4120,9 +4123,9 @@@ static void get_cg_distribution(FILE *f } dd_scatterv(dd, - DDMASTER(dd) ? ma->ibuf : NULL, - DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL, - DDMASTER(dd) ? ma->cg : NULL, + bMaster ? ma->ibuf : NULL, + bMaster ? ma->ibuf+dd->nnodes : NULL, + bMaster ? ma->cg : NULL, dd->ncg_home*sizeof(int), dd->index_gl); /* Determine the home charge group sizes */ @@@ -4336,7 -4335,7 +4338,7 @@@ static void clear_and_mark_ind(int ncg static void print_cg_move(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step, int cg, int dim, int dir, - gmx_bool bHaveLimitdAndCMOld, real limitd, + gmx_bool bHaveCgcmOld, real limitd, rvec cm_old, rvec cm_new, real pos_d) { gmx_domdec_comm_t *comm; @@@ -4345,19 -4344,22 +4347,22 @@@ comm = dd->comm; fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf)); - if (bHaveLimitdAndCMOld) + if (limitd > 0) { - fprintf(fplog, "The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n", + fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n", + dd->comm->bCGs ? "The charge group starting at atom" : "Atom", ddglatnr(dd, dd->cgindex[cg]), limitd, dim2char(dim)); } else { - fprintf(fplog, "The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n", + /* We don't have a limiting distance available: don't print it */ + fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n", + dd->comm->bCGs ? "The charge group starting at atom" : "Atom", ddglatnr(dd, dd->cgindex[cg]), dim2char(dim)); } fprintf(fplog, "distance out of cell %f\n", dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]); - if (bHaveLimitdAndCMOld) + if (bHaveCgcmOld) { fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n", cm_old[XX], cm_old[YY], cm_old[ZZ]); @@@ -4375,19 -4377,20 +4380,20 @@@ static void cg_move_error(FILE *fplog, gmx_domdec_t *dd, gmx_int64_t step, int cg, int dim, int dir, - gmx_bool bHaveLimitdAndCMOld, real limitd, + gmx_bool bHaveCgcmOld, real limitd, rvec cm_old, rvec cm_new, real pos_d) { if (fplog) { print_cg_move(fplog, dd, step, cg, dim, dir, - bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d); + bHaveCgcmOld, limitd, cm_old, cm_new, pos_d); } print_cg_move(stderr, dd, step, cg, dim, dir, - bHaveLimitdAndCMOld, limitd, cm_old, cm_new, pos_d); + bHaveCgcmOld, limitd, cm_old, cm_new, pos_d); gmx_fatal(FARGS, - "A charge group moved too far between two domain decomposition steps\n" - "This usually means that your system is not well equilibrated"); + "%s moved too far between two domain decomposition steps\n" + "This usually means that your system is not well equilibrated", + dd->comm->bCGs ? "A charge group" : "An atom"); } static void rotate_state_atom(t_state *state, int a) @@@ -4454,8 -4457,8 +4460,8 @@@ static void calc_cg_move(FILE *fplog, g int *move) { int npbcdim; - int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d; - int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec; + int cg, k, k0, k1, d, dim, d2; + int mc, nrcg; int flag; gmx_bool bScrew; ivec dev; @@@ -4509,7 -4512,8 +4515,8 @@@ { if (pos_d >= limit1[d]) { - cg_move_error(fplog, dd, step, cg, d, 1, TRUE, limitd[d], + cg_move_error(fplog, dd, step, cg, d, 1, + cg_cm != state->x, limitd[d], cg_cm[cg], cm_new, pos_d); } dev[d] = 1; @@@ -4535,7 -4539,8 +4542,8 @@@ { if (pos_d < limit0[d]) { - cg_move_error(fplog, dd, step, cg, d, -1, TRUE, limitd[d], + cg_move_error(fplog, dd, step, cg, d, -1, + cg_cm != state->x, limitd[d], cg_cm[cg], cm_new, pos_d); } dev[d] = -1; @@@ -4629,15 -4634,17 +4637,15 @@@ static void dd_redistribute_cg(FILE *fp int *move; int npbcdim; int ncg[DIM*2], nat[DIM*2]; - int c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d; - int mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec; + int c, i, cg, k, d, dim, dim2, dir, d2, d3; + int mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec; int sbuf[2], rbuf[2]; int home_pos_cg, home_pos_at, buf_pos; int flag; gmx_bool bV = FALSE, bSDX = FALSE, bCGP = FALSE; - gmx_bool bScrew; - ivec dev; - real inv_ncg, pos_d; + real pos_d; matrix tcm; - rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new; + rvec *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1; atom_id *cgindex; cginfo_mb_t *cginfo_mb; gmx_domdec_comm_t *comm; @@@ -4894,6 -4901,7 +4902,6 @@@ { dim = dd->dim[d]; ncg_recv = 0; - nat_recv = 0; nvr = 0; for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++) { @@@ -4928,6 -4936,7 +4936,6 @@@ comm->cgcm_state[cdd], nvs, comm->vbuf.v+nvr, i); ncg_recv += rbuf[0]; - nat_recv += rbuf[1]; nvr += i; } @@@ -4949,7 -4958,7 +4957,7 @@@ { cg_move_error(fplog, dd, step, cg, dim, (flag & DD_FLAG_FW(d)) ? 1 : 0, - FALSE, 0, + fr->cutoff_scheme == ecutsGROUP, 0, comm->vbuf.v[buf_pos], comm->vbuf.v[buf_pos], comm->vbuf.v[buf_pos][dim]); @@@ -5231,7 -5240,7 +5239,7 @@@ static void get_load_distribution(gmx_d gmx_domdec_comm_t *comm; gmx_domdec_load_t *load; gmx_domdec_root_t *root = NULL; - int d, dim, cid, i, pos; + int d, dim, i, pos; float cell_frac = 0, sbuf[DD_NLOAD_MAX]; gmx_bool bSepPME; @@@ -5327,7 -5336,7 +5335,7 @@@ for (i = 0; i < dd->nc[dim]; i++) { load->sum += load->load[pos++]; - load->max = max(load->max, load->load[pos]); + load->max = std::max(load->max, load->load[pos]); pos++; if (dd->bGridJump) { @@@ -5336,14 -5345,14 +5344,14 @@@ /* This direction could not be load balanced properly, * therefore we need to use the maximum iso the average load. */ - load->sum_m = max(load->sum_m, load->load[pos]); + load->sum_m = std::max(load->sum_m, load->load[pos]); } else { load->sum_m += load->load[pos]; } pos++; - load->cvol_min = min(load->cvol_min, load->load[pos]); + load->cvol_min = std::min(load->cvol_min, load->load[pos]); pos++; if (d < dd->ndim-1) { @@@ -5357,9 -5366,9 +5365,9 @@@ } if (bSepPME) { - load->mdf = max(load->mdf, load->load[pos]); + load->mdf = std::max(load->mdf, load->load[pos]); pos++; - load->pme = max(load->pme, load->load[pos]); + load->pme = std::max(load->pme, load->load[pos]); pos++; } } @@@ -5753,6 -5762,7 +5761,6 @@@ static void make_load_communicators(gmx void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd) { - gmx_bool bZYX; int d, dim, i, j, m; ivec tmp, s; int nzone, nzonep; @@@ -5911,16 -5921,17 +5919,16 @@@ static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder) { gmx_domdec_t *dd; + dd = cr->dd; + +#ifdef GMX_MPI gmx_domdec_comm_t *comm; - int i, rank, *buf; + int rank, *buf; ivec periods; -#ifdef GMX_MPI MPI_Comm comm_cart; -#endif - dd = cr->dd; comm = dd->comm; -#ifdef GMX_MPI if (comm->bCartesianPP) { /* Set up cartesian communication for the particle-particle part */ @@@ -5930,7 -5941,7 +5938,7 @@@ dd->nc[XX], dd->nc[YY], dd->nc[ZZ]); } - for (i = 0; i < DIM; i++) + for (int i = 0; i < DIM; i++) { periods[i] = TRUE; } @@@ -5994,7 -6005,7 +6002,7 @@@ /* Determine the master coordinates and rank. * The DD master should be the same node as the master of this sim. */ - for (i = 0; i < dd->nnodes; i++) + for (int i = 0; i < dd->nnodes; i++) { if (comm->ddindex2simnodeid[i] == 0) { @@@ -6032,27 -6043,30 +6040,27 @@@ } } -static void receive_ddindex2simnodeid(t_commrec *cr) +static void receive_ddindex2simnodeid(t_commrec gmx_unused *cr) { +#ifdef GMX_MPI gmx_domdec_t *dd; - gmx_domdec_comm_t *comm; - int *buf; dd = cr->dd; comm = dd->comm; -#ifdef GMX_MPI if (!comm->bCartesianPP_PME && comm->bCartesianPP) { + int *buf; snew(comm->ddindex2simnodeid, dd->nnodes); snew(buf, dd->nnodes); if (cr->duty & DUTY_PP) { buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid; } -#ifdef GMX_MPI /* Communicate the ddindex to simulation nodeid index */ MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim); -#endif sfree(buf); } #endif @@@ -6094,8 -6108,9 +6102,8 @@@ static void split_communicator(FILE *fp { gmx_domdec_t *dd; gmx_domdec_comm_t *comm; - int i, rank; + int i; gmx_bool bDiv[DIM]; - ivec periods; #ifdef GMX_MPI MPI_Comm comm_cart; #endif @@@ -6143,9 -6158,6 +6151,9 @@@ #ifdef GMX_MPI if (comm->bCartesianPP_PME) { + int rank; + ivec periods; + if (fplog) { fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]); @@@ -6157,6 -6169,7 +6165,6 @@@ } MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder, &comm_cart); - MPI_Comm_rank(comm_cart, &rank); if (MASTERNODE(cr) && rank != 0) { @@@ -6340,7 -6353,7 +6348,7 @@@ static real *get_slb_frac(FILE *fplog, for (i = 0; i < nc; i++) { dbl = 0; - sscanf(size_string, "%lf%n", &dbl, &n); + sscanf(size_string, "%20lf%n", &dbl, &n); if (dbl == 0) { gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string); @@@ -6403,7 -6416,7 +6411,7 @@@ static int dd_getenv(FILE *fplog, cons val = getenv(env_var); if (val) { - if (sscanf(val, "%d", &nst) <= 0) + if (sscanf(val, "%20d", &nst) <= 0) { nst = 1; } @@@ -6464,7 -6477,7 +6472,7 @@@ static real average_cellsize_min(gmx_do { d = dd->dim[di]; /* Check using the initial average cell size */ - r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]); + r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]); } return r; @@@ -6474,6 -6487,7 +6482,6 @@@ static int check_dlb_support(FILE *fplo const char *dlb_opt, gmx_bool bRecordLoad, unsigned long Flags, t_inputrec *ir) { - gmx_domdec_t *dd; int eDLB = -1; char buf[STRLEN]; @@@ -6615,10 -6629,10 +6623,10 @@@ gmx_domdec_t *init_domain_decomposition gmx_domdec_t *dd; gmx_domdec_comm_t *comm; int recload; - int d, i, j; real r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs; gmx_bool bC; char buf[STRLEN]; + const real tenPercentMargin = 1.1; if (fplog) { @@@ -6673,7 -6687,8 +6681,8 @@@ /* Initialize to GPU share count to 0, might change later */ comm->nrank_gpu_shared = 0; - comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir); + comm->eDLB = check_dlb_support(fplog, cr, dlb_opt, comm->bRecordLoad, Flags, ir); + comm->bDLB_locked = FALSE; comm->bDynLoadBal = (comm->eDLB == edlbYES); if (fplog) @@@ -6739,6 -6754,13 +6748,13 @@@ comm->cellsize_limit = 0; comm->bBondComm = FALSE; + /* Atoms should be able to move by up to half the list buffer size (if > 0) + * within nstlist steps. Since boundaries are allowed to displace by half + * a cell size, DD cells should be at least the size of the list buffer. + */ - comm->cellsize_limit = max(comm->cellsize_limit, - ir->rlistlong - max(ir->rvdw, ir->rcoulomb)); ++ comm->cellsize_limit = std::max(comm->cellsize_limit, ++ ir->rlistlong - std::max(ir->rvdw, ir->rcoulomb)); + if (comm->bInterCGBondeds) { if (comm_distance_min > 0) @@@ -6750,7 -6772,7 +6766,7 @@@ } else { - comm->cutoff = max(comm->cutoff, comm->cutoff_mbody); + comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody); } r_bonded_limit = comm->cutoff_mbody; } @@@ -6776,16 -6798,16 +6792,16 @@@ */ if (Flags & MD_DDBONDCOMM) { - if (max(r_2b, r_mb) > comm->cutoff) + if (std::max(r_2b, r_mb) > comm->cutoff) { - r_bonded = max(r_2b, r_mb); - r_bonded_limit = 1.1*r_bonded; + r_bonded = std::max(r_2b, r_mb); + r_bonded_limit = tenPercentMargin*r_bonded; comm->bBondComm = TRUE; } else { r_bonded = r_mb; - r_bonded_limit = min(1.1*r_bonded, comm->cutoff); + r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff); } /* We determine cutoff_mbody later */ } @@@ -6794,12 -6816,12 +6810,12 @@@ /* No special bonded communication, * simply increase the DD cut-off. */ - r_bonded_limit = 1.1*max(r_2b, r_mb); + r_bonded_limit = tenPercentMargin*std::max(r_2b, r_mb); comm->cutoff_mbody = r_bonded_limit; - comm->cutoff = max(comm->cutoff, comm->cutoff_mbody); + comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody); } } - comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit); + comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit); if (fplog) { fprintf(fplog, @@@ -6833,7 -6855,7 +6849,7 @@@ "User supplied maximum distance required for P-LINCS: %.3f nm\n", rconstr); } - comm->cellsize_limit = max(comm->cellsize_limit, rconstr); + comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr); comm->cgs_gl = gmx_mtop_global_cgs(mtop); @@@ -6989,12 -7011,12 +7005,12 @@@ if (comm->eDLB != edlbNO) { /* Check if this does not limit the scaling */ - comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs); + comm->cutoff_mbody = std::min(comm->cutoff_mbody, dlb_scale*acs); } if (!comm->bBondComm) { /* Without bBondComm do not go beyond the n.b. cut-off */ - comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff); + comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff); if (comm->cellsize_limit >= comm->cutoff) { /* We don't loose a lot of efficieny @@@ -7006,7 -7028,7 +7022,7 @@@ } } /* Check if we did not end up below our original limit */ - comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit); + comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit); if (comm->cutoff_mbody > comm->cellsize_limit) { @@@ -7069,7 -7091,7 +7085,7 @@@ static void turn_on_dlb(FILE *fplog, t_ cellsize_min = comm->cellsize_min[dd->dim[0]]; for (d = 1; d < dd->ndim; d++) { - cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]); + cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]); } if (cellsize_min < comm->cellsize_limit*1.05) @@@ -7134,6 -7156,8 +7150,6 @@@ void dd_init_bondeds(FILE *fplog t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb) { gmx_domdec_comm_t *comm; - gmx_bool bBondComm; - int d; dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck); @@@ -7244,7 -7268,7 +7260,7 @@@ static void print_dd_settings(FILE *fpl limit = dd->comm->cellsize_min[XX]; for (d = 1; d < DIM; d++) { - limit = min(limit, dd->comm->cellsize_min[d]); + limit = std::min(limit, dd->comm->cellsize_min[d]); } } @@@ -7252,10 -7276,10 +7268,10 @@@ { fprintf(fplog, "%40s %-7s %6.3f nm\n", "two-body bonded interactions", "(-rdd)", - max(comm->cutoff, comm->cutoff_mbody)); + std::max(comm->cutoff, comm->cutoff_mbody)); fprintf(fplog, "%40s %-7s %6.3f nm\n", "multi-body bonded interactions", "(-rdd)", - (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit)); + (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : std::min(comm->cutoff, limit)); } if (dd->vsite_comm) { @@@ -7290,7 -7314,7 +7306,7 @@@ static void set_cell_limits_dlb(gmx_dom /* Determine the maximum number of comm. pulses in one dimension */ - comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody); + comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody); /* Determine the maximum required number of grid pulses */ if (comm->cellsize_limit >= comm->cutoff) @@@ -7311,7 -7335,7 +7327,7 @@@ else { /* There is no cell size limit */ - npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1)); + npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1)); } if (!bNoCutOff && npulse > 1) @@@ -7323,9 -7347,9 +7339,9 @@@ dim = dd->dim[d]; npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale)); - npulse_d_max = max(npulse_d_max, npulse_d); + npulse_d_max = std::max(npulse_d_max, npulse_d); } - npulse = min(npulse, npulse_d_max); + npulse = std::min(npulse, npulse_d_max); } /* This env var can override npulse */ @@@ -7339,10 -7363,10 +7355,10 @@@ comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE); for (d = 0; d < dd->ndim; d++) { - comm->cd[d].np_dlb = min(npulse, dd->nc[dd->dim[d]]-1); + comm->cd[d].np_dlb = std::min(npulse, dd->nc[dd->dim[d]]-1); comm->cd[d].np_nalloc = comm->cd[d].np_dlb; snew(comm->cd[d].ind, comm->cd[d].np_nalloc); - comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb); + comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb); if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1) { comm->bVacDLBNoLimit = FALSE; @@@ -7352,10 -7376,10 +7368,10 @@@ /* cellsize_limit is set for LINCS in init_domain_decomposition */ if (!comm->bVacDLBNoLimit) { - comm->cellsize_limit = max(comm->cellsize_limit, - comm->cutoff/comm->maxpulse); + comm->cellsize_limit = std::max(comm->cellsize_limit, + comm->cutoff/comm->maxpulse); } - comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody); + comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody); /* Set the minimum cell size for each DD dimension */ for (d = 0; d < dd->ndim; d++) { @@@ -7372,7 -7396,7 +7388,7 @@@ } if (comm->cutoff_mbody <= 0) { - comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit); + comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit); } if (comm->bDynLoadBal) { @@@ -7463,7 -7487,7 +7479,7 @@@ void set_dd_parameters(FILE *fplog, gmx } natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr]; - dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot); + dd->ga2la = ga2la_init(natoms_tot, static_cast(vol_frac*natoms_tot)); } static gmx_bool test_dd_cutoff(t_commrec *cr, @@@ -7564,6 -7588,20 +7580,20 @@@ void change_dd_dlb_cutoff_limit(t_commr comm->PMELoadBal_max_cutoff = comm->cutoff; } + gmx_bool dd_dlb_is_locked(const gmx_domdec_t *dd) + { + return dd->comm->bDLB_locked; + } + + void dd_dlb_set_lock(gmx_domdec_t *dd, gmx_bool bValue) + { + /* We can only lock the DLB when it is set to auto, otherwise don't lock */ + if (dd->comm->eDLB == edlbAUTO) + { + dd->comm->bDLB_locked = bValue; + } + } + static void merge_cg_buffers(int ncell, gmx_domdec_comm_dim_t *cd, int pulse, int *ncg_cell, @@@ -7727,11 -7765,11 +7757,11 @@@ set_dd_corners(const gmx_domdec_t *dd c->c[1][1] = comm->cell_x0[dim1]; if (dd->bGridJump) { - c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0); + c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0); if (bDistMB) { /* For the multi-body distance we need the maximum */ - c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0); + c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0); } } /* Set the upper-right corner for rounding */ @@@ -7754,8 -7792,8 +7784,8 @@@ if (j >= 4) { c->c[2][j-4] = - max(c->c[2][j-4], - comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0); + std::max(c->c[2][j-4], + comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0); } } } @@@ -7767,7 -7805,7 +7797,7 @@@ { for (j = 0; j < 2; j++) { - c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0); + c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0); } } } @@@ -7781,11 -7819,11 +7811,11 @@@ c->cr1[3] = comm->cell_x1[dim1]; if (dd->bGridJump) { - c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1); + c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1); if (bDistMB) { /* For the multi-body distance we need the maximum */ - c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1); + c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1); } } } @@@ -8065,7 -8103,7 +8095,7 @@@ static void setup_dd_communication(gmx_ { int dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot; int nzone, nzone_send, zone, zonei, cg0, cg1; - int c, i, j, cg, cg_gl, nrcg; + int c, i, cg, cg_gl, nrcg; int *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i; gmx_domdec_comm_t *comm; gmx_domdec_zones_t *zones; @@@ -8073,7 -8111,7 +8103,7 @@@ gmx_domdec_ind_t *ind; cginfo_mb_t *cginfo_mb; gmx_bool bBondComm, bDist2B, bDistMB, bDistBonded; - real r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg; + real r_comm2, r_bcomm2; dd_corners_t corners; ivec tric_dist; rvec *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr; @@@ -8104,6 -8142,8 +8134,6 @@@ for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++) { - dim = dd->dim[dim_ind]; - /* Check if we need to use triclinic distances */ tric_dist[dim_ind] = 0; for (i = 0; i <= dim_ind; i++) @@@ -8395,7 -8435,7 +8425,7 @@@ /* The rvec buffer is also required for atom buffers * of size nrecv in dd_move_x and dd_move_f. */ - i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]); + i = std::max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]); vec_rvec_check_alloc(&comm->vbuf2, i); } } @@@ -8540,9 -8580,10 +8570,9 @@@ static void set_zones_size(gmx_domdec_ gmx_domdec_comm_t *comm; gmx_domdec_zones_t *zones; gmx_bool bDistMB; - int z, zi, zj0, zj1, d, dim; + int z, zi, d, dim; real rcs, rcmbs; int i, j; - real size_j, add_tric; real vol; comm = dd->comm; @@@ -8640,8 -8681,8 +8670,8 @@@ * With multiple pulses this will lead * to a larger zone then strictly necessary. */ - zones->size[z].x1[dim] = max(zones->size[z].x1[dim], - zones->size[zi].x1[dim]+rcmbs); + zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim], + zones->size[zi].x1[dim]+rcmbs); } } } @@@ -8660,8 -8701,8 +8690,8 @@@ { if (zones->shift[z][dim] > 0) { - zones->size[z].x1[dim] = max(zones->size[z].x1[dim], - zones->size[zi].x1[dim]+rcs); + zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim], + zones->size[zi].x1[dim]+rcs); } } } @@@ -8725,8 -8766,8 +8755,8 @@@ { for (i = 0; i < DIM; i++) { - corner_min[i] = min(corner_min[i], corner[i]); - corner_max[i] = max(corner_max[i], corner[i]); + corner_min[i] = std::min(corner_min[i], corner[i]); + corner_max[i] = std::max(corner_max[i], corner[i]); } } } @@@ -8896,7 -8937,8 +8926,7 @@@ static int dd_sort_order(gmx_domdec_t * { gmx_domdec_sort_t *sort; gmx_cgsort_t *cgsort, *sort_i; - int ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf; - int sort_last, sort_skip; + int ncg_new, nsort2, nsort_new, i, *a, moved; sort = dd->comm->sort; @@@ -9007,7 -9049,7 +9037,7 @@@ static void dd_sort_state(gmx_domdec_t int ncg_home_old) { gmx_domdec_sort_t *sort; - gmx_cgsort_t *cgsort, *sort_i; + gmx_cgsort_t *cgsort; int *cgindex; int ncg_new, i, *ibuf, cgsize; rvec *vbuf; @@@ -9264,7 -9306,7 +9294,7 @@@ void dd_partition_system(FIL t_block *cgs_gl; gmx_int64_t step_pcoupl; rvec cell_ns_x0, cell_ns_x1; - int i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum; + int i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum; gmx_bool bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad; gmx_bool bRedist, bSortCG, bResortAll; ivec ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np; @@@ -9324,17 -9366,18 +9354,17 @@@ } /* Check if we have recorded loads on the nodes */ - if (comm->bRecordLoad && dd_load_count(comm)) + if (comm->bRecordLoad && dd_load_count(comm) > 0) { - if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal) + if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal && !dd_dlb_is_locked(dd)) { /* Check if we should use DLB at the second partitioning * and every 100 partitionings, * so the extra communication cost is negligible. */ - n = std::max(100, nstglobalcomm); + const int nddp_chk_dlb = 100; - bCheckDLB = (comm->n_load_collect == 0 || - comm->n_load_have % n == n-1); + comm->n_load_have % nddp_chk_dlb == nddp_chk_dlb - 1); } else { @@@ -9372,8 -9415,26 +9402,26 @@@ /* Since the timings are node dependent, the master decides */ if (DDMASTER(dd)) { - bTurnOnDLB = - (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON); + /* Here we check if the max PME rank load is more than 0.98 + * the max PP force load. If so, PP DLB will not help, + * since we are (almost) limited by PME. Furthermore, + * DLB will cause a significant extra x/f redistribution + * cost on the PME ranks, which will then surely result + * in lower total performance. + * This check might be fragile, since one measurement + * below 0.98 (although only done once every 100 DD part.) + * could turn on DLB for the rest of the run. + */ + if (cr->npmenodes > 0 && + dd_pme_f_ratio(dd) > 1 - DD_PERF_LOSS_DLB_ON) + { + bTurnOnDLB = FALSE; + } + else + { + bTurnOnDLB = + (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS_DLB_ON); + } if (debug) { fprintf(debug, "step %s, imb loss %f\n", diff --combined src/gromacs/mdlib/force.c index f98b073fee,d4e6445491..4313d3e75e --- a/src/gromacs/mdlib/force.c +++ b/src/gromacs/mdlib/force.c @@@ -34,39 -34,38 +34,39 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" + +#include "gromacs/legacyheaders/force.h" + +#include "config.h" +#include #include #include -#include -#include "sysstuff.h" -#include "typedefs.h" -#include "macros.h" -#include "gromacs/utility/smalloc.h" -#include "macros.h" -#include "physics.h" -#include "force.h" -#include "nonbonded.h" -#include "names.h" -#include "network.h" -#include "pbc.h" -#include "ns.h" -#include "nrnb.h" -#include "bondf.h" -#include "mshift.h" -#include "txtdump.h" -#include "coulomb.h" -#include "pme.h" -#include "mdrun.h" -#include "domdec.h" -#include "qmmm.h" -#include "gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/coulomb.h" +#include "gromacs/legacyheaders/domdec.h" +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/mdrun.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/network.h" +#include "gromacs/legacyheaders/nonbonded.h" +#include "gromacs/legacyheaders/nrnb.h" +#include "gromacs/legacyheaders/ns.h" +#include "gromacs/legacyheaders/pme.h" +#include "gromacs/legacyheaders/qmmm.h" +#include "gromacs/legacyheaders/txtdump.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/legacyheaders/types/commrec.h" +#include "gromacs/listed-forces/bonded.h" +#include "gromacs/math/vec.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" #include "gromacs/timing/wallcycle.h" -#include "gmx_fatal.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" void ns(FILE *fp, t_forcerec *fr, @@@ -118,9 -117,11 +118,11 @@@ static void reduce_thread_forces(int n int nthreads, f_thread_t *f_t) { int t, i; + int nthreads_loop gmx_unused; /* This reduction can run over any number of threads */ - #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static) + nthreads_loop = gmx_omp_nthreads_get(emntBonded); + #pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static) for (i = 0; i < n; i++) { for (t = 1; t < nthreads; t++) @@@ -139,7 -140,13 +141,7 @@@ } } -void gmx_print_sepdvdl(FILE *fplog, const char *s, real v, real dvdlambda) -{ - fprintf(fplog, " %-30s V %12.5e dVdl %12.5e\n", s, v, dvdlambda); -} - -void do_force_lowlevel(FILE *fplog, gmx_int64_t step, - t_forcerec *fr, t_inputrec *ir, +void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, t_idef *idef, t_commrec *cr, t_nrnb *nrnb, gmx_wallcycle_t wcycle, t_mdatoms *md, @@@ -150,6 -157,7 +152,6 @@@ t_fcdata *fcd, gmx_localtop_t *top, gmx_genborn_t *born, - t_atomtypes *atype, gmx_bool bBornRadii, matrix box, t_lambda *fepvals, @@@ -162,7 -170,7 +164,7 @@@ { int i, j; int donb_flags; - gmx_bool bDoEpot, bSepDVDL, bSB; + gmx_bool bDoEpot, bSB; int pme_flags; matrix boxs; rvec box_size; @@@ -176,6 -184,8 +178,6 @@@ double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ #endif -#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) { gmx_print_sepdvdl(fplog, s, v, dvdlambda); } - set_pbc(&pbc, fr->ePBC, box); /* reset free energy components */ @@@ -191,6 -201,7 +193,6 @@@ box_size[i] = box[i][i]; } - bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); debug_gmx(); /* do QMMM first if requested */ @@@ -199,6 -210,12 +201,6 @@@ enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); } - if (bSepDVDL) - { - fprintf(fplog, "Step %s: non-bonded V and dVdl for rank %d:\n", - gmx_step_str(step, buf), cr->nodeid); - } - /* Call the short range functions all in one go. */ #ifdef GMX_MPI @@@ -216,6 -233,7 +218,6 @@@ /* foreign lambda component for walls */ real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], enerd->grpp.ener[egLJSR], nrnb); - PRINT_SEPDVDL("Walls", 0.0, dvdl_walls); enerd->dvdl_lin[efptVDW] += dvdl_walls; } @@@ -298,10 -316,10 +300,10 @@@ /* MRS: Eventually, many need to include free energy contribution here! */ if (ir->implicit_solvent) { - wallcycle_sub_start(wcycle, ewcsBONDED); + wallcycle_sub_start(wcycle, ewcsLISTED); calc_gb_forces(cr, md, born, top, x, f, fr, idef, ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); - wallcycle_sub_stop(wcycle, ewcsBONDED); + wallcycle_sub_stop(wcycle, ewcsLISTED); } #ifdef GMX_MPI @@@ -333,6 -351,24 +335,6 @@@ enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; } - if (bSepDVDL) - { - real V_short_range = 0; - real dvdl_short_range = 0; - - for (i = 0; i < enerd->grpp.nener; i++) - { - V_short_range += - (fr->bBHAM ? - enerd->grpp.ener[egBHAMSR][i] : - enerd->grpp.ener[egLJSR][i]) - + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; - } - dvdl_short_range = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; - PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", - V_short_range, - dvdl_short_range); - } debug_gmx(); @@@ -341,9 -377,9 +343,9 @@@ pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); } - /* Shift the coordinates. Must be done before bonded forces and PPPM, + /* Shift the coordinates. Must be done before listed forces and PPPM, * but is also necessary for SHAKE and update, therefore it can NOT - * go when no bonded forces have to be evaluated. + * go when no listed forces have to be evaluated. */ /* Here sometimes we would not need to shift with NBFonly, @@@ -361,9 -397,9 +363,9 @@@ inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); } } - /* Check whether we need to do bondeds or correct for exclusions */ + /* Check whether we need to do listed interactions or correct for exclusions */ if (fr->bMolPBC && - ((flags & GMX_FORCE_BONDED) + ((flags & GMX_FORCE_LISTED) || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) { /* Since all atoms are in the rectangular or triclinic unit-cell, @@@ -373,13 -409,14 +375,13 @@@ } debug_gmx(); - if (flags & GMX_FORCE_BONDED) + if (flags & GMX_FORCE_LISTED) { - wallcycle_sub_start(wcycle, ewcsBONDED); - calc_bonds(fplog, cr->ms, - idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, - DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, - flags, - fr->bSepDVDL && do_per_step(step, ir->nstlog), step); + wallcycle_sub_start(wcycle, ewcsLISTED); + calc_bonds(cr->ms, + idef, (const rvec *) x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, + DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, + flags); /* Check if we have to determine energy differences * at foreign lambda's. @@@ -398,7 -435,7 +400,7 @@@ { lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); } - calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, + calc_bonds_lambda(idef, (const rvec *) x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; @@@ -406,25 -443,17 +408,25 @@@ } debug_gmx(); - wallcycle_sub_stop(wcycle, ewcsBONDED); + wallcycle_sub_stop(wcycle, ewcsLISTED); } where(); *cycles_pme = 0; + clear_mat(fr->vir_el_recip); + clear_mat(fr->vir_lj_recip); + + /* Do long-range electrostatics and/or LJ-PME, including related short-range + * corrections. + */ if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) { - real Vlr = 0, Vcorr = 0; - real dvdl_long_range = 0; - int status = 0; + real Vlr = 0, Vcorr = 0; + real dvdl_long_range = 0; + int status = 0; + real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; + real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; bSB = (ir->nwall == 2); if (bSB) @@@ -433,6 -462,20 +435,6 @@@ svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); box_size[ZZ] *= ir->wall_ewald_zfac; } - } - - /* Do long-range electrostatics and/or LJ-PME, including related short-range - * corrections. - */ - - clear_mat(fr->vir_el_recip); - clear_mat(fr->vir_lj_recip); - - if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) - { - real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; - real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; - int status = 0; if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) { @@@ -534,10 -577,15 +536,10 @@@ fr->vir_el_recip); } - PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr_q, dvdl_long_range_correction_q); - PRINT_SEPDVDL("Ewald excl. corr. LJ", Vcorr_lj, dvdl_long_range_correction_lj); enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; - } - if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype))) - { - if (cr->duty & DUTY_PME) + if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME)) { /* Do reciprocal PME for Coulomb and/or LJ. */ assert(fr->n_tpi >= 0); @@@ -609,6 -657,7 +611,6 @@@ md->chargeA + md->homenr - fr->n_tpi, &Vlr_q); } - PRINT_SEPDVDL("PME mesh", Vlr_q + Vlr_lj, dvdl_long_range_q+dvdl_long_range_lj); } } @@@ -619,6 -668,7 +621,6 @@@ box_size, cr, md->homenr, fr->vir_el_recip, fr->ewaldcoeff_q, lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); - PRINT_SEPDVDL("Ewald long-range", Vlr_q, dvdl_long_range_q); } /* Note that with separate PME nodes we get the real energies later */ @@@ -653,6 -703,8 +655,6 @@@ fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; - PRINT_SEPDVDL("RF exclusion correction", - enerd->term[F_RF_EXCL], dvdl_rf_excl); } } } diff --combined src/gromacs/mdlib/mdatom.c index 01c5c55bd6,b8d51a76da..d1722dedf1 --- a/src/gromacs/mdlib/mdatom.c +++ b/src/gromacs/mdlib/mdatom.c @@@ -34,16 -34,19 +34,16 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" #include -#include "typedefs.h" -#include "mdatoms.h" +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/mdatoms.h" +#include "gromacs/legacyheaders/qmmm.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/topology/mtop_util.h" #include "gromacs/utility/smalloc.h" -#include "main.h" -#include "qmmm.h" -#include "mtop_util.h" -#include "gmx_omp_nthreads.h" #define ALMOST_ZERO 1e-30 @@@ -118,6 -121,7 +118,7 @@@ void atoms2md(gmx_mtop_t *mtop, t_input t_grpopts *opts; gmx_groups_t *groups; gmx_molblock_t *molblock; + int nthreads gmx_unused; bLJPME = EVDW_PME(ir->vdwtype); @@@ -230,7 -234,8 +231,8 @@@ alook = gmx_mtop_atomlookup_init(mtop); - #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntDefault)) schedule(static) + nthreads = gmx_omp_nthreads_get(emntDefault); + #pragma omp parallel for num_threads(nthreads) schedule(static) for (i = 0; i < md->nr; i++) { int g, ag, molb; diff --combined src/gromacs/mdlib/minimize.c index 234bcbdd8f,69008f53fa..5255d544d1 --- a/src/gromacs/mdlib/minimize.c +++ b/src/gromacs/mdlib/minimize.c @@@ -34,51 -34,51 +34,51 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" + +#include "config.h" +#include #include #include -#include -#include "sysstuff.h" -#include "gromacs/utility/cstringutil.h" -#include "network.h" -#include "gromacs/utility/smalloc.h" -#include "nrnb.h" -#include "main.h" -#include "force.h" -#include "macros.h" -#include "names.h" -#include "gmx_fatal.h" -#include "txtdump.h" -#include "typedefs.h" -#include "update.h" -#include "constr.h" -#include "vec.h" -#include "tgroup.h" -#include "mdebin.h" -#include "vsite.h" -#include "force.h" -#include "mdrun.h" -#include "md_support.h" -#include "sim_util.h" -#include "domdec.h" -#include "mdatoms.h" -#include "ns.h" -#include "mtop_util.h" -#include "pme.h" -#include "bondf.h" -#include "gmx_omp_nthreads.h" -#include "md_logging.h" #include "gromacs/fileio/confio.h" +#include "gromacs/fileio/mtxio.h" #include "gromacs/fileio/trajectory_writing.h" -#include "gromacs/linearalgebra/mtxio.h" +#include "gromacs/imd/imd.h" +#include "gromacs/legacyheaders/bonded-threading.h" +#include "gromacs/legacyheaders/constr.h" +#include "gromacs/legacyheaders/domdec.h" +#include "gromacs/legacyheaders/force.h" +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/md_logging.h" +#include "gromacs/legacyheaders/md_support.h" +#include "gromacs/legacyheaders/mdatoms.h" +#include "gromacs/legacyheaders/mdebin.h" +#include "gromacs/legacyheaders/mdrun.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/network.h" +#include "gromacs/legacyheaders/nrnb.h" +#include "gromacs/legacyheaders/ns.h" +#include "gromacs/legacyheaders/pme.h" +#include "gromacs/legacyheaders/sim_util.h" +#include "gromacs/legacyheaders/tgroup.h" +#include "gromacs/legacyheaders/txtdump.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/legacyheaders/update.h" +#include "gromacs/legacyheaders/vsite.h" +#include "gromacs/legacyheaders/types/commrec.h" #include "gromacs/linearalgebra/sparsematrix.h" +#include "gromacs/math/vec.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" #include "gromacs/timing/wallcycle.h" #include "gromacs/timing/walltime_accounting.h" -#include "gromacs/imd/imd.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" typedef struct { t_state s; @@@ -550,6 -550,7 +550,7 @@@ static void do_em_step(t_commrec *cr, t int start, end; rvec *x1, *x2; real dvdl_constr; + int nthreads gmx_unused; s1 = &ems1->s; s2 = &ems2->s; @@@ -587,7 -588,8 +588,8 @@@ x1 = s1->x; x2 = s2->x; - #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate)) + nthreads = gmx_omp_nthreads_get(emntUpdate); + #pragma omp parallel num_threads(nthreads) { int gf, i, m; @@@ -771,7 -773,7 +773,7 @@@ static void evaluate_energy(FILE *fplog } /* Calculate long range corrections to pressure and energy */ - calc_dispcorr(fplog, inputrec, fr, count, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW], + calc_dispcorr(inputrec, fr, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW], pres, force_vir, &prescorr, &enercorr, &dvdlcorr); enerd->term[F_DISPCORR] = enercorr; enerd->term[F_EPOT] += enercorr; @@@ -790,6 -792,10 +792,6 @@@ ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box, ems->s.lambda[efptBONDED], &dvdl_constr, NULL, &shake_vir, nrnb, econqForceDispl, FALSE, 0, 0); - if (fr->bSepDVDL && fplog) - { - gmx_print_sepdvdl(fplog, "Constraints", t, dvdl_constr); - } enerd->term[F_DVDL_CONSTR] += dvdl_constr; m_add(force_vir, shake_vir, vir); wallcycle_stop(wcycle, ewcCONSTR); @@@ -1786,6 -1792,7 +1788,6 @@@ double do_lbfgs(FILE *fplog, t_commrec } stepsize = 1.0/fnorm; - converged = FALSE; /* Start the loop over BFGS steps. * Each successful step is counted, and we continue until diff --combined src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu index 799425edab,d02e903410..c024f7b02c --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu @@@ -32,14 -32,12 +32,14 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" + +#include "nbnxn_cuda.h" + +#include "config.h" -#include #include +#include #if defined(_MSVC) #include @@@ -47,21 -45,21 +47,21 @@@ #include -#include "types/simple.h" -#include "types/nbnxn_pairlist.h" -#include "types/nb_verlet.h" -#include "types/ishift.h" -#include "types/force_flags.h" -#include "../nbnxn_consts.h" - #ifdef TMPI_ATOMICS #include "thread_mpi/atomic.h" #endif +#include "gromacs/gmxlib/cuda_tools/cudautils.cuh" +#include "gromacs/legacyheaders/types/force_flags.h" +#include "gromacs/legacyheaders/types/simple.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_consts.h" +#include "gromacs/mdlib/nbnxn_pairlist.h" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/utility/cstringutil.h" + #include "nbnxn_cuda_types.h" -#include "../../gmxlib/cuda_tools/cudautils.cuh" -#include "nbnxn_cuda.h" -#include "nbnxn_cuda_data_mgmt.h" #if defined TEXOBJ_SUPPORTED && __CUDA_ARCH__ >= 300 #define USE_TEXOBJ @@@ -81,7 -79,7 +81,7 @@@ textureprop.major >= 3) { /* Default kernel on sm 3.x 48/16 kB Shared/L1 */ - stat = cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferShared); - stat = cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferShared); - stat = cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferShared); + cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferShared); + cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferShared); + cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferShared); stat = cudaFuncSetCacheConfig(nb_kfunc_noener_noprune_ptr[i][j], cudaFuncCachePreferShared); } else { /* On Fermi prefer L1 gives 2% higher performance */ /* Default kernel on sm_2.x 16/48 kB Shared/L1 */ - stat = cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferL1); - stat = cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferL1); - stat = cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(nb_kfunc_ener_prune_ptr[i][j], cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(nb_kfunc_ener_noprune_ptr[i][j], cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(nb_kfunc_noener_prune_ptr[i][j], cudaFuncCachePreferL1); stat = cudaFuncSetCacheConfig(nb_kfunc_noener_noprune_ptr[i][j], cudaFuncCachePreferL1); } CU_RET_ERR(stat, "cudaFuncSetCacheConfig failed"); diff --combined src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh index c468306899,3bdb17f8fe..3b37869e6d --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh @@@ -52,30 -52,30 +52,30 @@@ /* cut-off + V shift LJ */ #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJ ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w geometric combination rules */ #define LJ_EWALD_COMB_GEOM #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJEwCombGeom ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_GEOM #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w LB combination rules */ #define LJ_EWALD_COMB_LB #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJEwCombLB ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_LB #undef NB_KERNEL_FUNC_NAME /* F switch LJ */ #define LJ_FORCE_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJFsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_FORCE_SWITCH #undef NB_KERNEL_FUNC_NAME /* V switch LJ */ #define LJ_POT_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecCut_VdwLJPsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_POT_SWITCH #undef NB_KERNEL_FUNC_NAME @@@ -88,30 -88,30 +88,30 @@@ /* cut-off + V shift LJ */ #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJ ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w geometric combination rules */ #define LJ_EWALD_COMB_GEOM #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJEwCombGeom ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_GEOM #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w LB combination rules */ #define LJ_EWALD_COMB_LB #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJEwCombLB ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_LB #undef NB_KERNEL_FUNC_NAME /* F switch LJ */ #define LJ_FORCE_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJFsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_FORCE_SWITCH #undef NB_KERNEL_FUNC_NAME /* V switch LJ */ #define LJ_POT_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecRF_VdwLJPsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_POT_SWITCH #undef NB_KERNEL_FUNC_NAME @@@ -124,30 -124,30 +124,30 @@@ /* cut-off + V shift LJ */ #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJ ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w geometric combination rules */ #define LJ_EWALD_COMB_GEOM #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJEwCombGeom ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_GEOM #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w LB combination rules */ #define LJ_EWALD_COMB_LB #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJEwCombLB ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_LB #undef NB_KERNEL_FUNC_NAME /* F switch LJ */ #define LJ_FORCE_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJFsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_FORCE_SWITCH #undef NB_KERNEL_FUNC_NAME /* V switch LJ */ #define LJ_POT_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEw_VdwLJPsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_POT_SWITCH #undef NB_KERNEL_FUNC_NAME @@@ -157,39 -157,39 +157,39 @@@ /* Analytical Ewald interaction kernels with twin-range cut-off */ #define EL_EWALD_ANA - #define LJ_CUTOFF_CHECK + #define VDW_CUTOFF_CHECK /* cut-off + V shift LJ */ #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJ ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w geometric combination rules */ #define LJ_EWALD_COMB_GEOM #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJEwCombGeom ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_GEOM #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w LB combination rules */ #define LJ_EWALD_COMB_LB #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJEwCombLB ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_LB #undef NB_KERNEL_FUNC_NAME /* F switch LJ */ #define LJ_FORCE_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJFsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_FORCE_SWITCH #undef NB_KERNEL_FUNC_NAME /* V switch LJ */ #define LJ_POT_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwTwinCut_VdwLJPsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_POT_SWITCH #undef NB_KERNEL_FUNC_NAME #undef EL_EWALD_ANA - #undef LJ_CUTOFF_CHECK + #undef VDW_CUTOFF_CHECK /* Tabulated Ewald interaction kernels */ @@@ -197,30 -197,30 +197,30 @@@ /* cut-off + V shift LJ */ #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJ ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w geometric combination rules */ #define LJ_EWALD_COMB_GEOM #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJEwCombGeom ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_GEOM #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w LB combination rules */ #define LJ_EWALD_COMB_LB #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJEwCombLB ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_LB #undef NB_KERNEL_FUNC_NAME /* F switch LJ */ #define LJ_FORCE_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJFsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_FORCE_SWITCH #undef NB_KERNEL_FUNC_NAME /* V switch LJ */ #define LJ_POT_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTab_VdwLJPsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_POT_SWITCH #undef NB_KERNEL_FUNC_NAME @@@ -229,36 -229,36 +229,36 @@@ /* Tabulated Ewald interaction kernels with twin-range cut-off */ #define EL_EWALD_TAB - #define LJ_CUTOFF_CHECK + #define VDW_CUTOFF_CHECK /* cut-off + V shift LJ */ #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJ ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w geometric combination rules */ #define LJ_EWALD_COMB_GEOM #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJEwCombGeom ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_GEOM #undef NB_KERNEL_FUNC_NAME /* LJ-Ewald w LB combination rules */ #define LJ_EWALD_COMB_LB #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJEwCombLB ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_EWALD_COMB_LB #undef NB_KERNEL_FUNC_NAME /* F switch LJ */ #define LJ_FORCE_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJFsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_FORCE_SWITCH #undef NB_KERNEL_FUNC_NAME /* V switch LJ */ #define LJ_POT_SWITCH #define NB_KERNEL_FUNC_NAME(x, ...) x ## _ElecEwQSTabTwinCut_VdwLJPsw ## __VA_ARGS__ -#include "nbnxn_cuda_kernel.cuh" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #undef LJ_POT_SWITCH #undef NB_KERNEL_FUNC_NAME #undef EL_EWALD_TAB - #undef LJ_CUTOFF_CHECK + #undef VDW_CUTOFF_CHECK diff --combined src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre index 9432b2f311,2aa4fa9fb5..5e410cdecc --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.c.pre @@@ -32,12 -32,12 +32,12 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" -#include "typedefs.h" +#include "config.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/mdlib/nb_verlet.h" #include "gromacs/mdlib/nbnxn_simd.h" #ifdef {0} @@@ -47,11 -47,10 +47,11 @@@ {2} #define GMX_SIMD_J_UNROLL_SIZE {3} #include "{4}" -#include "../nbnxn_kernel_common.h" -#include "gmx_omp_nthreads.h" -#include "types/force_flags.h" -#include "gmx_fatal.h" + +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/types/force_flags.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h" +#include "gromacs/utility/fatalerror.h" /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels */ @@@ -118,7 -117,7 +118,7 @@@ reduce_group_energies(int ng, int ng_2l #else /* {0} */ -#include "gmx_fatal.h" +#include "gromacs/utility/fatalerror.h" #endif /* {0} */ @@@ -139,6 -138,7 +139,7 @@@ voi nbnxn_pairlist_t **nbl; int coulkt, vdwkt = 0; int nb; + int nthreads gmx_unused; nnbl = nbl_list->nnbl; nbl = nbl_list->nbl; @@@ -210,7 -210,8 +211,8 @@@ gmx_incons("Unsupported VdW interaction type"); }} - #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded)) + nthreads = gmx_omp_nthreads_get(emntNonbonded); + #pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) {{ nbnxn_atomdata_output_t *out; diff --combined src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c index 3db03bbec5,a104652965..4cad2a2f56 --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c @@@ -32,24 -32,21 +32,24 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" + +#include "nbnxn_kernel_ref.h" + +#include "config.h" -#include #include +#include -#include "typedefs.h" -#include "vec.h" +#include "gromacs/legacyheaders/force.h" +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_consts.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h" +#include "gromacs/pbcutil/ishift.h" #include "gromacs/utility/smalloc.h" -#include "force.h" -#include "gmx_omp_nthreads.h" -#include "nbnxn_kernel_ref.h" -#include "../nbnxn_consts.h" -#include "nbnxn_kernel_common.h" /*! \brief Typedefs for declaring lookup tables of kernel functions. */ @@@ -73,21 -70,21 +73,21 @@@ typedef void (*p_nbk_func_ener)(const n /* Analytical reaction-field kernels */ #define CALC_COUL_RF #define LJ_CUT -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_CUT #define LJ_FORCE_SWITCH -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_FORCE_SWITCH #define LJ_POT_SWITCH -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_POT_SWITCH #define LJ_EWALD #define LJ_CUT #define LJ_EWALD_COMB_GEOM -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_EWALD_COMB_GEOM #define LJ_EWALD_COMB_LB -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_EWALD_COMB_LB #undef LJ_CUT #undef LJ_EWALD @@@ -97,42 -94,42 +97,42 @@@ /* Tabulated exclusion interaction electrostatics kernels */ #define CALC_COUL_TAB #define LJ_CUT -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_CUT #define LJ_FORCE_SWITCH -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_FORCE_SWITCH #define LJ_POT_SWITCH -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_POT_SWITCH #define LJ_EWALD #define LJ_CUT #define LJ_EWALD_COMB_GEOM -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_EWALD_COMB_GEOM #define LJ_EWALD_COMB_LB -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_EWALD_COMB_LB #undef LJ_CUT #undef LJ_EWALD /* Twin-range cut-off kernels */ #define VDW_CUTOFF_CHECK #define LJ_CUT -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_CUT #define LJ_FORCE_SWITCH -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_FORCE_SWITCH #define LJ_POT_SWITCH -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_POT_SWITCH #define LJ_EWALD #define LJ_CUT #define LJ_EWALD_COMB_GEOM -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_EWALD_COMB_GEOM #define LJ_EWALD_COMB_LB -#include "nbnxn_kernel_ref_includes.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_includes.h" #undef LJ_EWALD_COMB_LB #undef LJ_CUT #undef LJ_EWALD @@@ -185,6 -182,7 +185,7 @@@ nbnxn_kernel_ref(const nbnxn_pairlist_s int coult; int vdwt; int nb; + int nthreads gmx_unused; nnbl = nbl_list->nnbl; nbl = nbl_list->nbl; @@@ -242,7 -240,8 +243,8 @@@ gmx_incons("Unsupported vdwtype in nbnxn reference kernel"); } - #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded)) + nthreads = gmx_omp_nthreads_get(emntNonbonded); + #pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) { nbnxn_atomdata_output_t *out; diff --combined src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c index ed1456873b,35181d43eb..1d8567b206 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c @@@ -37,12 -37,12 +37,12 @@@ * kernel type 2xnn. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" -#include "typedefs.h" +#include "config.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/mdlib/nb_verlet.h" #include "gromacs/mdlib/nbnxn_simd.h" #ifdef GMX_NBNXN_SIMD_2XNN @@@ -56,11 -56,10 +56,11 @@@ #define GMX_SIMD_J_UNROLL_SIZE 2 #include "nbnxn_kernel_simd_2xnn.h" -#include "../nbnxn_kernel_common.h" -#include "gmx_omp_nthreads.h" -#include "types/force_flags.h" -#include "gmx_fatal.h" + +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/types/force_flags.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h" +#include "gromacs/utility/fatalerror.h" /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels */ @@@ -253,7 -252,7 +253,7 @@@ reduce_group_energies(int ng, int ng_2l #else /* GMX_NBNXN_SIMD_2XNN */ -#include "gmx_fatal.h" +#include "gromacs/utility/fatalerror.h" #endif /* GMX_NBNXN_SIMD_2XNN */ @@@ -274,6 -273,7 +274,7 @@@ nbnxn_kernel_simd_2xnn(nbnxn_pairlist_s nbnxn_pairlist_t **nbl; int coulkt, vdwkt = 0; int nb; + int nthreads gmx_unused; nnbl = nbl_list->nnbl; nbl = nbl_list->nbl; @@@ -345,7 -345,8 +346,8 @@@ gmx_incons("Unsupported VdW interaction type"); } - #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded)) + nthreads = gmx_omp_nthreads_get(emntNonbonded); + #pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) { nbnxn_atomdata_output_t *out; diff --combined src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c index d17ba1e7bd,5974873a07..3531299efd --- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c @@@ -37,12 -37,12 +37,12 @@@ * kernel type 4xn. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" -#include "typedefs.h" +#include "config.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/mdlib/nb_verlet.h" #include "gromacs/mdlib/nbnxn_simd.h" #ifdef GMX_NBNXN_SIMD_4XN @@@ -55,11 -55,10 +55,11 @@@ #define GMX_SIMD_J_UNROLL_SIZE 1 #include "nbnxn_kernel_simd_4xn.h" -#include "../nbnxn_kernel_common.h" -#include "gmx_omp_nthreads.h" -#include "types/force_flags.h" -#include "gmx_fatal.h" + +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/types/force_flags.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h" +#include "gromacs/utility/fatalerror.h" /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels */ @@@ -252,7 -251,7 +252,7 @@@ reduce_group_energies(int ng, int ng_2l #else /* GMX_NBNXN_SIMD_4XN */ -#include "gmx_fatal.h" +#include "gromacs/utility/fatalerror.h" #endif /* GMX_NBNXN_SIMD_4XN */ @@@ -273,6 -272,7 +273,7 @@@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_se nbnxn_pairlist_t **nbl; int coulkt, vdwkt = 0; int nb; + int nthreads gmx_unused; nnbl = nbl_list->nnbl; nbl = nbl_list->nbl; @@@ -344,7 -344,8 +345,8 @@@ gmx_incons("Unsupported VdW interaction type"); } - #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded)) + nthreads = gmx_omp_nthreads_get(emntNonbonded); + #pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) { nbnxn_atomdata_output_t *out; diff --combined src/gromacs/mdlib/nbnxn_search.c index f13de8c7db,3114bebc22..56e616967c --- a/src/gromacs/mdlib/nbnxn_search.c +++ b/src/gromacs/mdlib/nbnxn_search.c @@@ -33,35 -33,34 +33,35 @@@ * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" + +#include "nbnxn_search.h" + +#include "config.h" +#include #include #include -#include -#include "sysstuff.h" -#include "gromacs/utility/smalloc.h" -#include "types/commrec.h" -#include "macros.h" +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/nrnb.h" +#include "gromacs/legacyheaders/ns.h" +#include "gromacs/legacyheaders/types/commrec.h" #include "gromacs/math/utilities.h" -#include "vec.h" -#include "pbc.h" -#include "nbnxn_consts.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_atomdata.h" +#include "gromacs/mdlib/nbnxn_consts.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/utility/smalloc.h" + /* nbnxn_internal.h included gromacs/simd/macros.h */ -#include "nbnxn_internal.h" -#ifdef GMX_NBNXN_SIMD +#include "gromacs/mdlib/nbnxn_internal.h" +#ifdef GMX_SIMD #include "gromacs/simd/vector_operations.h" #endif -#include "nbnxn_atomdata.h" -#include "nbnxn_search.h" -#include "gmx_omp_nthreads.h" -#include "nrnb.h" -#include "ns.h" - -#include "gromacs/fileio/gmxfio.h" #ifdef NBNXN_SEARCH_BB_SIMD4 /* Always use 4-wide SIMD for bounding box calculations */ @@@ -1383,13 -1382,14 +1383,13 @@@ static void sort_columns_supersub(cons int cxy_start, int cxy_end, int *sort_work) { - int cxy; - int cx, cy, cz = -1, c = -1, ncz; - int na, ash, na_c, ind, a; - int subdiv_z, sub_z, na_z, ash_z; - int subdiv_y, sub_y, na_y, ash_y; - int subdiv_x, sub_x, na_x, ash_x; + int cxy; + int cx, cy, cz = -1, c = -1, ncz; + int na, ash, na_c, ind, a; + int subdiv_z, sub_z, na_z, ash_z; + int subdiv_y, sub_y, na_y, ash_y; + int subdiv_x, sub_x, na_x, ash_x; - /* cppcheck-suppress unassignedVariable */ nbnxn_bb_t bb_work_array[2], *bb_work_aligned; bb_work_aligned = (nbnxn_bb_t *)(((size_t)(bb_work_array+1)) & (~((size_t)15))); @@@ -1931,6 -1931,7 +1931,7 @@@ void nbnxn_grid_add_simple(nbnxn_search float *bbcz; nbnxn_bb_t *bb; int ncd, sc; + int nthreads gmx_unused; grid = &nbs->grid[0]; @@@ -1957,7 -1958,8 +1958,8 @@@ bbcz = grid->bbcz_simple; bb = grid->bb_simple; - #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static) + nthreads = gmx_omp_nthreads_get(emntPairsearch); + #pragma omp parallel for num_threads(nthreads) schedule(static) for (sc = 0; sc < grid->nc; sc++) { int c, tx, na; @@@ -2942,10 -2944,10 +2944,10 @@@ static void make_cluster_list_simple(co } #ifdef GMX_NBNXN_SIMD_4XN -#include "nbnxn_search_simd_4xn.h" +#include "gromacs/mdlib/nbnxn_search_simd_4xn.h" #endif #ifdef GMX_NBNXN_SIMD_2XNN -#include "nbnxn_search_simd_2xnn.h" +#include "gromacs/mdlib/nbnxn_search_simd_2xnn.h" #endif /* Plain C or SIMD4 code for making a pair list of super-cell sci vs scj. @@@ -4473,6 -4475,7 +4475,7 @@@ static void combine_nblists(int nnbl, n { int nsci, ncj4, nexcl; int n, i; + int nthreads gmx_unused; if (nblc->bSimple) { @@@ -4513,7 -4516,8 +4516,8 @@@ /* Each thread should copy its own data to the combined arrays, * as otherwise data will go back and forth between different caches. */ - #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntPairsearch)) schedule(static) + nthreads = gmx_omp_nthreads_get(emntPairsearch); + #pragma omp parallel for num_threads(nthreads) schedule(static) for (n = 0; n < nnbl; n++) { int sci_offset; diff --combined src/gromacs/mdlib/ns.c index 9145929c47,430e26d304..3a1396cd40 --- a/src/gromacs/mdlib/ns.c +++ b/src/gromacs/mdlib/ns.c @@@ -34,33 -34,33 +34,33 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" + +#include "gromacs/legacyheaders/ns.h" #include +#include #include -#include "sysstuff.h" -#include "gromacs/utility/smalloc.h" -#include "macros.h" + +#include "gromacs/legacyheaders/domdec.h" +#include "gromacs/legacyheaders/force.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/network.h" +#include "gromacs/legacyheaders/nonbonded.h" +#include "gromacs/legacyheaders/nrnb.h" +#include "gromacs/legacyheaders/nsgrid.h" +#include "gromacs/legacyheaders/txtdump.h" +#include "gromacs/legacyheaders/types/commrec.h" #include "gromacs/math/utilities.h" -#include "vec.h" -#include "types/commrec.h" -#include "network.h" -#include "nsgrid.h" -#include "force.h" -#include "nonbonded.h" -#include "ns.h" -#include "pbc.h" -#include "names.h" -#include "gmx_fatal.h" -#include "nrnb.h" -#include "txtdump.h" -#include "mtop_util.h" - -#include "domdec.h" -#include "adress.h" +#include "gromacs/math/vec.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" +#include "adress.h" /* * E X C L U S I O N H A N D L I N G @@@ -287,8 -287,11 +287,8 @@@ void init_neighbor_list(FILE *log, t_fo /* Did we get the solvent loops so we can use optimized water kernels? */ if (nbl->nlist_sr[eNL_VDWQQ_WATER].kernelptr_vf == NULL || nbl->nlist_sr[eNL_QQ_WATER].kernelptr_vf == NULL -#ifndef DISABLE_WATERWATER_NLIST || nbl->nlist_sr[eNL_VDWQQ_WATERWATER].kernelptr_vf == NULL - || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf == NULL -#endif - ) + || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf == NULL) { fr->solvent_opt = esolNO; if (log != NULL) @@@ -427,6 -430,14 +427,14 @@@ static gmx_inline void close_i_nblist(t nlist->jindex[nri+1] = nlist->nrj; len = nlist->nrj - nlist->jindex[nri]; + /* If there are no j-particles we have to reduce the + * number of i-particles again, to prevent errors in the + * kernel functions. + */ + if ((len == 0) && (nlist->nri > 0)) + { + nlist->nri--; + } } } @@@ -661,11 -672,13 +669,11 @@@ put_in_list_at(gmx_bool bH if (iwater != esolNO) { - vdwc = &nlist[eNL_VDWQQ_WATER]; - vdw = &nlist[eNL_VDW]; - coul = &nlist[eNL_QQ_WATER]; -#ifndef DISABLE_WATERWATER_NLIST + vdwc = &nlist[eNL_VDWQQ_WATER]; + vdw = &nlist[eNL_VDW]; + coul = &nlist[eNL_QQ_WATER]; vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER]; coul_ww = &nlist[eNL_QQ_WATERWATER]; -#endif } else { @@@ -685,7 -698,9 +693,7 @@@ if (bDoCoul && bDoVdW) { new_i_nblist(vdwc, i_atom, shift, gid); -#ifndef DISABLE_WATERWATER_NLIST new_i_nblist(vdwc_ww, i_atom, shift, gid); -#endif } if (bDoVdW) { @@@ -694,7 -709,9 +702,7 @@@ if (bDoCoul) { new_i_nblist(coul, i_atom, shift, gid); -#ifndef DISABLE_WATERWATER_NLIST new_i_nblist(coul_ww, i_atom, shift, gid); -#endif } /* Loop over the j charge groups */ for (j = 0; (j < nj); j++) @@@ -719,6 -736,19 +727,6 @@@ } else { -#ifdef DISABLE_WATERWATER_NLIST - /* Add entries for the three atoms - only do VdW if we need to */ - if (!bDoVdW) - { - add_j_to_nblist(coul, jj0, bLR); - } - else - { - add_j_to_nblist(vdwc, jj0, bLR); - } - add_j_to_nblist(coul, jj0+1, bLR); - add_j_to_nblist(coul, jj0+2, bLR); -#else /* One entry for the entire water-water interaction */ if (!bDoVdW) { @@@ -728,6 -758,7 +736,6 @@@ { add_j_to_nblist(vdwc_ww, jj0, bLR); } -#endif } } else if (iwater == esolTIP4P && jwater == esolTIP4P) @@@ -740,6 -771,16 +748,6 @@@ } else { -#ifdef DISABLE_WATERWATER_NLIST - /* Add entries for the four atoms - only do VdW if we need to */ - if (bDoVdW) - { - add_j_to_nblist(vdw, jj0, bLR); - } - add_j_to_nblist(coul, jj0+1, bLR); - add_j_to_nblist(coul, jj0+2, bLR); - add_j_to_nblist(coul, jj0+3, bLR); -#else /* One entry for the entire water-water interaction */ if (!bDoVdW) { @@@ -749,6 -790,7 +757,6 @@@ { add_j_to_nblist(vdwc_ww, jj0, bLR); } -#endif } } else @@@ -809,8 -851,10 +817,8 @@@ close_i_nblist(vdw); close_i_nblist(coul); close_i_nblist(vdwc); -#ifndef DISABLE_WATERWATER_NLIST close_i_nblist(coul_ww); close_i_nblist(vdwc_ww); -#endif } else { @@@ -2087,7 -2131,7 +2095,7 @@@ static int nsgrid_core(t_commrec *cr, t gmx_ns_t *ns; atom_id **nl_lr_ljc, **nl_lr_one, **nl_sr; int *nlr_ljc, *nlr_one, *nsr; - gmx_domdec_t *dd = NULL; + gmx_domdec_t *dd; t_block *cgs = &(top->cgs); int *cginfo = fr->cginfo; /* atom_id *i_atoms,*cgsindex=cgs->index; */ @@@ -2115,7 -2159,10 +2123,7 @@@ ns = &fr->ns; bDomDec = DOMAINDECOMP(cr); - if (bDomDec) - { - dd = cr->dd; - } + dd = cr->dd; bTriclinicX = ((YY < grid->npbcdim && (!bDomDec || dd->nc[YY] == 1) && box[YY][XX] != 0) || diff --combined src/gromacs/mdlib/sim_util.c index e6686abd02,a6b166ccfd..b35dd827ac --- a/src/gromacs/mdlib/sim_util.c +++ b/src/gromacs/mdlib/sim_util.c @@@ -34,71 -34,70 +34,71 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" + +#include "gromacs/legacyheaders/sim_util.h" + +#include "config.h" #include #include #include #include + #ifdef HAVE_SYS_TIME_H #include #endif -#include "typedefs.h" -#include "gromacs/utility/cstringutil.h" -#include "gromacs/utility/smalloc.h" -#include "names.h" -#include "txtdump.h" -#include "pbc.h" -#include "chargegroup.h" -#include "vec.h" -#include "nrnb.h" -#include "mshift.h" -#include "mdrun.h" -#include "sim_util.h" -#include "update.h" -#include "physics.h" -#include "main.h" -#include "mdatoms.h" -#include "force.h" -#include "bondf.h" -#include "pme.h" -#include "disre.h" -#include "orires.h" -#include "network.h" -#include "calcmu.h" -#include "constr.h" -#include "xvgr.h" -#include "copyrite.h" -#include "domdec.h" -#include "genborn.h" -#include "nbnxn_atomdata.h" -#include "nbnxn_search.h" -#include "nbnxn_kernels/nbnxn_kernel_ref.h" -#include "nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h" -#include "nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h" -#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h" -#include "nonbonded.h" -#include "../gmxlib/nonbonded/nb_kernel.h" -#include "../gmxlib/nonbonded/nb_free_energy.h" - -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/walltime_accounting.h" -#include "gromacs/utility/gmxmpi.h" #include "gromacs/essentialdynamics/edsam.h" +#include "gromacs/gmxlib/nonbonded/nb_free_energy.h" +#include "gromacs/gmxlib/nonbonded/nb_kernel.h" +#include "gromacs/imd/imd.h" +#include "gromacs/legacyheaders/calcmu.h" +#include "gromacs/legacyheaders/chargegroup.h" +#include "gromacs/legacyheaders/constr.h" +#include "gromacs/legacyheaders/copyrite.h" +#include "gromacs/legacyheaders/disre.h" +#include "gromacs/legacyheaders/domdec.h" +#include "gromacs/legacyheaders/force.h" +#include "gromacs/legacyheaders/genborn.h" +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/mdatoms.h" +#include "gromacs/legacyheaders/mdrun.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/network.h" +#include "gromacs/legacyheaders/nonbonded.h" +#include "gromacs/legacyheaders/nrnb.h" +#include "gromacs/legacyheaders/orires.h" +#include "gromacs/legacyheaders/pme.h" +#include "gromacs/legacyheaders/qmmm.h" +#include "gromacs/legacyheaders/txtdump.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/legacyheaders/update.h" +#include "gromacs/legacyheaders/types/commrec.h" +#include "gromacs/listed-forces/bonded.h" +#include "gromacs/math/units.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_atomdata.h" +#include "gromacs/mdlib/nbnxn_search.h" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.h" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h" +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h" +#include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h" +#include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" #include "gromacs/pulling/pull.h" #include "gromacs/pulling/pull_rotation.h" -#include "gromacs/imd/imd.h" -#include "adress.h" -#include "qmmm.h" - -#include "gmx_omp_nthreads.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/gmxmpi.h" +#include "gromacs/utility/smalloc.h" -#include "nbnxn_cuda_data_mgmt.h" -#include "nbnxn_cuda/nbnxn_cuda.h" +#include "adress.h" void print_time(FILE *out, gmx_walltime_accounting_t walltime_accounting, @@@ -314,7 -313,9 +314,7 @@@ static void calc_virial(int start, int } } -static void posres_wrapper(FILE *fplog, - int flags, - gmx_bool bSepDVDL, +static void posres_wrapper(int flags, t_inputrec *ir, t_nrnb *nrnb, gmx_localtop_t *top, @@@ -336,6 -337,10 +336,6 @@@ ir->ePBC == epbcNONE ? NULL : &pbc, lambda[efptRESTRAINT], &dvdl, fr->rc_scaling, fr->ePBC, fr->posres_com, fr->posres_comB); - if (bSepDVDL) - { - gmx_print_sepdvdl(fplog, interaction_function[F_POSRES].longname, v, dvdl); - } enerd->term[F_POSRES] += v; /* If just the force constant changes, the FEP term is linear, * but if k changes, it is not. @@@ -381,7 -386,9 +381,7 @@@ static void fbposres_wrapper(t_inputre inc_nrnb(nrnb, eNR_FBPOSRES, top->idef.il[F_FBPOSRES].nr/2); } -static void pull_potential_wrapper(FILE *fplog, - gmx_bool bSepDVDL, - t_commrec *cr, +static void pull_potential_wrapper(t_commrec *cr, t_inputrec *ir, matrix box, rvec x[], rvec f[], @@@ -406,11 -413,17 +406,11 @@@ enerd->term[F_COM_PULL] += pull_potential(ir->ePull, ir->pull, mdatoms, &pbc, cr, t, lambda[efptRESTRAINT], x, f, vir_force, &dvdl); - if (bSepDVDL) - { - gmx_print_sepdvdl(fplog, "Com pull", enerd->term[F_COM_PULL], dvdl); - } enerd->dvdl_lin[efptRESTRAINT] += dvdl; wallcycle_stop(wcycle, ewcPULLPOT); } -static void pme_receive_force_ener(FILE *fplog, - gmx_bool bSepDVDL, - t_commrec *cr, +static void pme_receive_force_ener(t_commrec *cr, gmx_wallcycle_t wcycle, gmx_enerdata_t *enerd, t_forcerec *fr) @@@ -430,6 -443,11 +430,6 @@@ gmx_pme_receive_f(cr, fr->f_novirsum, fr->vir_el_recip, &e_q, fr->vir_lj_recip, &e_lj, &dvdl_q, &dvdl_lj, &cycles_seppme); - if (bSepDVDL) - { - gmx_print_sepdvdl(fplog, "Electrostatic PME mesh", e_q, dvdl_q); - gmx_print_sepdvdl(fplog, "Lennard-Jones PME mesh", e_lj, dvdl_lj); - } enerd->term[F_COUL_RECIP] += e_q; enerd->term[F_LJ_RECIP] += e_lj; enerd->dvdl_lin[efptCOUL] += dvdl_q; @@@ -784,11 -802,6 +784,11 @@@ static void do_nb_verlet_fep(nbnxn_pair wallcycle_sub_stop(wcycle, ewcsNONBONDED); } +gmx_bool use_GPU(const nonbonded_verlet_t *nbv) +{ + return nbv != NULL && nbv->bUseGPU; +} + void do_force_cutsVERLET(FILE *fplog, t_commrec *cr, t_inputrec *inputrec, gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle, @@@ -810,7 -823,7 +810,7 @@@ int start, homenr; int nb_kernel_type; double mu[2*DIM]; - gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS; + gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS; gmx_bool bDoLongRange, bDoForces, bSepLRF, bUseGPU, bUseOrEmulGPU; gmx_bool bDiffKernels = FALSE; matrix boxs; @@@ -827,6 -840,8 +827,6 @@@ start = 0; homenr = mdatoms->homenr; - bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog)); - clear_mat(vir_force); cg0 = 0; @@@ -1236,11 -1251,10 +1236,11 @@@ } /* We calculate the non-bonded forces, when done on the CPU, here. - * We do this before calling do_force_lowlevel, as in there bondeds - * forces are calculated before PME, which does communication. - * With this order, non-bonded and bonded force calculation imbalance - * can be balanced out by the domain decomposition load balancing. + * We do this before calling do_force_lowlevel, because in that + * function, the listed forces are calculated before PME, which + * does communication. With this order, non-bonded and listed + * force calculation imbalance can be balanced out by the domain + * decomposition load balancing. */ if (!bUseOrEmulGPU) @@@ -1320,22 -1334,22 +1320,22 @@@ update_QMMMrec(cr, fr, x, mdatoms, box, top); } - if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) + if ((flags & GMX_FORCE_LISTED) && top->idef.il[F_POSRES].nr > 0) { - posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x, + posres_wrapper(flags, inputrec, nrnb, top, box, x, enerd, lambda, fr); } - if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0) + if ((flags & GMX_FORCE_LISTED) && top->idef.il[F_FBPOSRES].nr > 0) { fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr); } /* Compute the bonded and non-bonded energies and optionally forces */ - do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef), + do_force_lowlevel(fr, inputrec, &(top->idef), cr, nrnb, wcycle, mdatoms, x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born, - &(top->atomtypes), bBornRadii, box, + bBornRadii, box, inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot, flags, &cycles_pme); @@@ -1399,6 -1413,16 +1399,16 @@@ if (bDoForces && DOMAINDECOMP(cr)) { + if (bUseGPU) + { + /* We are done with the CPU compute, but the GPU local non-bonded + * kernel can still be running while we communicate the forces. + * We start a counter here, so we can, hopefully, time the rest + * of the GPU kernel execution and data transfer. + */ + wallcycle_start(wcycle, ewcWAIT_GPU_NB_L_EST); + } + /* Communicate the forces */ wallcycle_start(wcycle, ewcMOVEF); dd_move_f(cr->dd, f, fr->fshift); @@@ -1429,13 -1453,44 +1439,44 @@@ /* wait for local forces (or calculate in emulation mode) */ if (bUseGPU) { + float cycles_tmp, cycles_wait_est; + const float cuda_api_overhead_margin = 50000.0f; /* cycles */ + wallcycle_start(wcycle, ewcWAIT_GPU_NB_L); nbnxn_cuda_wait_gpu(nbv->cu_nbv, nbv->grp[eintLocal].nbat, flags, eatLocal, enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR], fr->fshift); - cycles_wait_gpu += wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L); + cycles_tmp = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L); + + if (bDoForces && DOMAINDECOMP(cr)) + { + cycles_wait_est = wallcycle_stop(wcycle, ewcWAIT_GPU_NB_L_EST); + + if (cycles_tmp < cuda_api_overhead_margin) + { + /* We measured few cycles, it could be that the kernel + * and transfer finished earlier and there was no actual + * wait time, only API call overhead. + * Then the actual time could be anywhere between 0 and + * cycles_wait_est. As a compromise, we use half the time. + */ + cycles_wait_est *= 0.5f; + } + } + else + { + /* No force communication so we actually timed the wait */ + cycles_wait_est = cycles_tmp; + } + /* Even though this is after dd_move_f, the actual task we are + * waiting for runs asynchronously with dd_move_f and we usually + * have nothing to balance it with, so we can and should add + * the time to the force time for load balancing. + */ + cycles_force += cycles_wait_est; + cycles_wait_gpu += cycles_wait_est; /* now clear the GPU outputs while we finish the step on the CPU */ @@@ -1519,7 -1574,7 +1560,7 @@@ /* Since the COM pulling is always done mass-weighted, no forces are * applied to vsites and this call can be done after vsite spreading. */ - pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x, + pull_potential_wrapper(cr, inputrec, box, x, f, vir_force, mdatoms, enerd, lambda, t, wcycle); } @@@ -1540,7 -1595,7 +1581,7 @@@ /* In case of node-splitting, the PP nodes receive the long-range * forces, virial and energy from the PME nodes here. */ - pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr); + pme_receive_force_ener(cr, wcycle, enerd, fr); } if (bDoForces) @@@ -1573,7 -1628,7 +1614,7 @@@ void do_force_cutsGROUP(FILE *fplog, t_ int cg0, cg1, i, j; int start, homenr; double mu[2*DIM]; - gmx_bool bSepDVDL, bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS; + gmx_bool bStateChanged, bNS, bFillGrid, bCalcCGCM, bBS; gmx_bool bDoLongRangeNS, bDoForces, bDoPotential, bSepLRF; gmx_bool bDoAdressWF; matrix boxs; @@@ -1585,6 -1640,8 +1626,6 @@@ start = 0; homenr = mdatoms->homenr; - bSepDVDL = (fr->bSepDVDL && do_per_step(step, inputrec->nstlog)); - clear_mat(vir_force); cg0 = 0; @@@ -1869,22 -1926,22 +1910,22 @@@ update_QMMMrec(cr, fr, x, mdatoms, box, top); } - if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0) + if ((flags & GMX_FORCE_LISTED) && top->idef.il[F_POSRES].nr > 0) { - posres_wrapper(fplog, flags, bSepDVDL, inputrec, nrnb, top, box, x, + posres_wrapper(flags, inputrec, nrnb, top, box, x, enerd, lambda, fr); } - if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0) + if ((flags & GMX_FORCE_LISTED) && top->idef.il[F_FBPOSRES].nr > 0) { fbposres_wrapper(inputrec, nrnb, top, box, x, enerd, fr); } /* Compute the bonded and non-bonded energies and optionally forces */ - do_force_lowlevel(fplog, step, fr, inputrec, &(top->idef), + do_force_lowlevel(fr, inputrec, &(top->idef), cr, nrnb, wcycle, mdatoms, x, hist, f, bSepLRF ? fr->f_twin : f, enerd, fcd, top, fr->born, - &(top->atomtypes), bBornRadii, box, + bBornRadii, box, inputrec->fepvals, lambda, graph, &(top->excls), fr->mu_tot, flags, @@@ -1992,7 -2049,7 +2033,7 @@@ if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F) { - pull_potential_wrapper(fplog, bSepDVDL, cr, inputrec, box, x, + pull_potential_wrapper(cr, inputrec, box, x, f, vir_force, mdatoms, enerd, lambda, t, wcycle); } @@@ -2013,7 -2070,7 +2054,7 @@@ /* In case of node-splitting, the PP nodes receive the long-range * forces, virial and energy from the PME nodes here. */ - pme_receive_force_ener(fplog, bSepDVDL, cr, wcycle, enerd, fr); + pme_receive_force_ener(cr, wcycle, enerd, fr); } if (bDoForces) @@@ -2429,8 -2486,8 +2470,8 @@@ void calc_enervirdiff(FILE *fplog, int } } -void calc_dispcorr(FILE *fplog, t_inputrec *ir, t_forcerec *fr, - gmx_int64_t step, int natoms, +void calc_dispcorr(t_inputrec *ir, t_forcerec *fr, + int natoms, matrix box, real lambda, tensor pres, tensor virial, real *prescorr, real *enercorr, real *dvdlcorr) { @@@ -2531,6 -2588,10 +2572,6 @@@ } } - if (fr->bSepDVDL && do_per_step(step, ir->nstlog)) - { - gmx_print_sepdvdl(fplog, "Dispersion correction", *enercorr, dvdlambda); - } if (fr->efep != efepNO) { *dvdlcorr += dvdlambda; @@@ -2635,7 -2696,7 +2676,7 @@@ void finish_run(FILE *fplog, t_commrec t_inputrec *inputrec, t_nrnb nrnb[], gmx_wallcycle_t wcycle, gmx_walltime_accounting_t walltime_accounting, - wallclock_gpu_t *gputimes, + nonbonded_verlet_t *nbv, gmx_bool bWriteStat) { int i, j; @@@ -2699,8 -2760,6 +2740,8 @@@ if (SIMMASTER(cr)) { + wallclock_gpu_t* gputimes = use_GPU(nbv) ? + nbnxn_cuda_get_timings(nbv->cu_nbv) : NULL; wallcycle_print(fplog, cr->nnodes, cr->npmenodes, elapsed_time_over_all_ranks, wcycle, gputimes); diff --combined src/gromacs/mdlib/update.c index 3be84a8a93,93b7f59c1e..373212ca8a --- a/src/gromacs/mdlib/update.c +++ b/src/gromacs/mdlib/update.c @@@ -34,37 -34,41 +34,37 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" +#include "gromacs/legacyheaders/update.h" -#include #include - -#include "types/commrec.h" -#include "sysstuff.h" -#include "gromacs/utility/smalloc.h" -#include "typedefs.h" -#include "nrnb.h" -#include "physics.h" -#include "macros.h" -#include "vec.h" -#include "main.h" -#include "update.h" -#include "gromacs/random/random.h" -#include "mshift.h" -#include "tgroup.h" -#include "force.h" -#include "names.h" -#include "txtdump.h" -#include "mdrun.h" -#include "constr.h" -#include "disre.h" -#include "orires.h" -#include "gmx_omp_nthreads.h" +#include #include "gromacs/fileio/confio.h" -#include "gromacs/fileio/futil.h" +#include "gromacs/legacyheaders/constr.h" +#include "gromacs/legacyheaders/disre.h" +#include "gromacs/legacyheaders/force.h" +#include "gromacs/legacyheaders/gmx_omp_nthreads.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/mdrun.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/nrnb.h" +#include "gromacs/legacyheaders/orires.h" +#include "gromacs/legacyheaders/tgroup.h" +#include "gromacs/legacyheaders/txtdump.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/legacyheaders/types/commrec.h" +#include "gromacs/math/units.h" +#include "gromacs/math/vec.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/random/random.h" #include "gromacs/timing/wallcycle.h" +#include "gromacs/utility/futil.h" #include "gromacs/utility/gmxomp.h" -#include "gromacs/pulling/pull.h" +#include "gromacs/utility/smalloc.h" /*For debugging, start at v(-dt/2) for velolcity verlet -- uncomment next line */ /*#define STARTFROMDT2*/ @@@ -1767,7 -1771,9 +1767,9 @@@ void update_constraints(FIL } else { - #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntUpdate)) schedule(static) + nth = gmx_omp_nthreads_get(emntUpdate); + + #pragma omp parallel for num_threads(nth) schedule(static) for (i = start; i < nrend; i++) { copy_rvec(upd->xp[i], state->x[i]); diff --combined src/gromacs/timing/wallcycle.c index 27eb275cd0,ae26c2cec4..3b0d906636 --- a/src/gromacs/timing/wallcycle.c +++ b/src/gromacs/timing/wallcycle.c @@@ -34,22 -34,21 +34,22 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#include "gromacs/timing/wallcycle.h" +#include "gmxpre.h" -#ifdef HAVE_CONFIG_H -#include -#endif +#include "wallcycle.h" -#include +#include "config.h" -#include "gromacs/utility/smalloc.h" -#include "gmx_fatal.h" -#include "md_logging.h" -#include "gromacs/utility/cstringutil.h" +#include +#include +#include "gromacs/legacyheaders/md_logging.h" +#include "gromacs/legacyheaders/types/commrec.h" #include "gromacs/timing/cyclecounter.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxmpi.h" +#include "gromacs/utility/smalloc.h" /* DEBUG_WCYCLE adds consistency checking for the counters. * It checks if you stop a counter different from the last @@@ -99,7 -98,7 +99,7 @@@ static const char *wcn[ewcNR] "DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.", "Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh", "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve LJ", "PME solve Elec", - "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.", + "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "Wait GPU loc. est.", "NB X/F buffer ops.", "Vsite spread", "COM pull force", "Write traj.", "Update", "Constraints", "Comm. energies", "Enforced rotation", "Add rot. forces", "Coordinate swapping", "IMD", "Test" @@@ -110,7 -109,7 +110,7 @@@ static const char *wcsn[ewcsNR] "DD redist.", "DD NS grid + sort", "DD setup comm.", "DD make top.", "DD make constr.", "DD top. other", "NS grid local", "NS grid non-loc.", "NS search local", "NS search non-loc.", - "Bonded F", "Nonbonded F", "Ewald F correction", + "Listed F", "Nonbonded F", "Ewald F correction", "NB X buffer ops.", "NB F buffer ops." }; @@@ -388,6 -387,12 +388,12 @@@ void wallcycle_sum(t_commrec *cr, gmx_w wcc = wc->wcc; + /* The GPU wait estimate counter is used for load balancing only + * and will mess up the total due to double counting: clear it. + */ + wcc[ewcWAIT_GPU_NB_L_EST].n = 0; + wcc[ewcWAIT_GPU_NB_L_EST].c = 0; + for (i = 0; i < ewcNR; i++) { if (is_pme_counter(i) || (i == ewcRUN && cr->duty == DUTY_PME)) diff --combined src/gromacs/timing/wallcycle.h index 104173598a,7b48849707..69c92cde72 --- a/src/gromacs/timing/wallcycle.h +++ b/src/gromacs/timing/wallcycle.h @@@ -38,23 -38,19 +38,23 @@@ #define GMX_TIMING_WALLCYCLE_H #include -#include "gromacs/legacyheaders/typedefs.h" -#include "gromacs/legacyheaders/types/commrec.h" + +#include "gromacs/legacyheaders/types/commrec_fwd.h" +#include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h" +#include "gromacs/utility/basedefinitions.h" #ifdef __cplusplus extern "C" { #endif +typedef struct gmx_wallcycle *gmx_wallcycle_t; + enum { ewcRUN, ewcSTEP, ewcPPDURINGPME, ewcDOMDEC, ewcDDCOMMLOAD, ewcDDCOMMBOUND, ewcVSITECONSTR, ewcPP_PMESENDX, ewcNS, ewcLAUNCH_GPU_NB, ewcMOVEX, ewcGB, ewcFORCE, ewcMOVEF, ewcPMEMESH, ewcPME_REDISTXF, ewcPME_SPREADGATHER, ewcPME_FFT, ewcPME_FFTCOMM, ewcLJPME, ewcPME_SOLVE, - ewcPMEWAITCOMM, ewcPP_PMEWAITRECVF, ewcWAIT_GPU_NB_NL, ewcWAIT_GPU_NB_L, ewcNB_XF_BUF_OPS, + ewcPMEWAITCOMM, ewcPP_PMEWAITRECVF, ewcWAIT_GPU_NB_NL, ewcWAIT_GPU_NB_L, ewcWAIT_GPU_NB_L_EST, ewcNB_XF_BUF_OPS, ewcVSITESPREAD, ewcPULLPOT, ewcTRAJ, ewcUPDATE, ewcCONSTR, ewcMoveE, ewcROT, ewcROTadd, ewcSWAP, ewcIMD, ewcTEST, ewcNR @@@ -66,7 -62,7 +66,7 @@@ enum ewcsDD_MAKETOP, ewcsDD_MAKECONSTR, ewcsDD_TOPOTHER, ewcsNBS_GRID_LOCAL, ewcsNBS_GRID_NONLOCAL, ewcsNBS_SEARCH_LOCAL, ewcsNBS_SEARCH_NONLOCAL, - ewcsBONDED, ewcsNONBONDED, ewcsEWALD_CORRECTION, + ewcsLISTED, ewcsNONBONDED, ewcsEWALD_CORRECTION, ewcsNB_X_BUF_OPS, ewcsNB_F_BUF_OPS, ewcsNR }; diff --combined src/programs/mdrun/md.cpp index 9864f8789f,3d98d597c7..0504150c00 --- a/src/programs/mdrun/md.cpp +++ b/src/programs/mdrun/md.cpp @@@ -34,67 -34,67 +34,67 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" -#include "typedefs.h" -#include "gromacs/utility/smalloc.h" -#include "sysstuff.h" -#include "vec.h" -#include "vcm.h" -#include "mdebin.h" -#include "nrnb.h" -#include "calcmu.h" -#include "index.h" -#include "vsite.h" -#include "update.h" -#include "ns.h" -#include "mdrun.h" -#include "md_support.h" -#include "md_logging.h" -#include "network.h" -#include "xvgr.h" -#include "physics.h" -#include "names.h" -#include "force.h" -#include "disre.h" -#include "orires.h" -#include "pme.h" -#include "mdatoms.h" -#include "repl_ex.h" -#include "deform.h" -#include "qmmm.h" -#include "domdec.h" -#include "domdec_network.h" -#include "gromacs/gmxlib/topsort.h" -#include "coulomb.h" -#include "constr.h" -#include "shellfc.h" -#include "gromacs/gmxpreprocess/compute_io.h" -#include "checkpoint.h" -#include "mtop_util.h" -#include "sighandler.h" -#include "txtdump.h" -#include "gromacs/utility/cstringutil.h" -#include "pme_loadbal.h" -#include "bondf.h" -#include "membed.h" -#include "types/nlistheuristics.h" -#include "types/iteratedconstraints.h" -#include "nbnxn_cuda_data_mgmt.h" +#include "config.h" + +#include -#include "gromacs/utility/gmxmpi.h" #include "gromacs/fileio/confio.h" +#include "gromacs/fileio/mdoutf.h" #include "gromacs/fileio/trajectory_writing.h" #include "gromacs/fileio/trnio.h" #include "gromacs/fileio/trxio.h" #include "gromacs/fileio/xtcio.h" -#include "gromacs/timing/wallcycle.h" -#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/gmxpreprocess/compute_io.h" +#include "gromacs/imd/imd.h" +#include "gromacs/legacyheaders/bonded-threading.h" +#include "gromacs/legacyheaders/calcmu.h" +#include "gromacs/legacyheaders/checkpoint.h" +#include "gromacs/legacyheaders/constr.h" +#include "gromacs/legacyheaders/coulomb.h" +#include "gromacs/legacyheaders/disre.h" +#include "gromacs/legacyheaders/domdec.h" +#include "gromacs/legacyheaders/domdec_network.h" +#include "gromacs/legacyheaders/force.h" +#include "gromacs/legacyheaders/md_logging.h" +#include "gromacs/legacyheaders/md_support.h" +#include "gromacs/legacyheaders/mdatoms.h" +#include "gromacs/legacyheaders/mdebin.h" +#include "gromacs/legacyheaders/mdrun.h" +#include "gromacs/legacyheaders/names.h" +#include "gromacs/legacyheaders/network.h" +#include "gromacs/legacyheaders/nrnb.h" +#include "gromacs/legacyheaders/ns.h" +#include "gromacs/legacyheaders/orires.h" +#include "gromacs/legacyheaders/pme.h" +#include "gromacs/legacyheaders/qmmm.h" +#include "gromacs/legacyheaders/shellfc.h" +#include "gromacs/legacyheaders/sighandler.h" +#include "gromacs/legacyheaders/txtdump.h" +#include "gromacs/legacyheaders/typedefs.h" +#include "gromacs/legacyheaders/update.h" +#include "gromacs/legacyheaders/vcm.h" +#include "gromacs/legacyheaders/vsite.h" +#include "gromacs/legacyheaders/types/iteratedconstraints.h" +#include "gromacs/legacyheaders/types/nlistheuristics.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" #include "gromacs/pulling/pull.h" #include "gromacs/swap/swapcoords.h" -#include "gromacs/imd/imd.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/gmxmpi.h" +#include "gromacs/utility/smalloc.h" + +#include "deform.h" +#include "membed.h" +#include "pme_loadbal.h" +#include "repl_ex.h" #ifdef GMX_FAHCORE #include "corewrap.h" @@@ -105,7 -105,7 +105,7 @@@ static void reset_all_counters(FILE *fp gmx_int64_t *step_rel, t_inputrec *ir, gmx_wallcycle_t wcycle, t_nrnb *nrnb, gmx_walltime_accounting_t walltime_accounting, - nbnxn_cuda_ptr_t cu_nbv) + struct nonbonded_verlet_t *nbv) { char sbuf[STEPSTRSIZE]; @@@ -113,7 -113,10 +113,7 @@@ md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n", gmx_step_str(step, sbuf)); - if (cu_nbv) - { - nbnxn_cuda_reset_timings(cu_nbv); - } + nbnxn_cuda_reset_timings(nbv); wallcycle_stop(wcycle, ewcRUN); wallcycle_reset_all(wcycle); @@@ -167,7 -170,7 +167,7 @@@ double do_md(FILE *fplog, t_commrec *cr rvec mu_tot; t_vcm *vcm; t_state *bufstate = NULL; - matrix *scale_tot, pcoupl_mu, M, ebox; + matrix pcoupl_mu, M; gmx_nlheur_t nlh; t_trxframe rerun_fr; gmx_repl_ex_t repl_ex = NULL; @@@ -186,8 -189,10 +186,8 @@@ gmx_ekindata_t *ekind, *ekind_save; gmx_shellfc_t shellfc; int count, nconverged = 0; - real timestep = 0; - double tcount = 0; - gmx_bool bConverged = TRUE, bOK, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; - gmx_bool bAppend; + double tcount = 0; + gmx_bool bConverged = TRUE, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; gmx_bool bResetCountersHalfMaxH = FALSE; gmx_bool bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter; gmx_bool bUpdateDoLR; @@@ -202,6 -207,7 +202,6 @@@ double cycles; real saved_conserved_quantity = 0; real last_ekin = 0; - int iter_i; t_extmass MassQ; int **trotter_seq; char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; @@@ -225,6 -231,7 +225,6 @@@ /* Check for special mdrun options */ bRerunMD = (Flags & MD_RERUN); - bAppend = (Flags & MD_APPENDFILES); if (Flags & MD_RESETCOUNTERSHALFWAY) { if (ir->nsteps > 0) @@@ -476,7 -483,7 +476,7 @@@ */ if ((Flags & MD_TUNEPME) && EEL_PME(fr->eeltype) && - ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) && + ( use_GPU(fr->nbv) || !(cr->duty & DUTY_PME)) && !bRerunMD) { pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata); @@@ -719,7 -726,9 +719,7 @@@ bStateFromTPX = !bStateFromCP; bInitStep = bFirstStep && (bStateFromTPX || bVV); bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep; - bLastStep = FALSE; bSumEkinhOld = FALSE; - bDoReplEx = FALSE; bExchanged = FALSE; bNeedRepartition = FALSE; @@@ -728,7 -737,10 +728,7 @@@ step = ir->init_step; step_rel = 0; - if (ir->nstlist == -1) - { - init_nlistheuristics(&nlh, bGStatEveryStep, step); - } + init_nlistheuristics(&nlh, bGStatEveryStep, step); if (MULTISIM(cr) && (repl_ex_nst <= 0 )) { @@@ -1149,6 -1161,7 +1149,6 @@@ } } - bOK = TRUE; if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ { wallcycle_stop(wcycle, ewcUPDATE); @@@ -1164,6 -1177,12 +1164,6 @@@ /* Correct the virial for multiple time stepping */ m_sub(shake_vir, fr->vir_twin_constr, shake_vir); } - - if (!bOK) - { - gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); - } - } else if (graph) { @@@ -1464,6 -1483,7 +1464,6 @@@ */ copy_mat(state->box, lastbox); - bOK = TRUE; dvdl_constr = 0; if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate)) @@@ -1573,7 -1593,15 +1573,7 @@@ FALSE, bCalcVir, state->veta); } - if (!bOK) - { - gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); - } - if (fr->bSepDVDL && fplog && do_log) - { - gmx_print_sepdvdl(fplog, "Constraint dV/dl", 0.0, dvdl_constr); - } if (bVV) { /* this factor or 2 correction is necessary @@@ -1881,6 -1909,21 +1881,21 @@@ } dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); + if (bPMETuneRunning && - fr->nbv->bUseGPU && DOMAINDECOMP(cr) && ++ use_GPU(fr->nbv) && DOMAINDECOMP(cr) && + !(cr->duty & DUTY_PME)) + { + /* Lock DLB=auto to off (does nothing when DLB=yes/no). + * With GPUs + separate PME ranks, we don't want DLB. + * This could happen when we scan coarse grids and + * it would then never be turned off again. + * This would hurt performance at the final, optimal + * grid spacing, where DLB almost never helps. + * Also, DLB can limit the cut-off for PME tuning. + */ + dd_dlb_set_lock(cr->dd, TRUE); + } + if (bPMETuneRunning || step_rel > ir->nstlist*50) { bPMETuneTry = FALSE; @@@ -1911,6 -1954,16 +1926,16 @@@ { calc_enervirdiff(NULL, ir->eDispCorr, fr); } + + if (!bPMETuneRunning && + DOMAINDECOMP(cr) && + dd_dlb_is_locked(cr->dd)) + { + /* Unlock the DLB=auto, DLB is allowed to activate + * (but we don't expect it to activate in most cases). + */ + dd_dlb_set_lock(cr->dd, FALSE); + } } cycles_pmes = 0; } @@@ -1921,7 -1974,7 +1946,7 @@@ { /* Reset all the counters related to performance over the run */ reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting, - fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL); + use_GPU(fr->nbv) ? fr->nbv : NULL); wcycle_set_reset_counters(wcycle, -1); if (!(cr->duty & DUTY_PME)) { @@@ -1980,7 -2033,7 +2005,7 @@@ if (pme_loadbal != NULL) { pme_loadbal_done(pme_loadbal, cr, fplog, - fr->nbv != NULL && fr->nbv->bUseGPU); + use_GPU(fr->nbv)); } if (shellfc && fplog) diff --combined src/programs/mdrun/pme_loadbal.c index 5ba76e63e0,d6fd68f372..dd2cd1a2e9 --- a/src/programs/mdrun/pme_loadbal.c +++ b/src/programs/mdrun/pme_loadbal.c @@@ -32,27 -32,23 +32,27 @@@ * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org. */ -#ifdef HAVE_CONFIG_H -#include -#endif +#include "gmxpre.h" -#include "gromacs/utility/smalloc.h" -#include "types/commrec.h" -#include "network.h" -#include "calcgrid.h" -#include "pme.h" -#include "vec.h" -#include "domdec.h" -#include "nbnxn_cuda_data_mgmt.h" -#include "force.h" -#include "macros.h" -#include "md_logging.h" #include "pme_loadbal.h" +#include "config.h" + +#include "gromacs/legacyheaders/calcgrid.h" +#include "gromacs/legacyheaders/domdec.h" +#include "gromacs/legacyheaders/force.h" +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/legacyheaders/md_logging.h" +#include "gromacs/legacyheaders/network.h" +#include "gromacs/legacyheaders/pme.h" +#include "gromacs/legacyheaders/sim_util.h" +#include "gromacs/legacyheaders/types/commrec.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/smalloc.h" + /* Parameters and setting for one PP-PME setup */ typedef struct { real rcut_coulomb; /* Coulomb cut-off */ @@@ -265,6 -261,15 +265,15 @@@ static gmx_bool pme_loadbal_increase_cu while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing || !grid_ok); set->rcut_coulomb = pme_lb->cut_spacing*sp; + if (set->rcut_coulomb < pme_lb->rcut_coulomb_start) + { + /* This is unlikely, but can happen when e.g. continuing from + * a checkpoint after equilibration where the box shrank a lot. + * We want to avoid rcoulomb getting smaller than rvdw + * and there might be more issues with decreasing rcoulomb. + */ + set->rcut_coulomb = pme_lb->rcut_coulomb_start; + } if (pme_lb->cutoff_scheme == ecutsVERLET) { @@@ -431,17 -436,17 +440,17 @@@ static void switch_to_stage1(pme_load_b pme_lb->cur = pme_lb->start - 1; } -gmx_bool pme_load_balance(pme_load_balancing_t pme_lb, - t_commrec *cr, - FILE *fp_err, - FILE *fp_log, - t_inputrec *ir, - t_state *state, - double cycles, - interaction_const_t *ic, - nonbonded_verlet_t *nbv, - gmx_pme_t *pmedata, - gmx_int64_t step) +gmx_bool pme_load_balance(pme_load_balancing_t pme_lb, + t_commrec *cr, + FILE *fp_err, + FILE *fp_log, + t_inputrec *ir, + t_state *state, + double cycles, + interaction_const_t *ic, + struct nonbonded_verlet_t *nbv, + gmx_pme_t *pmedata, + gmx_int64_t step) { gmx_bool OK; pme_setup_t *set; @@@ -693,26 -698,30 +702,26 @@@ } bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0); - if (pme_lb->cutoff_scheme == ecutsVERLET && - nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA) - { - nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv, ic); - - /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore - * also sharing texture references. To keep the code simple, we don't - * treat texture references as shared resources, but this means that - * the coulomb_tab texture ref will get updated by multiple threads. - * Hence, to ensure that the non-bonded kernels don't start before all - * texture binding operations are finished, we need to wait for all ranks - * to arrive here before continuing. - * - * Note that we could omit this barrier if GPUs are not shared (or - * texture objects are used), but as this is initialization code, there - * is not point in complicating things. - */ + nbnxn_cuda_pme_loadbal_update_param(nbv, ic); + + /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore + * also sharing texture references. To keep the code simple, we don't + * treat texture references as shared resources, but this means that + * the coulomb_tab texture ref will get updated by multiple threads. + * Hence, to ensure that the non-bonded kernels don't start before all + * texture binding operations are finished, we need to wait for all ranks + * to arrive here before continuing. + * + * Note that we could omit this barrier if GPUs are not shared (or + * texture objects are used), but as this is initialization code, there + * is not point in complicating things. + */ #ifdef GMX_THREAD_MPI - if (PAR(cr)) - { - gmx_barrier(cr); - } -#endif /* GMX_THREAD_MPI */ + if (PAR(cr) && use_GPU(nbv)) + { + gmx_barrier(cr); } +#endif /* GMX_THREAD_MPI */ /* Usually we won't need the simple tables with GPUs. * But we do with hybrid acceleration and with free energy.