Merge branch 'release-4-6', adds the nbnxn functionality

author Roland Schulz <roland@utk.edu>

Wed, 17 Oct 2012 03:21:42 +0000 (23:21 -0400)

committer Roland Schulz <roland@utk.edu>

Wed, 17 Oct 2012 03:21:42 +0000 (23:21 -0400)
author Roland Schulz <roland@utk.edu>
Wed, 17 Oct 2012 03:21:42 +0000 (23:21 -0400)
committer Roland Schulz <roland@utk.edu>
Wed, 17 Oct 2012 03:21:42 +0000 (23:21 -0400)
diff --cc CMakeLists.txt

index 8be8aadde792a7ecf41b93dbffd05104286d1156,a5b8e02f1233a31c4438f64047d369b2c04ae9a9..4290c921ff3839d9c0f536f4f5c525fde7e3a0e7
--- 1/CMakeLists.txt
--- 2/CMakeLists.txt
+++ b/CMakeLists.txt
@@@ -32,10 -32,25 +32,25 @@@ if (CUSTOM_VERSION_STRING
   endif (CUSTOM_VERSION_STRING)
   set(SOVERSION 6)
   # It is a bit irritating, but this has to be set separately for now!
- -SET(CPACK_PACKAGE_VERSION_MAJOR "4")
- -SET(CPACK_PACKAGE_VERSION_MINOR "6")
+ +SET(CPACK_PACKAGE_VERSION_MAJOR "5")
+ +SET(CPACK_PACKAGE_VERSION_MINOR "0")
   #SET(CPACK_PACKAGE_VERSION_PATCH "0")
   
+ # The numerical gromacs version. It is 40600 for 4.6.0.
+ # The #define GMX_VERSION in gmx_header_config_h is set to this value.
+ math(EXPR NUM_VERSION 
+     "${CPACK_PACKAGE_VERSION_MAJOR}*10000 + ${CPACK_PACKAGE_VERSION_MINOR}*100")
+ if(CPACK_PACKAGE_VERSION_PATCH)
+     math(EXPR NUM_VERSION 
+          "${NUM_VERSION} + ${CPACK_PACKAGE_VERSION_PATCH}")
+ endif()
+ 
+ # The API version tracks the numerical Gromacs version (for now).
+ # It is potentially different from the Gromacs version in the future, if
+ # the programs/libraries diverge from the presumably more stable API.
+ # The #define GMX_API_VERSION in version.h is set to this value to
+ # provide backward compatibility of software written against the Gromacs API.
+ set(API_VERSION ${NUM_VERSION})
   
   # Cmake modules/macros are in a subdirectory to keep this file cleaner
   set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
@@@ -339,21 -354,32 +365,22 @@@ endif(GMX_OPENMM
   # Basic system tests (standard libraries, headers, functions, types)   #
   ########################################################################
   include(CheckIncludeFiles)
- -check_include_files(string.h     HAVE_STRING_H)
- -check_include_files(math.h       HAVE_MATH_H)
- -check_include_files(limits.h     HAVE_LIMITS_H)
- -check_include_files(memory.h     HAVE_MEMORY_H)
+ +include(CheckIncludeFileCXX)
   check_include_files(unistd.h   HAVE_UNISTD_H)
- -check_include_files(direct.h   HAVE_DIRECT_H)
   check_include_files(pwd.h        HAVE_PWD_H)
- -check_include_files(stdint.h   HAVE_STDINT_H)
- -check_include_files(stdlib.h   HAVE_STDLIB_H)
   check_include_files(pthread.h    HAVE_PTHREAD_H)
   check_include_files(dirent.h     HAVE_DIRENT_H)
- -check_include_files(inttypes.h   HAVE_INTTYPES_H)
- -check_include_files(regex.h      HAVE_REGEX_H)
- -check_include_files(sys/types.h  HAVE_SYS_TYPES_H)
- -check_include_files(sys/stat.h   HAVE_SYS_STAT_H)
   check_include_files(sys/time.h   HAVE_SYS_TIME_H)
- -check_include_files(rpc/rpc.h    HAVE_RPC_RPC_H)
- -check_include_files("rpc/rpc.h;rpc/xdr.h"    HAVE_RPC_XDR_H)
   check_include_files(io.h               HAVE_IO_H)
+ check_include_files(sched.h      HAVE_SCHED_H)
   
+ +check_include_files(regex.h      HAVE_POSIX_REGEX)
+ +check_include_file_cxx(regex     HAVE_CXX11_REGEX)
+ +# TODO: It could be nice to inform the user if no regex support is found,
+ +# as selections won't be fully functional.
+ +
   include(CheckFunctionExists)
- -check_function_exists(strcasecmp        HAVE_STRCASECMP)
   check_function_exists(strdup            HAVE_STRDUP)
- -check_function_exists(vprintf           HAVE_VPRINTF)
- -check_function_exists(memcmp            HAVE_MEMCMP)
   check_function_exists(posix_memalign    HAVE_POSIX_MEMALIGN)
   check_function_exists(memalign          HAVE_MEMALIGN)
   check_function_exists(_aligned_malloc   HAVE__ALIGNED_MALLOC)
@@@ -584,10 -663,6 +667,15 @@@ gmx_test_isfinite(HAVE_ISFINITE
   gmx_test__isfinite(HAVE__ISFINITE)
   gmx_test__finite(HAVE__FINITE)
   
+ +include(gmxTestCXX11)
+ +gmx_test_cxx11(GMX_CXX11 CXX11_FLAG)
+ +set(GROMACS_CXX_FLAGS "${CXX11_FLAG} ${GROMACS_CXX_FLAGS}")
++if(CXX11_FLAG)
++    #FIXME: add proper solution for progate all but cxx11 flag
++    set(CUDA_PROPAGATE_HOST_FLAGS no)
++    message(WARNING "Please manually add compiler flags to CUDA_NVCC_FLAGS. Automatic propogation temporary not working.")
++endif()
+ +
   include(gmxTestXDR)
   gmx_test_xdr(GMX_SYSTEM_XDR)
   if(NOT GMX_SYSTEM_XDR)
diff --cc cmake/ThreadMPI.cmake

index 51fb69668367faa5aaccbc715425948e00617dd1,30e2767b77c1c5a592ff190eaad92643604b57aa..eb35874ad45436295663bb4ede1733701ca95bdb
--- 1/cmake/ThreadMPI.cmake
--- 2/cmake/ThreadMPI.cmake
+++ b/cmake/ThreadMPI.cmake
@@@ -23,46 -28,15 +28,50 @@@ ENDMACRO(TEST_TMPI_ATOMICS VARIABLE
   
   MACRO(TMPI_MAKE_CXX_LIB)
       set(TMPI_CXX_LIB 1)
- -    # the C++ library
- -    set(THREAD_MPI_CXX_SRC
- -        thread_mpi/system_error.cpp )
   ENDMACRO(TMPI_MAKE_CXX_LIB)
   
+ +MACRO(TMPI_GET_SOURCE_LIST SRC_VARIABLE)
+ +    foreach (_option IN ITEMS ${ARGN})
+ +        if (_option STREQUAL "CXX")
+ +            set(TMPI_CXX_LIB 1)
+ +        elseif (_option STREQUAL "NOMPI")
+ +            set(TMPI_NO_MPI_LIB 1)
+ +        else ()
+ +            message(FATAL_ERROR "Unknown thread_mpi option '${_option}'")
+ +        endif ()
+ +    endforeach ()
+ +    set(${SRC_VARIABLE}
+ +        thread_mpi/errhandler.c
+ +        thread_mpi/tmpi_malloc.c)
+ +    if (THREAD_PTHREADS)
+ +        list(APPEND ${SRC_VARIABLE} thread_mpi/pthreads.c)
+ +    elseif (THREAD_WINDOWS)
+ +        list(APPEND ${SRC_VARIABLE} thread_mpi/winthreads.c)
+ +    endif (THREAD_PTHREADS)
+ +    if (TMPI_CXX_LIB)
+ +        list(APPEND ${SRC_VARIABLE} thread_mpi/system_error.cpp)
+ +    endif (TMPI_CXX_LIB)
+ +    if (NOT TMPI_NO_MPI_LIB)
+ +        list(APPEND ${SRC_VARIABLE}
+ +             thread_mpi/alltoall.c      thread_mpi/p2p_protocol.c
+ +             thread_mpi/barrier.c       thread_mpi/p2p_send_recv.c
+ +             thread_mpi/bcast.c         thread_mpi/p2p_wait.c
+ +             thread_mpi/collective.c    thread_mpi/profile.c
+ +             thread_mpi/comm.c          thread_mpi/reduce.c
+ +             thread_mpi/event.c         thread_mpi/reduce_fast.c
+ +             thread_mpi/gather.c        thread_mpi/scatter.c
+ +             thread_mpi/group.c         thread_mpi/tmpi_init.c
+ +             thread_mpi/topology.c      thread_mpi/list.c
+ +             thread_mpi/type.c          thread_mpi/lock.c
+ +             thread_mpi/numa_malloc.c   thread_mpi/once.c
+ +             thread_mpi/scan.c)
+ +    endif()
+ +ENDMACRO(TMPI_GET_SOURCE_LIST)
+ +
+ test_tmpi_atomics(TMPI_ATOMICS)
+ 
+ # do we want to only the atomics of tMPI (with GPU + MPI)
+ if(NOT TEST_TMPI_ATOMICS_ONLY)
   include(FindThreads)
   if (CMAKE_USE_PTHREADS_INIT)
       check_include_files(pthread.h    HAVE_PTHREAD_H)
diff --cc cmake/gmxDetectAcceleration.cmake

index 8aea63c70f9766ca85c580dce20a7b000399c72a,154847f4fb1847bfd4b189d7b56ab44eafa97bfa..d2c9376be45f8af1103f68cc71dafd6c065f5318
--- 1/cmake/gmxDetectAcceleration.cmake
--- 2/cmake/gmxDetectAcceleration.cmake
+++ b/cmake/gmxDetectAcceleration.cmake
@@@ -25,23 -25,23 +25,23 @@@ macro(gmx_detect_acceleration GMX_SUGGE
       message(STATUS "Detecting best acceleration for this CPU")
   
       # Get CPU acceleration information
-     try_run(GMX_DETECTCPU_RUN_ACC GMX_DETECTCPU_COMPILED
+     try_run(GMX_CPUID_RUN_ACC GMX_CPUID_COMPILED
               ${CMAKE_BINARY_DIR}
-             ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_detectcpu.c
-             COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_DETECTCPU_STANDALONE"
- -            ${CMAKE_SOURCE_DIR}/src/gmxlib/gmx_cpuid.c
- -            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/include -DGMX_CPUID_STANDALONE"
++            ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
++            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_CPUID_STANDALONE"
               RUN_OUTPUT_VARIABLE OUTPUT_TMP
-             COMPILE_OUTPUT_VARIABLE GMX_DETECTCPU_COMPILE_OUTPUT 
+             COMPILE_OUTPUT_VARIABLE GMX_CPUID_COMPILE_OUTPUT
               ARGS "-acceleration")
   
-     if(NOT GMX_DETECTCPU_COMPILED)
-         message(WARNING "Cannot compile CPU detection code, which means no optimization.")
-         message(STATUS "Compile output: ${GMX_DETECTCPU_COMPILE_OUTPUT}")
+     if(NOT GMX_CPUID_COMPILED)
+         message(WARNING "Cannot compile CPUID code, which means no CPU-specific acceleration.")
+         message(STATUS "Compile output: ${GMX_CPUID_COMPILE_OUTPUT}")
           set(OUTPUT_TMP "None")
-     elseif(NOT GMX_DETECTCPU_RUN_ACC EQUAL 0)
-         message(WARNING "Cannot run CPU detection code, which means no optimization.")
+     elseif(NOT GMX_CPUID_RUN_ACC EQUAL 0)
+         message(WARNING "Cannot run CPUID code, which means no CPU-specific optimization.")
           message(STATUS "Run output: ${OUTPUT_TMP}")
           set(OUTPUT_TMP "None")
-     endif(NOT GMX_DETECTCPU_COMPILED)
+     endif(NOT GMX_CPUID_COMPILED)
   
       string(STRIP "@OUTPUT_TMP@" OUTPUT_ACC)
   
diff --cc cmake/gmxSetBuildInformation.cmake

index a646032c49567e622d0f00c6102b8f057f80c61f,8b04ef2d4424fe3a527c671e7bd8f45924d9c01c..57fb84c76a397ac60dc740318b3f7c008725aab6
--- 1/cmake/gmxSetBuildInformation.cmake
--- 2/cmake/gmxSetBuildInformation.cmake
+++ b/cmake/gmxSetBuildInformation.cmake
@@@ -50,35 -50,35 +50,35 @@@ macro(gmx_set_build_information
   
       if(NOT CMAKE_CROSSCOMPILING)
           # Get CPU acceleration information
-         try_run(GMX_DETECTCPU_RUN_VENDOR GMX_DETECTCPU_COMPILED
+         try_run(GMX_CPUID_RUN_VENDOR GMX_CPUID_COMPILED
               ${CMAKE_BINARY_DIR}
-             ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_detectcpu.c
-             COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_DETECTCPU_STANDALONE"
- -            ${CMAKE_SOURCE_DIR}/src/gmxlib/gmx_cpuid.c
- -            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/include -DGMX_CPUID_STANDALONE"
++            ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
++            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_CPUID_STANDALONE"
               RUN_OUTPUT_VARIABLE OUTPUT_CPU_VENDOR ARGS "-vendor")
-         try_run(GMX_DETECTCPU_RUN_BRAND GMX_DETECTCPU_COMPILED
+         try_run(GMX_CPUID_RUN_BRAND GMX_CPUID_COMPILED
               ${CMAKE_BINARY_DIR}
-             ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_detectcpu.c
-             COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_DETECTCPU_STANDALONE"
- -            ${CMAKE_SOURCE_DIR}/src/gmxlib/gmx_cpuid.c
- -            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/include -DGMX_CPUID_STANDALONE"
++            ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
++            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_CPUID_STANDALONE"
               RUN_OUTPUT_VARIABLE OUTPUT_CPU_BRAND ARGS "-brand")
-         try_run(GMX_DETECTCPU_RUN_FAMILY GMX_DETECTCPU_COMPILED
+         try_run(GMX_CPUID_RUN_FAMILY GMX_CPUID_COMPILED
               ${CMAKE_BINARY_DIR}
-             ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_detectcpu.c
-             COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_DETECTCPU_STANDALONE"
- -            ${CMAKE_SOURCE_DIR}/src/gmxlib/gmx_cpuid.c
- -            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/include -DGMX_CPUID_STANDALONE"
++            ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
++            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_CPUID_STANDALONE"
               RUN_OUTPUT_VARIABLE OUTPUT_CPU_FAMILY ARGS "-family")
-         try_run(GMX_DETECTCPU_RUN_MODEL GMX_DETECTCPU_COMPILED
+         try_run(GMX_CPUID_RUN_MODEL GMX_CPUID_COMPILED
               ${CMAKE_BINARY_DIR}
-             ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_detectcpu.c
-             COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_DETECTCPU_STANDALONE"
- -            ${CMAKE_SOURCE_DIR}/src/gmxlib/gmx_cpuid.c
- -            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/include -DGMX_CPUID_STANDALONE"
++            ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
++            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_CPUID_STANDALONE"
               RUN_OUTPUT_VARIABLE OUTPUT_CPU_MODEL ARGS "-model")
-        try_run(GMX_DETECTCPU_RUN_STEPPING GMX_DETECTCPU_COMPILED
+        try_run(GMX_CPUID_RUN_STEPPING GMX_CPUID_COMPILED
               ${CMAKE_BINARY_DIR}
-             ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_detectcpu.c
-             COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_DETECTCPU_STANDALONE"
- -            ${CMAKE_SOURCE_DIR}/src/gmxlib/gmx_cpuid.c
- -            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/include -DGMX_CPUID_STANDALONE"
++            ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
++            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_CPUID_STANDALONE"
               RUN_OUTPUT_VARIABLE OUTPUT_CPU_STEPPING ARGS "-stepping")
-         try_run(GMX_DETECTCPU_RUN_FEATURES GMX_DETECTCPU_COMPILED
+         try_run(GMX_CPUID_RUN_FEATURES GMX_CPUID_COMPILED
               ${CMAKE_BINARY_DIR}
-             ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_detectcpu.c
-             COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_DETECTCPU_STANDALONE"
- -            ${CMAKE_SOURCE_DIR}/src/gmxlib/gmx_cpuid.c
- -            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/include -DGMX_CPUID_STANDALONE"
++            ${CMAKE_SOURCE_DIR}/src/gromacs/gmxlib/gmx_cpuid.c
++            COMPILE_DEFINITIONS "@GCC_INLINE_ASM_DEFINE@ -I${CMAKE_SOURCE_DIR}/src/gromacs/legacyheaders/ -DGMX_CPUID_STANDALONE"
               RUN_OUTPUT_VARIABLE OUTPUT_CPU_FEATURES ARGS "-features")
   
           string(STRIP "@OUTPUT_CPU_VENDOR@" OUTPUT_CPU_VENDOR)
diff --cc src/config.h.cmakein

index a59451d90bb5ffd3980306b0854dafffb910337c,8277706b2c63516aadbfdc004bd0cc0579aa5cb6..1e0bf611d15cc0810aa1a3b7db5e9ef426807982
--- 1/src/config.h.cmakein
--- 2/src/config.h.cmakein
+++ b/src/config.h.cmakein
@@@ -209,6 -200,15 +215,9 @@@
   /* Enable x86 gcc inline assembly */
   #cmakedefine GMX_X86_GCC_INLINE_ASM
   
- -/* Define to 1 if the system has the type gmx_bool. */
- -#cmakedefine HAVE_BOOL
- -
+ /* Use GPU native acceleration */
+ #cmakedefine GMX_GPU
+ 
   /* Define to 1 if fseeko (and presumably ftello) exists and is declared. */
   #cmakedefine HAVE_FSEEKO
   
@@@ -284,15 -344,24 +293,21 @@@
   /* Define to 1 if you have the <x86intrin.h> header file */
   #cmakedefine HAVE_X86INTRIN_H
   
- /* Define for sched.h (this is for thread_mpi)*/
- #define HAVE_SCHED_H
+ /* Define to 1 if you have the <sched.h> header */
+ #cmakedefine HAVE_SCHED_H
   
- -/* Define to 1 if you have the vprintf() function. */
- -#cmakedefine HAVE_VPRINTF
+ +/* Define to 1 if you have the POSIX <regex.h> header file. */
+ +#cmakedefine HAVE_POSIX_REGEX
+ +
+ +/* Define to 1 if you have the C++11 <regex> header file. */
+ +#cmakedefine HAVE_CXX11_REGEX
   
+ /* Define to 1 if you have the sysconf() function */
+ #cmakedefine HAVE_SYSCONF
+ 
+ /* Define to 1 if you have the sched_setaffinity() function */
+ #cmakedefine HAVE_SCHED_SETAFFINITY
+ 
   /* Bytes in IEEE fp word are in big-endian order if set, little-endian if not.
      Only relevant when FLOAT_FORMAT_IEEE754 is defined. */
   #cmakedefine GMX_IEEE754_BIG_ENDIAN_BYTE_ORDER
diff --cc src/gromacs/CMakeLists.txt

index 5f211fa6553de7744f969955e759a48af0abbed7,0000000000000000000000000000000000000000..457ae58fc44567e239583605ef27c39ad7459d97

mode 100644,000000..100644
--- 1/src/gromacs/CMakeLists.txt
--- /dev/null
+++ b/src/gromacs/CMakeLists.txt
@@@ -1,62 -1,0 +1,75 @@@
- target_link_libraries(libgromacs
+ +set(LIBGROMACS_SOURCES)
+ +
+ +add_subdirectory(legacyheaders)
+ +add_subdirectory(gmxlib)
+ +add_subdirectory(mdlib)
+ +add_subdirectory(gmxpreprocess)
+ +add_subdirectory(analysisdata)
+ +add_subdirectory(commandline)
+ +add_subdirectory(linearalgebra)
+ +add_subdirectory(onlinehelp)
+ +add_subdirectory(options)
+ +add_subdirectory(selection)
+ +add_subdirectory(trajectoryanalysis)
+ +add_subdirectory(utility)
+ +
+ +file(GLOB LIBGROMACS_HEADERS *.h)
+ +install(FILES ${LIBGROMACS_HEADERS} DESTINATION ${INCL_INSTALL_DIR}/gromacs
+ +        COMPONENT development)
+ +
+ +list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES})
+ +
++configure_file(${CMAKE_CURRENT_SOURCE_DIR}/version.h.cmakein ${CMAKE_CURRENT_BINARY_DIR}/version.h)
++install(FILES ${CMAKE_CURRENT_BINARY_DIR}/version.h
++    DESTINATION ${INCL_INSTALL_DIR}/gromacs
++    COMPONENT development)
+ +# add target that generates version.c every time a make is run
+ +# only do this if we generate the version
+ +if (USE_VERSION_H)
+ +    add_custom_target(gmx_version ALL
+ +            COMMAND ${CMAKE_COMMAND} 
+ +                -D GIT_EXECUTABLE="${GIT_EXECUTABLE}"
+ +                -D GIT_VERSION="${GIT_VERSION}"
+ +                -D PROJECT_VERSION="${PROJECT_VERSION}"
+ +                -D PROJECT_SOURCE_DIR="${PROJECT_SOURCE_DIR}"
+ +                -D VERSION_C_CMAKEIN="${CMAKE_CURRENT_SOURCE_DIR}/version.c.cmakein"
+ +                -D VERSION_C_OUT="${CMAKE_CURRENT_BINARY_DIR}/version.c"
+ +                -P ${CMAKE_SOURCE_DIR}/cmake/gmxGenerateVersionInfo.cmake 
+ +            WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/gmxlib 
+ +            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/version.c.cmakein
+ +            COMMENT "Generating version information")
+ +    list(APPEND LIBGROMACS_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/version.c) # auto-generated
+ +    set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/version.c 
+ +                                PROPERTIES GENERATED true)
+ +endif (USE_VERSION_H)
+ +
++# apply gcc 4.4.x bug workaround
++if(GMX_USE_GCC44_BUG_WORKAROUND)
++   include(gmxGCC44O3BugWorkaround)
++   gmx_apply_gcc44_bug_workaround("gmxlib/bondfree.c")
++   gmx_apply_gcc44_bug_workaround("mdlib/force.c")
++   gmx_apply_gcc44_bug_workaround("mdlib/constr.c")
++endif()
++
+ +add_library(libgromacs ${LIBGROMACS_SOURCES})
+ +if (USE_VERSION_H)
+ +    add_dependencies(libgromacs gmx_version)
+ +endif (USE_VERSION_H)
++
++target_link_libraries(libgromacs ${GMX_GPU_LIBRARIES}
+ +                      ${GMX_EXTRA_LIBRARIES} ${FFT_LIBRARIES} ${XML_LIBRARIES}
+ +                      ${THREAD_LIB} ${OpenMP_SHARED_LINKER_FLAGS})
+ +set_target_properties(libgromacs PROPERTIES
+ +                      OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
+ +                      SOVERSION ${SOVERSION}
+ +                      INSTALL_NAME_DIR "${LIB_INSTALL_DIR}"
+ +                      COMPILE_FLAGS "${OpenMP_C_FLAGS}")
+ +
+ +install(TARGETS libgromacs DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries)
+ +
+ +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgromacs.pc.cmakein
+ +               ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc @ONLY)
+ +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc
+ +        DESTINATION ${LIB_INSTALL_DIR}/pkgconfig
+ +        RENAME "libgromacs${GMX_LIBS_SUFFIX}.pc"
+ +        COMPONENT development)
diff --cc src/gromacs/gmxlib/CMakeLists.txt

index 90f750cb08c31bb80d44d205388561593d508ef5,0000000000000000000000000000000000000000..85248e92fdab69303a11564df921f9d2f2764599

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/CMakeLists.txt
--- /dev/null
+++ b/src/gromacs/gmxlib/CMakeLists.txt
@@@ -1,31 -1,0 +1,38 @@@
+ +include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+ +
+ +add_subdirectory(nonbonded)
+ +
+ +# The nonbonded directory contains subdirectories that are only
+ +# conditionally built, so we cannot use a GLOB_RECURSE here.
+ +file(GLOB GMXLIB_SOURCES *.c *.cpp statistics/*.c)
+ +
+ +# This would be the standard way to include thread_mpi, but we want libgmx
+ +# to link the functions directly
+ +#if(GMX_THREAD_MPI)
+ +#    add_subdirectory(thread_mpi)
+ +#endif(GMX_THREAD_MPI)
+ +#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB})
+ +
+ +# Files called xxx_test.c are test drivers with a main() function for module xxx.c,
+ +# so they should not be included in the library
+ +file(GLOB_RECURSE NOT_GMXLIB_SOURCES *_test.c *\#*)
+ +list(REMOVE_ITEM GMXLIB_SOURCES ${NOT_GMXLIB_SOURCES})  
+ +
++# gpu utils + cuda tools module
++if(GMX_GPU)
++    add_subdirectory(cuda_tools)
++    add_subdirectory(gpu_utils)   
++    set(GMX_GPU_LIBRARIES ${GMX_GPU_LIBRARIES} gpu_utils cuda_tools PARENT_SCOPE)
++endif()
++
+ +if(GMX_USE_PLUGINS)
+ +  set(GMXLIB_SOURCES ${GMXLIB_SOURCES} ${CMAKE_SOURCE_DIR}/src/external/vmd_molfile/vmddlopen.c)
+ +else()
+ +  list(REMOVE_ITEM GMXLIB_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/vmdio.c)
+ +endif()
+ +
+ +# An ugly hack to get absolute paths...
+ +file(GLOB THREAD_MPI_SOURCES ${THREAD_MPI_SRC})
+ +
+ +set(GMXLIB_SOURCES ${GMXLIB_SOURCES} ${THREAD_MPI_SOURCES} ${NONBONDED_SOURCES}
+ +    PARENT_SCOPE)
diff --cc src/gromacs/gmxlib/bondfree.c

index 8d1d1f6c71687eb67086976e0cdd58cc025ba9a3,0000000000000000000000000000000000000000..b6fd93be2bcc7ce5f25fceb4b35e3af5e0dbfa1c

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/bondfree.c
--- /dev/null
+++ b/src/gromacs/gmxlib/bondfree.c
@@@ -1,3247 -1,0 +1,4005 @@@
- #include "mdrun.h"
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <math.h>
+ +#include "physics.h"
+ +#include "vec.h"
+ +#include "maths.h"
+ +#include "txtdump.h"
+ +#include "bondf.h"
+ +#include "smalloc.h"
+ +#include "pbc.h"
+ +#include "ns.h"
+ +#include "macros.h"
+ +#include "names.h"
+ +#include "gmx_fatal.h"
+ +#include "mshift.h"
+ +#include "main.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "force.h"
+ +#include "nonbonded.h"
-           const t_iatom forceatoms[],const t_iparams forceparams[],
-           const rvec x[],rvec f[],rvec fshift[],
-           const t_pbc *pbc,const t_graph *g,
-           real lambda,real *dvdlambda,
-           const t_mdatoms *md,t_fcdata *fcd,
-           int *global_atom_index)
++
++#if !defined GMX_DOUBLE && defined GMX_X86_SSE2
++#include "gmx_x86_simd_single.h"
++#define SSE_PROPER_DIHEDRALS
++#endif
+ +
+ +/* Find a better place for this? */
+ +const int cmap_coeff_matrix[] = {
+ +1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4 ,
+ +0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
+ +0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4 ,
+ +0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
+ +0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2 ,
+ +0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2 ,
+ +0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
+ +0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
+ +0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2 ,
+ +0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
+ +0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
+ +0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2 ,
+ +0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
+ +0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
+ +0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
+ +0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
+ +};
+ +
+ +
+ +
+ +int glatnr(int *global_atom_index,int i)
+ +{
+ +    int atnr;
+ +
+ +    if (global_atom_index == NULL) {
+ +        atnr = i + 1;
+ +    } else {
+ +        atnr = global_atom_index[i] + 1;
+ +    }
+ +
+ +    return atnr;
+ +}
+ +
+ +static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
+ +{
+ +  if (pbc) {
+ +    return pbc_dx_aiuc(pbc,xi,xj,dx);
+ +  }
+ +  else {
+ +    rvec_sub(xi,xj,dx);
+ +    return CENTRAL;
+ +  }
+ +}
+ +
+ +/*
+ + * Morse potential bond by Frank Everdij
+ + *
+ + * Three parameters needed:
+ + *
+ + * b0 = equilibrium distance in nm
+ + * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
+ + * cb = well depth in kJ/mol
+ + *
+ + * Note: the potential is referenced to be +cb at infinite separation
+ + *       and zero at the equilibrium distance!
+ + */
+ +
+ +real morse_bonds(int nbonds,
+ +               const t_iatom forceatoms[],const t_iparams forceparams[],
+ +               const rvec x[],rvec f[],rvec fshift[],
+ +               const t_pbc *pbc,const t_graph *g,
+ +               real lambda,real *dvdlambda,
+ +               const t_mdatoms *md,t_fcdata *fcd,
+ +               int *global_atom_index)
+ +{
+ +  const real one=1.0;
+ +  const real two=2.0;
+ +  real  dr,dr2,temp,omtemp,cbomtemp,fbond,vbond,fij,vtot;
+ +  real  b0,be,cb,b0A,beA,cbA,b0B,beB,cbB,L1;
+ +  rvec  dx;
+ +  int   i,m,ki,type,ai,aj;
+ +  ivec  dt;
+ +
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    
+ +    b0A   = forceparams[type].morse.b0A;
+ +    beA   = forceparams[type].morse.betaA;
+ +    cbA   = forceparams[type].morse.cbA;
+ +
+ +    b0B   = forceparams[type].morse.b0B;
+ +    beB   = forceparams[type].morse.betaB;
+ +    cbB   = forceparams[type].morse.cbB;
+ +
+ +    L1 = one-lambda;                      /* 1 */
+ +    b0 = L1*b0A + lambda*b0B;             /* 3 */
+ +    be = L1*beA + lambda*beB;             /* 3 */
+ +    cb = L1*cbA + lambda*cbB;             /* 3 */
+ +
+ +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
+ +    dr2  = iprod(dx,dx);                            /*   5          */
+ +    dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
+ +    temp = exp(-be*(dr-b0));                        /*  12          */
+ +    
+ +    if (temp == one)
+ +    {
+ +        /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
+ +        *dvdlambda += cbB-cbA;
+ +        continue;
+ +    }
+ +
+ +    omtemp   = one-temp;                               /*   1          */
+ +    cbomtemp = cb*omtemp;                              /*   1          */
+ +    vbond    = cbomtemp*omtemp;                        /*   1          */
+ +    fbond    = -two*be*temp*cbomtemp*gmx_invsqrt(dr2); /*   9          */
+ +    vtot     += vbond;                                 /*   1          */
+ +
+ +    *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
+ +    
+ +    if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +      ki = IVEC2IS(dt);
+ +    }
+ +
+ +    for (m=0; (m<DIM); m++) {                          /*  15          */
+ +      fij=fbond*dx[m];
+ +      f[ai][m]+=fij;
+ +      f[aj][m]-=fij;
+ +      fshift[ki][m]+=fij;
+ +      fshift[CENTRAL][m]-=fij;
+ +    }
+ +  }                                           /*  83 TOTAL    */
+ +  return vtot;
+ +}
+ +
+ +real cubic_bonds(int nbonds,
+ +               const t_iatom forceatoms[],const t_iparams forceparams[],
+ +               const rvec x[],rvec f[],rvec fshift[],
+ +               const t_pbc *pbc,const t_graph *g,
+ +               real lambda,real *dvdlambda,
+ +               const t_mdatoms *md,t_fcdata *fcd,
+ +               int *global_atom_index)
+ +{
+ +  const real three = 3.0;
+ +  const real two   = 2.0;
+ +  real  kb,b0,kcub;
+ +  real  dr,dr2,dist,kdist,kdist2,fbond,vbond,fij,vtot;
+ +  rvec  dx;
+ +  int   i,m,ki,type,ai,aj;
+ +  ivec  dt;
+ +
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    
+ +    b0   = forceparams[type].cubic.b0;
+ +    kb   = forceparams[type].cubic.kb;
+ +    kcub = forceparams[type].cubic.kcub;
+ +
+ +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);                /*   3          */
+ +    dr2  = iprod(dx,dx);                                /*   5          */
+ +    
+ +    if (dr2 == 0.0)
+ +      continue;
+ +      
+ +    dr         = dr2*gmx_invsqrt(dr2);                      /*  10          */
+ +    dist       = dr-b0;
+ +    kdist      = kb*dist;
+ +    kdist2     = kdist*dist;
+ +    
+ +    vbond      = kdist2 + kcub*kdist2*dist;
+ +    fbond      = -(two*kdist + three*kdist2*kcub)/dr;
+ +
+ +    vtot      += vbond;       /* 21 */
+ +    
+ +    if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +      ki=IVEC2IS(dt);
+ +    }
+ +    for (m=0; (m<DIM); m++) {                          /*  15          */
+ +      fij=fbond*dx[m];
+ +      f[ai][m]+=fij;
+ +      f[aj][m]-=fij;
+ +      fshift[ki][m]+=fij;
+ +      fshift[CENTRAL][m]-=fij;
+ +    }
+ +  }                                           /*  54 TOTAL    */
+ +  return vtot;
+ +}
+ +
+ +real FENE_bonds(int nbonds,
+ +              const t_iatom forceatoms[],const t_iparams forceparams[],
+ +              const rvec x[],rvec f[],rvec fshift[],
+ +              const t_pbc *pbc,const t_graph *g,
+ +              real lambda,real *dvdlambda,
+ +              const t_mdatoms *md,t_fcdata *fcd,
+ +              int *global_atom_index)
+ +{
+ +  const real half=0.5;
+ +  const real one=1.0;
+ +  real  bm,kb;
+ +  real  dr,dr2,bm2,omdr2obm2,fbond,vbond,fij,vtot;
+ +  rvec  dx;
+ +  int   i,m,ki,type,ai,aj;
+ +  ivec  dt;
+ +
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    
+ +    bm   = forceparams[type].fene.bm;
+ +    kb   = forceparams[type].fene.kb;
+ +
+ +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
+ +    dr2  = iprod(dx,dx);                                /*   5          */
+ +    
+ +    if (dr2 == 0.0)
+ +      continue;
+ +
+ +    bm2 = bm*bm;
+ +
+ +    if (dr2 >= bm2)
+ +      gmx_fatal(FARGS,
+ +              "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
+ +              dr2,bm2,
+ +              glatnr(global_atom_index,ai),
+ +              glatnr(global_atom_index,aj));
+ +      
+ +    omdr2obm2  = one - dr2/bm2;
+ +    
+ +    vbond      = -half*kb*bm2*log(omdr2obm2);
+ +    fbond      = -kb/omdr2obm2;
+ +
+ +    vtot      += vbond;       /* 35 */
+ +    
+ +    if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +      ki=IVEC2IS(dt);
+ +    }
+ +    for (m=0; (m<DIM); m++) {                          /*  15          */
+ +      fij=fbond*dx[m];
+ +      f[ai][m]+=fij;
+ +      f[aj][m]-=fij;
+ +      fshift[ki][m]+=fij;
+ +      fshift[CENTRAL][m]-=fij;
+ +    }
+ +  }                                           /*  58 TOTAL    */
+ +  return vtot;
+ +}
+ +
+ +real harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
+ +            real *V,real *F)
+ +{
+ +  const real half=0.5;
+ +  real  L1,kk,x0,dx,dx2;
+ +  real  v,f,dvdlambda;
+ +  
+ +  L1    = 1.0-lambda;
+ +  kk    = L1*kA+lambda*kB;
+ +  x0    = L1*xA+lambda*xB;
+ +
+ +  dx    = x-x0;
+ +  dx2   = dx*dx;
+ +
+ +  f     = -kk*dx;
+ +  v     = half*kk*dx2;
+ +  dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
+ +
+ +  *F    = f;
+ +  *V    = v;
+ +
+ +  return dvdlambda;
+ +
+ +  /* That was 19 flops */
+ +}
+ +
+ +
+ +real bonds(int nbonds,
+ +         const t_iatom forceatoms[],const t_iparams forceparams[],
+ +         const rvec x[],rvec f[],rvec fshift[],
+ +         const t_pbc *pbc,const t_graph *g,
+ +         real lambda,real *dvdlambda,
+ +         const t_mdatoms *md,t_fcdata *fcd,
+ +         int *global_atom_index)
+ +{
+ +  int  i,m,ki,ai,aj,type;
+ +  real dr,dr2,fbond,vbond,fij,vtot;
+ +  rvec dx;
+ +  ivec dt;
+ +
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +  
+ +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
+ +    dr2  = iprod(dx,dx);                      /*   5          */
+ +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
+ +
+ +    *dvdlambda += harmonic(forceparams[type].harmonic.krA,
+ +                           forceparams[type].harmonic.krB,
+ +                           forceparams[type].harmonic.rA,
+ +                           forceparams[type].harmonic.rB,
+ +                           dr,lambda,&vbond,&fbond);  /*  19  */
+ +
+ +    if (dr2 == 0.0)
+ +      continue;
+ +
+ +    
+ +    vtot  += vbond;/* 1*/
+ +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
+ +#ifdef DEBUG
+ +    if (debug)
+ +      fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
+ +            dr,vbond,fbond);
+ +#endif
+ +    if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +      ki=IVEC2IS(dt);
+ +    }
+ +    for (m=0; (m<DIM); m++) {                 /*  15          */
+ +      fij=fbond*dx[m];
+ +      f[ai][m]+=fij;
+ +      f[aj][m]-=fij;
+ +      fshift[ki][m]+=fij;
+ +      fshift[CENTRAL][m]-=fij;
+ +    }
+ +  }                                   /* 59 TOTAL     */
+ +  return vtot;
+ +}
+ +
+ +real restraint_bonds(int nbonds,
+ +                     const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                     const rvec x[],rvec f[],rvec fshift[],
+ +                     const t_pbc *pbc,const t_graph *g,
+ +                     real lambda,real *dvdlambda,
+ +                     const t_mdatoms *md,t_fcdata *fcd,
+ +                     int *global_atom_index)
+ +{
+ +    int  i,m,ki,ai,aj,type;
+ +    real dr,dr2,fbond,vbond,fij,vtot;
+ +    real L1;
+ +    real low,dlow,up1,dup1,up2,dup2,k,dk;
+ +    real drh,drh2;
+ +    rvec dx;
+ +    ivec dt;
+ +
+ +    L1   = 1.0 - lambda;
+ +
+ +    vtot = 0.0;
+ +    for(i=0; (i<nbonds); )
+ +    {
+ +        type = forceatoms[i++];
+ +        ai   = forceatoms[i++];
+ +        aj   = forceatoms[i++];
+ +        
+ +        ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);      /*   3          */
+ +        dr2  = iprod(dx,dx);                          /*   5          */
+ +        dr   = dr2*gmx_invsqrt(dr2);                  /*  10          */
+ +
+ +        low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
+ +        dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
+ +        up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
+ +        dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
+ +        up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
+ +        dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
+ +        k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
+ +        dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
+ +        /* 24 */
+ +
+ +        if (dr < low)
+ +        {
+ +            drh   = dr - low;
+ +            drh2  = drh*drh;
+ +            vbond = 0.5*k*drh2;
+ +            fbond = -k*drh;
+ +            *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
+ +        } /* 11 */
+ +        else if (dr <= up1)
+ +        {
+ +            vbond = 0;
+ +            fbond = 0;
+ +        }
+ +        else if (dr <= up2)
+ +        {
+ +            drh   = dr - up1;
+ +            drh2  = drh*drh;
+ +            vbond = 0.5*k*drh2;
+ +            fbond = -k*drh;
+ +            *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
+ +        } /* 11       */
+ +        else
+ +        {
+ +            drh   = dr - up2;
+ +            vbond = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
+ +            fbond = -k*(up2 - up1);
+ +            *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
+ +                + k*(dup2 - dup1)*(up2 - up1 + drh)
+ +                - k*(up2 - up1)*dup2;
+ +        }
+ +   
+ +        if (dr2 == 0.0)
+ +            continue;
+ +        
+ +        vtot  += vbond;/* 1*/
+ +        fbond *= gmx_invsqrt(dr2);                    /*   6          */
+ +#ifdef DEBUG
+ +        if (debug)
+ +            fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
+ +                    dr,vbond,fbond);
+ +#endif
+ +        if (g) {
+ +            ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +            ki=IVEC2IS(dt);
+ +        }
+ +        for (m=0; (m<DIM); m++) {                     /*  15          */
+ +            fij=fbond*dx[m];
+ +            f[ai][m]+=fij;
+ +            f[aj][m]-=fij;
+ +            fshift[ki][m]+=fij;
+ +            fshift[CENTRAL][m]-=fij;
+ +        }
+ +    }                                 /* 59 TOTAL     */
+ +
+ +    return vtot;
+ +}
+ +
+ +real polarize(int nbonds,
+ +            const t_iatom forceatoms[],const t_iparams forceparams[],
+ +            const rvec x[],rvec f[],rvec fshift[],
+ +            const t_pbc *pbc,const t_graph *g,
+ +            real lambda,real *dvdlambda,
+ +            const t_mdatoms *md,t_fcdata *fcd,
+ +            int *global_atom_index)
+ +{
+ +  int  i,m,ki,ai,aj,type;
+ +  real dr,dr2,fbond,vbond,fij,vtot,ksh;
+ +  rvec dx;
+ +  ivec dt;
+ +
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ksh  = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
+ +    if (debug)
+ +      fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
+ +  
+ +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
+ +    dr2  = iprod(dx,dx);                      /*   5          */
+ +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
+ +
+ +    *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
+ +
+ +    if (dr2 == 0.0)
+ +      continue;
+ +    
+ +    vtot  += vbond;/* 1*/
+ +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
+ +
+ +    if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +      ki=IVEC2IS(dt);
+ +    }
+ +    for (m=0; (m<DIM); m++) {                 /*  15          */
+ +      fij=fbond*dx[m];
+ +      f[ai][m]+=fij;
+ +      f[aj][m]-=fij;
+ +      fshift[ki][m]+=fij;
+ +      fshift[CENTRAL][m]-=fij;
+ +    }
+ +  }                                   /* 59 TOTAL     */
+ +  return vtot;
+ +}
+ +
+ +real anharm_polarize(int nbonds,
+ +                     const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                     const rvec x[],rvec f[],rvec fshift[],
+ +                     const t_pbc *pbc,const t_graph *g,
+ +                     real lambda,real *dvdlambda,
+ +                     const t_mdatoms *md,t_fcdata *fcd,
+ +                     int *global_atom_index)
+ +{
+ +  int  i,m,ki,ai,aj,type;
+ +  real dr,dr2,fbond,vbond,fij,vtot,ksh,khyp,drcut,ddr,ddr3;
+ +  rvec dx;
+ +  ivec dt;
+ +
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type  = forceatoms[i++];
+ +    ai    = forceatoms[i++];
+ +    aj    = forceatoms[i++];
+ +    ksh   = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
+ +    khyp  = forceparams[type].anharm_polarize.khyp;
+ +    drcut = forceparams[type].anharm_polarize.drcut;
+ +    if (debug)
+ +      fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
+ +  
+ +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
+ +    dr2  = iprod(dx,dx);                      /*   5          */
+ +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
+ +
+ +    *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
+ +
+ +    if (dr2 == 0.0)
+ +      continue;
+ +    
+ +    if (dr > drcut) {
+ +        ddr    = dr-drcut;
+ +        ddr3   = ddr*ddr*ddr;
+ +        vbond += khyp*ddr*ddr3;
+ +        fbond -= 4*khyp*ddr3;
+ +    }
+ +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
+ +    vtot  += vbond;/* 1*/
+ +
+ +    if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +      ki=IVEC2IS(dt);
+ +    }
+ +    for (m=0; (m<DIM); m++) {                 /*  15          */
+ +      fij=fbond*dx[m];
+ +      f[ai][m]+=fij;
+ +      f[aj][m]-=fij;
+ +      fshift[ki][m]+=fij;
+ +      fshift[CENTRAL][m]-=fij;
+ +    }
+ +  }                                   /* 72 TOTAL     */
+ +  return vtot;
+ +}
+ +
+ +real water_pol(int nbonds,
+ +             const t_iatom forceatoms[],const t_iparams forceparams[],
+ +             const rvec x[],rvec f[],rvec fshift[],
+ +             const t_pbc *pbc,const t_graph *g,
+ +             real lambda,real *dvdlambda,
+ +             const t_mdatoms *md,t_fcdata *fcd,
+ +             int *global_atom_index)
+ +{
+ +  /* This routine implements anisotropic polarizibility for water, through
+ +   * a shell connected to a dummy with spring constant that differ in the
+ +   * three spatial dimensions in the molecular frame.
+ +   */
+ +  int  i,m,aO,aH1,aH2,aD,aS,type,type0;
+ +  rvec dOH1,dOH2,dHH,dOD,dDS,nW,kk,dx,kdx,proj;
+ +#ifdef DEBUG
+ +  rvec df;
+ +#endif
+ +  real vtot,fij,r_HH,r_OD,r_nW,tx,ty,tz,qS;
+ +
+ +  vtot = 0.0;
+ +  if (nbonds > 0) {
+ +    type0  = forceatoms[0];
+ +    aS     = forceatoms[5];
+ +    qS     = md->chargeA[aS];
+ +    kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
+ +    kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
+ +    kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
+ +    r_HH   = 1.0/forceparams[type0].wpol.rHH;
+ +    r_OD   = 1.0/forceparams[type0].wpol.rOD;
+ +    if (debug) {
+ +      fprintf(debug,"WPOL: qS  = %10.5f aS = %5d\n",qS,aS);
+ +      fprintf(debug,"WPOL: kk  = %10.3f        %10.3f        %10.3f\n",
+ +            kk[XX],kk[YY],kk[ZZ]);
+ +      fprintf(debug,"WPOL: rOH = %10.3f  rHH = %10.3f  rOD = %10.3f\n",
+ +            forceparams[type0].wpol.rOH,
+ +            forceparams[type0].wpol.rHH,
+ +            forceparams[type0].wpol.rOD);
+ +    }
+ +    for(i=0; (i<nbonds); i+=6) {
+ +      type = forceatoms[i];
+ +      if (type != type0)
+ +      gmx_fatal(FARGS,"Sorry, type = %d, type0 = %d, file = %s, line = %d",
+ +                  type,type0,__FILE__,__LINE__);
+ +      aO   = forceatoms[i+1];
+ +      aH1  = forceatoms[i+2];
+ +      aH2  = forceatoms[i+3];
+ +      aD   = forceatoms[i+4];
+ +      aS   = forceatoms[i+5];
+ +      
+ +      /* Compute vectors describing the water frame */
+ +      rvec_sub(x[aH1],x[aO], dOH1);
+ +      rvec_sub(x[aH2],x[aO], dOH2);
+ +      rvec_sub(x[aH2],x[aH1],dHH);
+ +      rvec_sub(x[aD], x[aO], dOD);
+ +      rvec_sub(x[aS], x[aD], dDS);
+ +      cprod(dOH1,dOH2,nW);
+ +      
+ +      /* Compute inverse length of normal vector 
+ +       * (this one could be precomputed, but I'm too lazy now)
+ +       */
+ +      r_nW = gmx_invsqrt(iprod(nW,nW));
+ +      /* This is for precision, but does not make a big difference,
+ +       * it can go later.
+ +       */
+ +      r_OD = gmx_invsqrt(iprod(dOD,dOD)); 
+ +      
+ +      /* Normalize the vectors in the water frame */
+ +      svmul(r_nW,nW,nW);
+ +      svmul(r_HH,dHH,dHH);
+ +      svmul(r_OD,dOD,dOD);
+ +      
+ +      /* Compute displacement of shell along components of the vector */
+ +      dx[ZZ] = iprod(dDS,dOD);
+ +      /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
+ +      for(m=0; (m<DIM); m++)
+ +      proj[m] = dDS[m]-dx[ZZ]*dOD[m];
+ +      
+ +      /*dx[XX] = iprod(dDS,nW);
+ +      dx[YY] = iprod(dDS,dHH);*/
+ +      dx[XX] = iprod(proj,nW);
+ +      for(m=0; (m<DIM); m++)
+ +      proj[m] -= dx[XX]*nW[m];
+ +      dx[YY] = iprod(proj,dHH);
+ +      /*#define DEBUG*/
+ +#ifdef DEBUG
+ +      if (debug) {
+ +      fprintf(debug,"WPOL: dx2=%10g  dy2=%10g  dz2=%10g  sum=%10g  dDS^2=%10g\n",
+ +              sqr(dx[XX]),sqr(dx[YY]),sqr(dx[ZZ]),iprod(dx,dx),iprod(dDS,dDS));
+ +      fprintf(debug,"WPOL: dHH=(%10g,%10g,%10g)\n",dHH[XX],dHH[YY],dHH[ZZ]);
+ +      fprintf(debug,"WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
+ +              dOD[XX],dOD[YY],dOD[ZZ],1/r_OD);
+ +      fprintf(debug,"WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
+ +              nW[XX],nW[YY],nW[ZZ],1/r_nW);
+ +      fprintf(debug,"WPOL: dx  =%10g, dy  =%10g, dz  =%10g\n",
+ +              dx[XX],dx[YY],dx[ZZ]);
+ +      fprintf(debug,"WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
+ +              dDS[XX],dDS[YY],dDS[ZZ]);
+ +      }
+ +#endif
+ +      /* Now compute the forces and energy */
+ +      kdx[XX] = kk[XX]*dx[XX];
+ +      kdx[YY] = kk[YY]*dx[YY];
+ +      kdx[ZZ] = kk[ZZ]*dx[ZZ];
+ +      vtot   += iprod(dx,kdx);
+ +      for(m=0; (m<DIM); m++) {
+ +      /* This is a tensor operation but written out for speed */
+ +      tx        =  nW[m]*kdx[XX];
+ +      ty        = dHH[m]*kdx[YY];
+ +      tz        = dOD[m]*kdx[ZZ];
+ +      fij       = -tx-ty-tz;
+ +#ifdef DEBUG
+ +      df[m] = fij;
+ +#endif
+ +      f[aS][m] += fij;
+ +      f[aD][m] -= fij;
+ +      }
+ +#ifdef DEBUG
+ +      if (debug) {
+ +      fprintf(debug,"WPOL: vwpol=%g\n",0.5*iprod(dx,kdx));
+ +      fprintf(debug,"WPOL: df = (%10g, %10g, %10g)\n",df[XX],df[YY],df[ZZ]);
+ +      }
+ +#endif
+ +    } 
+ +  }
+ +  return 0.5*vtot;
+ +}
+ +
+ +static real do_1_thole(const rvec xi,const rvec xj,rvec fi,rvec fj,
+ +                     const t_pbc *pbc,real qq,
+ +                     rvec fshift[],real afac)
+ +{
+ +  rvec r12;
+ +  real r12sq,r12_1,r12n,r12bar,v0,v1,fscal,ebar,fff;
+ +  int  m,t;
+ +    
+ +  t      = pbc_rvec_sub(pbc,xi,xj,r12); /*  3 */
+ +  
+ +  r12sq  = iprod(r12,r12);              /*  5 */
+ +  r12_1  = gmx_invsqrt(r12sq);              /*  5 */
+ +  r12bar = afac/r12_1;                  /*  5 */
+ +  v0     = qq*ONE_4PI_EPS0*r12_1;       /*  2 */
+ +  ebar   = exp(-r12bar);                /*  5 */
+ +  v1     = (1-(1+0.5*r12bar)*ebar);     /*  4 */
+ +  fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
+ +  if (debug)
+ +    fprintf(debug,"THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f  ebar = %.3f\n",v0,v1,1/r12_1,r12bar,fscal,ebar);
+ +  
+ +  for(m=0; (m<DIM); m++) {
+ +    fff    = fscal*r12[m];
+ +    fi[m] += fff;
+ +    fj[m] -= fff;             
+ +    fshift[t][m]       += fff;
+ +    fshift[CENTRAL][m] -= fff;
+ +  } /* 15 */
+ +  
+ +  return v0*v1; /* 1 */
+ +  /* 54 */
+ +}
+ +
+ +real thole_pol(int nbonds,
+ +             const t_iatom forceatoms[],const t_iparams forceparams[],
+ +             const rvec x[],rvec f[],rvec fshift[],
+ +             const t_pbc *pbc,const t_graph *g,
+ +             real lambda,real *dvdlambda,
+ +             const t_mdatoms *md,t_fcdata *fcd,
+ +             int *global_atom_index)
+ +{
+ +  /* Interaction between two pairs of particles with opposite charge */
+ +  int i,type,a1,da1,a2,da2;
+ +  real q1,q2,qq,a,al1,al2,afac;
+ +  real V=0;
+ +  
+ +  for(i=0; (i<nbonds); ) {
+ +    type  = forceatoms[i++];
+ +    a1    = forceatoms[i++];
+ +    da1   = forceatoms[i++];
+ +    a2    = forceatoms[i++];
+ +    da2   = forceatoms[i++];
+ +    q1    = md->chargeA[da1];
+ +    q2    = md->chargeA[da2];
+ +    a     = forceparams[type].thole.a;
+ +    al1   = forceparams[type].thole.alpha1;
+ +    al2   = forceparams[type].thole.alpha2;
+ +    qq    = q1*q2;
+ +    afac  = a*pow(al1*al2,-1.0/6.0);
+ +    V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq,fshift,afac);
+ +    V += do_1_thole(x[da1],x[a2], f[da1],f[a2], pbc,-qq,fshift,afac);
+ +    V += do_1_thole(x[a1], x[da2],f[a1], f[da2],pbc,-qq,fshift,afac);
+ +    V += do_1_thole(x[da1],x[da2],f[da1],f[da2],pbc, qq,fshift,afac);
+ +  }
+ +  /* 290 flops */
+ +  return V;
+ +}
+ +
+ +real bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
+ +              rvec r_ij,rvec r_kj,real *costh,
+ +              int *t1,int *t2)
+ +/* Return value is the angle between the bonds i-j and j-k */
+ +{
+ +  /* 41 FLOPS */
+ +  real th;
+ +  
+ +  *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                 /*  3           */
+ +  *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                 /*  3           */
+ +
+ +  *costh=cos_angle(r_ij,r_kj);                /* 25           */
+ +  th=acos(*costh);                    /* 10           */
+ +                                      /* 41 TOTAL     */
+ +  return th;
+ +}
+ +
+ +real angles(int nbonds,
-   int  i,ai,aj,ak,t1,t2,type;
-   rvec r_ij,r_kj;
-   real cos_theta,cos_theta2,theta,dVdt,va,vtot;
-   ivec jt,dt_ij,dt_kj;
-   
-   vtot = 0.0;
-   for(i=0; (i<nbonds); ) {
-     type = forceatoms[i++];
-     ai   = forceatoms[i++];
-     aj   = forceatoms[i++];
-     ak   = forceatoms[i++];
-     
-     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
-                       r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
++            const t_iatom forceatoms[],const t_iparams forceparams[],
++            const rvec x[],rvec f[],rvec fshift[],
++            const t_pbc *pbc,const t_graph *g,
++            real lambda,real *dvdlambda,
++            const t_mdatoms *md,t_fcdata *fcd,
++            int *global_atom_index)
+ +{
-     *dvdlambda += harmonic(forceparams[type].harmonic.krA,
-                          forceparams[type].harmonic.krB,
-                          forceparams[type].harmonic.rA*DEG2RAD,
-                          forceparams[type].harmonic.rB*DEG2RAD,
-                          theta,lambda,&va,&dVdt);  /*  21  */
-     vtot += va;
-     
-     cos_theta2 = sqr(cos_theta);
-     if (cos_theta2 < 1) {
-       int  m;
-       real st,sth;
-       real cik,cii,ckk;
-       real nrkj2,nrij2;
-       rvec f_i,f_j,f_k;
-       
-       st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12          */
-       sth = st*cos_theta;                     /*   1          */
++    int  i,ai,aj,ak,t1,t2,type;
++    rvec r_ij,r_kj;
++    real cos_theta,cos_theta2,theta,dVdt,va,vtot;
++    ivec jt,dt_ij,dt_kj;
++
++    vtot = 0.0;
++    for(i=0; i<nbonds; )
++    {
++        type = forceatoms[i++];
++        ai   = forceatoms[i++];
++        aj   = forceatoms[i++];
++        ak   = forceatoms[i++];
++
++        theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
++                            r_ij,r_kj,&cos_theta,&t1,&t2);    /*  41          */
+ +  
-       if (debug)
-       fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
-               theta*RAD2DEG,va,dVdt);
++        *dvdlambda += harmonic(forceparams[type].harmonic.krA,
++                               forceparams[type].harmonic.krB,
++                               forceparams[type].harmonic.rA*DEG2RAD,
++                               forceparams[type].harmonic.rB*DEG2RAD,
++                               theta,lambda,&va,&dVdt);  /*  21  */
++        vtot += va;
++
++        cos_theta2 = sqr(cos_theta);
++        if (cos_theta2 < 1)
++        {
++            int  m;
++            real st,sth;
++            real cik,cii,ckk;
++            real nrkj2,nrij2;
++            real nrkj_1,nrij_1;
++            rvec f_i,f_j,f_k;
++
++            st  = dVdt*gmx_invsqrt(1 - cos_theta2);   /*  12          */
++            sth = st*cos_theta;                       /*   1          */
+ +#ifdef DEBUG
-       nrkj2=iprod(r_kj,r_kj);                 /*   5          */
-       nrij2=iprod(r_ij,r_ij);
-       
-       cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
-       cii=sth/nrij2;                          /*  10          */
-       ckk=sth/nrkj2;                          /*  10          */
-       
-       for (m=0; (m<DIM); m++) {                       /*  39          */
-       f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
-       f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
-       f_j[m]=-f_i[m]-f_k[m];
-       f[ai][m]+=f_i[m];
-       f[aj][m]+=f_j[m];
-       f[ak][m]+=f_k[m];
-       }
-       if (g) {
-       copy_ivec(SHIFT_IVEC(g,aj),jt);
++            if (debug)
++                fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
++                        theta*RAD2DEG,va,dVdt);
+ +#endif
-       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
-       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
-       t1=IVEC2IS(dt_ij);
-       t2=IVEC2IS(dt_kj);
-       }
-       rvec_inc(fshift[t1],f_i);
-       rvec_inc(fshift[CENTRAL],f_j);
-       rvec_inc(fshift[t2],f_k);
-     }                                           /* 161 TOTAL  */
-   }
-   return vtot;
++            nrij2 = iprod(r_ij,r_ij);                 /*   5          */
++            nrkj2 = iprod(r_kj,r_kj);                 /*   5          */
++
++            nrij_1 = gmx_invsqrt(nrij2);              /*  10          */
++            nrkj_1 = gmx_invsqrt(nrkj2);              /*  10          */
++
++            cik = st*nrij_1*nrkj_1;                   /*   2          */
++            cii = sth*nrij_1*nrij_1;                  /*   2          */
++            ckk = sth*nrkj_1*nrkj_1;                  /*   2          */
+ +      
-   real iprm,iprn,nrkj,nrkj2;
-   real a,p,q,toler;
++            for (m=0; m<DIM; m++)
++            {                 /*  39          */
++                f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
++                f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
++                f_j[m]    = -f_i[m] - f_k[m];
++                f[ai][m] += f_i[m];
++                f[aj][m] += f_j[m];
++                f[ak][m] += f_k[m];
++            }
++            if (g != NULL)
++            {
++                copy_ivec(SHIFT_IVEC(g,aj),jt);
++
++                ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
++                ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
++                t1 = IVEC2IS(dt_ij);
++                t2 = IVEC2IS(dt_kj);
++            }
++            rvec_inc(fshift[t1],f_i);
++            rvec_inc(fshift[CENTRAL],f_j);
++            rvec_inc(fshift[t2],f_k);
++        }                                           /* 161 TOTAL      */
++    }
++
++    return vtot;
+ +}
+ +
+ +real linear_angles(int nbonds,
+ +                   const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                   const rvec x[],rvec f[],rvec fshift[],
+ +                   const t_pbc *pbc,const t_graph *g,
+ +                   real lambda,real *dvdlambda,
+ +                   const t_mdatoms *md,t_fcdata *fcd,
+ +                   int *global_atom_index)
+ +{
+ +  int  i,m,ai,aj,ak,t1,t2,type;
+ +  rvec f_i,f_j,f_k;
+ +  real L1,kA,kB,aA,aB,dr,dr2,va,vtot,a,b,klin;
+ +  ivec jt,dt_ij,dt_kj;
+ +  rvec r_ij,r_kj,r_ik,dx;
+ +    
+ +  L1   = 1-lambda;
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    
+ +    kA = forceparams[type].linangle.klinA;
+ +    kB = forceparams[type].linangle.klinB;
+ +    klin = L1*kA + lambda*kB;
+ +    
+ +    aA   = forceparams[type].linangle.aA;
+ +    aB   = forceparams[type].linangle.aB;
+ +    a    = L1*aA+lambda*aB;
+ +    b    = 1-a;
+ +    
+ +    t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
+ +    t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
+ +    rvec_sub(r_ij,r_kj,r_ik);
+ +    
+ +    dr2 = 0;
+ +    for(m=0; (m<DIM); m++) 
+ +    {
+ +        dr     = - a * r_ij[m] - b * r_kj[m];
+ +        dr2   += dr*dr;
+ +        dx[m]  = dr;
+ +        f_i[m] = a*klin*dr;
+ +        f_k[m] = b*klin*dr;
+ +        f_j[m] = -(f_i[m]+f_k[m]);
+ +        f[ai][m] += f_i[m];
+ +        f[aj][m] += f_j[m];
+ +        f[ak][m] += f_k[m];
+ +    }
+ +    va    = 0.5*klin*dr2;
+ +    *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx,r_ik); 
+ +            
+ +    vtot += va;
+ +    
+ +    if (g) {
+ +        copy_ivec(SHIFT_IVEC(g,aj),jt);
+ +      
+ +        ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
+ +        ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
+ +        t1=IVEC2IS(dt_ij);
+ +        t2=IVEC2IS(dt_kj);
+ +    }
+ +    rvec_inc(fshift[t1],f_i);
+ +    rvec_inc(fshift[CENTRAL],f_j);
+ +    rvec_inc(fshift[t2],f_k);
+ +  }                                           /* 57 TOTAL     */
+ +  return vtot;
+ +}
+ +
+ +real urey_bradley(int nbonds,
+ +                const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                const rvec x[],rvec f[],rvec fshift[],
+ +                const t_pbc *pbc,const t_graph *g,
+ +                real lambda,real *dvdlambda,
+ +                const t_mdatoms *md,t_fcdata *fcd,
+ +                int *global_atom_index)
+ +{
+ +  int  i,m,ai,aj,ak,t1,t2,type,ki;
+ +  rvec r_ij,r_kj,r_ik;
+ +  real cos_theta,cos_theta2,theta;
+ +  real dVdt,va,vtot,dr,dr2,vbond,fbond,fik;
+ +  real kthA,th0A,kUBA,r13A,kthB,th0B,kUBB,r13B;
+ +  ivec jt,dt_ij,dt_kj,dt_ik;
+ +  
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
+ +    kthA  = forceparams[type].u_b.kthetaA;
+ +    r13A  = forceparams[type].u_b.r13A;
+ +    kUBA  = forceparams[type].u_b.kUBA;
+ +    th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
+ +    kthB  = forceparams[type].u_b.kthetaB;
+ +    r13B  = forceparams[type].u_b.r13B;
+ +    kUBB  = forceparams[type].u_b.kUBB;
+ +    
+ +    theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
+ +                      r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
+ +  
+ +    *dvdlambda += harmonic(kthA,kthB,th0A,th0B,theta,lambda,&va,&dVdt);  /*  21  */
+ +    vtot += va;
+ +    
+ +    ki   = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);        /*   3          */
+ +    dr2  = iprod(r_ik,r_ik);                  /*   5          */
+ +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
+ +
+ +    *dvdlambda += harmonic(kUBA,kUBB,r13A,r13B,dr,lambda,&vbond,&fbond); /*  19  */
+ +
+ +    cos_theta2 = sqr(cos_theta);                /*   1                */
+ +    if (cos_theta2 < 1) {
+ +      real st,sth;
+ +      real cik,cii,ckk;
+ +      real nrkj2,nrij2;
+ +      rvec f_i,f_j,f_k;
+ +      
+ +      st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12          */
+ +      sth = st*cos_theta;                     /*   1          */
+ +#ifdef DEBUG
+ +      if (debug)
+ +      fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
+ +              theta*RAD2DEG,va,dVdt);
+ +#endif
+ +      nrkj2=iprod(r_kj,r_kj);                 /*   5          */
+ +      nrij2=iprod(r_ij,r_ij);
+ +      
+ +      cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
+ +      cii=sth/nrij2;                          /*  10          */
+ +      ckk=sth/nrkj2;                          /*  10          */
+ +      
+ +      for (m=0; (m<DIM); m++) {                       /*  39          */
+ +      f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
+ +      f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
+ +      f_j[m]=-f_i[m]-f_k[m];
+ +      f[ai][m]+=f_i[m];
+ +      f[aj][m]+=f_j[m];
+ +      f[ak][m]+=f_k[m];
+ +      }
+ +      if (g) {
+ +      copy_ivec(SHIFT_IVEC(g,aj),jt);
+ +      
+ +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
+ +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
+ +      t1=IVEC2IS(dt_ij);
+ +      t2=IVEC2IS(dt_kj);
+ +      }
+ +      rvec_inc(fshift[t1],f_i);
+ +      rvec_inc(fshift[CENTRAL],f_j);
+ +      rvec_inc(fshift[t2],f_k);
+ +    }                                           /* 161 TOTAL  */
+ +    /* Time for the bond calculations */
+ +    if (dr2 == 0.0)
+ +      continue;
+ +
+ +    vtot  += vbond;  /* 1*/
+ +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
+ +    
+ +    if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),dt_ik);
+ +      ki=IVEC2IS(dt_ik);
+ +    }
+ +    for (m=0; (m<DIM); m++) {                 /*  15          */
+ +      fik=fbond*r_ik[m];
+ +      f[ai][m]+=fik;
+ +      f[ak][m]-=fik;
+ +      fshift[ki][m]+=fik;
+ +      fshift[CENTRAL][m]-=fik;
+ +    }
+ +  }
+ +  return vtot;
+ +}
+ +
+ +real quartic_angles(int nbonds,
+ +                  const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                  const rvec x[],rvec f[],rvec fshift[],
+ +                  const t_pbc *pbc,const t_graph *g,
+ +                  real lambda,real *dvdlambda,
+ +                  const t_mdatoms *md,t_fcdata *fcd,
+ +                  int *global_atom_index)
+ +{
+ +  int  i,j,ai,aj,ak,t1,t2,type;
+ +  rvec r_ij,r_kj;
+ +  real cos_theta,cos_theta2,theta,dt,dVdt,va,dtp,c,vtot;
+ +  ivec jt,dt_ij,dt_kj;
+ +  
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +
+ +    theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
+ +                      r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
+ +
+ +    dt = theta - forceparams[type].qangle.theta*DEG2RAD; /* 2          */
+ +
+ +    dVdt = 0;
+ +    va = forceparams[type].qangle.c[0];
+ +    dtp = 1.0;
+ +    for(j=1; j<=4; j++) {
+ +      c = forceparams[type].qangle.c[j];
+ +      dVdt -= j*c*dtp;
+ +      dtp *= dt;
+ +      va += c*dtp;
+ +    }
+ +    /* 20 */
+ +
+ +    vtot += va;
+ +    
+ +    cos_theta2 = sqr(cos_theta);                /*   1                */
+ +    if (cos_theta2 < 1) {
+ +      int  m;
+ +      real st,sth;
+ +      real cik,cii,ckk;
+ +      real nrkj2,nrij2;
+ +      rvec f_i,f_j,f_k;
+ +      
+ +      st  = dVdt*gmx_invsqrt(1 - cos_theta2);         /*  12          */
+ +      sth = st*cos_theta;                     /*   1          */
+ +#ifdef DEBUG
+ +      if (debug)
+ +      fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
+ +              theta*RAD2DEG,va,dVdt);
+ +#endif
+ +      nrkj2=iprod(r_kj,r_kj);                 /*   5          */
+ +      nrij2=iprod(r_ij,r_ij);
+ +      
+ +      cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
+ +      cii=sth/nrij2;                          /*  10          */
+ +      ckk=sth/nrkj2;                          /*  10          */
+ +      
+ +      for (m=0; (m<DIM); m++) {                       /*  39          */
+ +      f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
+ +      f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
+ +      f_j[m]=-f_i[m]-f_k[m];
+ +      f[ai][m]+=f_i[m];
+ +      f[aj][m]+=f_j[m];
+ +      f[ak][m]+=f_k[m];
+ +      }
+ +      if (g) {
+ +      copy_ivec(SHIFT_IVEC(g,aj),jt);
+ +      
+ +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
+ +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
+ +      t1=IVEC2IS(dt_ij);
+ +      t2=IVEC2IS(dt_kj);
+ +      }
+ +      rvec_inc(fshift[t1],f_i);
+ +      rvec_inc(fshift[CENTRAL],f_j);
+ +      rvec_inc(fshift[t2],f_k);
+ +    }                                           /* 153 TOTAL  */
+ +  }
+ +  return vtot;
+ +}
+ +
+ +real dih_angle(const rvec xi,const rvec xj,const rvec xk,const rvec xl,
+ +               const t_pbc *pbc,
+ +               rvec r_ij,rvec r_kj,rvec r_kl,rvec m,rvec n,
+ +               real *sign,int *t1,int *t2,int *t3)
+ +{
+ +  real ipr,phi;
+ +
+ +  *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                 /*  3           */
+ +  *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                 /*  3           */
+ +  *t3 = pbc_rvec_sub(pbc,xk,xl,r_kl);                 /*  3           */
+ +
+ +  cprod(r_ij,r_kj,m);                         /*  9           */
+ +  cprod(r_kj,r_kl,n);                 /*  9           */
+ +  phi=gmx_angle(m,n);                         /* 49 (assuming 25 for atan2) */
+ +  ipr=iprod(r_ij,n);                  /*  5           */
+ +  (*sign)=(ipr<0.0)?-1.0:1.0;
+ +  phi=(*sign)*phi;                    /*  1           */
+ +                                      /* 82 TOTAL     */
+ +  return phi;
+ +}
+ +
+ +
++#ifdef SSE_PROPER_DIHEDRALS
++
++/* x86 SIMD inner-product of 4 float vectors */
++#define GMX_MM_IPROD_PS(ax,ay,az,bx,by,bz)                 \
++    _mm_add_ps(_mm_add_ps(_mm_mul_ps(ax,bx),_mm_mul_ps(ay,by)),_mm_mul_ps(az,bz))
++
++/* x86 SIMD norm^2 of 4 float vectors */
++#define GMX_MM_NORM2_PS(ax,ay,az) GMX_MM_IPROD_PS(ax,ay,az,ax,ay,az)
++
++/* x86 SIMD cross-product of 4 float vectors */
++#define GMX_MM_CPROD_PS(ax,ay,az,bx,by,bz,cx,cy,cz)        \
++{                                                          \
++    cx = _mm_sub_ps(_mm_mul_ps(ay,bz),_mm_mul_ps(az,by));  \
++    cy = _mm_sub_ps(_mm_mul_ps(az,bx),_mm_mul_ps(ax,bz));  \
++    cz = _mm_sub_ps(_mm_mul_ps(ax,by),_mm_mul_ps(ay,bx));  \
++}
++
++/* load 4 rvec's into 3 x86 SIMD float registers */
++#define load_rvec4(r0,r1,r2,r3,rx_SSE,ry_SSE,rz_SSE)          \
++{                                                             \
++    __m128 tmp;                                               \
++    rx_SSE = _mm_load_ps(r0);                                 \
++    ry_SSE = _mm_load_ps(r1);                                 \
++    rz_SSE = _mm_load_ps(r2);                                 \
++    tmp    = _mm_load_ps(r3);                                 \
++    _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
++}
++
++#define store_rvec4(rx_SSE,ry_SSE,rz_SSE,r0,r1,r2,r3)         \
++{                                                             \
++    __m128 tmp=_mm_setzero_ps();                              \
++    _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
++    _mm_store_ps(r0,rx_SSE);                                  \
++    _mm_store_ps(r1,ry_SSE);                                  \
++    _mm_store_ps(r2,rz_SSE);                                  \
++    _mm_store_ps(r3,tmp   );                                  \
++}
++
++/* An rvec in a structure which can be allocated 16-byte aligned */
++typedef struct {
++    rvec  v;
++    float f;
++} rvec_sse_t;
++
++/* As dih_angle above, but calculates 4 dihedral angles at once using SSE,
++ * also calculates the pre-factor required for the dihedral force update.
++ * Note that bv and buf should be 16-byte aligned.
++ */
++static void
++dih_angle_sse(const rvec *x,
++              int ai[4],int aj[4],int ak[4],int al[4],
++              const t_pbc *pbc,
++              int t1[4],int t2[4],int t3[4],
++              rvec_sse_t *bv,
++              real *buf)
++{
++    int s;
++    __m128 rijx_SSE,rijy_SSE,rijz_SSE;
++    __m128 rkjx_SSE,rkjy_SSE,rkjz_SSE;
++    __m128 rklx_SSE,rkly_SSE,rklz_SSE;
++    __m128 mx_SSE,my_SSE,mz_SSE;
++    __m128 nx_SSE,ny_SSE,nz_SSE;
++    __m128 cx_SSE,cy_SSE,cz_SSE;
++    __m128 cn_SSE;
++    __m128 s_SSE;
++    __m128 phi_SSE;
++    __m128 ipr_SSE;
++    int signs;
++    __m128 iprm_SSE,iprn_SSE;
++    __m128 nrkj2_SSE,nrkj_1_SSE,nrkj_2_SSE,nrkj_SSE;
++    __m128 nrkj_m2_SSE,nrkj_n2_SSE;
++    __m128 p_SSE,q_SSE;
++    __m128 fmin_SSE=_mm_set1_ps(GMX_FLOAT_MIN);
++
++    for(s=0; s<4; s++)
++    {
++        t1[s] = pbc_rvec_sub(pbc,x[ai[s]],x[aj[s]],bv[0+s].v);
++        t2[s] = pbc_rvec_sub(pbc,x[ak[s]],x[aj[s]],bv[4+s].v);
++        t3[s] = pbc_rvec_sub(pbc,x[ak[s]],x[al[s]],bv[8+s].v);
++    }
++
++    load_rvec4(bv[0].v,bv[1].v,bv[2].v,bv[3].v,rijx_SSE,rijy_SSE,rijz_SSE);
++    load_rvec4(bv[4].v,bv[5].v,bv[6].v,bv[7].v,rkjx_SSE,rkjy_SSE,rkjz_SSE);
++    load_rvec4(bv[8].v,bv[9].v,bv[10].v,bv[11].v,rklx_SSE,rkly_SSE,rklz_SSE);
++
++    GMX_MM_CPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
++                    rkjx_SSE,rkjy_SSE,rkjz_SSE,
++                    mx_SSE,my_SSE,mz_SSE);
++
++    GMX_MM_CPROD_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE,
++                    rklx_SSE,rkly_SSE,rklz_SSE,
++                    nx_SSE,ny_SSE,nz_SSE);
++
++    GMX_MM_CPROD_PS(mx_SSE,my_SSE,mz_SSE,
++                    nx_SSE,ny_SSE,nz_SSE,
++                    cx_SSE,cy_SSE,cz_SSE);
++
++    cn_SSE = gmx_mm_sqrt_ps(GMX_MM_NORM2_PS(cx_SSE,cy_SSE,cz_SSE));
++    
++    s_SSE = GMX_MM_IPROD_PS(mx_SSE,my_SSE,mz_SSE,nx_SSE,ny_SSE,nz_SSE);
++
++    phi_SSE = gmx_mm_atan2_ps(cn_SSE,s_SSE);
++    _mm_store_ps(buf+16,phi_SSE);
++
++    ipr_SSE = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
++                              nx_SSE,ny_SSE,nz_SSE);
++
++    signs = _mm_movemask_ps(ipr_SSE);
++    
++    for(s=0; s<4; s++)
++    {
++        if (signs & (1<<s))
++        {
++            buf[16+s] = -buf[16+s];
++        }
++    }
++
++    iprm_SSE    = GMX_MM_NORM2_PS(mx_SSE,my_SSE,mz_SSE);
++    iprn_SSE    = GMX_MM_NORM2_PS(nx_SSE,ny_SSE,nz_SSE);
++
++    /* store_rvec4 messes with the input, don't use it after this! */
++    store_rvec4(mx_SSE,my_SSE,mz_SSE,bv[0].v,bv[1].v,bv[2].v,bv[3].v);
++    store_rvec4(nx_SSE,ny_SSE,nz_SSE,bv[4].v,bv[5].v,bv[6].v,bv[7].v);
++
++    nrkj2_SSE   = GMX_MM_NORM2_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE);
++
++    /* Avoid division by zero. When zero, the result is multiplied by 0
++     * anyhow, so the 3 max below do not affect the final result.
++     */
++    nrkj2_SSE   = _mm_max_ps(nrkj2_SSE,fmin_SSE);
++    nrkj_1_SSE  = gmx_mm_invsqrt_ps(nrkj2_SSE);
++    nrkj_2_SSE  = _mm_mul_ps(nrkj_1_SSE,nrkj_1_SSE);
++    nrkj_SSE    = _mm_mul_ps(nrkj2_SSE,nrkj_1_SSE);
++
++    iprm_SSE    = _mm_max_ps(iprm_SSE,fmin_SSE);
++    iprn_SSE    = _mm_max_ps(iprn_SSE,fmin_SSE);
++    nrkj_m2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprm_SSE));
++    nrkj_n2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprn_SSE));
++
++    _mm_store_ps(buf+0,nrkj_m2_SSE);
++    _mm_store_ps(buf+4,nrkj_n2_SSE);
++
++    p_SSE       = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
++                                  rkjx_SSE,rkjy_SSE,rkjz_SSE);
++    p_SSE       = _mm_mul_ps(p_SSE,nrkj_2_SSE);
++
++    q_SSE       = GMX_MM_IPROD_PS(rklx_SSE,rkly_SSE,rklz_SSE,
++                                  rkjx_SSE,rkjy_SSE,rkjz_SSE);
++    q_SSE       = _mm_mul_ps(q_SSE,nrkj_2_SSE);
++
++    _mm_store_ps(buf+8 ,p_SSE);
++    _mm_store_ps(buf+12,q_SSE);
++}
++
++#endif /* SSE_PROPER_DIHEDRALS */
++
+ +
+ +void do_dih_fup(int i,int j,int k,int l,real ddphi,
+ +              rvec r_ij,rvec r_kj,rvec r_kl,
+ +              rvec m,rvec n,rvec f[],rvec fshift[],
+ +              const t_pbc *pbc,const t_graph *g,
+ +              const rvec x[],int t1,int t2,int t3)
+ +{
+ +  /* 143 FLOPS */
+ +  rvec f_i,f_j,f_k,f_l;
+ +  rvec uvec,vvec,svec,dx_jl;
-     nrkj  = nrkj2*gmx_invsqrt(nrkj2); /* 10   */
++  real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
++  real a,b,p,q,toler;
+ +  ivec jt,dt_ij,dt_kj,dt_lj;  
+ +  
+ +  iprm  = iprod(m,m);         /*  5   */
+ +  iprn  = iprod(n,n);         /*  5   */
+ +  nrkj2 = iprod(r_kj,r_kj);   /*  5   */
+ +  toler = nrkj2*GMX_REAL_EPS;
+ +  if ((iprm > toler) && (iprn > toler)) {
-     a     = ddphi*nrkj/iprn;  /* 11   */
-     svmul(a,n,f_l);           /*  3   */
++    nrkj_1 = gmx_invsqrt(nrkj2);      /* 10   */
++    nrkj_2 = nrkj_1*nrkj_1;   /*  1   */
++    nrkj  = nrkj2*nrkj_1;     /*  1   */
+ +    a     = -ddphi*nrkj/iprm; /* 11   */
+ +    svmul(a,m,f_i);           /*  3   */
-     p    /= nrkj2;            /* 10   */
++    b     = ddphi*nrkj/iprn;  /* 11   */
++    svmul(b,n,f_l);           /*  3   */
+ +    p     = iprod(r_ij,r_kj); /*  5   */
-     q    /= nrkj2;            /* 10   */
++    p    *= nrkj_2;           /*  1   */
+ +    q     = iprod(r_kl,r_kj); /*  5   */
- real calc_one_bond(FILE *fplog,int ftype, const t_idef *idef,
-                    rvec x[], rvec f[], t_forcerec *fr,
-                    const t_pbc *pbc,const t_graph *g,
-                    gmx_enerdata_t *enerd, t_nrnb *nrnb,
-                    real *lambda, real *dvdl,
-                    const t_mdatoms *md,t_fcdata *fcd,
-                    int *global_atom_index, gmx_bool bPrintSepPot)
++    q    *= nrkj_2;           /*  1   */
+ +    svmul(p,f_i,uvec);                /*  3   */
+ +    svmul(q,f_l,vvec);                /*  3   */
+ +    rvec_sub(uvec,vvec,svec); /*  3   */
+ +    rvec_sub(f_i,svec,f_j);   /*  3   */
+ +    rvec_add(f_l,svec,f_k);   /*  3   */
+ +    rvec_inc(f[i],f_i);       /*  3   */
+ +    rvec_dec(f[j],f_j);       /*  3   */
+ +    rvec_dec(f[k],f_k);       /*  3   */
+ +    rvec_inc(f[l],f_l);       /*  3   */
+ +    
+ +    if (g) {
+ +      copy_ivec(SHIFT_IVEC(g,j),jt);
+ +      ivec_sub(SHIFT_IVEC(g,i),jt,dt_ij);
+ +      ivec_sub(SHIFT_IVEC(g,k),jt,dt_kj);
+ +      ivec_sub(SHIFT_IVEC(g,l),jt,dt_lj);
+ +      t1=IVEC2IS(dt_ij);
+ +      t2=IVEC2IS(dt_kj);
+ +      t3=IVEC2IS(dt_lj);
+ +    } else if (pbc) {
+ +      t3 = pbc_rvec_sub(pbc,x[l],x[j],dx_jl);
+ +    } else {
+ +      t3 = CENTRAL;
+ +    }
+ +    
+ +    rvec_inc(fshift[t1],f_i);
+ +    rvec_dec(fshift[CENTRAL],f_j);
+ +    rvec_dec(fshift[t2],f_k);
+ +    rvec_inc(fshift[t3],f_l);
+ +  }
+ +  /* 112 TOTAL        */
+ +}
+ +
++/* As do_dih_fup above, but without shift forces */
++static void
++do_dih_fup_noshiftf(int i,int j,int k,int l,real ddphi,
++                    rvec r_ij,rvec r_kj,rvec r_kl,
++                    rvec m,rvec n,rvec f[])
++{
++  rvec f_i,f_j,f_k,f_l;
++  rvec uvec,vvec,svec,dx_jl;
++  real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
++  real a,b,p,q,toler;
++  ivec jt,dt_ij,dt_kj,dt_lj;  
++  
++  iprm  = iprod(m,m);         /*  5   */
++  iprn  = iprod(n,n);         /*  5   */
++  nrkj2 = iprod(r_kj,r_kj);   /*  5   */
++  toler = nrkj2*GMX_REAL_EPS;
++  if ((iprm > toler) && (iprn > toler)) {
++    nrkj_1 = gmx_invsqrt(nrkj2);      /* 10   */
++    nrkj_2 = nrkj_1*nrkj_1;   /*  1   */
++    nrkj  = nrkj2*nrkj_1;     /*  1   */
++    a     = -ddphi*nrkj/iprm; /* 11   */
++    svmul(a,m,f_i);           /*  3   */
++    b     = ddphi*nrkj/iprn;  /* 11   */
++    svmul(b,n,f_l);           /*  3   */
++    p     = iprod(r_ij,r_kj); /*  5   */
++    p    *= nrkj_2;           /*  1   */
++    q     = iprod(r_kl,r_kj); /*  5   */
++    q    *= nrkj_2;           /*  1   */
++    svmul(p,f_i,uvec);                /*  3   */
++    svmul(q,f_l,vvec);                /*  3   */
++    rvec_sub(uvec,vvec,svec); /*  3   */
++    rvec_sub(f_i,svec,f_j);   /*  3   */
++    rvec_add(f_l,svec,f_k);   /*  3   */
++    rvec_inc(f[i],f_i);       /*  3   */
++    rvec_dec(f[j],f_j);       /*  3   */
++    rvec_dec(f[k],f_k);       /*  3   */
++    rvec_inc(f[l],f_l);       /*  3   */
++  }
++}
++
++/* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
++static void
++do_dih_fup_noshiftf_precalc(int i,int j,int k,int l,real ddphi,
++                            real nrkj_m2,real nrkj_n2,
++                            real p,real q,
++                            rvec m,rvec n,rvec f[])
++{
++    rvec f_i,f_j,f_k,f_l;
++    rvec uvec,vvec,svec,dx_jl;
++    real a,b,toler;
++    ivec jt,dt_ij,dt_kj,dt_lj;  
++  
++    a = -ddphi*nrkj_m2;
++    svmul(a,m,f_i);
++    b =  ddphi*nrkj_n2;
++    svmul(b,n,f_l);
++    svmul(p,f_i,uvec);
++    svmul(q,f_l,vvec);
++    rvec_sub(uvec,vvec,svec);
++    rvec_sub(f_i,svec,f_j);
++    rvec_add(f_l,svec,f_k);
++    rvec_inc(f[i],f_i);
++    rvec_dec(f[j],f_j);
++    rvec_dec(f[k],f_k);
++    rvec_inc(f[l],f_l);
++}
++
+ +
+ +real dopdihs(real cpA,real cpB,real phiA,real phiB,int mult,
+ +           real phi,real lambda,real *V,real *F)
+ +{
+ +  real v,dvdlambda,mdphi,v1,sdphi,ddphi;
+ +  real L1   = 1.0 - lambda;
+ +  real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
+ +  real dph0 = (phiB - phiA)*DEG2RAD;
+ +  real cp   = L1*cpA + lambda*cpB;
+ +  
+ +  mdphi =  mult*phi - ph0;
+ +  sdphi = sin(mdphi);
+ +  ddphi = -cp*mult*sdphi;
+ +  v1    = 1.0 + cos(mdphi);
+ +  v     = cp*v1;
+ +  
+ +  dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
+ +  
+ +  *V = v;
+ +  *F = ddphi;
+ +  
+ +  return dvdlambda;
+ +  
+ +  /* That was 40 flops */
+ +}
+ +
++static void
++dopdihs_noener(real cpA,real cpB,real phiA,real phiB,int mult,
++               real phi,real lambda,real *F)
++{
++  real mdphi,sdphi,ddphi;
++  real L1   = 1.0 - lambda;
++  real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
++  real cp   = L1*cpA + lambda*cpB;
++  
++  mdphi = mult*phi - ph0;
++  sdphi = sin(mdphi);
++  ddphi = -cp*mult*sdphi;
++  
++  *F = ddphi;
++  
++  /* That was 20 flops */
++}
++
++static void
++dopdihs_mdphi(real cpA,real cpB,real phiA,real phiB,int mult,
++              real phi,real lambda,real *cp,real *mdphi)
++{
++    real L1   = 1.0 - lambda;
++    real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
++
++    *cp    = L1*cpA + lambda*cpB;
++
++    *mdphi = mult*phi - ph0;
++}
++
+ +static real dopdihs_min(real cpA,real cpB,real phiA,real phiB,int mult,
+ +                      real phi,real lambda,real *V,real *F)
+ +     /* similar to dopdihs, except for a minus sign  *
+ +      * and a different treatment of mult/phi0       */
+ +{
+ +  real v,dvdlambda,mdphi,v1,sdphi,ddphi;
+ +  real L1   = 1.0 - lambda;
+ +  real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
+ +  real dph0 = (phiB - phiA)*DEG2RAD;
+ +  real cp   = L1*cpA + lambda*cpB;
+ +  
+ +  mdphi = mult*(phi-ph0);
+ +  sdphi = sin(mdphi);
+ +  ddphi = cp*mult*sdphi;
+ +  v1    = 1.0-cos(mdphi);
+ +  v     = cp*v1;
+ +  
+ +  dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
+ +  
+ +  *V = v;
+ +  *F = ddphi;
+ +  
+ +  return dvdlambda;
+ +  
+ +  /* That was 40 flops */
+ +}
+ +
+ +real pdihs(int nbonds,
+ +         const t_iatom forceatoms[],const t_iparams forceparams[],
+ +         const rvec x[],rvec f[],rvec fshift[],
+ +         const t_pbc *pbc,const t_graph *g,
+ +         real lambda,real *dvdlambda,
+ +         const t_mdatoms *md,t_fcdata *fcd,
+ +         int *global_atom_index)
+ +{
+ +  int  i,type,ai,aj,ak,al;
+ +  int  t1,t2,t3;
+ +  rvec r_ij,r_kj,r_kl,m,n;
+ +  real phi,sign,ddphi,vpd,vtot;
+ +
+ +  vtot = 0.0;
+ +
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    al   = forceatoms[i++];
+ +    
+ +    phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
+ +                  &sign,&t1,&t2,&t3);                 /*  84          */
+ +    *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
+ +                          forceparams[type].pdihs.cpB,
+ +                          forceparams[type].pdihs.phiA,
+ +                          forceparams[type].pdihs.phiB,
+ +                          forceparams[type].pdihs.mult,
+ +                          phi,lambda,&vpd,&ddphi);
+ +
+ +    vtot += vpd;
+ +    do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
+ +             f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
+ +
+ +#ifdef DEBUG
+ +    fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
+ +          ai,aj,ak,al,phi);
+ +#endif
+ +  } /* 223 TOTAL      */
+ +
+ +  return vtot;
+ +}
+ +
+ +void make_dp_periodic(real *dp)  /* 1 flop? */
+ +{
+ +    /* dp cannot be outside (-pi,pi) */
+ +    if (*dp >= M_PI)
+ +    {
+ +        *dp -= 2*M_PI;
+ +    }
+ +    else if (*dp < -M_PI)
+ +    {
+ +        *dp += 2*M_PI;
+ +    }
+ +    return;
+ +}
+ +
++/* As pdihs above, but without calculating energies and shift forces */
++static void
++pdihs_noener(int nbonds,
++             const t_iatom forceatoms[],const t_iparams forceparams[],
++             const rvec x[],rvec f[],
++             const t_pbc *pbc,const t_graph *g,
++             real lambda,
++             const t_mdatoms *md,t_fcdata *fcd,
++             int *global_atom_index)
++{
++    int  i,type,ai,aj,ak,al;
++    int  t1,t2,t3;
++    rvec r_ij,r_kj,r_kl,m,n;
++    real phi,sign,ddphi_tot,ddphi;
++
++    for(i=0; (i<nbonds); )
++    {
++        ai   = forceatoms[i+1];
++        aj   = forceatoms[i+2];
++        ak   = forceatoms[i+3];
++        al   = forceatoms[i+4];
++
++        phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
++                        &sign,&t1,&t2,&t3);
++
++        ddphi_tot = 0;
++
++        /* Loop over dihedrals working on the same atoms,
++         * so we avoid recalculating angles and force distributions.
++         */
++        do
++        {
++            type = forceatoms[i];
++            dopdihs_noener(forceparams[type].pdihs.cpA,
++                           forceparams[type].pdihs.cpB,
++                           forceparams[type].pdihs.phiA,
++                           forceparams[type].pdihs.phiB,
++                           forceparams[type].pdihs.mult,
++                           phi,lambda,&ddphi);
++            ddphi_tot += ddphi;
++
++            i += 5;
++        }
++        while(i < nbonds &&
++              forceatoms[i+1] == ai &&
++              forceatoms[i+2] == aj &&
++              forceatoms[i+3] == ak &&
++              forceatoms[i+4] == al);
++
++        do_dih_fup_noshiftf(ai,aj,ak,al,ddphi_tot,r_ij,r_kj,r_kl,m,n,f);
++    }
++}
++
++
++#ifdef SSE_PROPER_DIHEDRALS
++
++/* As pdihs_noner above, but using SSE to calculate 4 dihedrals at once */
++static void
++pdihs_noener_sse(int nbonds,
++                 const t_iatom forceatoms[],const t_iparams forceparams[],
++                 const rvec x[],rvec f[],
++                 const t_pbc *pbc,const t_graph *g,
++                 real lambda,
++                 const t_mdatoms *md,t_fcdata *fcd,
++                 int *global_atom_index)
++{
++    int  i,i4,s;
++    int  type,ai[4],aj[4],ak[4],al[4];
++    int  t1[4],t2[4],t3[4];
++    int  mult[4];
++    real cp[4],mdphi[4];
++    real ddphi;
++    rvec_sse_t rs_array[13],*rs;
++    real buf_array[24],*buf;
++    __m128 mdphi_SSE,sin_SSE,cos_SSE;
++
++    /* Ensure 16-byte alignment */
++    rs  = (rvec_sse_t *)(((size_t)(rs_array +1)) & (~((size_t)15)));
++    buf =      (float *)(((size_t)(buf_array+3)) & (~((size_t)15)));
++
++    for(i=0; (i<nbonds); i+=20)
++    {
++        /* Collect atoms quadruplets for 4 dihedrals */
++        i4 = i;
++        for(s=0; s<4; s++)
++        {
++            ai[s] = forceatoms[i4+1];
++            aj[s] = forceatoms[i4+2];
++            ak[s] = forceatoms[i4+3];
++            al[s] = forceatoms[i4+4];
++            /* At the end fill the arrays with identical entries */
++            if (i4 + 5 < nbonds)
++            {
++                i4 += 5;
++            }
++        }
++
++        /* Caclulate 4 dihedral angles at once */
++        dih_angle_sse(x,ai,aj,ak,al,pbc,t1,t2,t3,rs,buf);
++
++        i4 = i;
++        for(s=0; s<4; s++)
++        {
++            if (i4 < nbonds)
++            {
++                /* Calculate the coefficient and angle deviation */
++                type = forceatoms[i4];
++                dopdihs_mdphi(forceparams[type].pdihs.cpA,
++                              forceparams[type].pdihs.cpB,
++                              forceparams[type].pdihs.phiA,
++                              forceparams[type].pdihs.phiB,
++                              forceparams[type].pdihs.mult,
++                              buf[16+s],lambda,&cp[s],&buf[16+s]);
++                mult[s] = forceparams[type].pdihs.mult;
++            }
++            else
++            {
++                buf[16+s] = 0;
++            }
++            i4 += 5;
++        }
++
++        /* Calculate 4 sines at once */
++        mdphi_SSE = _mm_load_ps(buf+16);
++        gmx_mm_sincos_ps(mdphi_SSE,&sin_SSE,&cos_SSE);
++        _mm_store_ps(buf+16,sin_SSE);
++
++        i4 = i;
++        s = 0;
++        do
++        {
++            ddphi = -cp[s]*mult[s]*buf[16+s];
++
++            do_dih_fup_noshiftf_precalc(ai[s],aj[s],ak[s],al[s],ddphi,
++                                        buf[ 0+s],buf[ 4+s],
++                                        buf[ 8+s],buf[12+s],
++                                        rs[0+s].v,rs[4+s].v,
++                                        f);
++            s++;
++            i4 += 5;
++        }
++        while (s < 4 && i4 < nbonds);
++    }
++}
++
++#endif /* SSE_PROPER_DIHEDRALS */
++
+ +
+ +real idihs(int nbonds,
+ +         const t_iatom forceatoms[],const t_iparams forceparams[],
+ +         const rvec x[],rvec f[],rvec fshift[],
+ +         const t_pbc *pbc,const t_graph *g,
+ +         real lambda,real *dvdlambda,
+ +         const t_mdatoms *md,t_fcdata *fcd,
+ +         int *global_atom_index)
+ +{
+ +  int  i,type,ai,aj,ak,al;
+ +  int  t1,t2,t3;
+ +  real phi,phi0,dphi0,ddphi,sign,vtot;
+ +  rvec r_ij,r_kj,r_kl,m,n;
+ +  real L1,kk,dp,dp2,kA,kB,pA,pB,dvdl_term;
+ +
+ +  L1 = 1.0-lambda;
+ +  dvdl_term = 0;
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    al   = forceatoms[i++];
+ +    
+ +    phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
+ +                  &sign,&t1,&t2,&t3);                 /*  84          */
+ +    
+ +    /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
+ +     * force changes if we just apply a normal harmonic.
+ +     * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
+ +     * This means we will never have the periodicity problem, unless
+ +     * the dihedral is Pi away from phiO, which is very unlikely due to
+ +     * the potential.
+ +     */
+ +    kA = forceparams[type].harmonic.krA;
+ +    kB = forceparams[type].harmonic.krB;
+ +    pA = forceparams[type].harmonic.rA;
+ +    pB = forceparams[type].harmonic.rB;
+ +
+ +    kk    = L1*kA + lambda*kB;
+ +    phi0  = (L1*pA + lambda*pB)*DEG2RAD;
+ +    dphi0 = (pB - pA)*DEG2RAD;
+ +
+ +    dp = phi-phi0;  
+ +
+ +    make_dp_periodic(&dp);
+ +    
+ +    dp2 = dp*dp;
+ +
+ +    vtot += 0.5*kk*dp2;
+ +    ddphi = -kk*dp;
+ +    
+ +    dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
+ +
+ +    do_dih_fup(ai,aj,ak,al,(real)(-ddphi),r_ij,r_kj,r_kl,m,n,
+ +             f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
+ +    /* 218 TOTAL      */
+ +#ifdef DEBUG
+ +    if (debug)
+ +      fprintf(debug,"idih: (%d,%d,%d,%d) phi=%g\n",
+ +            ai,aj,ak,al,phi);
+ +#endif
+ +  }
+ +  
+ +  *dvdlambda += dvdl_term;
+ +  return vtot;
+ +}
+ +
+ +
+ +/*! \brief returns dx, rdist, and dpdl for functions posres() and fbposres()        
+ + */
+ +static void posres_dx(const rvec x, const rvec pos0A, const rvec pos0B,
+ +                      const rvec comA_sc, const rvec comB_sc,
+ +                      real lambda,
+ +                      t_pbc *pbc, int refcoord_scaling,int npbcdim,
+ +                      rvec dx, rvec rdist, rvec dpdl)
+ +{
+ +    int m,d;
+ +    real posA, posB, L1, ref=0.;
+ +    rvec pos;
+ +
+ +    L1=1.0-lambda;
+ +
+ +    for(m=0; m<DIM; m++)
+ +    {
+ +        posA = pos0A[m];
+ +        posB = pos0B[m];
+ +        if (m < npbcdim)
+ +        {
+ +            switch (refcoord_scaling)
+ +            {
+ +            case erscNO:
+ +                ref      = 0;
+ +                rdist[m] = L1*posA + lambda*posB;
+ +                dpdl[m]  = posB - posA;
+ +                    break;
+ +            case erscALL:
+ +                /* Box relative coordinates are stored for dimensions with pbc */
+ +                posA *= pbc->box[m][m];
+ +                posB *= pbc->box[m][m];
+ +                for(d=m+1; d<npbcdim; d++)
+ +                {
+ +                    posA += pos0A[d]*pbc->box[d][m];
+ +                    posB += pos0B[d]*pbc->box[d][m];
+ +                }
+ +                ref      = L1*posA + lambda*posB;
+ +                rdist[m] = 0;
+ +                dpdl[m]  = posB - posA;
+ +                break;
+ +            case erscCOM:
+ +                ref      = L1*comA_sc[m] + lambda*comB_sc[m];
+ +                rdist[m] = L1*posA       + lambda*posB;
+ +                dpdl[m]  = comB_sc[m] - comA_sc[m] + posB - posA;
+ +                break;
+ +            default:
+ +                gmx_fatal(FARGS, "No such scaling method implemented");
+ +            }
+ +        }
+ +        else
+ +        {
+ +            ref      = L1*posA + lambda*posB;
+ +            rdist[m] = 0;
+ +            dpdl[m]  = posB - posA;
+ +        }
+ +
+ +        /* We do pbc_dx with ref+rdist,
+ +         * since with only ref we can be up to half a box vector wrong.
+ +         */
+ +        pos[m] = ref + rdist[m];
+ +    }
+ +
+ +    if (pbc)
+ +    {
+ +        pbc_dx(pbc,x,pos,dx);
+ +    }
+ +    else
+ +    {
+ +        rvec_sub(x,pos,dx);
+ +    }
+ +}
+ +
+ +/*! \brief Adds forces of flat-bottomed positions restraints to f[]
+ + *         and fixes vir_diag. Returns the flat-bottomed potential. */
+ +real fbposres(int nbonds,
+ +              const t_iatom forceatoms[],const t_iparams forceparams[],
+ +              const rvec x[],rvec f[],rvec vir_diag,
+ +              t_pbc *pbc,
+ +              int refcoord_scaling,int ePBC,rvec com)
+ +/* compute flat-bottomed positions restraints */
+ +{
+ +    int  i,ai,m,d,type,npbcdim=0,fbdim;
+ +    const t_iparams *pr;
+ +    real vtot,kk,v;
+ +    real ref=0,dr,dr2,rpot,rfb,rfb2,fact,invdr;
+ +    rvec com_sc,rdist,pos,dx,dpdl,fm;
+ +    gmx_bool bInvert;
+ +
+ +    npbcdim = ePBC2npbcdim(ePBC);
+ +
+ +    if (refcoord_scaling == erscCOM)
+ +    {
+ +        clear_rvec(com_sc);
+ +        for(m=0; m<npbcdim; m++)
+ +        {
+ +            for(d=m; d<npbcdim; d++)
+ +            {
+ +                com_sc[m] += com[d]*pbc->box[d][m];
+ +            }
+ +        }
+ +    }
+ +
+ +    vtot = 0.0;
+ +    for(i=0; (i<nbonds); )
+ +    {
+ +        type = forceatoms[i++];
+ +        ai   = forceatoms[i++];
+ +        pr   = &forceparams[type];
+ +
+ +        /* same calculation as for normal posres, but with identical A and B states, and lambda==0 */
+ +        posres_dx(x[ai],forceparams[type].fbposres.pos0, forceparams[type].fbposres.pos0,
+ +                  com_sc, com_sc, 0.0,
+ +                  pbc, refcoord_scaling, npbcdim,
+ +                  dx, rdist, dpdl);
+ +
+ +        clear_rvec(fm);
+ +        v=0.0;
+ +
+ +        kk=pr->fbposres.k;
+ +        rfb=pr->fbposres.r;
+ +        rfb2=sqr(rfb);
+ +
+ +        /* with rfb<0, push particle out of the sphere/cylinder/layer */
+ +        bInvert=FALSE;
+ +        if (rfb<0.){
+ +            bInvert=TRUE;
+ +            rfb=-rfb;
+ +        }
+ +
+ +        switch (pr->fbposres.geom)
+ +        {
+ +        case efbposresSPHERE:
+ +            /* spherical flat-bottom posres */
+ +            dr2=norm2(dx);
+ +            if ( dr2 > 0.0 &&
+ +                 ( (dr2 > rfb2 && bInvert==FALSE ) || (dr2 < rfb2 && bInvert==TRUE ) )
+ +                )
+ +            {
+ +                dr=sqrt(dr2);
+ +                v = 0.5*kk*sqr(dr - rfb);
+ +                fact = -kk*(dr-rfb)/dr;  /* Force pointing to the center pos0 */
+ +                svmul(fact,dx,fm);
+ +            }
+ +            break;
+ +        case efbposresCYLINDER:
+ +            /* cylidrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */
+ +            dr2=sqr(dx[XX])+sqr(dx[YY]);
+ +            if  ( dr2 > 0.0 &&
+ +                  ( (dr2 > rfb2 && bInvert==FALSE ) || (dr2 < rfb2 && bInvert==TRUE ) )
+ +                )
+ +            {
+ +                dr=sqrt(dr2);
+ +                invdr=1./dr;
+ +                v = 0.5*kk*sqr(dr - rfb);
+ +                fm[XX] = -kk*(dr-rfb)*dx[XX]*invdr;  /* Force pointing to the center */
+ +                fm[YY] = -kk*(dr-rfb)*dx[YY]*invdr;
+ +            }
+ +            break;
+ +        case efbposresX: /* fbdim=XX */
+ +        case efbposresY: /* fbdim=YY */
+ +        case efbposresZ: /* fbdim=ZZ */
+ +            /* 1D flat-bottom potential */
+ +            fbdim = pr->fbposres.geom - efbposresX;
+ +            dr=dx[fbdim];
+ +            if ( ( dr>rfb && bInvert==FALSE ) || ( 0<dr && dr<rfb && bInvert==TRUE )  )
+ +            {
+ +                v = 0.5*kk*sqr(dr - rfb);
+ +                fm[fbdim] = -kk*(dr - rfb);
+ +            }
+ +            else if ( (dr < (-rfb) && bInvert==FALSE ) || ( (-rfb)<dr && dr<0 && bInvert==TRUE ))
+ +            {
+ +                v = 0.5*kk*sqr(dr + rfb);
+ +                fm[fbdim] = -kk*(dr + rfb);
+ +            }
+ +            break;
+ +        }
+ +
+ +        vtot += v;
+ +
+ +        for (m=0; (m<DIM); m++)
+ +        {
+ +            f[ai][m]   += fm[m];
+ +            /* Here we correct for the pbc_dx which included rdist */
+ +            vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm[m];
+ +        }
+ +    }
+ +
+ +    return vtot;
+ +}
+ +
+ +
+ +real posres(int nbonds,
+ +            const t_iatom forceatoms[],const t_iparams forceparams[],
+ +            const rvec x[],rvec f[],rvec vir_diag,
+ +            t_pbc *pbc,
+ +            real lambda,real *dvdlambda,
+ +            int refcoord_scaling,int ePBC,rvec comA,rvec comB)
+ +{
+ +    int  i,ai,m,d,type,ki,npbcdim=0;
+ +    const t_iparams *pr;
+ +    real L1;
+ +    real vtot,kk,fm;
+ +    real posA,posB,ref=0;
+ +    rvec comA_sc,comB_sc,rdist,dpdl,pos,dx;
+ +    gmx_bool bForceValid = TRUE;
+ +
+ +    if ((f==NULL) || (vir_diag==NULL)) {  /* should both be null together! */
+ +        bForceValid = FALSE;
+ +    }
+ +
+ +    npbcdim = ePBC2npbcdim(ePBC);
+ +
+ +    if (refcoord_scaling == erscCOM)
+ +    {
+ +        clear_rvec(comA_sc);
+ +        clear_rvec(comB_sc);
+ +        for(m=0; m<npbcdim; m++)
+ +        {
+ +            for(d=m; d<npbcdim; d++)
+ +            {
+ +                comA_sc[m] += comA[d]*pbc->box[d][m];
+ +                comB_sc[m] += comB[d]*pbc->box[d][m];
+ +            }
+ +        }
+ +    }
+ +
+ +    L1 = 1.0 - lambda;
+ +
+ +    vtot = 0.0;
+ +    for(i=0; (i<nbonds); )
+ +    {
+ +        type = forceatoms[i++];
+ +        ai   = forceatoms[i++];
+ +        pr   = &forceparams[type];
+ +        
+ +        /* return dx, rdist, and dpdl */
+ +        posres_dx(x[ai],forceparams[type].posres.pos0A, forceparams[type].posres.pos0B,
+ +                  comA_sc, comB_sc, lambda,
+ +                  pbc, refcoord_scaling, npbcdim,
+ +                  dx, rdist, dpdl);
+ +
+ +        for (m=0; (m<DIM); m++)
+ +        {
+ +            kk          = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
+ +            fm          = -kk*dx[m];
+ +            vtot       += 0.5*kk*dx[m]*dx[m];
+ +            *dvdlambda +=
+ +                0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
+ +                -fm*dpdl[m];
+ +
+ +            /* Here we correct for the pbc_dx which included rdist */
+ +            if (bForceValid) {
+ +                f[ai][m]   += fm;
+ +                vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
+ +            }
+ +        }
+ +    }
+ +
+ +    return vtot;
+ +}
+ +
+ +static real low_angres(int nbonds,
+ +                     const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                     const rvec x[],rvec f[],rvec fshift[],
+ +                     const t_pbc *pbc,const t_graph *g,
+ +                     real lambda,real *dvdlambda,
+ +                     gmx_bool bZAxis)
+ +{
+ +  int  i,m,type,ai,aj,ak,al;
+ +  int  t1,t2;
+ +  real phi,cos_phi,cos_phi2,vid,vtot,dVdphi;
+ +  rvec r_ij,r_kl,f_i,f_k={0,0,0};
+ +  real st,sth,nrij2,nrkl2,c,cij,ckl;
+ +
+ +  ivec dt;  
+ +  t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
+ +
+ +  vtot = 0.0;
+ +  ak=al=0; /* to avoid warnings */
+ +  for(i=0; i<nbonds; ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    t1   = pbc_rvec_sub(pbc,x[aj],x[ai],r_ij);                /*  3           */
+ +    if (!bZAxis) {      
+ +      ak   = forceatoms[i++];
+ +      al   = forceatoms[i++];
+ +      t2   = pbc_rvec_sub(pbc,x[al],x[ak],r_kl);           /*  3              */
+ +    } else {
+ +      r_kl[XX] = 0;
+ +      r_kl[YY] = 0;
+ +      r_kl[ZZ] = 1;
+ +    }
+ +
+ +    cos_phi = cos_angle(r_ij,r_kl);           /* 25           */
+ +    phi     = acos(cos_phi);                    /* 10           */
+ +
+ +    *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
+ +                              forceparams[type].pdihs.cpB,
+ +                              forceparams[type].pdihs.phiA,
+ +                              forceparams[type].pdihs.phiB,
+ +                              forceparams[type].pdihs.mult,
+ +                              phi,lambda,&vid,&dVdphi); /*  40  */
+ +    
+ +    vtot += vid;
+ +
+ +    cos_phi2 = sqr(cos_phi);                    /*   1                */
+ +    if (cos_phi2 < 1) {
+ +      st  = -dVdphi*gmx_invsqrt(1 - cos_phi2);      /*  12            */
+ +      sth = st*cos_phi;                               /*   1          */
+ +      nrij2 = iprod(r_ij,r_ij);                       /*   5          */
+ +      nrkl2 = iprod(r_kl,r_kl);                 /*   5          */
+ +      
+ +      c   = st*gmx_invsqrt(nrij2*nrkl2);              /*  11          */ 
+ +      cij = sth/nrij2;                                /*  10          */
+ +      ckl = sth/nrkl2;                                /*  10          */
+ +      
+ +      for (m=0; m<DIM; m++) {                 /*  18+18       */
+ +      f_i[m] = (c*r_kl[m]-cij*r_ij[m]);
+ +      f[ai][m] += f_i[m];
+ +      f[aj][m] -= f_i[m];
+ +      if (!bZAxis) {
+ +        f_k[m] = (c*r_ij[m]-ckl*r_kl[m]);
+ +        f[ak][m] += f_k[m];
+ +        f[al][m] -= f_k[m];
+ +      }
+ +      }
+ +      
+ +      if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +      t1=IVEC2IS(dt);
+ +      }
+ +      rvec_inc(fshift[t1],f_i);
+ +      rvec_dec(fshift[CENTRAL],f_i);
+ +      if (!bZAxis) {
+ +      if (g) {
+ +        ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,al),dt);
+ +        t2=IVEC2IS(dt);
+ +      }
+ +      rvec_inc(fshift[t2],f_k);
+ +      rvec_dec(fshift[CENTRAL],f_k);
+ +      }
+ +    }
+ +  }
+ +
+ +  return vtot;  /*  184 / 157 (bZAxis)  total  */
+ +}
+ +
+ +real angres(int nbonds,
+ +          const t_iatom forceatoms[],const t_iparams forceparams[],
+ +          const rvec x[],rvec f[],rvec fshift[],
+ +          const t_pbc *pbc,const t_graph *g,
+ +          real lambda,real *dvdlambda,
+ +          const t_mdatoms *md,t_fcdata *fcd,
+ +          int *global_atom_index)
+ +{
+ +  return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
+ +                  lambda,dvdlambda,FALSE);
+ +}
+ +
+ +real angresz(int nbonds,
+ +           const t_iatom forceatoms[],const t_iparams forceparams[],
+ +           const rvec x[],rvec f[],rvec fshift[],
+ +           const t_pbc *pbc,const t_graph *g,
+ +           real lambda,real *dvdlambda,
+ +           const t_mdatoms *md,t_fcdata *fcd,
+ +           int *global_atom_index)
+ +{
+ +  return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
+ +                    lambda,dvdlambda,TRUE);
+ +}
+ +
+ +real dihres(int nbonds,
+ +            const t_iatom forceatoms[],const t_iparams forceparams[],
+ +            const rvec x[],rvec f[],rvec fshift[],
+ +            const t_pbc *pbc,const t_graph *g,
+ +            real lambda,real *dvdlambda,
+ +            const t_mdatoms *md,t_fcdata *fcd,
+ +            int *global_atom_index)
+ +{
+ +    real vtot = 0;
+ +    int  ai,aj,ak,al,i,k,type,t1,t2,t3;
+ +    real phi0A,phi0B,dphiA,dphiB,kfacA,kfacB,phi0,dphi,kfac;
+ +    real phi,ddphi,ddp,ddp2,dp,sign,d2r,fc,L1;
+ +    rvec r_ij,r_kj,r_kl,m,n;
+ +
+ +    L1 = 1.0-lambda;
+ +
+ +    d2r = DEG2RAD;
+ +    k   = 0;
+ +
+ +    for (i=0; (i<nbonds); )
+ +    {
+ +        type = forceatoms[i++];
+ +        ai   = forceatoms[i++];
+ +        aj   = forceatoms[i++];
+ +        ak   = forceatoms[i++];
+ +        al   = forceatoms[i++];
+ +
+ +        phi0A  = forceparams[type].dihres.phiA*d2r;
+ +        dphiA  = forceparams[type].dihres.dphiA*d2r;
+ +        kfacA  = forceparams[type].dihres.kfacA;
+ +
+ +        phi0B  = forceparams[type].dihres.phiB*d2r;
+ +        dphiB  = forceparams[type].dihres.dphiB*d2r;
+ +        kfacB  = forceparams[type].dihres.kfacB;
+ +
+ +        phi0  = L1*phi0A + lambda*phi0B;
+ +        dphi  = L1*dphiA + lambda*dphiB;
+ +        kfac = L1*kfacA + lambda*kfacB;
+ +
+ +        phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
+ +                        &sign,&t1,&t2,&t3);
+ +        /* 84 flops */
+ +
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
+ +                    k++,ai,aj,ak,al,phi0,dphi,kfac);
+ +        }
+ +        /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
+ +         * force changes if we just apply a normal harmonic.
+ +         * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
+ +         * This means we will never have the periodicity problem, unless
+ +         * the dihedral is Pi away from phiO, which is very unlikely due to
+ +         * the potential.
+ +         */
+ +        dp = phi-phi0;
+ +        make_dp_periodic(&dp);
+ +
+ +        if (dp > dphi)
+ +        {
+ +            ddp = dp-dphi;
+ +        }
+ +        else if (dp < -dphi)
+ +        {
+ +            ddp = dp+dphi;
+ +        }
+ +        else
+ +        {
+ +            ddp = 0;
+ +        }
+ +
+ +        if (ddp != 0.0)
+ +        {
+ +            ddp2 = ddp*ddp;
+ +            vtot += 0.5*kfac*ddp2;
+ +            ddphi = kfac*ddp;
+ +
+ +            *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
+ +            /* lambda dependence from changing restraint distances */
+ +            if (ddp > 0)
+ +            {
+ +                *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
+ +            }
+ +            else if (ddp < 0)
+ +            {
+ +                *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
+ +            }
+ +            do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
+ +                       f,fshift,pbc,g,x,t1,t2,t3);            /* 112          */
+ +        }
+ +    }
+ +    return vtot;
+ +}
+ +
+ +
+ +real unimplemented(int nbonds,
+ +                 const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                 const rvec x[],rvec f[],rvec fshift[],
+ +                 const t_pbc *pbc,const t_graph *g,
+ +                 real lambda,real *dvdlambda,
+ +                 const t_mdatoms *md,t_fcdata *fcd,
+ +                 int *global_atom_index)
+ +{
+ +  gmx_impl("*** you are using a not implemented function");
+ +
+ +  return 0.0; /* To make the compiler happy */
+ +}
+ +
+ +real rbdihs(int nbonds,
+ +          const t_iatom forceatoms[],const t_iparams forceparams[],
+ +          const rvec x[],rvec f[],rvec fshift[],
+ +          const t_pbc *pbc,const t_graph *g,
+ +          real lambda,real *dvdlambda,
+ +          const t_mdatoms *md,t_fcdata *fcd,
+ +          int *global_atom_index)
+ +{
+ +  const real c0=0.0,c1=1.0,c2=2.0,c3=3.0,c4=4.0,c5=5.0;
+ +  int  type,ai,aj,ak,al,i,j;
+ +  int  t1,t2,t3;
+ +  rvec r_ij,r_kj,r_kl,m,n;
+ +  real parmA[NR_RBDIHS];
+ +  real parmB[NR_RBDIHS];
+ +  real parm[NR_RBDIHS];
+ +  real cos_phi,phi,rbp,rbpBA;
+ +  real v,sign,ddphi,sin_phi;
+ +  real cosfac,vtot;
+ +  real L1   = 1.0-lambda;
+ +  real dvdl_term=0;
+ +
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    al   = forceatoms[i++];
+ +
+ +      phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
+ +                    &sign,&t1,&t2,&t3);                       /*  84          */
+ +
+ +    /* Change to polymer convention */
+ +    if (phi < c0)
+ +      phi += M_PI;
+ +    else
+ +      phi -= M_PI;                    /*   1          */
+ +      
+ +    cos_phi = cos(phi);               
+ +    /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
+ +    sin_phi = sin(phi);
+ +
+ +    for(j=0; (j<NR_RBDIHS); j++) {
+ +      parmA[j] = forceparams[type].rbdihs.rbcA[j];
+ +      parmB[j] = forceparams[type].rbdihs.rbcB[j];
+ +      parm[j]  = L1*parmA[j]+lambda*parmB[j];
+ +    }
+ +    /* Calculate cosine powers */
+ +    /* Calculate the energy */
+ +    /* Calculate the derivative */
+ +
+ +    v       = parm[0];
+ +    dvdl_term   += (parmB[0]-parmA[0]);
+ +    ddphi   = c0;
+ +    cosfac  = c1;
+ +    
+ +    rbp     = parm[1];
+ +    rbpBA   = parmB[1]-parmA[1];
+ +    ddphi  += rbp*cosfac;
+ +    cosfac *= cos_phi;
+ +    v      += cosfac*rbp;
+ +    dvdl_term   += cosfac*rbpBA;
+ +    rbp     = parm[2];
+ +    rbpBA   = parmB[2]-parmA[2];    
+ +    ddphi  += c2*rbp*cosfac;
+ +    cosfac *= cos_phi;
+ +    v      += cosfac*rbp;
+ +    dvdl_term   += cosfac*rbpBA;
+ +    rbp     = parm[3];
+ +    rbpBA   = parmB[3]-parmA[3];
+ +    ddphi  += c3*rbp*cosfac;
+ +    cosfac *= cos_phi;
+ +    v      += cosfac*rbp;
+ +    dvdl_term   += cosfac*rbpBA;
+ +    rbp     = parm[4];
+ +    rbpBA   = parmB[4]-parmA[4];
+ +    ddphi  += c4*rbp*cosfac;
+ +    cosfac *= cos_phi;
+ +    v      += cosfac*rbp;
+ +    dvdl_term   += cosfac*rbpBA;
+ +    rbp     = parm[5];
+ +    rbpBA   = parmB[5]-parmA[5];
+ +    ddphi  += c5*rbp*cosfac;
+ +    cosfac *= cos_phi;
+ +    v      += cosfac*rbp;
+ +    dvdl_term   += cosfac*rbpBA;
+ +   
+ +    ddphi = -ddphi*sin_phi;                           /*  11          */
+ +    
+ +    do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
+ +             f,fshift,pbc,g,x,t1,t2,t3);              /* 112          */
+ +    vtot += v;
+ +  }  
+ +  *dvdlambda += dvdl_term;
+ +
+ +  return vtot;
+ +}
+ +
+ +int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
+ +{
+ +      int im1, ip1, ip2;
+ +      
+ +      if(ip<0)
+ +      {
+ +              ip = ip + grid_spacing - 1;
+ +      }
+ +      else if(ip > grid_spacing)
+ +      {
+ +              ip = ip - grid_spacing - 1;
+ +      }
+ +      
+ +      im1 = ip - 1;
+ +      ip1 = ip + 1;
+ +      ip2 = ip + 2;
+ +      
+ +      if(ip == 0)
+ +      {
+ +              im1 = grid_spacing - 1;
+ +      }
+ +      else if(ip == grid_spacing-2)
+ +      {
+ +              ip2 = 0;
+ +      }
+ +      else if(ip == grid_spacing-1)
+ +      {
+ +              ip1 = 0;
+ +              ip2 = 1;
+ +      }
+ +      
+ +      *ipm1 = im1;
+ +      *ipp1 = ip1;
+ +      *ipp2 = ip2;
+ +      
+ +      return ip;
+ +      
+ +}
+ +
+ +real cmap_dihs(int nbonds,
+ +                         const t_iatom forceatoms[],const t_iparams forceparams[],
+ +               const gmx_cmap_t *cmap_grid,
+ +                         const rvec x[],rvec f[],rvec fshift[],
+ +                         const t_pbc *pbc,const t_graph *g,
+ +                         real lambda,real *dvdlambda,
+ +                         const t_mdatoms *md,t_fcdata *fcd,
+ +                         int *global_atom_index)
+ +{
+ +      int i,j,k,n,idx;
+ +      int ai,aj,ak,al,am;
+ +      int a1i,a1j,a1k,a1l,a2i,a2j,a2k,a2l;
+ +      int type,cmapA;
+ +      int t11,t21,t31,t12,t22,t32;
+ +      int iphi1,ip1m1,ip1p1,ip1p2;
+ +      int iphi2,ip2m1,ip2p1,ip2p2;
+ +      int l1,l2,l3,l4;
+ +      int pos1,pos2,pos3,pos4,tmp;
+ +      
+ +      real ty[4],ty1[4],ty2[4],ty12[4],tc[16],tx[16];
+ +      real phi1,psi1,cos_phi1,sin_phi1,sign1,xphi1;
+ +      real phi2,psi2,cos_phi2,sin_phi2,sign2,xphi2;
+ +      real dx,xx,tt,tu,e,df1,df2,ddf1,ddf2,ddf12,vtot;
+ +      real ra21,rb21,rg21,rg1,rgr1,ra2r1,rb2r1,rabr1;
+ +      real ra22,rb22,rg22,rg2,rgr2,ra2r2,rb2r2,rabr2;
+ +      real fg1,hg1,fga1,hgb1,gaa1,gbb1;
+ +      real fg2,hg2,fga2,hgb2,gaa2,gbb2;
+ +      real fac;
+ +      
+ +      rvec r1_ij, r1_kj, r1_kl,m1,n1;
+ +      rvec r2_ij, r2_kj, r2_kl,m2,n2;
+ +      rvec f1_i,f1_j,f1_k,f1_l;
+ +      rvec f2_i,f2_j,f2_k,f2_l;
+ +      rvec a1,b1,a2,b2;
+ +      rvec f1,g1,h1,f2,g2,h2;
+ +      rvec dtf1,dtg1,dth1,dtf2,dtg2,dth2;
+ +      ivec jt1,dt1_ij,dt1_kj,dt1_lj;
+ +      ivec jt2,dt2_ij,dt2_kj,dt2_lj;
+ +
+ +    const real *cmapd;
+ +
+ +      int loop_index[4][4] = {
+ +              {0,4,8,12},
+ +              {1,5,9,13},
+ +              {2,6,10,14},
+ +              {3,7,11,15}
+ +      };
+ +      
+ +      /* Total CMAP energy */
+ +      vtot = 0;
+ +      
+ +      for(n=0;n<nbonds; )
+ +      {
+ +              /* Five atoms are involved in the two torsions */
+ +              type   = forceatoms[n++];
+ +              ai     = forceatoms[n++];
+ +              aj     = forceatoms[n++];
+ +              ak     = forceatoms[n++];
+ +              al     = forceatoms[n++];
+ +              am     = forceatoms[n++];
+ +              
+ +              /* Which CMAP type is this */
+ +              cmapA = forceparams[type].cmap.cmapA;
+ +        cmapd = cmap_grid->cmapdata[cmapA].cmap;
+ +
+ +              /* First torsion */
+ +              a1i   = ai;
+ +              a1j   = aj;
+ +              a1k   = ak;
+ +              a1l   = al;
+ +              
+ +              phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
+ +                                                 &sign1, &t11, &t21, &t31); /* 84 */
+ +              
+ +        cos_phi1 = cos(phi1);
+ +        
+ +              a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
+ +              a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
+ +              a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
+ +              
+ +              b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
+ +              b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
+ +              b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
+ +              
+ +              tmp = pbc_rvec_sub(pbc,x[a1l],x[a1k],h1);
+ +              
+ +              ra21  = iprod(a1,a1);       /* 5 */
+ +              rb21  = iprod(b1,b1);       /* 5 */
+ +              rg21  = iprod(r1_kj,r1_kj); /* 5 */
+ +              rg1   = sqrt(rg21);
+ +              
+ +              rgr1  = 1.0/rg1;
+ +              ra2r1 = 1.0/ra21;
+ +              rb2r1 = 1.0/rb21;
+ +              rabr1 = sqrt(ra2r1*rb2r1);
+ +              
+ +              sin_phi1 = rg1 * rabr1 * iprod(a1,h1) * (-1);
+ +              
+ +              if(cos_phi1 < -0.5 || cos_phi1 > 0.5)
+ +              {
+ +                      phi1 = asin(sin_phi1);
+ +                      
+ +                      if(cos_phi1 < 0)
+ +                      {
+ +                              if(phi1 > 0)
+ +                              {
+ +                                      phi1 = M_PI - phi1;
+ +                              }
+ +                              else
+ +                              {
+ +                                      phi1 = -M_PI - phi1;
+ +                              }
+ +                      }
+ +              }
+ +              else
+ +              {
+ +                      phi1 = acos(cos_phi1);
+ +                      
+ +                      if(sin_phi1 < 0)
+ +                      {
+ +                              phi1 = -phi1;
+ +                      }
+ +              }
+ +              
+ +              xphi1 = phi1 + M_PI; /* 1 */
+ +              
+ +              /* Second torsion */
+ +              a2i   = aj;
+ +              a2j   = ak;
+ +              a2k   = al;
+ +              a2l   = am;
+ +              
+ +              phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
+ +                                                &sign2, &t12, &t22, &t32); /* 84 */
+ +              
+ +        cos_phi2 = cos(phi2);
+ +
+ +              a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
+ +              a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
+ +              a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
+ +              
+ +              b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
+ +              b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
+ +              b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
+ +              
+ +              tmp = pbc_rvec_sub(pbc,x[a2l],x[a2k],h2);
+ +              
+ +              ra22  = iprod(a2,a2);         /* 5 */
+ +              rb22  = iprod(b2,b2);         /* 5 */
+ +              rg22  = iprod(r2_kj,r2_kj);   /* 5 */
+ +              rg2   = sqrt(rg22);
+ +              
+ +              rgr2  = 1.0/rg2;
+ +              ra2r2 = 1.0/ra22;
+ +              rb2r2 = 1.0/rb22;
+ +              rabr2 = sqrt(ra2r2*rb2r2);
+ +              
+ +              sin_phi2 = rg2 * rabr2 * iprod(a2,h2) * (-1);
+ +              
+ +              if(cos_phi2 < -0.5 || cos_phi2 > 0.5)
+ +              {
+ +                      phi2 = asin(sin_phi2);
+ +                      
+ +                      if(cos_phi2 < 0)
+ +                      {
+ +                              if(phi2 > 0)
+ +                              {
+ +                                      phi2 = M_PI - phi2;
+ +                              }
+ +                              else
+ +                              {
+ +                                      phi2 = -M_PI - phi2;
+ +                              }
+ +                      }
+ +              }
+ +              else
+ +              {
+ +                      phi2 = acos(cos_phi2);
+ +                      
+ +                      if(sin_phi2 < 0)
+ +                      {
+ +                              phi2 = -phi2;
+ +                      }
+ +              }
+ +              
+ +              xphi2 = phi2 + M_PI; /* 1 */
+ +              
+ +              /* Range mangling */
+ +              if(xphi1<0)
+ +              {
+ +                      xphi1 = xphi1 + 2*M_PI;
+ +              }
+ +              else if(xphi1>=2*M_PI)
+ +              {
+ +                      xphi1 = xphi1 - 2*M_PI;
+ +              }
+ +              
+ +              if(xphi2<0)
+ +              {
+ +                      xphi2 = xphi2 + 2*M_PI;
+ +              }
+ +              else if(xphi2>=2*M_PI)
+ +              {
+ +                      xphi2 = xphi2 - 2*M_PI;
+ +              }
+ +              
+ +              /* Number of grid points */
+ +              dx = 2*M_PI / cmap_grid->grid_spacing;
+ +              
+ +              /* Where on the grid are we */
+ +              iphi1 = (int)(xphi1/dx);
+ +              iphi2 = (int)(xphi2/dx);
+ +              
+ +              iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1,&ip1p1,&ip1p2);
+ +              iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1,&ip2p1,&ip2p2);
+ +              
+ +              pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
+ +              pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
+ +              pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
+ +              pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
+ +
+ +              ty[0]   = cmapd[pos1*4];
+ +              ty[1]   = cmapd[pos2*4];
+ +              ty[2]   = cmapd[pos3*4];
+ +              ty[3]   = cmapd[pos4*4];
+ +              
+ +              ty1[0]   = cmapd[pos1*4+1];
+ +              ty1[1]   = cmapd[pos2*4+1];
+ +              ty1[2]   = cmapd[pos3*4+1];
+ +              ty1[3]   = cmapd[pos4*4+1];
+ +              
+ +              ty2[0]   = cmapd[pos1*4+2];
+ +              ty2[1]   = cmapd[pos2*4+2];
+ +              ty2[2]   = cmapd[pos3*4+2];
+ +              ty2[3]   = cmapd[pos4*4+2];
+ +              
+ +              ty12[0]   = cmapd[pos1*4+3];
+ +              ty12[1]   = cmapd[pos2*4+3];
+ +              ty12[2]   = cmapd[pos3*4+3];
+ +              ty12[3]   = cmapd[pos4*4+3];
+ +              
+ +              /* Switch to degrees */
+ +              dx = 360.0 / cmap_grid->grid_spacing;
+ +              xphi1 = xphi1 * RAD2DEG;
+ +              xphi2 = xphi2 * RAD2DEG; 
+ +              
+ +              for(i=0;i<4;i++) /* 16 */
+ +              {
+ +                      tx[i] = ty[i];
+ +                      tx[i+4] = ty1[i]*dx;
+ +                      tx[i+8] = ty2[i]*dx;
+ +                      tx[i+12] = ty12[i]*dx*dx;
+ +              }
+ +              
+ +              idx=0;
+ +              for(i=0;i<4;i++) /* 1056 */
+ +              {
+ +                      for(j=0;j<4;j++)
+ +                      {
+ +                              xx = 0;
+ +                              for(k=0;k<16;k++)
+ +                              {
+ +                                      xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
+ +                              }
+ +                              
+ +                              idx++;
+ +                              tc[i*4+j]=xx;
+ +                      }
+ +              }
+ +              
+ +              tt    = (xphi1-iphi1*dx)/dx;
+ +              tu    = (xphi2-iphi2*dx)/dx;
+ +              
+ +              e     = 0;
+ +              df1   = 0;
+ +              df2   = 0;
+ +              ddf1  = 0;
+ +              ddf2  = 0;
+ +              ddf12 = 0;
+ +              
+ +              for(i=3;i>=0;i--)
+ +              {
+ +                      l1 = loop_index[i][3];
+ +                      l2 = loop_index[i][2];
+ +                      l3 = loop_index[i][1];
+ +                      
+ +                      e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
+ +                      df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
+ +                      df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
+ +                      ddf1  = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
+ +                      ddf2  = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
+ +              }
+ +              
+ +              ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
+ +              3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
+ +              
+ +              fac     = RAD2DEG/dx;
+ +              df1     = df1   * fac;
+ +              df2     = df2   * fac;
+ +              ddf1    = ddf1  * fac * fac;
+ +              ddf2    = ddf2  * fac * fac;
+ +              ddf12   = ddf12 * fac * fac;
+ +              
+ +              /* CMAP energy */
+ +              vtot += e;
+ +              
+ +              /* Do forces - first torsion */
+ +              fg1       = iprod(r1_ij,r1_kj);
+ +              hg1       = iprod(r1_kl,r1_kj);
+ +              fga1      = fg1*ra2r1*rgr1;
+ +              hgb1      = hg1*rb2r1*rgr1;
+ +              gaa1      = -ra2r1*rg1;
+ +              gbb1      = rb2r1*rg1;
+ +              
+ +              for(i=0;i<DIM;i++)
+ +              {
+ +                      dtf1[i]   = gaa1 * a1[i];
+ +                      dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
+ +                      dth1[i]   = gbb1 * b1[i];
+ +                      
+ +                      f1[i]     = df1  * dtf1[i];
+ +                      g1[i]     = df1  * dtg1[i];
+ +                      h1[i]     = df1  * dth1[i];
+ +                      
+ +                      f1_i[i]   =  f1[i];
+ +                      f1_j[i]   = -f1[i] - g1[i];
+ +                      f1_k[i]   =  h1[i] + g1[i];
+ +                      f1_l[i]   = -h1[i];
+ +                      
+ +                      f[a1i][i] = f[a1i][i] + f1_i[i];
+ +                      f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */                                                            
+ +                      f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */                                                            
+ +                      f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */                                                                       
+ +              }
+ +              
+ +              /* Do forces - second torsion */
+ +              fg2       = iprod(r2_ij,r2_kj);
+ +              hg2       = iprod(r2_kl,r2_kj);
+ +              fga2      = fg2*ra2r2*rgr2;
+ +              hgb2      = hg2*rb2r2*rgr2;
+ +              gaa2      = -ra2r2*rg2;
+ +              gbb2      = rb2r2*rg2;
+ +              
+ +              for(i=0;i<DIM;i++)
+ +              {
+ +                      dtf2[i]   = gaa2 * a2[i];
+ +                      dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
+ +                      dth2[i]   = gbb2 * b2[i];
+ +                      
+ +                      f2[i]     = df2  * dtf2[i];
+ +                      g2[i]     = df2  * dtg2[i];
+ +                      h2[i]     = df2  * dth2[i];
+ +                      
+ +                      f2_i[i]   =  f2[i];
+ +                      f2_j[i]   = -f2[i] - g2[i];
+ +                      f2_k[i]   =  h2[i] + g2[i];
+ +                      f2_l[i]   = -h2[i];
+ +                      
+ +                      f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */                                                                        
+ +                      f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */                                                              
+ +                      f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */                            
+ +                      f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */                                                                      
+ +              }
+ +              
+ +              /* Shift forces */
+ +              if(g)
+ +              {
+ +                      copy_ivec(SHIFT_IVEC(g,a1j), jt1);
+ +                      ivec_sub(SHIFT_IVEC(g,a1i),  jt1,dt1_ij);
+ +                      ivec_sub(SHIFT_IVEC(g,a1k),  jt1,dt1_kj);
+ +                      ivec_sub(SHIFT_IVEC(g,a1l),  jt1,dt1_lj);
+ +                      t11 = IVEC2IS(dt1_ij);
+ +                      t21 = IVEC2IS(dt1_kj);
+ +                      t31 = IVEC2IS(dt1_lj);
+ +                      
+ +                      copy_ivec(SHIFT_IVEC(g,a2j), jt2);
+ +                      ivec_sub(SHIFT_IVEC(g,a2i),  jt2,dt2_ij);
+ +                      ivec_sub(SHIFT_IVEC(g,a2k),  jt2,dt2_kj);
+ +                      ivec_sub(SHIFT_IVEC(g,a2l),  jt2,dt2_lj);
+ +                      t12 = IVEC2IS(dt2_ij);
+ +                      t22 = IVEC2IS(dt2_kj);
+ +                      t32 = IVEC2IS(dt2_lj);
+ +              }
+ +              else if(pbc)
+ +              {
+ +                      t31 = pbc_rvec_sub(pbc,x[a1l],x[a1j],h1);
+ +                      t32 = pbc_rvec_sub(pbc,x[a2l],x[a2j],h2);
+ +              }
+ +              else
+ +              {
+ +                      t31 = CENTRAL;
+ +                      t32 = CENTRAL;
+ +              }
+ +              
+ +              rvec_inc(fshift[t11],f1_i);
+ +              rvec_inc(fshift[CENTRAL],f1_j);
+ +              rvec_inc(fshift[t21],f1_k);
+ +              rvec_inc(fshift[t31],f1_l);
+ +              
+ +              rvec_inc(fshift[t21],f2_i);
+ +              rvec_inc(fshift[CENTRAL],f2_j);
+ +              rvec_inc(fshift[t22],f2_k);
+ +              rvec_inc(fshift[t32],f2_l);
+ +      }       
+ +      return vtot;
+ +}
+ +
+ +
+ +
+ +/***********************************************************
+ + *
+ + *   G R O M O S  9 6   F U N C T I O N S
+ + *
+ + ***********************************************************/
+ +real g96harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
+ +               real *V,real *F)
+ +{
+ +  const real half=0.5;
+ +  real  L1,kk,x0,dx,dx2;
+ +  real  v,f,dvdlambda;
+ +  
+ +  L1    = 1.0-lambda;
+ +  kk    = L1*kA+lambda*kB;
+ +  x0    = L1*xA+lambda*xB;
+ +  
+ +  dx    = x-x0;
+ +  dx2   = dx*dx;
+ +  
+ +  f     = -kk*dx;
+ +  v     = half*kk*dx2;
+ +  dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
+ +  
+ +  *F    = f;
+ +  *V    = v;
+ +  
+ +  return dvdlambda;
+ +  
+ +  /* That was 21 flops */
+ +}
+ +
+ +real g96bonds(int nbonds,
+ +            const t_iatom forceatoms[],const t_iparams forceparams[],
+ +            const rvec x[],rvec f[],rvec fshift[],
+ +            const t_pbc *pbc,const t_graph *g,
+ +            real lambda,real *dvdlambda,
+ +            const t_mdatoms *md,t_fcdata *fcd,
+ +            int *global_atom_index)
+ +{
+ +  int  i,m,ki,ai,aj,type;
+ +  real dr2,fbond,vbond,fij,vtot;
+ +  rvec dx;
+ +  ivec dt;
+ +  
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +  
+ +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);          /*   3          */
+ +    dr2  = iprod(dx,dx);                              /*   5          */
+ +      
+ +    *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
+ +                              forceparams[type].harmonic.krB,
+ +                              forceparams[type].harmonic.rA,
+ +                              forceparams[type].harmonic.rB,
+ +                              dr2,lambda,&vbond,&fbond);
+ +
+ +    vtot  += 0.5*vbond;                             /* 1*/
+ +#ifdef DEBUG
+ +    if (debug)
+ +      fprintf(debug,"G96-BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
+ +            sqrt(dr2),vbond,fbond);
+ +#endif
+ +   
+ +    if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +      ki=IVEC2IS(dt);
+ +    }
+ +    for (m=0; (m<DIM); m++) {                 /*  15          */
+ +      fij=fbond*dx[m];
+ +      f[ai][m]+=fij;
+ +      f[aj][m]-=fij;
+ +      fshift[ki][m]+=fij;
+ +      fshift[CENTRAL][m]-=fij;
+ +    }
+ +  }                                   /* 44 TOTAL     */
+ +  return vtot;
+ +}
+ +
+ +real g96bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
+ +                 rvec r_ij,rvec r_kj,
+ +                 int *t1,int *t2)
+ +/* Return value is the angle between the bonds i-j and j-k */
+ +{
+ +  real costh;
+ +  
+ +  *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                 /*  3           */
+ +  *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                 /*  3           */
+ +
+ +  costh=cos_angle(r_ij,r_kj);                 /* 25           */
+ +                                      /* 41 TOTAL     */
+ +  return costh;
+ +}
+ +
+ +real g96angles(int nbonds,
+ +             const t_iatom forceatoms[],const t_iparams forceparams[],
+ +             const rvec x[],rvec f[],rvec fshift[],
+ +             const t_pbc *pbc,const t_graph *g,
+ +             real lambda,real *dvdlambda,
+ +             const t_mdatoms *md,t_fcdata *fcd,
+ +             int *global_atom_index)
+ +{
+ +  int  i,ai,aj,ak,type,m,t1,t2;
+ +  rvec r_ij,r_kj;
+ +  real cos_theta,dVdt,va,vtot;
+ +  real rij_1,rij_2,rkj_1,rkj_2,rijrkj_1;
+ +  rvec f_i,f_j,f_k;
+ +  ivec jt,dt_ij,dt_kj;
+ +  
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    
+ +    cos_theta  = g96bond_angle(x[ai],x[aj],x[ak],pbc,r_ij,r_kj,&t1,&t2);
+ +
+ +    *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
+ +                              forceparams[type].harmonic.krB,
+ +                              forceparams[type].harmonic.rA,
+ +                              forceparams[type].harmonic.rB,
+ +                              cos_theta,lambda,&va,&dVdt);
+ +    vtot    += va;
+ +    
+ +    rij_1    = gmx_invsqrt(iprod(r_ij,r_ij));
+ +    rkj_1    = gmx_invsqrt(iprod(r_kj,r_kj));
+ +    rij_2    = rij_1*rij_1;
+ +    rkj_2    = rkj_1*rkj_1;
+ +    rijrkj_1 = rij_1*rkj_1;                     /* 23 */
+ +    
+ +#ifdef DEBUG
+ +    if (debug)
+ +      fprintf(debug,"G96ANGLES: costheta = %10g  vth = %10g  dV/dct = %10g\n",
+ +            cos_theta,va,dVdt);
+ +#endif
+ +    for (m=0; (m<DIM); m++) {                 /*  42  */
+ +      f_i[m]=dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
+ +      f_k[m]=dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
+ +      f_j[m]=-f_i[m]-f_k[m];
+ +      f[ai][m]+=f_i[m];
+ +      f[aj][m]+=f_j[m];
+ +      f[ak][m]+=f_k[m];
+ +    }
+ +    
+ +    if (g) {
+ +      copy_ivec(SHIFT_IVEC(g,aj),jt);
+ +      
+ +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
+ +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
+ +      t1=IVEC2IS(dt_ij);
+ +      t2=IVEC2IS(dt_kj);
+ +    }      
+ +    rvec_inc(fshift[t1],f_i);
+ +    rvec_inc(fshift[CENTRAL],f_j);
+ +    rvec_inc(fshift[t2],f_k);               /* 9 */
+ +    /* 163 TOTAL      */
+ +  }
+ +  return vtot;
+ +}
+ +
+ +real cross_bond_bond(int nbonds,
+ +                   const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                   const rvec x[],rvec f[],rvec fshift[],
+ +                   const t_pbc *pbc,const t_graph *g,
+ +                   real lambda,real *dvdlambda,
+ +                   const t_mdatoms *md,t_fcdata *fcd,
+ +                   int *global_atom_index)
+ +{
+ +  /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
+ +   * pp. 842-847
+ +   */
+ +  int  i,ai,aj,ak,type,m,t1,t2;
+ +  rvec r_ij,r_kj;
+ +  real vtot,vrr,s1,s2,r1,r2,r1e,r2e,krr;
+ +  rvec f_i,f_j,f_k;
+ +  ivec jt,dt_ij,dt_kj;
+ +  
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    r1e  = forceparams[type].cross_bb.r1e;
+ +    r2e  = forceparams[type].cross_bb.r2e;
+ +    krr  = forceparams[type].cross_bb.krr;
+ +    
+ +    /* Compute distance vectors ... */
+ +    t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
+ +    t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
+ +    
+ +    /* ... and their lengths */
+ +    r1 = norm(r_ij);
+ +    r2 = norm(r_kj);
+ +    
+ +    /* Deviations from ideality */
+ +    s1 = r1-r1e;
+ +    s2 = r2-r2e;
+ +    
+ +    /* Energy (can be negative!) */
+ +    vrr   = krr*s1*s2;
+ +    vtot += vrr;
+ +    
+ +    /* Forces */
+ +    svmul(-krr*s2/r1,r_ij,f_i);
+ +    svmul(-krr*s1/r2,r_kj,f_k);
+ +    
+ +    for (m=0; (m<DIM); m++) {                 /*  12  */
+ +      f_j[m]    = -f_i[m] - f_k[m];
+ +      f[ai][m] += f_i[m];
+ +      f[aj][m] += f_j[m];
+ +      f[ak][m] += f_k[m];
+ +    }
+ +    
+ +    /* Virial stuff */
+ +    if (g) {
+ +      copy_ivec(SHIFT_IVEC(g,aj),jt);
+ +      
+ +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
+ +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
+ +      t1=IVEC2IS(dt_ij);
+ +      t2=IVEC2IS(dt_kj);
+ +    }      
+ +    rvec_inc(fshift[t1],f_i);
+ +    rvec_inc(fshift[CENTRAL],f_j);
+ +    rvec_inc(fshift[t2],f_k);               /* 9 */
+ +    /* 163 TOTAL      */
+ +  }
+ +  return vtot;
+ +}
+ +
+ +real cross_bond_angle(int nbonds,
+ +                    const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                    const rvec x[],rvec f[],rvec fshift[],
+ +                    const t_pbc *pbc,const t_graph *g,
+ +                    real lambda,real *dvdlambda,
+ +                    const t_mdatoms *md,t_fcdata *fcd,
+ +                    int *global_atom_index)
+ +{
+ +  /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
+ +   * pp. 842-847
+ +   */
+ +  int  i,ai,aj,ak,type,m,t1,t2,t3;
+ +  rvec r_ij,r_kj,r_ik;
+ +  real vtot,vrt,s1,s2,s3,r1,r2,r3,r1e,r2e,r3e,krt,k1,k2,k3;
+ +  rvec f_i,f_j,f_k;
+ +  ivec jt,dt_ij,dt_kj;
+ +  
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    r1e  = forceparams[type].cross_ba.r1e;
+ +    r2e  = forceparams[type].cross_ba.r2e;
+ +    r3e  = forceparams[type].cross_ba.r3e;
+ +    krt  = forceparams[type].cross_ba.krt;
+ +    
+ +    /* Compute distance vectors ... */
+ +    t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
+ +    t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
+ +    t3 = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);
+ +    
+ +    /* ... and their lengths */
+ +    r1 = norm(r_ij);
+ +    r2 = norm(r_kj);
+ +    r3 = norm(r_ik);
+ +    
+ +    /* Deviations from ideality */
+ +    s1 = r1-r1e;
+ +    s2 = r2-r2e;
+ +    s3 = r3-r3e;
+ +    
+ +    /* Energy (can be negative!) */
+ +    vrt   = krt*s3*(s1+s2);
+ +    vtot += vrt;
+ +    
+ +    /* Forces */
+ +    k1 = -krt*(s3/r1);
+ +    k2 = -krt*(s3/r2);
+ +    k3 = -krt*(s1+s2)/r3;
+ +    for(m=0; (m<DIM); m++) {
+ +      f_i[m] = k1*r_ij[m] + k3*r_ik[m];
+ +      f_k[m] = k2*r_kj[m] - k3*r_ik[m];
+ +      f_j[m] = -f_i[m] - f_k[m];
+ +    }
+ +    
+ +    for (m=0; (m<DIM); m++) {                 /*  12  */
+ +      f[ai][m] += f_i[m];
+ +      f[aj][m] += f_j[m];
+ +      f[ak][m] += f_k[m];
+ +    }
+ +    
+ +    /* Virial stuff */
+ +    if (g) {
+ +      copy_ivec(SHIFT_IVEC(g,aj),jt);
+ +      
+ +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
+ +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
+ +      t1=IVEC2IS(dt_ij);
+ +      t2=IVEC2IS(dt_kj);
+ +    }      
+ +    rvec_inc(fshift[t1],f_i);
+ +    rvec_inc(fshift[CENTRAL],f_j);
+ +    rvec_inc(fshift[t2],f_k);               /* 9 */
+ +    /* 163 TOTAL      */
+ +  }
+ +  return vtot;
+ +}
+ +
+ +static real bonded_tab(const char *type,int table_nr,
+ +                     const bondedtable_t *table,real kA,real kB,real r,
+ +                     real lambda,real *V,real *F)
+ +{
+ +  real k,tabscale,*VFtab,rt,eps,eps2,Yt,Ft,Geps,Heps2,Fp,VV,FF;
+ +  int  n0,nnn;
+ +  real v,f,dvdlambda;
+ +
+ +  k = (1.0 - lambda)*kA + lambda*kB;
+ +
+ +  tabscale = table->scale;
+ +  VFtab    = table->tab;
+ +  
+ +  rt    = r*tabscale;
+ +  n0    = rt;
+ +  if (n0 >= table->n) {
+ +    gmx_fatal(FARGS,"A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
+ +            type,table_nr,r,n0,n0+1,table->n);
+ +  }
+ +  eps   = rt - n0;
+ +  eps2  = eps*eps;
+ +  nnn   = 4*n0;
+ +  Yt    = VFtab[nnn];
+ +  Ft    = VFtab[nnn+1];
+ +  Geps  = VFtab[nnn+2]*eps;
+ +  Heps2 = VFtab[nnn+3]*eps2;
+ +  Fp    = Ft + Geps + Heps2;
+ +  VV    = Yt + Fp*eps;
+ +  FF    = Fp + Geps + 2.0*Heps2;
+ +  
+ +  *F    = -k*FF*tabscale;
+ +  *V    = k*VV;
+ +  dvdlambda  = (kB - kA)*VV;
+ +  
+ +  return dvdlambda;
+ +  
+ +  /* That was 22 flops */
+ +}
+ +
+ +real tab_bonds(int nbonds,
+ +             const t_iatom forceatoms[],const t_iparams forceparams[],
+ +             const rvec x[],rvec f[],rvec fshift[],
+ +             const t_pbc *pbc,const t_graph *g,
+ +             real lambda,real *dvdlambda,
+ +             const t_mdatoms *md,t_fcdata *fcd,
+ +             int *global_atom_index)
+ +{
+ +  int  i,m,ki,ai,aj,type,table;
+ +  real dr,dr2,fbond,vbond,fij,vtot;
+ +  rvec dx;
+ +  ivec dt;
+ +
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +  
+ +    ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);  /*   3          */
+ +    dr2  = iprod(dx,dx);                      /*   5          */
+ +    dr   = dr2*gmx_invsqrt(dr2);                      /*  10          */
+ +
+ +    table = forceparams[type].tab.table;
+ +
+ +    *dvdlambda += bonded_tab("bond",table,
+ +                             &fcd->bondtab[table],
+ +                             forceparams[type].tab.kA,
+ +                             forceparams[type].tab.kB,
+ +                             dr,lambda,&vbond,&fbond);  /*  22 */
+ +
+ +    if (dr2 == 0.0)
+ +      continue;
+ +
+ +    
+ +    vtot  += vbond;/* 1*/
+ +    fbond *= gmx_invsqrt(dr2);                        /*   6          */
+ +#ifdef DEBUG
+ +    if (debug)
+ +      fprintf(debug,"TABBONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
+ +            dr,vbond,fbond);
+ +#endif
+ +    if (g) {
+ +      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+ +      ki=IVEC2IS(dt);
+ +    }
+ +    for (m=0; (m<DIM); m++) {                 /*  15          */
+ +      fij=fbond*dx[m];
+ +      f[ai][m]+=fij;
+ +      f[aj][m]-=fij;
+ +      fshift[ki][m]+=fij;
+ +      fshift[CENTRAL][m]-=fij;
+ +    }
+ +  }                                   /* 62 TOTAL     */
+ +  return vtot;
+ +}
+ +
+ +real tab_angles(int nbonds,
+ +              const t_iatom forceatoms[],const t_iparams forceparams[],
+ +              const rvec x[],rvec f[],rvec fshift[],
+ +              const t_pbc *pbc,const t_graph *g,
+ +              real lambda,real *dvdlambda,
+ +              const t_mdatoms *md,t_fcdata *fcd,
+ +              int *global_atom_index)
+ +{
+ +  int  i,ai,aj,ak,t1,t2,type,table;
+ +  rvec r_ij,r_kj;
+ +  real cos_theta,cos_theta2,theta,dVdt,va,vtot;
+ +  ivec jt,dt_ij,dt_kj;
+ +  
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    
+ +    theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
+ +                      r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
+ +
+ +    table = forceparams[type].tab.table;
+ +  
+ +    *dvdlambda += bonded_tab("angle",table,
+ +                             &fcd->angletab[table],
+ +                             forceparams[type].tab.kA,
+ +                             forceparams[type].tab.kB,
+ +                             theta,lambda,&va,&dVdt);  /*  22  */
+ +    vtot += va;
+ +    
+ +    cos_theta2 = sqr(cos_theta);                /*   1                */
+ +    if (cos_theta2 < 1) {
+ +      int  m;
+ +      real snt,st,sth;
+ +      real cik,cii,ckk;
+ +      real nrkj2,nrij2;
+ +      rvec f_i,f_j,f_k;
+ +      
+ +      st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12          */
+ +      sth = st*cos_theta;                     /*   1          */
+ +#ifdef DEBUG
+ +      if (debug)
+ +      fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
+ +              theta*RAD2DEG,va,dVdt);
+ +#endif
+ +      nrkj2=iprod(r_kj,r_kj);                 /*   5          */
+ +      nrij2=iprod(r_ij,r_ij);
+ +      
+ +      cik=st*gmx_invsqrt(nrkj2*nrij2);                /*  12          */ 
+ +      cii=sth/nrij2;                          /*  10          */
+ +      ckk=sth/nrkj2;                          /*  10          */
+ +      
+ +      for (m=0; (m<DIM); m++) {                       /*  39          */
+ +      f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
+ +      f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
+ +      f_j[m]=-f_i[m]-f_k[m];
+ +      f[ai][m]+=f_i[m];
+ +      f[aj][m]+=f_j[m];
+ +      f[ak][m]+=f_k[m];
+ +      }
+ +      if (g) {
+ +      copy_ivec(SHIFT_IVEC(g,aj),jt);
+ +      
+ +      ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
+ +      ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
+ +      t1=IVEC2IS(dt_ij);
+ +      t2=IVEC2IS(dt_kj);
+ +      }
+ +      rvec_inc(fshift[t1],f_i);
+ +      rvec_inc(fshift[CENTRAL],f_j);
+ +      rvec_inc(fshift[t2],f_k);
+ +    }                                           /* 169 TOTAL  */
+ +  }
+ +  return vtot;
+ +}
+ +
+ +real tab_dihs(int nbonds,
+ +            const t_iatom forceatoms[],const t_iparams forceparams[],
+ +            const rvec x[],rvec f[],rvec fshift[],
+ +            const t_pbc *pbc,const t_graph *g,
+ +            real lambda,real *dvdlambda,
+ +            const t_mdatoms *md,t_fcdata *fcd,
+ +            int *global_atom_index)
+ +{
+ +  int  i,type,ai,aj,ak,al,table;
+ +  int  t1,t2,t3;
+ +  rvec r_ij,r_kj,r_kl,m,n;
+ +  real phi,sign,ddphi,vpd,vtot;
+ +
+ +  vtot = 0.0;
+ +  for(i=0; (i<nbonds); ) {
+ +    type = forceatoms[i++];
+ +    ai   = forceatoms[i++];
+ +    aj   = forceatoms[i++];
+ +    ak   = forceatoms[i++];
+ +    al   = forceatoms[i++];
+ +    
+ +    phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
+ +                  &sign,&t1,&t2,&t3);                 /*  84  */
+ +
+ +    table = forceparams[type].tab.table;
+ +
+ +    /* Hopefully phi+M_PI never results in values < 0 */
+ +    *dvdlambda += bonded_tab("dihedral",table,
+ +                             &fcd->dihtab[table],
+ +                             forceparams[type].tab.kA,
+ +                             forceparams[type].tab.kB,
+ +                             phi+M_PI,lambda,&vpd,&ddphi);
+ +                     
+ +    vtot += vpd;
+ +    do_dih_fup(ai,aj,ak,al,-ddphi,r_ij,r_kj,r_kl,m,n,
+ +             f,fshift,pbc,g,x,t1,t2,t3);                      /* 112  */
+ +
+ +#ifdef DEBUG
+ +    fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
+ +          ai,aj,ak,al,phi);
+ +#endif
+ +  } /* 227 TOTAL      */
+ +
+ +  return vtot;
+ +}
+ +
-     int ind,nat1,nbonds,efptFTYPE;
-     real v=0;
-     t_iatom *iatoms;
++static unsigned
++calc_bonded_reduction_mask(const t_idef *idef,
++                           int shift,
++                           int t,int nt)
+ +{
-     if (IS_RESTRAINT_TYPE(ftype))
++    unsigned mask;
++    int ftype,nb,nat1,nb0,nb1,i,a;
+ +
-         efptFTYPE = efptRESTRAINT;
++    mask = 0;
++
++    for(ftype=0; ftype<F_NRE; ftype++)
+ +    {
-     else
++        if (interaction_function[ftype].flags & IF_BOND &&
++            !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
++            (ftype<F_GB12 || ftype>F_GB14))
++        {
++            nb = idef->il[ftype].nr;
++            if (nb > 0)
++            {
++                nat1 = interaction_function[ftype].nratoms + 1;
++
++                /* Divide this interaction equally over the threads.
++                 * This is not stored: should match division in calc_bonds.
++                 */
++                nb0 = (((nb/nat1)* t   )/nt)*nat1;
++                nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
++
++                for(i=nb0; i<nb1; i+=nat1)
++                {
++                    for(a=1; a<nat1; a++)
++                    {
++                        mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
++                    }
++                }
++            }
++        }
+ +    }
-         efptFTYPE = efptBONDED;
++
++    return mask;
++}
++
++void init_bonded_thread_force_reduction(t_forcerec *fr,
++                                        const t_idef *idef)
++{
++#define MAX_BLOCK_BITS 32
++    int t;
++    int ctot,c,b;
++
++    if (fr->nthreads <= 1)
+ +    {
-     if (ftype<F_GB12 || ftype>F_GB14)
++        fr->red_nblock = 0;
++
++        return;
+ +    }
+ +
-         if (interaction_function[ftype].flags & IF_BOND &&
-             !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES))
++    /* We divide the force array in a maximum of 32 blocks.
++     * Minimum force block reduction size is 2^6=64.
++     */
++    fr->red_ashift = 6;
++    while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
+ +    {
-             ind  = interaction_function[ftype].nrnb_ind;
-             nat1 = interaction_function[ftype].nratoms+1;
-             nbonds    = idef->il[ftype].nr;
-             iatoms    = idef->il[ftype].iatoms;
-             if (nbonds > 0)
++        fr->red_ashift++;
++    }
++    if (debug)
++    {
++        fprintf(debug,"bonded force buffer block atom shift %d bits\n",
++                fr->red_ashift);
++    }
++
++    /* Determine to which blocks each thread's bonded force calculation
++     * contributes. Store this is a mask for each thread.
++     */
++#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
++    for(t=1; t<fr->nthreads; t++)
++    {
++        fr->f_t[t].red_mask =
++            calc_bonded_reduction_mask(idef,fr->red_ashift,t,fr->nthreads);
++    }
++
++    /* Determine the maximum number of blocks we need to reduce over */
++    fr->red_nblock = 0;
++    ctot = 0;
++    for(t=0; t<fr->nthreads; t++)
++    {
++        c = 0;
++        for(b=0; b<MAX_BLOCK_BITS; b++)
+ +        {
-                 if (ftype < F_LJ14 || ftype > F_LJC_PAIRS_NB)
++            if (fr->f_t[t].red_mask & (1U<<b))
+ +            {
-                     if(ftype==F_CMAP)
-                     {
-                         v = cmap_dihs(nbonds,iatoms,
-                                       idef->iparams,&idef->cmap_grid,
-                                       (const rvec*)x,f,fr->fshift,
-                                       pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
-                                       md,fcd,global_atom_index);
-                     }
-                     else
-                     {
-                         v =       interaction_function[ftype].ifunc(nbonds,iatoms,
-                                                                   idef->iparams,
-                                                                   (const rvec*)x,f,fr->fshift,
-                                                                   pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
-                                                                   md,fcd,global_atom_index);
-                     }
-                     enerd->dvdl_nonlin[efptFTYPE] += dvdl[efptFTYPE];
-                     if (bPrintSepPot)
-                     {
-                         fprintf(fplog,"  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
-                                 interaction_function[ftype].longname,
-                                 nbonds/nat1,v,lambda[efptFTYPE]);
-                     }
++                fr->red_nblock = max(fr->red_nblock,b+1);
++                c++;
++            }
++        }
++        if (debug)
++        {
++            fprintf(debug,"thread %d flags %x count %d\n",
++                    t,fr->f_t[t].red_mask,c);
++        }
++        ctot += c;
++    }
++    if (debug)
++    {
++        fprintf(debug,"Number of blocks to reduce: %d of size %d\n",
++                fr->red_nblock,1<<fr->red_ashift);
++        fprintf(debug,"Reduction density %.2f density/#thread %.2f\n",
++                ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
++                ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
++    }
++}
++
++static void zero_thread_forces(f_thread_t *f_t,int n,
++                               int nblock,int blocksize)
++{
++    int b,a0,a1,a,i,j;
++
++    if (n > f_t->f_nalloc)
++    {
++        f_t->f_nalloc = over_alloc_large(n);
++        srenew(f_t->f,f_t->f_nalloc);
++    }
++
++    if (f_t->red_mask != 0)
++    {
++        for(b=0; b<nblock; b++)
++        {
++            if (f_t->red_mask && (1U<<b))
++            {
++                a0 = b*blocksize;
++                a1 = min((b+1)*blocksize,n);
++                for(a=a0; a<a1; a++)
+ +                {
-                 else
++                    clear_rvec(f_t->f[a]);
+ +                }
-                     v = do_listed_vdw_q(ftype,nbonds,iatoms,
-                                         idef->iparams,
-                                         (const rvec*)x,f,fr->fshift,
-                                         pbc,g,lambda,dvdl,
-                                         md,fr,&enerd->grpp,global_atom_index);
-                     enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
-                     enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
++            }
++        }
++    }
++    for(i=0; i<SHIFTS; i++)
++    {
++        clear_rvec(f_t->fshift[i]);
++    }
++    for(i=0; i<F_NRE; i++)
++    {
++        f_t->ener[i] = 0;
++    }
++    for(i=0; i<egNR; i++)
++    {
++        for(j=0; j<f_t->grpp.nener; j++)
++        {
++            f_t->grpp.ener[i][j] = 0;
++        }
++    }
++    for(i=0; i<efptNR; i++)
++    {
++        f_t->dvdl[i] = 0;
++    }
++}
++
++static void reduce_thread_force_buffer(int n,rvec *f,
++                                       int nthreads,f_thread_t *f_t,
++                                       int nblock,int block_size)
++{
++    /* The max thread number is arbitrary,
++     * we used a fixed number to avoid memory management.
++     * Using more than 16 threads is probably never useful performance wise.
++     */
++#define MAX_BONDED_THREADS 256
++    int b;
++
++    if (nthreads > MAX_BONDED_THREADS)
++    {
++        gmx_fatal(FARGS,"Can not reduce bonded forces on more than %d threads",
++                  MAX_BONDED_THREADS);
++    }
++
++    /* This reduction can run on any number of threads,
++     * independently of nthreads.
++     */
++#pragma omp parallel for num_threads(nthreads) schedule(static)
++    for(b=0; b<nblock; b++)
++    {
++        rvec *fp[MAX_BONDED_THREADS];
++        int nfb,ft,fb;
++        int a0,a1,a;
++
++        /* Determine which threads contribute to this block */
++        nfb = 0;
++        for(ft=1; ft<nthreads; ft++)
++        {
++            if (f_t[ft].red_mask & (1U<<b))
++            {
++                fp[nfb++] = f_t[ft].f;
++            }
++        }
++        if (nfb > 0)
++        {
++            /* Reduce force buffers for threads that contribute */
++            a0 =  b   *block_size;
++            a1 = (b+1)*block_size;
++            a1 = min(a1,n);
++            for(a=a0; a<a1; a++)
++            {
++                for(fb=0; fb<nfb; fb++)
+ +                {
-                     if (bPrintSepPot)
-                     {
-                         fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
-                                 interaction_function[ftype].longname,
-                                 interaction_function[F_LJ14].longname,nbonds/nat1,dvdl[efptVDW]);
-                         fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
-                                 interaction_function[ftype].longname,
-                                 interaction_function[F_COUL14].longname,nbonds/nat1,dvdl[efptCOUL]);
-                     }
++                    rvec_inc(f[a],fp[fb][a]);
++                }
++            }
++        }
++    }
++}
+ +
-                 if (ind != -1)
++static void reduce_thread_forces(int n,rvec *f,rvec *fshift,
++                                 real *ener,gmx_grppairener_t *grpp,real *dvdl,
++                                 int nthreads,f_thread_t *f_t,
++                                 int nblock,int block_size,
++                                 gmx_bool bCalcEnerVir,
++                                 gmx_bool bDHDL)
++{
++    if (nblock > 0)
++    {
++        /* Reduce the bonded force buffer */
++        reduce_thread_force_buffer(n,f,nthreads,f_t,nblock,block_size);
++    }
++
++    /* When necessary, reduce energy and virial using one thread only */
++    if (bCalcEnerVir)
++    {
++        int t,i,j;
++
++        for(i=0; i<SHIFTS; i++)
++        {
++            for(t=1; t<nthreads; t++)
++            {
++                rvec_inc(fshift[i],f_t[t].fshift[i]);
++            }
++        }
++        for(i=0; i<F_NRE; i++)
++        {
++            for(t=1; t<nthreads; t++)
++            {
++                ener[i] += f_t[t].ener[i];
++            }
++        }
++        for(i=0; i<egNR; i++)
++        {
++            for(j=0; j<f_t[1].grpp.nener; j++)
++            {
++                for(t=1; t<nthreads; t++)
++                {
++                    
++                    grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
+ +                }
-                     inc_nrnb(nrnb,ind,nbonds/nat1);
++            }
++        }
++        if (bDHDL)
++        {
++            for(i=0; i<efptNR; i++)
++            {
++                
++                for(t=1; t<nthreads; t++)
+ +                {
- /* WARNING!  THIS FUNCTION MUST EXACTLY TRACK THE calc_one_bond
++                    dvdl[i] += f_t[t].dvdl[i];
+ +                }
+ +            }
+ +        }
+ +    }
++}
++
++static real calc_one_bond(FILE *fplog,int thread,
++                          int ftype,const t_idef *idef,
++                          rvec x[], rvec f[], rvec fshift[],
++                          t_forcerec *fr,
++                          const t_pbc *pbc,const t_graph *g,
++                          gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
++                          t_nrnb *nrnb,
++                          real *lambda, real *dvdl,
++                          const t_mdatoms *md,t_fcdata *fcd,
++                          gmx_bool bCalcEnerVir,
++                          int *global_atom_index, gmx_bool bPrintSepPot)
++{
++    int ind,nat1,nbonds,efptFTYPE;
++    real v=0;
++    t_iatom *iatoms;
++    int nb0,nbn;
++
++    if (IS_RESTRAINT_TYPE(ftype))
++    {
++        efptFTYPE = efptRESTRAINT;
++    }
++    else
++    {
++        efptFTYPE = efptBONDED;
++    }
++
++    if (interaction_function[ftype].flags & IF_BOND &&
++        !(ftype == F_CONNBONDS || ftype == F_POSRES))
++    {
++        ind  = interaction_function[ftype].nrnb_ind;
++        nat1 = interaction_function[ftype].nratoms + 1;
++        nbonds    = idef->il[ftype].nr/nat1;
++        iatoms    = idef->il[ftype].iatoms;
++
++        nb0 = ((nbonds* thread   )/(fr->nthreads))*nat1;
++        nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
++
++        if (!IS_LISTED_LJ_C(ftype))
++        {
++            if(ftype==F_CMAP)
++            {
++                v = cmap_dihs(nbn,iatoms+nb0,
++                              idef->iparams,&idef->cmap_grid,
++                              (const rvec*)x,f,fshift,
++                              pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
++                              md,fcd,global_atom_index);
++            }
++            else if (ftype == F_PDIHS &&
++                     !bCalcEnerVir && fr->efep==efepNO)
++            {
++                /* No energies, shift forces, dvdl */
++#ifndef SSE_PROPER_DIHEDRALS
++                pdihs_noener
++#else
++                pdihs_noener_sse
++#endif
++                    (nbn,idef->il[ftype].iatoms+nb0,
++                     idef->iparams,
++                     (const rvec*)x,f,
++                     pbc,g,lambda[efptFTYPE],md,fcd,
++                     global_atom_index);
++                v = 0;
++                dvdl[efptFTYPE] = 0;
++            }
++            else
++            {
++                v = interaction_function[ftype].ifunc(nbn,iatoms+nb0,
++                                                      idef->iparams,
++                                                      (const rvec*)x,f,fshift,
++                                                      pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
++                                                      md,fcd,global_atom_index);
++            }
++            enerd->dvdl_nonlin[efptFTYPE] += dvdl[efptFTYPE];
++            if (bPrintSepPot)
++            {
++                fprintf(fplog,"  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
++                        interaction_function[ftype].longname,
++                        nbonds/nat1,v,lambda[efptFTYPE]);
++            }
++        }
++        else
++        {
++            v = do_listed_vdw_q(ftype,nbn,iatoms+nb0,
++                                idef->iparams,
++                                (const rvec*)x,f,fshift,
++                                pbc,g,lambda,dvdl,
++                                md,fr,grpp,global_atom_index);
++            enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
++            enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
++            
++            if (bPrintSepPot)
++            {
++                fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
++                        interaction_function[ftype].longname,
++                        interaction_function[F_LJ14].longname,nbonds/nat1,dvdl[efptVDW]);
++                fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
++                        interaction_function[ftype].longname,
++                        interaction_function[F_COUL14].longname,nbonds/nat1,dvdl[efptCOUL]);
++            }
++        }
++        if (ind != -1 && thread == 0)
++        {
++            inc_nrnb(nrnb,ind,nbonds);
++        }
++    }
++
+ +    return v;
+ +}
+ +
- real calc_one_bond_foreign(FILE *fplog,int ftype, const t_idef *idef,
-                            rvec x[], rvec f[], t_forcerec *fr,
-                            const t_pbc *pbc,const t_graph *g,
-                            gmx_enerdata_t *enerd, t_nrnb *nrnb,
-                            real *lambda, real *dvdl,
-                            const t_mdatoms *md,t_fcdata *fcd,
-                            int *global_atom_index, gmx_bool bPrintSepPot)
++/* WARNING!  THIS FUNCTION MUST EXACTLY TRACK THE calc
+ +   function, or horrible things will happen when doing free energy
+ +   calculations!  In a good coding world, this would not be a
+ +   different function, but for speed reasons, it needs to be made a
+ +   separate function.  TODO for 5.0 - figure out a way to reorganize
+ +   to reduce duplication.
+ +*/
+ +
-                 if (ftype < F_LJ14 || ftype > F_LJC_PAIRS_NB)
++static real calc_one_bond_foreign(FILE *fplog,int ftype, const t_idef *idef,
++                                  rvec x[], rvec f[], t_forcerec *fr,
++                                  const t_pbc *pbc,const t_graph *g,
++                                  gmx_enerdata_t *enerd, t_nrnb *nrnb,
++                                  real *lambda, real *dvdl,
++                                  const t_mdatoms *md,t_fcdata *fcd,
++                                  int *global_atom_index, gmx_bool bPrintSepPot)
+ +{
+ +    int ind,nat1,nbonds,efptFTYPE,nbonds_np;
+ +    real v=0;
+ +    t_iatom *iatoms;
+ +
+ +    if (IS_RESTRAINT_TYPE(ftype))
+ +    {
+ +        efptFTYPE = efptRESTRAINT;
+ +    }
+ +    else
+ +    {
+ +        efptFTYPE = efptBONDED;
+ +    }
+ +
+ +    if (ftype<F_GB12 || ftype>F_GB14)
+ +    {
+ +        if (interaction_function[ftype].flags & IF_BOND &&
+ +            !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES))
+ +        {
+ +            ind  = interaction_function[ftype].nrnb_ind;
+ +            nat1 = interaction_function[ftype].nratoms+1;
+ +            nbonds_np = idef->il[ftype].nr_nonperturbed;
+ +            nbonds    = idef->il[ftype].nr - nbonds_np;
+ +            iatoms    = idef->il[ftype].iatoms + nbonds_np;
+ +            if (nbonds > 0)
+ +            {
-     int    i,ftype,nbonds,ind,nat;
++                if (!IS_LISTED_LJ_C(ftype))
+ +                {
+ +                    if(ftype==F_CMAP)
+ +                    {
+ +                        v = cmap_dihs(nbonds,iatoms,
+ +                                      idef->iparams,&idef->cmap_grid,
+ +                                      (const rvec*)x,f,fr->fshift,
+ +                                      pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),md,fcd,
+ +                                      global_atom_index);
+ +                    }
+ +                    else
+ +                    {
+ +                        v =       interaction_function[ftype].ifunc(nbonds,iatoms,
+ +                                                                  idef->iparams,
+ +                                                                  (const rvec*)x,f,fr->fshift,
+ +                                                                  pbc,g,lambda[efptFTYPE],&dvdl[efptFTYPE],
+ +                                                                  md,fcd,global_atom_index);
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    v = do_listed_vdw_q(ftype,nbonds,iatoms,
+ +                                        idef->iparams,
+ +                                        (const rvec*)x,f,fr->fshift,
+ +                                        pbc,g,lambda,dvdl,
+ +                                        md,fr,&enerd->grpp,global_atom_index);
+ +                }
+ +                if (ind != -1)
+ +                {
+ +                    inc_nrnb(nrnb,ind,nbonds/nat1);
+ +                }
+ +            }
+ +        }
+ +    }
+ +    return v;
+ +}
+ +
+ +void calc_bonds(FILE *fplog,const gmx_multisim_t *ms,
+ +                const t_idef *idef,
+ +                rvec x[],history_t *hist,
+ +                rvec f[],t_forcerec *fr,
+ +                const t_pbc *pbc,const t_graph *g,
+ +                gmx_enerdata_t *enerd,t_nrnb *nrnb,
+ +                real *lambda,
+ +                const t_mdatoms *md,
+ +                t_fcdata *fcd,int *global_atom_index,
+ +                t_atomtypes *atype, gmx_genborn_t *born,
++                int force_flags,
+ +                gmx_bool bPrintSepPot,gmx_large_int_t step)
+ +{
-     real   *epot;
++    gmx_bool bCalcEnerVir;
++    int    i;
+ +    real   v,dvdl[efptNR],dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
+ +                                               of lambda, which will be thrown away in the end*/
-     epot = enerd->term;
- 
+ +    const  t_pbc *pbc_null;
+ +    char   buf[22];
++    int    thread;
++
++    bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
+ +
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        dvdl[i] = 0.0;
+ +    }
+ +    if (fr->bMolPBC)
+ +    {
+ +        pbc_null = pbc;
+ +    }
+ +    else
+ +    {
+ +        pbc_null = NULL;
+ +    }
+ +    if (bPrintSepPot)
+ +    {
+ +        fprintf(fplog,"Step %s: bonded V and dVdl for this node\n",
+ +                gmx_step_str(step,buf));
+ +    }
+ +
+ +#ifdef DEBUG
+ +    if (g && debug)
+ +    {
+ +        p_graph(debug,"Bondage is fun",g);
+ +    }
+ +#endif
+ +
-     if (idef->il[F_ORIRES].nr) {
-         epot[F_ORIRESDEV] = calc_orires_dev(ms,idef->il[F_ORIRES].nr,
-                                             idef->il[F_ORIRES].iatoms,
-                                             idef->iparams,md,(const rvec*)x,
-                                             pbc_null,fcd,hist);
+ +    /* Do pre force calculation stuff which might require communication */
-     if (idef->il[F_DISRES].nr) {
++    if (idef->il[F_ORIRES].nr)
++    {
++        enerd->term[F_ORIRESDEV] =
++            calc_orires_dev(ms,idef->il[F_ORIRES].nr,
++                            idef->il[F_ORIRES].iatoms,
++                            idef->iparams,md,(const rvec*)x,
++                            pbc_null,fcd,hist);
+ +    }
-     /* Loop over all bonded force types to calculate the bonded forces */
-     for(ftype=0; (ftype<F_NRE); ftype++) 
++    if (idef->il[F_DISRES].nr)
++    {
+ +        calc_disres_R_6(ms,idef->il[F_DISRES].nr,
+ +                        idef->il[F_DISRES].iatoms,
+ +                        idef->iparams,(const rvec*)x,pbc_null,
+ +                        fcd,hist);
+ +    }
+ +
-         v = calc_one_bond(fplog,ftype,idef,x, 
-                           f,fr,pbc_null,g,enerd,nrnb,lambda,dvdl,
-                           md,fcd,global_atom_index,bPrintSepPot);
-         epot[ftype]        += v;
++#pragma omp parallel for num_threads(fr->nthreads) schedule(static)
++    for(thread=0; thread<fr->nthreads; thread++)
+ +    {
-         epot[F_DISRESVIOL] = fcd->disres.sumviol;
++        int    ftype,nbonds,ind,nat1;
++        real   *epot,v;
++        /* thread stuff */
++        rvec   *ft,*fshift;
++        real   *dvdlt;
++        gmx_grppairener_t *grpp;
++        int    nb0,nbn;
++
++        if (thread == 0)
++        {
++            ft     = f;
++            fshift = fr->fshift;
++            epot   = enerd->term;
++            grpp   = &enerd->grpp;
++            dvdlt  = dvdl;
++        }
++        else
++        {
++            zero_thread_forces(&fr->f_t[thread],fr->natoms_force,
++                               fr->red_nblock,1<<fr->red_ashift);
++
++            ft     = fr->f_t[thread].f;
++            fshift = fr->f_t[thread].fshift;
++            epot   = fr->f_t[thread].ener;
++            grpp   = &fr->f_t[thread].grpp;
++            dvdlt  = fr->f_t[thread].dvdl;
++        }
++        /* Loop over all bonded force types to calculate the bonded forces */
++        for(ftype=0; (ftype<F_NRE); ftype++)
++        {
++            if (idef->il[ftype].nr > 0 &&
++                (interaction_function[ftype].flags & IF_BOND) &&
++                (ftype < F_GB12 || ftype > F_GB14) &&
++                !(ftype == F_CONNBONDS || ftype == F_POSRES))
++            {
++                v = calc_one_bond(fplog,thread,ftype,idef,x, 
++                                  ft,fshift,fr,pbc_null,g,enerd,grpp,
++                                  nrnb,lambda,dvdlt,
++                                  md,fcd,bCalcEnerVir,
++                                  global_atom_index,bPrintSepPot);
++                epot[ftype]        += v;
++            }
++        }
++    }
++    if (fr->nthreads > 1)
++    {
++        reduce_thread_forces(fr->natoms_force,f,fr->fshift,
++                             enerd->term,&enerd->grpp,dvdl,
++                             fr->nthreads,fr->f_t,
++                             fr->red_nblock,1<<fr->red_ashift,
++                             bCalcEnerVir,
++                             force_flags & GMX_FORCE_DHDL);
+ +    }
++
+ +    /* Copy the sum of violations for the distance restraints from fcd */
+ +    if (fcd)
+ +    {
++        enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
++
+ +    }
+ +}
+ +
+ +void calc_bonds_lambda(FILE *fplog,
+ +                       const t_idef *idef,
+ +                       rvec x[],
+ +                       t_forcerec *fr,
+ +                       const t_pbc *pbc,const t_graph *g,
+ +                       gmx_enerdata_t *enerd,t_nrnb *nrnb,
+ +                       real *lambda,
+ +                       const t_mdatoms *md,
+ +                       t_fcdata *fcd,
+ +                       int *global_atom_index)
+ +{
+ +    int    i,ftype,nbonds_np,nbonds,ind,nat;
+ +    real   v,dr,dr2,*epot;
+ +    real   dvdl_dum[efptNR];
+ +    rvec   *f,*fshift_orig;
+ +    const  t_pbc *pbc_null;
+ +    t_iatom *iatom_fe;
+ +
+ +    if (fr->bMolPBC)
+ +    {
+ +        pbc_null = pbc;
+ +    }
+ +    else
+ +    {
+ +        pbc_null = NULL;
+ +    }
+ +
+ +    epot = enerd->term;
+ +
+ +    snew(f,fr->natoms_force);
+ +    /* We want to preserve the fshift array in forcerec */
+ +    fshift_orig = fr->fshift;
+ +    snew(fr->fshift,SHIFTS);
+ +
+ +    /* Loop over all bonded force types to calculate the bonded forces */
+ +    for(ftype=0; (ftype<F_NRE); ftype++) 
+ +    {
+ +        v = calc_one_bond_foreign(fplog,ftype,idef,x, 
+ +                                  f,fr,pbc_null,g,enerd,nrnb,lambda,dvdl_dum,
+ +                                  md,fcd,global_atom_index,FALSE);
+ +        epot[ftype] += v;
+ +    }
+ +
+ +    sfree(fr->fshift);
+ +    fr->fshift = fshift_orig;
+ +    sfree(f);
+ +}
diff --cc src/gromacs/gmxlib/calcgrid.c
Simple merge
diff --cc src/gromacs/gmxlib/checkpoint.c

index 7de00e10be9c43cb875ed8d616ef56e71b55b093,0000000000000000000000000000000000000000..fa0696548375a1d20ff2391657d95023c728c1ba

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/checkpoint.c
--- /dev/null
+++ b/src/gromacs/gmxlib/checkpoint.c
@@@ -1,2403 -1,0 +1,2402 @@@
- #include "mdrun.h"
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + * This file is part of Gromacs        Copyright (c) 1991-2008
+ + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gnomes, ROck Monsters And Chili Sauce
+ + */
+ +
+ +/* The source code in this file should be thread-safe. 
+ + Please keep it that way. */
+ +
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include <time.h>
+ +
+ +#ifdef HAVE_SYS_TIME_H
+ +#include <sys/time.h>
+ +#endif
+ +
+ +#ifdef HAVE_UNISTD_H
+ +#include <unistd.h>
+ +#endif
+ +
+ +#ifdef GMX_NATIVE_WINDOWS
+ +/* _chsize_s */
+ +#include <io.h>
+ +#include <sys/locking.h>
+ +#endif
+ +
+ +
+ +#include "filenm.h"
+ +#include "names.h"
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "gmxfio.h"
+ +#include "xdrf.h"
+ +#include "statutil.h"
+ +#include "txtdump.h"
+ +#include "vec.h"
+ +#include "network.h"
+ +#include "gmx_random.h"
+ +#include "checkpoint.h"
+ +#include "futil.h"
+ +#include "string2.h"
+ +#include <fcntl.h>
+ +
+ +
+ +#ifdef GMX_FAHCORE
+ +#include "corewrap.h"
+ +#endif
+ +
+ +
+ +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
+ +char *
+ +gmx_ctime_r(const time_t *clock,char *buf, int n);
+ +
+ +
+ +#define CPT_MAGIC1 171817
+ +#define CPT_MAGIC2 171819
+ +#define CPTSTRLEN 1024
+ +
+ +#ifdef GMX_DOUBLE
+ +#define GMX_CPT_BUILD_DP 1
+ +#else
+ +#define GMX_CPT_BUILD_DP 0
+ +#endif
+ +
+ +/* cpt_version should normally only be changed
+ + * when the header of footer format changes.
+ + * The state data format itself is backward and forward compatible.
+ + * But old code can not read a new entry that is present in the file
+ + * (but can read a new format when new entries are not present).
+ + */
+ +static const int cpt_version = 14;
+ +
+ +
+ +const char *est_names[estNR]=
+ +{
+ +    "FE-lambda",
+ +    "box", "box-rel", "box-v", "pres_prev",
+ +    "nosehoover-xi", "thermostat-integral",
+ +    "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
+ +    "disre_initf", "disre_rm3tav",
+ +    "orire_initf", "orire_Dtav",
+ +    "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
+ +};
+ +
+ +enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
+ +
+ +const char *eeks_names[eeksNR]=
+ +{
+ +    "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
+ +    "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
+ +};
+ +
+ +enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
+ +       eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
+ +       eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM, 
+ +       eenhENERGY_DELTA_H_NN,
+ +       eenhENERGY_DELTA_H_LIST, 
+ +       eenhENERGY_DELTA_H_STARTTIME, 
+ +       eenhENERGY_DELTA_H_STARTLAMBDA, 
+ +       eenhNR };
+ +
+ +const char *eenh_names[eenhNR]=
+ +{
+ +    "energy_n", "energy_aver", "energy_sum", "energy_nsum",
+ +    "energy_sum_sim", "energy_nsum_sim",
+ +    "energy_nsteps", "energy_nsteps_sim", 
+ +    "energy_delta_h_nn",
+ +    "energy_delta_h_list", 
+ +    "energy_delta_h_start_time", 
+ +    "energy_delta_h_start_lambda"
+ +};
+ +
+ +/* free energy history variables -- need to be preserved over checkpoint */
+ +enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
+ +       edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
+ +/* free energy history variable names  */
+ +const char *edfh_names[edfhNR]=
+ +{
+ +    "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
+ +    "accumulated_plus", "accumulated_minus", "accumulated_plus_2",  "accumulated_minus_2", "Tij", "Tij_empirical"
+ +};
+ +
+ +#ifdef GMX_NATIVE_WINDOWS
+ +static int
+ +gmx_wintruncate(const char *filename, __int64 size)
+ +{
+ +#ifdef GMX_FAHCORE
+ +    /*we do this elsewhere*/
+ +    return 0;
+ +#else
+ +    FILE *fp;
+ +    int   rc;
+ +    
+ +    fp=fopen(filename,"rb+");
+ +    
+ +    if(fp==NULL)
+ +    {
+ +        return -1;
+ +    }
+ +    
+ +    return _chsize_s( fileno(fp), size);
+ +#endif
+ +}
+ +#endif
+ +
+ +
+ +enum { ecprREAL, ecprRVEC, ecprMATRIX };
+ +
+ +enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
+ +/* enums for the different components of checkpoint variables, replacing the hard coded ones.
+ +   cptpEST - state variables.
+ +   cptpEEKS - Kinetic energy state variables.
+ +   cptpEENH - Energy history state variables.
+ +   cptpEDFH - free energy history variables.
+ +*/
+ +
+ +
+ +static const char *st_names(int cptp,int ecpt)
+ +{
+ +    switch (cptp)
+ +    {
+ +    case cptpEST: return est_names [ecpt]; break;
+ +    case cptpEEKS: return eeks_names[ecpt]; break;
+ +    case cptpEENH: return eenh_names[ecpt]; break;
+ +    case cptpEDFH: return edfh_names[ecpt]; break;
+ +    }
+ +
+ +    return NULL;
+ +}
+ +
+ +static void cp_warning(FILE *fp)
+ +{
+ +    fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
+ +}
+ +
+ +static void cp_error()
+ +{
+ +    gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
+ +}
+ +
+ +static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
+ +{
+ +    bool_t res=0;
+ +    
+ +    if (bRead)
+ +    {
+ +        snew(*s,CPTSTRLEN);
+ +    }
+ +    res = xdr_string(xd,s,CPTSTRLEN);
+ +    if (res == 0)
+ +    {
+ +        cp_error();
+ +    }
+ +    if (list)
+ +    {
+ +        fprintf(list,"%s = %s\n",desc,*s);
+ +        sfree(*s);
+ +    }
+ +}
+ +
+ +static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
+ +{
+ +    bool_t res=0;
+ +    
+ +    res = xdr_int(xd,i);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (list)
+ +    {
+ +        fprintf(list,"%s = %d\n",desc,*i);
+ +    }
+ +    return 0;
+ +}
+ +
+ +static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
+ +{
+ +    bool_t res=1;
+ +    int j;
+ +    if (list)
+ +    {
+ +        fprintf(list,"%s = ",desc);
+ +    }
+ +    for (j=0; j<n && res; j++)
+ +    {
+ +        res &= xdr_u_char(xd,&i[j]);
+ +        if (list)
+ +        {
+ +            fprintf(list,"%02x",i[j]);
+ +        }
+ +    }
+ +    if (list)
+ +    {
+ +        fprintf(list,"\n");
+ +    }
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +
+ +    return 0;
+ +}
+ +
+ +static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
+ +{
+ +    if (do_cpt_int(xd,desc,i,list) < 0)
+ +    {
+ +        cp_error();
+ +    }
+ +}
+ +
+ +static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
+ +{
+ +    bool_t res=0;
+ +    char   buf[STEPSTRSIZE];
+ +
+ +    res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
+ +    if (res == 0)
+ +    {
+ +        cp_error();
+ +    }
+ +    if (list)
+ +    {
+ +        fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
+ +    }
+ +}
+ +
+ +static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
+ +{
+ +    bool_t res=0;
+ +    
+ +    res = xdr_double(xd,f);
+ +    if (res == 0)
+ +    {
+ +        cp_error();
+ +    }
+ +    if (list)
+ +    {
+ +        fprintf(list,"%s = %f\n",desc,*f);
+ +    }
+ +}
+ +
+ +/* If nval >= 0, nval is used; on read this should match the passed value.
+ + * If nval n<0, *nptr is used; on read the value is stored in nptr
+ + */
+ +static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
+ +                             int nval,int *nptr,real **v,
+ +                             FILE *list,int erealtype)
+ +{
+ +    bool_t res=0;
+ +#ifndef GMX_DOUBLE
+ +    int  dtc=xdr_datatype_float; 
+ +#else
+ +    int  dtc=xdr_datatype_double;
+ +#endif
+ +    real *vp,*va=NULL;
+ +    float  *vf;
+ +    double *vd;
+ +    int  nf,dt,i;
+ +    
+ +    if (list == NULL)
+ +    {
+ +        if (nval >= 0)
+ +        {
+ +            nf = nval;
+ +        }
+ +        else
+ +        {
+ +        if (nptr == NULL)
+ +        {
+ +            gmx_incons("*ntpr=NULL in do_cpte_reals_low");
+ +        }
+ +        nf = *nptr;
+ +        }
+ +    }
+ +    res = xdr_int(xd,&nf);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (list == NULL)
+ +    {
+ +        if (nval >= 0)
+ +        {
+ +            if (nf != nval)
+ +            {
+ +                gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            *nptr = nf;
+ +        }
+ +    }
+ +    dt = dtc;
+ +    res = xdr_int(xd,&dt);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (dt != dtc)
+ +    {
+ +        fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
+ +                st_names(cptp,ecpt),xdr_datatype_names[dtc],
+ +                xdr_datatype_names[dt]);
+ +    }
+ +    if (list || !(sflags & (1<<ecpt)))
+ +    {
+ +        snew(va,nf);
+ +        vp = va;
+ +    }
+ +    else
+ +    {
+ +        if (*v == NULL)
+ +        {
+ +            snew(*v,nf);
+ +        }
+ +        vp = *v;
+ +    }
+ +    if (dt == xdr_datatype_float)
+ +    {
+ +        if (dtc == xdr_datatype_float)
+ +        {
+ +            vf = (float *)vp;
+ +        }
+ +        else
+ +        {
+ +            snew(vf,nf);
+ +        }
+ +        res = xdr_vector(xd,(char *)vf,nf,
+ +                         (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
+ +        if (res == 0)
+ +        {
+ +            return -1;
+ +        }
+ +        if (dtc != xdr_datatype_float)
+ +        {
+ +            for(i=0; i<nf; i++)
+ +            {
+ +                vp[i] = vf[i];
+ +            }
+ +            sfree(vf);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        if (dtc == xdr_datatype_double)
+ +        {
+ +            vd = (double *)vp;
+ +        }
+ +        else
+ +        {
+ +            snew(vd,nf);
+ +        }
+ +        res = xdr_vector(xd,(char *)vd,nf,
+ +                         (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
+ +        if (res == 0)
+ +        {
+ +            return -1;
+ +        }
+ +        if (dtc != xdr_datatype_double)
+ +        {
+ +            for(i=0; i<nf; i++)
+ +            {
+ +                vp[i] = vd[i];
+ +            }
+ +            sfree(vd);
+ +        }
+ +    }
+ +    
+ +    if (list)
+ +    {
+ +        switch (erealtype)
+ +        {
+ +        case ecprREAL:
+ +            pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
+ +            break;
+ +        case ecprRVEC:
+ +            pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
+ +            break;
+ +        default:
+ +            gmx_incons("Unknown checkpoint real type");
+ +        }
+ +    }
+ +    if (va)
+ +    {
+ +        sfree(va);
+ +    }
+ +
+ +    return 0;
+ +}
+ +
+ +
+ +/* This function stores n along with the reals for reading,
+ + * but on reading it assumes that n matches the value in the checkpoint file,
+ + * a fatal error is generated when this is not the case.
+ + */
+ +static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
+ +                         int n,real **v,FILE *list)
+ +{
+ +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
+ +}
+ +
+ +/* This function does the same as do_cpte_reals,
+ + * except that on reading it ignores the passed value of *n
+ + * and stored the value read from the checkpoint file in *n.
+ + */
+ +static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
+ +                           int *n,real **v,FILE *list)
+ +{
+ +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
+ +}
+ +
+ +static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
+ +                        real *r,FILE *list)
+ +{
+ +    int n;
+ +
+ +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
+ +}
+ +
+ +static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
+ +                        int n,int **v,FILE *list)
+ +{
+ +    bool_t res=0;
+ +    int  dtc=xdr_datatype_int;
+ +    int *vp,*va=NULL;
+ +    int  nf,dt,i;
+ +    
+ +    nf = n;
+ +    res = xdr_int(xd,&nf);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (list == NULL && v != NULL && nf != n)
+ +    {
+ +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
+ +    }
+ +    dt = dtc;
+ +    res = xdr_int(xd,&dt);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (dt != dtc)
+ +    {
+ +        gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
+ +                  st_names(cptp,ecpt),xdr_datatype_names[dtc],
+ +                  xdr_datatype_names[dt]);
+ +    }
+ +    if (list || !(sflags & (1<<ecpt)) || v == NULL)
+ +    {
+ +        snew(va,nf);
+ +        vp = va;
+ +    }
+ +    else
+ +    {
+ +        if (*v == NULL)
+ +        {
+ +            snew(*v,nf);
+ +        }
+ +        vp = *v;
+ +    }
+ +    res = xdr_vector(xd,(char *)vp,nf,
+ +                     (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (list)
+ +    {
+ +        pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
+ +    }
+ +    if (va)
+ +    {
+ +        sfree(va);
+ +    }
+ +
+ +    return 0;
+ +}
+ +
+ +static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
+ +                       int *i,FILE *list)
+ +{
+ +    return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
+ +}
+ +
+ +static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
+ +                           int n,double **v,FILE *list)
+ +{
+ +    bool_t res=0;
+ +    int  dtc=xdr_datatype_double;
+ +    double *vp,*va=NULL;
+ +    int  nf,dt,i;
+ +    
+ +    nf = n;
+ +    res = xdr_int(xd,&nf);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (list == NULL && nf != n)
+ +    {
+ +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
+ +    }
+ +    dt = dtc;
+ +    res = xdr_int(xd,&dt);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (dt != dtc)
+ +    {
+ +        gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
+ +                  st_names(cptp,ecpt),xdr_datatype_names[dtc],
+ +                  xdr_datatype_names[dt]);
+ +    }
+ +    if (list || !(sflags & (1<<ecpt)))
+ +    {
+ +        snew(va,nf);
+ +        vp = va;
+ +    }
+ +    else
+ +    {
+ +        if (*v == NULL)
+ +        {
+ +            snew(*v,nf);
+ +        }
+ +        vp = *v;
+ +    }
+ +    res = xdr_vector(xd,(char *)vp,nf,
+ +                     (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (list)
+ +    {
+ +        pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
+ +    }
+ +    if (va)
+ +    {
+ +        sfree(va);
+ +    }
+ +
+ +    return 0;
+ +}
+ +
+ +static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
+ +                          double *r,FILE *list)
+ +{
+ +    return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
+ +}
+ +
+ +
+ +static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
+ +                         int n,rvec **v,FILE *list)
+ +{
+ +    int n3;
+ +
+ +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,
+ +                             n*DIM,NULL,(real **)v,list,ecprRVEC);
+ +}
+ +
+ +static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
+ +                          matrix v,FILE *list)
+ +{
+ +    real *vr;
+ +    real ret;
+ +
+ +    vr = (real *)&(v[0][0]);
+ +    ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
+ +                            DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
+ +    
+ +    if (list && ret == 0)
+ +    {
+ +        pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
+ +    }
+ +    
+ +    return ret;
+ +}
+ +
+ +
+ +static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
+ +                           int n, real **v,FILE *list)
+ +{
+ +    int i;
+ +    real *vr;
+ +    real ret,reti;
+ +    char name[CPTSTRLEN];
+ +
+ +    ret = 0;
+ +    if (v==NULL)
+ +    {
+ +        snew(v,n);
+ +    }
+ +    for (i=0;i<n;i++)
+ +    {
+ +        reti = 0;
+ +        vr = v[i];
+ +        reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
+ +        if (list && reti == 0)
+ +        {
+ +            sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
+ +            pr_reals(list,0,name,v[i],n);
+ +        }
+ +        if (reti == 0)
+ +        {
+ +            ret = 0;
+ +        }
+ +    }
+ +    return ret;
+ +}
+ +
+ +static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
+ +                            int n,matrix **v,FILE *list)
+ +{
+ +    bool_t res=0;
+ +    matrix *vp,*va=NULL;
+ +    real *vr;
+ +    int  nf,i,j,k;
+ +    int  ret;
+ +
+ +    nf = n;
+ +    res = xdr_int(xd,&nf);
+ +    if (res == 0)
+ +    {
+ +        return -1;
+ +    }
+ +    if (list == NULL && nf != n)
+ +    {
+ +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
+ +    }
+ +    if (list || !(sflags & (1<<ecpt)))
+ +    {
+ +        snew(va,nf);
+ +        vp = va;
+ +    }
+ +    else
+ +    {
+ +        if (*v == NULL)
+ +        {
+ +            snew(*v,nf);
+ +        }
+ +        vp = *v;
+ +    }
+ +    snew(vr,nf*DIM*DIM);
+ +    for(i=0; i<nf; i++)
+ +    {
+ +        for(j=0; j<DIM; j++)
+ +        {
+ +            for(k=0; k<DIM; k++)
+ +            {
+ +                vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
+ +            }
+ +        }
+ +    }
+ +    ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
+ +                            nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
+ +    for(i=0; i<nf; i++)
+ +    {
+ +        for(j=0; j<DIM; j++)
+ +        {
+ +            for(k=0; k<DIM; k++)
+ +            {
+ +                vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
+ +            }
+ +        }
+ +    }
+ +    sfree(vr);
+ +    
+ +    if (list && ret == 0)
+ +    {
+ +        for(i=0; i<nf; i++)
+ +        {
+ +            pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
+ +        }
+ +    }
+ +    if (va)
+ +    {
+ +        sfree(va);
+ +    }
+ +    
+ +    return ret;
+ +}
+ +
+ +static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
+ +                          char **version,char **btime,char **buser,char **bhost,
+ +                          int *double_prec,
+ +                          char **fprog,char **ftime,
+ +                          int *eIntegrator,int *simulation_part,
+ +                          gmx_large_int_t *step,double *t,
+ +                          int *nnodes,int *dd_nc,int *npme,
+ +                          int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
+ +                          int *nlambda, int *flags_state,
+ +                          int *flags_eks,int *flags_enh, int *flags_dfh,
+ +                          FILE *list)
+ +{
+ +    bool_t res=0;
+ +    int  magic;
+ +    int  idum=0;
+ +    int  i;
+ +    char *fhost;
+ +
+ +    if (bRead)
+ +    {
+ +        magic = -1;
+ +    }
+ +    else
+ +    {
+ +        magic = CPT_MAGIC1;
+ +    }
+ +    res = xdr_int(xd,&magic);
+ +    if (res == 0)
+ +    {
+ +        gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
+ +    }
+ +    if (magic != CPT_MAGIC1)
+ +    {
+ +        gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
+ +                  "The checkpoint file is corrupted or not a checkpoint file",
+ +                  magic,CPT_MAGIC1);
+ +    }
+ +    if (!bRead)
+ +    {
+ +        snew(fhost,255);
+ +#ifdef HAVE_UNISTD_H
+ +        if (gethostname(fhost,255) != 0)
+ +        {
+ +            sprintf(fhost,"unknown");
+ +        }
+ +#else
+ +        sprintf(fhost,"unknown");
+ +#endif  
+ +    }
+ +    do_cpt_string_err(xd,bRead,"GROMACS version"           ,version,list);
+ +    do_cpt_string_err(xd,bRead,"GROMACS build time"        ,btime,list);
+ +    do_cpt_string_err(xd,bRead,"GROMACS build user"        ,buser,list);
+ +    do_cpt_string_err(xd,bRead,"GROMACS build host"        ,bhost,list);
+ +    do_cpt_string_err(xd,bRead,"generating program"        ,fprog,list);
+ +    do_cpt_string_err(xd,bRead,"generation time"           ,ftime,list);
+ +    *file_version = cpt_version;
+ +    do_cpt_int_err(xd,"checkpoint file version",file_version,list);
+ +    if (*file_version > cpt_version)
+ +    {
+ +        gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
+ +    }
+ +    if (*file_version >= 13)
+ +    {
+ +        do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
+ +    }
+ +    else
+ +    {
+ +        *double_prec = -1;
+ +    }
+ +    if (*file_version >= 12)
+ +    {
+ +        do_cpt_string_err(xd,bRead,"generating host"           ,&fhost,list);
+ +        if (list == NULL)
+ +        {
+ +            sfree(fhost);
+ +        }
+ +    }
+ +    do_cpt_int_err(xd,"#atoms"            ,natoms     ,list);
+ +    do_cpt_int_err(xd,"#T-coupling groups",ngtc       ,list);
+ +    if (*file_version >= 10) 
+ +    {
+ +        do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
+ +    }
+ +    else
+ +    {
+ +        *nhchainlength = 1;
+ +    }
+ +    if (*file_version >= 11)
+ +    {
+ +        do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
+ +    }
+ +    else
+ +    {
+ +        *nnhpres = 0;
+ +    }
+ +    if (*file_version >= 14)
+ +    {
+ +        do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
+ +    }
+ +    else
+ +    {
+ +        *nlambda = 0;
+ +    }
+ +    do_cpt_int_err(xd,"integrator"        ,eIntegrator,list);
+ +      if (*file_version >= 3)
+ +      {
+ +              do_cpt_int_err(xd,"simulation part #", simulation_part,list);
+ +      }
+ +      else
+ +      {
+ +              *simulation_part = 1;
+ +      }
+ +    if (*file_version >= 5)
+ +    {
+ +        do_cpt_step_err(xd,"step"         ,step       ,list);
+ +    }
+ +    else
+ +    {
+ +        do_cpt_int_err(xd,"step"          ,&idum      ,list);
+ +        *step = idum;
+ +    }
+ +    do_cpt_double_err(xd,"t"              ,t          ,list);
+ +    do_cpt_int_err(xd,"#PP-nodes"         ,nnodes     ,list);
+ +    idum = 1;
+ +    do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
+ +    do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
+ +    do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
+ +    do_cpt_int_err(xd,"#PME-only nodes",npme,list);
+ +    do_cpt_int_err(xd,"state flags",flags_state,list);
+ +      if (*file_version >= 4)
+ +    {
+ +        do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
+ +        do_cpt_int_err(xd,"energy history flags",flags_enh,list);
+ +    }
+ +    else
+ +    {
+ +        *flags_eks  = 0;
+ +        *flags_enh   = (*flags_state >> (estORIRE_DTAV+1));
+ +        *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
+ +                                         (1<<(estORIRE_DTAV+2)) |
+ +                                         (1<<(estORIRE_DTAV+3))));
+ +    }
+ +      if (*file_version >= 14)
+ +    {
+ +        do_cpt_int_err(xd,"df history flags",flags_dfh,list);
+ +    } else {
+ +        *flags_dfh = 0;
+ +    }
+ +}
+ +
+ +static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
+ +{
+ +    bool_t res=0;
+ +    int  magic;
+ +    
+ +    if (file_version >= 2)
+ +    {
+ +        magic = CPT_MAGIC2;
+ +        res = xdr_int(xd,&magic);
+ +        if (res == 0)
+ +        {
+ +            cp_error();
+ +        }
+ +        if (magic != CPT_MAGIC2)
+ +        {
+ +            return -1;
+ +        }
+ +    }
+ +
+ +    return 0;
+ +}
+ +
+ +static int do_cpt_state(XDR *xd,gmx_bool bRead,
+ +                        int fflags,t_state *state,
+ +                        gmx_bool bReadRNG,FILE *list)
+ +{
+ +    int  sflags;
+ +    int  **rng_p,**rngi_p;
+ +    int  i;
+ +    int  ret;
+ +    int  nnht,nnhtp;
+ +
+ +    ret = 0;
+ +    
+ +    nnht = state->nhchainlength*state->ngtc;
+ +    nnhtp = state->nhchainlength*state->nnhpres;
+ +
+ +    if (bReadRNG)
+ +    {
+ +        rng_p  = (int **)&state->ld_rng;
+ +        rngi_p = &state->ld_rngi;
+ +    }
+ +    else
+ +    {
+ +        /* Do not read the RNG data */
+ +        rng_p  = NULL;
+ +        rngi_p = NULL;
+ +    }
+ +    /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
+ +
+ +    sflags = state->flags;
+ +    for(i=0; (i<estNR && ret == 0); i++)
+ +    {
+ +        if (fflags & (1<<i))
+ +        {
+ +            switch (i)
+ +            {
+ +            case estLAMBDA:  ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
+ +            case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
+ +            case estBOX:     ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
+ +            case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
+ +            case estBOXV:    ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
+ +            case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
+ +            case estSVIR_PREV:  ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
+ +            case estFVIR_PREV:  ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
+ +            case estNH_XI:   ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
+ +            case estNH_VXI:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
+ +            case estNHPRES_XI:   ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
+ +            case estNHPRES_VXI:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
+ +            case estTC_INT:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
+ +            case estVETA:    ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
+ +            case estVOL0:    ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
+ +            case estX:       ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
+ +            case estV:       ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
+ +            case estSDX:     ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
+ +            case estLD_RNG:  ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
+ +            case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
+ +            case estMC_RNG:  ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
+ +            case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
+ +            case estDISRE_INITF:  ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
+ +            case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
+ +            case estORIRE_INITF:  ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
+ +            case estORIRE_DTAV:   ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
+ +            default:
+ +                gmx_fatal(FARGS,"Unknown state entry %d\n"
+ +                          "You are probably reading a new checkpoint file with old code",i);
+ +            }
+ +        }
+ +    }
+ +    
+ +    return ret;
+ +}
+ +
+ +static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
+ +                            int fflags,ekinstate_t *ekins,
+ +                            FILE *list)
+ +{
+ +    int  i;
+ +    int  ret;
+ +
+ +    ret = 0;
+ +
+ +    for(i=0; (i<eeksNR && ret == 0); i++)
+ +    {
+ +        if (fflags & (1<<i))
+ +        {
+ +            switch (i)
+ +            {
+ +                
+ +                      case eeksEKIN_N:     ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
+ +                      case eeksEKINH :     ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
+ +                      case eeksEKINF:      ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
+ +                      case eeksEKINO:      ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
+ +            case eeksEKINTOTAL:  ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
+ +            case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
+ +            case eeksVSCALE:     ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
+ +            case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
+ +                      case eeksDEKINDL :   ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
+ +            case eeksMVCOS:      ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
+ +            default:
+ +                gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
+ +                          "You are probably reading a new checkpoint file with old code",i);
+ +            }
+ +        }
+ +    }
+ +    
+ +    return ret;
+ +}
+ +
+ +
+ +static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
+ +                           int fflags,energyhistory_t *enerhist,
+ +                           FILE *list)
+ +{
+ +    int  i;
+ +    int  j;
+ +    int  ret;
+ +
+ +    ret = 0;
+ +
+ +    if (bRead)
+ +    {
+ +        enerhist->nsteps     = 0;
+ +        enerhist->nsum       = 0;
+ +        enerhist->nsteps_sim = 0;
+ +        enerhist->nsum_sim   = 0;
+ +        enerhist->dht        = NULL;
+ +
+ +        if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
+ +        {
+ +            snew(enerhist->dht,1);
+ +            enerhist->dht->ndh = NULL;
+ +            enerhist->dht->dh = NULL;
+ +            enerhist->dht->start_lambda_set=FALSE;
+ +        }
+ +    }
+ +
+ +    for(i=0; (i<eenhNR && ret == 0); i++)
+ +    {
+ +        if (fflags & (1<<i))
+ +        {
+ +            switch (i)
+ +            {
+ +                      case eenhENERGY_N:     ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
+ +                      case eenhENERGY_AVER:  ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
+ +                      case eenhENERGY_SUM:   ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
+ +            case eenhENERGY_NSUM:  do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
+ +            case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
+ +            case eenhENERGY_NSUM_SIM:   do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
+ +            case eenhENERGY_NSTEPS:     do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
+ +            case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
+ +            case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
+ +                if (bRead) /* now allocate memory for it */
+ +                {
+ +                    snew(enerhist->dht->dh, enerhist->dht->nndh);
+ +                    snew(enerhist->dht->ndh, enerhist->dht->nndh);
+ +                    for(j=0;j<enerhist->dht->nndh;j++)
+ +                    {
+ +                        enerhist->dht->ndh[j] = 0;
+ +                        enerhist->dht->dh[j] = NULL;
+ +                    }
+ +                }
+ +                break;
+ +            case eenhENERGY_DELTA_H_LIST:
+ +                for(j=0;j<enerhist->dht->nndh;j++)
+ +                {
+ +                    ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
+ +                }
+ +                break;
+ +            case eenhENERGY_DELTA_H_STARTTIME:
+ +                ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
+ +            case eenhENERGY_DELTA_H_STARTLAMBDA:
+ +                ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
+ +            default:
+ +                gmx_fatal(FARGS,"Unknown energy history entry %d\n"
+ +                          "You are probably reading a new checkpoint file with old code",i);
+ +            }
+ +        }
+ +    }
+ +
+ +    if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
+ +    {
+ +        /* Assume we have an old file format and copy sum to sum_sim */
+ +        srenew(enerhist->ener_sum_sim,enerhist->nener);
+ +        for(i=0; i<enerhist->nener; i++)
+ +        {
+ +            enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
+ +        }
+ +        fflags |= (1<<eenhENERGY_SUM_SIM);
+ +    }
+ +    
+ +    if ( (fflags & (1<<eenhENERGY_NSUM)) &&
+ +        !(fflags & (1<<eenhENERGY_NSTEPS)))
+ +    {
+ +        /* Assume we have an old file format and copy nsum to nsteps */
+ +        enerhist->nsteps = enerhist->nsum;
+ +        fflags |= (1<<eenhENERGY_NSTEPS);
+ +    }
+ +    if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
+ +        !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
+ +    {
+ +        /* Assume we have an old file format and copy nsum to nsteps */
+ +        enerhist->nsteps_sim = enerhist->nsum_sim;
+ +        fflags |= (1<<eenhENERGY_NSTEPS_SIM);
+ +    }
+ +
+ +    return ret;
+ +}
+ +
+ +static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
+ +{
+ +    int  i,nlambda;
+ +    int  ret;
+ +
+ +    nlambda = dfhist->nlambda;
+ +    ret = 0;
+ +
+ +    for(i=0; (i<edfhNR && ret == 0); i++)
+ +    {
+ +        if (fflags & (1<<i))
+ +        {
+ +            switch (i)
+ +            {
+ +            case edfhBEQUIL:       ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
+ +            case edfhNATLAMBDA:    ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
+ +            case edfhWLHISTO:      ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
+ +            case edfhWLDELTA:      ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
+ +            case edfhSUMWEIGHTS:   ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
+ +            case edfhSUMDG:        ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
+ +            case edfhSUMMINVAR:    ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
+ +            case edfhSUMVAR:       ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
+ +            case edfhACCUMP:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
+ +            case edfhACCUMM:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
+ +            case edfhACCUMP2:      ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
+ +            case edfhACCUMM2:      ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
+ +            case edfhTIJ:          ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
+ +            case edfhTIJEMP:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
+ +
+ +            default:
+ +                gmx_fatal(FARGS,"Unknown df history entry %d\n"
+ +                          "You are probably reading a new checkpoint file with old code",i);
+ +            }
+ +        }
+ +    }
+ +
+ +    return ret;
+ +}
+ +
+ +static int do_cpt_files(XDR *xd, gmx_bool bRead, 
+ +                        gmx_file_position_t **p_outputfiles, int *nfiles, 
+ +                        FILE *list, int file_version)
+ +{
+ +    int    i,j;
+ +    gmx_off_t  offset;
+ +    gmx_off_t  mask = 0xFFFFFFFFL;
+ +    int    offset_high,offset_low;
+ +    char   *buf;
+ +    gmx_file_position_t *outputfiles;
+ +
+ +    if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
+ +    {
+ +        return -1;
+ +    }
+ +
+ +    if(bRead)
+ +    {
+ +        snew(*p_outputfiles,*nfiles);
+ +    }
+ +
+ +    outputfiles = *p_outputfiles;
+ +
+ +    for(i=0;i<*nfiles;i++)
+ +    {
+ +        /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
+ +        if(bRead)
+ +        {
+ +            do_cpt_string_err(xd,bRead,"output filename",&buf,list);
+ +            strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
+ +            if(list==NULL)
+ +            {
+ +                sfree(buf);                   
+ +            }
+ +
+ +            if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
+ +            {
+ +                return -1;
+ +            }
+ +            if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
+ +            {
+ +                return -1;
+ +            }
+ +#if (SIZEOF_GMX_OFF_T > 4)
+ +            outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
+ +#else
+ +            outputfiles[i].offset = offset_low;
+ +#endif
+ +        }
+ +        else
+ +        {
+ +            buf = outputfiles[i].filename;
+ +            do_cpt_string_err(xd,bRead,"output filename",&buf,list);
+ +            /* writing */
+ +            offset      = outputfiles[i].offset;
+ +            if (offset == -1)
+ +            {
+ +                offset_low  = -1;
+ +                offset_high = -1;
+ +            }
+ +            else
+ +            {
+ +#if (SIZEOF_GMX_OFF_T > 4)
+ +                offset_low  = (int) (offset & mask);
+ +                offset_high = (int) ((offset >> 32) & mask);
+ +#else
+ +                offset_low  = offset;
+ +                offset_high = 0;
+ +#endif
+ +            }
+ +            if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
+ +            {
+ +                return -1;
+ +            }
+ +            if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
+ +            {
+ +                return -1;
+ +            }
+ +        }
+ +        if (file_version >= 8)
+ +        {
+ +            if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
+ +                           list) != 0)
+ +            {
+ +                return -1;
+ +            }
+ +            if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
+ +            {
+ +                return -1;
+ +            }
+ +        } 
+ +        else 
+ +        {
+ +            outputfiles[i].chksum_size = -1;
+ +        }
+ +    }
+ +    return 0;
+ +}
+ +
+ +
+ +void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
+ +                      FILE *fplog,t_commrec *cr,
+ +                      int eIntegrator,int simulation_part,
+ +                      gmx_bool bExpanded, int elamstats,
+ +                      gmx_large_int_t step,double t,t_state *state)
+ +{
+ +    t_fileio *fp;
+ +    int  file_version;
+ +    char *version;
+ +    char *btime;
+ +    char *buser;
+ +    char *bhost;
+ +    int  double_prec;
+ +    char *fprog;
+ +    char *fntemp; /* the temporary checkpoint file name */
+ +    time_t now;
+ +    char timebuf[STRLEN];
+ +    int  nppnodes,npmenodes,flag_64bit;
+ +    char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
+ +    gmx_file_position_t *outputfiles;
+ +    int  noutputfiles;
+ +    char *ftime;
+ +    int  flags_eks,flags_enh,flags_dfh,i;
+ +    t_fileio *ret;
+ +              
+ +    if (PAR(cr))
+ +    {
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            nppnodes  = cr->dd->nnodes;
+ +            npmenodes = cr->npmenodes;
+ +        }
+ +        else
+ +        {
+ +            nppnodes  = cr->nnodes;
+ +            npmenodes = 0;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        nppnodes  = 1;
+ +        npmenodes = 0;
+ +    }
+ +
+ +    /* make the new temporary filename */
+ +    snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
+ +    strcpy(fntemp,fn);
+ +    fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
+ +    sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
+ +    strcat(fntemp,suffix);
+ +    strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
+ +   
+ +    time(&now);
+ +    gmx_ctime_r(&now,timebuf,STRLEN);
+ +
+ +    if (fplog)
+ +    { 
+ +        fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
+ +                gmx_step_str(step,buf),timebuf);
+ +    }
+ +    
+ +    /* Get offsets for open files */
+ +    gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
+ +
+ +    fp = gmx_fio_open(fntemp,"w");
+ +      
+ +    if (state->ekinstate.bUpToDate)
+ +    {
+ +        flags_eks =
+ +            ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) | 
+ +             (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) | 
+ +             (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
+ +    }
+ +    else
+ +    {
+ +        flags_eks = 0;
+ +    }
+ +
+ +    flags_enh = 0;
+ +    if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
+ +    {
+ +        flags_enh |= (1<<eenhENERGY_N);
+ +        if (state->enerhist.nsum > 0)
+ +        {
+ +            flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
+ +                          (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
+ +        }
+ +        if (state->enerhist.nsum_sim > 0)
+ +        {
+ +            flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
+ +                          (1<<eenhENERGY_NSUM_SIM));
+ +        }
+ +        if (state->enerhist.dht)
+ +        {
+ +            flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
+ +                           (1<< eenhENERGY_DELTA_H_LIST) | 
+ +                           (1<< eenhENERGY_DELTA_H_STARTTIME) |
+ +                           (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
+ +        }
+ +    }
+ +
+ +    if (bExpanded)
+ +    {
+ +        flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) |  (1<<edfhSUMDG)  |
+ +                     (1<<edfhTIJ) | (1<<edfhTIJEMP));
+ +        if (EWL(elamstats))
+ +        {
+ +            flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
+ +        }
+ +        if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
+ +        {
+ +            flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
+ +                          | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
+ +        }
+ +    } else {
+ +        flags_dfh = 0;
+ +    }
+ +    
+ +    /* We can check many more things now (CPU, acceleration, etc), but
+ +     * it is highly unlikely to have two separate builds with exactly
+ +     * the same version, user, time, and build host!
+ +     */
+ +
+ +    version = gmx_strdup(VERSION);
+ +    btime   = gmx_strdup(BUILD_TIME);
+ +    buser   = gmx_strdup(BUILD_USER);
+ +    bhost   = gmx_strdup(BUILD_HOST);
+ +
+ +    double_prec = GMX_CPT_BUILD_DP;
+ +    fprog   = gmx_strdup(Program());
+ +
+ +    ftime   = &(timebuf[0]);
+ +    
+ +    do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
+ +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
+ +                  &eIntegrator,&simulation_part,&step,&t,&nppnodes,
+ +                  DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
+ +                  &state->natoms,&state->ngtc,&state->nnhpres,
+ +                  &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
+ +                  NULL);
+ +    
+ +    sfree(version);
+ +    sfree(btime);
+ +    sfree(buser);
+ +    sfree(bhost);
+ +    sfree(fprog);
+ +
+ +    if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0)        ||
+ +       (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
+ +       (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0)  ||
+ +       (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0)  ||
+ +       (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
+ +                     file_version) < 0))
+ +    {
+ +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
+ +    }
+ +
+ +    do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
+ +
+ +    /* we really, REALLY, want to make sure to physically write the checkpoint, 
+ +       and all the files it depends on, out to disk. Because we've
+ +       opened the checkpoint with gmx_fio_open(), it's in our list
+ +       of open files.  */
+ +    ret=gmx_fio_all_output_fsync();
+ +
+ +    if (ret)
+ +    {
+ +        char buf[STRLEN];
+ +        sprintf(buf,
+ +                "Cannot fsync '%s'; maybe you are out of disk space?",
+ +                gmx_fio_getname(ret));
+ +
+ +        if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
+ +        {
+ +            gmx_file(buf);
+ +        }
+ +        else
+ +        {
+ +            gmx_warning(buf);
+ +        }
+ +    }
+ +
+ +    if( gmx_fio_close(fp) != 0)
+ +    {
+ +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
+ +    }
+ +
+ +    /* we don't move the checkpoint if the user specified they didn't want it,
+ +       or if the fsyncs failed */
+ +    if (!bNumberAndKeep && !ret)
+ +    {
+ +        if (gmx_fexist(fn))
+ +        {
+ +            /* Rename the previous checkpoint file */
+ +            strcpy(buf,fn);
+ +            buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
+ +            strcat(buf,"_prev");
+ +            strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
+ +#ifndef GMX_FAHCORE
+ +            /* we copy here so that if something goes wrong between now and
+ +             * the rename below, there's always a state.cpt.
+ +             * If renames are atomic (such as in POSIX systems),
+ +             * this copying should be unneccesary.
+ +             */
+ +            gmx_file_copy(fn, buf, FALSE);
+ +            /* We don't really care if this fails: 
+ +             * there's already a new checkpoint.
+ +             */
+ +#else
+ +            gmx_file_rename(fn, buf);
+ +#endif
+ +        }
+ +        if (gmx_file_rename(fntemp, fn) != 0)
+ +        {
+ +            gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
+ +        }
+ +    }
+ +
+ +    sfree(outputfiles);
+ +    sfree(fntemp);
+ +
+ +#ifdef GMX_FAHCORE
+ +    /*code for alternate checkpointing scheme.  moved from top of loop over 
+ +      steps */
+ +    fcRequestCheckPoint();
+ +    if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
+ +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
+ +    }
+ +#endif /* end GMX_FAHCORE block */
+ +}
+ +
+ +static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
+ +{
+ +    int i;
+ +    
+ +    fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
+ +    fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
+ +    fprintf(fplog,"  %24s    %11s    %11s\n","","simulation","checkpoint");
+ +    for(i=0; i<estNR; i++)
+ +    {
+ +        if ((sflags & (1<<i)) || (fflags & (1<<i)))
+ +        {
+ +            fprintf(fplog,"  %24s    %11s    %11s\n",
+ +                    est_names[i],
+ +                    (sflags & (1<<i)) ? "  present  " : "not present",
+ +                    (fflags & (1<<i)) ? "  present  " : "not present");
+ +        }
+ +    }
+ +}
+ +
+ +static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
+ +{
+ +      FILE *fp = fplog ? fplog : stderr;
+ +
+ +    if (p != f)
+ +    {
+ +              fprintf(fp,"  %s mismatch,\n",type);
+ +              fprintf(fp,"    current program: %d\n",p);
+ +              fprintf(fp,"    checkpoint file: %d\n",f);
+ +              fprintf(fp,"\n");
+ +        *mm = TRUE;
+ +    }
+ +}
+ +
+ +static void check_string(FILE *fplog,const char *type,const char *p,
+ +                         const char *f,gmx_bool *mm)
+ +{
+ +      FILE *fp = fplog ? fplog : stderr;
+ +      
+ +    if (strcmp(p,f) != 0)
+ +    {
+ +              fprintf(fp,"  %s mismatch,\n",type);
+ +              fprintf(fp,"    current program: %s\n",p);
+ +              fprintf(fp,"    checkpoint file: %s\n",f);
+ +              fprintf(fp,"\n");
+ +        *mm = TRUE;
+ +    }
+ +}
+ +
+ +static void check_match(FILE *fplog,
+ +                        char *version,
+ +                        char *btime,char *buser,char *bhost,int double_prec,
+ +                        char *fprog,
+ +                        t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
+ +                        ivec dd_nc,ivec dd_nc_f)
+ +{
+ +    int  npp;
+ +    gmx_bool mm;
+ +    
+ +    mm = FALSE;
+ +    
+ +    check_string(fplog,"Version"      ,VERSION      ,version,&mm);
+ +    check_string(fplog,"Build time"   ,BUILD_TIME   ,btime  ,&mm);
+ +    check_string(fplog,"Build user"   ,BUILD_USER   ,buser  ,&mm);
+ +    check_string(fplog,"Build host"   ,BUILD_HOST   ,bhost  ,&mm);
+ +    check_int   (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
+ +    check_string(fplog,"Program name" ,Program()    ,fprog  ,&mm);
+ +    
+ +    check_int   (fplog,"#nodes"       ,cr->nnodes   ,npp_f+npme_f ,&mm);
+ +    if (bPartDecomp)
+ +    {
+ +        dd_nc[XX] = 1;
+ +        dd_nc[YY] = 1;
+ +        dd_nc[ZZ] = 1;
+ +    }
+ +    if (cr->nnodes > 1)
+ +    {
+ +        check_int (fplog,"#PME-nodes"  ,cr->npmenodes,npme_f     ,&mm);
+ +
+ +        npp = cr->nnodes;
+ +        if (cr->npmenodes >= 0)
+ +        {
+ +            npp -= cr->npmenodes;
+ +        }
+ +        if (npp == npp_f)
+ +        {
+ +            check_int (fplog,"#DD-cells[x]",dd_nc[XX]    ,dd_nc_f[XX],&mm);
+ +            check_int (fplog,"#DD-cells[y]",dd_nc[YY]    ,dd_nc_f[YY],&mm);
+ +            check_int (fplog,"#DD-cells[z]",dd_nc[ZZ]    ,dd_nc_f[ZZ],&mm);
+ +        }
+ +    }
+ +    
+ +    if (mm)
+ +    {
+ +              fprintf(stderr,
+ +                              "Gromacs binary or parallel settings not identical to previous run.\n"
+ +                              "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
+ +                              fplog ? ",\n see the log file for details" : "");
+ +              
+ +        if (fplog)
+ +        {
+ +                      fprintf(fplog,
+ +                                      "Gromacs binary or parallel settings not identical to previous run.\n"
+ +                                      "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
+ +              }
+ +    }
+ +}
+ +
+ +static void read_checkpoint(const char *fn,FILE **pfplog,
+ +                            t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
+ +                            int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
+ +                            t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
+ +                            int *simulation_part,
+ +                            gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
+ +{
+ +    t_fileio *fp;
+ +    int  i,j,rc;
+ +    int  file_version;
+ +    char *version,*btime,*buser,*bhost,*fprog,*ftime;
+ +    int  double_prec;
+ +      char filename[STRLEN],buf[STEPSTRSIZE];
+ +    int  nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
+ +    ivec dd_nc_f;
+ +    int  natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
+ +    int  d;
+ +    int  ret;
+ +    gmx_file_position_t *outputfiles;
+ +    int  nfiles;
+ +    t_fileio *chksum_file;
+ +    FILE* fplog = *pfplog;
+ +    unsigned char digest[16];
+ +#ifndef GMX_NATIVE_WINDOWS
+ +    struct flock fl;  /* don't initialize here: the struct order is OS 
+ +                         dependent! */
+ +#endif
+ +
+ +    const char *int_warn=
+ +              "WARNING: The checkpoint file was generated with integrator %s,\n"
+ +              "         while the simulation uses integrator %s\n\n";
+ +    const char *sd_note=
+ +        "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
+ +        "      while the simulation uses %d SD or BD nodes,\n"
+ +        "      continuation will be exact, except for the random state\n\n";
+ +    
+ +#ifndef GMX_NATIVE_WINDOWS
+ +    fl.l_type=F_WRLCK;
+ +    fl.l_whence=SEEK_SET;
+ +    fl.l_start=0;
+ +    fl.l_len=0;
+ +    fl.l_pid=0;
+ +#endif
+ +
+ +    if (PARTDECOMP(cr))
+ +    {
+ +        gmx_fatal(FARGS,
+ +                  "read_checkpoint not (yet) supported with particle decomposition");
+ +    }
+ +    
+ +    fp = gmx_fio_open(fn,"r");
+ +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
+ +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
+ +                  &eIntegrator_f,simulation_part,step,t,
+ +                  &nppnodes_f,dd_nc_f,&npmenodes_f,
+ +                  &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
+ +                  &fflags,&flags_eks,&flags_enh,&flags_dfh,NULL);
+ +
+ +    if (bAppendOutputFiles &&
+ +        file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
+ +    {
+ +        gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
+ +    }
+ +    
+ +    if (cr == NULL || MASTER(cr))
+ +    {
+ +        fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
+ +                fn,ftime);
+ +    }
+ +      
+ +      /* This will not be written if we do appending, since fplog is still NULL then */
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\n");
+ +        fprintf(fplog,"Reading checkpoint file %s\n",fn);
+ +        fprintf(fplog,"  file generated by:     %s\n",fprog);  
+ +        fprintf(fplog,"  file generated at:     %s\n",ftime);  
+ +        fprintf(fplog,"  GROMACS build time:    %s\n",btime);  
+ +        fprintf(fplog,"  GROMACS build user:    %s\n",buser);  
+ +        fprintf(fplog,"  GROMACS build host:    %s\n",bhost);
+ +        fprintf(fplog,"  GROMACS double prec.:  %d\n",double_prec);
+ +        fprintf(fplog,"  simulation part #:     %d\n",*simulation_part);
+ +        fprintf(fplog,"  step:                  %s\n",gmx_step_str(*step,buf));
+ +        fprintf(fplog,"  time:                  %f\n",*t);  
+ +        fprintf(fplog,"\n");
+ +    }
+ +    
+ +    if (natoms != state->natoms)
+ +    {
+ +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
+ +    }
+ +    if (ngtc != state->ngtc)
+ +    {
+ +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
+ +    }
+ +    if (nnhpres != state->nnhpres)
+ +    {
+ +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
+ +    }
+ +
+ +    if (nlambda != state->dfhist.nlambda)
+ +    {
+ +        gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
+ +    }
+ +
+ +    init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
+ +    /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
+ +    
+ +    if (eIntegrator_f != eIntegrator)
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
+ +        }
+ +              if(bAppendOutputFiles)
+ +              {
+ +                      gmx_fatal(FARGS,
+ +                                        "Output file appending requested, but input/checkpoint integrators do not match.\n"
+ +                                        "Stopping the run to prevent you from ruining all your data...\n"
+ +                                        "If you _really_ know what you are doing, try with the -noappend option.\n");
+ +              }
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
+ +        }
+ +    }
+ +
+ +    if (!PAR(cr))
+ +    {
+ +        nppnodes = 1;
+ +        cr->npmenodes = 0;
+ +    }
+ +    else if (bPartDecomp)
+ +    {
+ +        nppnodes = cr->nnodes;
+ +        cr->npmenodes = 0;
+ +    }
+ +    else if (cr->nnodes == nppnodes_f + npmenodes_f)
+ +    {
+ +        if (cr->npmenodes < 0)
+ +        {
+ +            cr->npmenodes = npmenodes_f;
+ +        }
+ +        nppnodes = cr->nnodes - cr->npmenodes;
+ +        if (nppnodes == nppnodes_f)
+ +        {
+ +            for(d=0; d<DIM; d++)
+ +            {
+ +                if (dd_nc[d] == 0)
+ +                {
+ +                    dd_nc[d] = dd_nc_f[d];
+ +                }
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* The number of PP nodes has not been set yet */
+ +        nppnodes = -1;
+ +    }
+ +
+ +    if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
+ +    {
+ +        /* Correct the RNG state size for the number of PP nodes.
+ +         * Such assignments should all be moved to one central function.
+ +         */
+ +        state->nrng  = nppnodes*gmx_rng_n();
+ +        state->nrngi = nppnodes;
+ +    }
+ +    
+ +    *bReadRNG = TRUE;
+ +    if (fflags != state->flags)
+ +    {
+ +              
+ +        if (MASTER(cr))
+ +        {
+ +                      if(bAppendOutputFiles)
+ +                      {
+ +                              gmx_fatal(FARGS,
+ +                                                "Output file appending requested, but input and checkpoint states are not identical.\n"
+ +                                                "Stopping the run to prevent you from ruining all your data...\n"
+ +                                                "You can try with the -noappend option, and get more info in the log file.\n");
+ +                      }
+ +                      
+ +            if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
+ +            {
+ +                gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
+ +            }
+ +            else
+ +            {
+ +                fprintf(stderr,
+ +                        "WARNING: The checkpoint state entries do not match the simulation,\n"
+ +                        "         see the log file for details\n\n");
+ +            }
+ +        }
+ +              
+ +              if(fplog)
+ +              {
+ +                      print_flag_mismatch(fplog,state->flags,fflags);
+ +              }
+ +    }
+ +    else
+ +    {
+ +        if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
+ +            nppnodes != nppnodes_f)
+ +        {
+ +            *bReadRNG = FALSE;
+ +            if (MASTER(cr))
+ +            {
+ +                fprintf(stderr,sd_note,nppnodes_f,nppnodes);
+ +            }
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
+ +            }
+ +        }
+ +        if (MASTER(cr))
+ +        {
+ +            check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
+ +                        cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
+ +        }
+ +    }
+ +    ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
+ +    *init_fep_state = state->fep_state;  /* there should be a better way to do this than setting it here.
+ +                                            Investigate for 5.0. */
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
+ +                           flags_eks,&state->ekinstate,NULL);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +    *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
+ +                  ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
+ +    
+ +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
+ +                          flags_enh,&state->enerhist,NULL);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +
+ +    if (file_version < 6)
+ +    {
+ +        const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
+ +
+ +        fprintf(stderr,"\nWARNING: %s\n\n",warn);
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"\nWARNING: %s\n\n",warn);
+ +        }
+ +        state->enerhist.nsum     = *step;
+ +        state->enerhist.nsum_sim = *step;
+ +    }
+ +
+ +    ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
+ +                         flags_dfh,&state->dfhist,NULL);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +
+ +      ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
+ +      if (ret)
+ +      {
+ +              cp_error();
+ +      }
+ +                                         
+ +    ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +    if( gmx_fio_close(fp) != 0)
+ +      {
+ +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
+ +      }
+ +    
+ +    sfree(fprog);
+ +    sfree(ftime);
+ +    sfree(btime);
+ +    sfree(buser);
+ +    sfree(bhost);
+ +      
+ +      /* If the user wants to append to output files,
+ +     * we use the file pointer positions of the output files stored
+ +     * in the checkpoint file and truncate the files such that any frames
+ +     * written after the checkpoint time are removed.
+ +     * All files are md5sum checked such that we can be sure that
+ +     * we do not truncate other (maybe imprortant) files.
+ +       */
+ +    if (bAppendOutputFiles)
+ +    {
+ +        if (fn2ftp(outputfiles[0].filename)!=efLOG)
+ +        {
+ +            /* make sure first file is log file so that it is OK to use it for 
+ +             * locking
+ +             */
+ +            gmx_fatal(FARGS,"The first output file should always be the log "
+ +                      "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
+ +        }
+ +        for(i=0;i<nfiles;i++)
+ +        {
+ +            if (outputfiles[i].offset < 0)
+ +            {
+ +                gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
+ +                    "is larger than 2 GB, but mdrun did not support large file"
+ +                    " offsets. Can not append. Run mdrun with -noappend",
+ +                    outputfiles[i].filename);
+ +            }
+ +#ifdef GMX_FAHCORE
+ +            chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
+ +
+ +#else
+ +            chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
+ +
+ +            /* lock log file */                
+ +            if (i==0)
+ +            {
+ +                /* Note that there are systems where the lock operation
+ +                 * will succeed, but a second process can also lock the file.
+ +                 * We should probably try to detect this.
+ +                 */
+ +#ifndef GMX_NATIVE_WINDOWS
+ +                if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
+ +                    ==-1)
+ +#else
+ +                if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
+ +#endif
+ +                {
+ +                    if (errno == ENOSYS)
+ +                    {
+ +                        if (!bForceAppend)
+ +                        {
+ +                            gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
+ +                        }
+ +                        else
+ +                        {
+ +                            fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
+ +                            if (fplog)
+ +                            {
+ +                                fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
+ +                            }
+ +                        }
+ +                    }
+ +                    else if (errno == EACCES || errno == EAGAIN)
+ +                    {
+ +                        gmx_fatal(FARGS,"Failed to lock: %s. Already running "
+ +                                  "simulation?", outputfiles[i].filename);
+ +                    }
+ +                    else
+ +                    {
+ +                        gmx_fatal(FARGS,"Failed to lock: %s. %s.",
+ +                                  outputfiles[i].filename, strerror(errno));
+ +                    }
+ +                }
+ +            }
+ +            
+ +            /* compute md5 chksum */ 
+ +            if (outputfiles[i].chksum_size != -1)
+ +            {
+ +                if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
+ +                                     digest) != outputfiles[i].chksum_size)  /*at the end of the call the file position is at the end of the file*/
+ +                {
+ +                    gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
+ +                              outputfiles[i].chksum_size, 
+ +                              outputfiles[i].filename);
+ +                }
+ +            } 
+ +            if (i==0)  /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
+ +            {
+ +                if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
+ +                {
+ +                      gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
+ +                }
+ +            }
+ +#endif
+ +            
+ +            if (i==0) /*open log file here - so that lock is never lifted 
+ +                        after chksum is calculated */
+ +            {
+ +                *pfplog = gmx_fio_getfp(chksum_file);
+ +            }
+ +            else
+ +            {
+ +                gmx_fio_close(chksum_file);
+ +            }
+ +#ifndef GMX_FAHCORE            
+ +            /* compare md5 chksum */
+ +            if (outputfiles[i].chksum_size != -1 &&
+ +                memcmp(digest,outputfiles[i].chksum,16)!=0) 
+ +            {
+ +                if (debug)
+ +                {
+ +                    fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
+ +                    for (j=0; j<16; j++)
+ +                    {
+ +                        fprintf(debug,"%02x",digest[j]);
+ +                    }
+ +                    fprintf(debug,"\n");
+ +                }
+ +                gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
+ +                          outputfiles[i].filename);
+ +            }
+ +#endif        
+ +
+ +              
+ +            if (i!=0) /*log file is already seeked to correct position */
+ +            {
+ +#ifdef GMX_NATIVE_WINDOWS
+ +                rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
+ +#else            
+ +                rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
+ +#endif
+ +                if(rc!=0)
+ +                {
+ +                    gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    sfree(outputfiles);
+ +}
+ +
+ +
+ +void load_checkpoint(const char *fn,FILE **fplog,
+ +                     t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
+ +                     t_inputrec *ir,t_state *state,
+ +                     gmx_bool *bReadRNG,gmx_bool *bReadEkin,
+ +                     gmx_bool bAppend,gmx_bool bForceAppend)
+ +{
+ +    gmx_large_int_t step;
+ +    double t;
+ +
+ +    if (SIMMASTER(cr)) {
+ +      /* Read the state from the checkpoint file */
+ +      read_checkpoint(fn,fplog,
+ +                      cr,bPartDecomp,dd_nc,
+ +                      ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
+ +                      &ir->simulation_part,bAppend,bForceAppend);
+ +    }
+ +    if (PAR(cr)) {
+ +      gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
+ +      gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
+ +      gmx_bcast(sizeof(step),&step,cr);
+ +      gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
+ +      gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
+ +    }
+ +    ir->bContinuation    = TRUE;
+ +    if (ir->nsteps >= 0)
+ +    {
+ +        ir->nsteps          += ir->init_step - step;
+ +    }
+ +    ir->init_step        = step;
+ +      ir->simulation_part += 1;
+ +}
+ +
+ +static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
+ +                                 gmx_large_int_t *step,double *t,t_state *state,
+ +                                 gmx_bool bReadRNG,
+ +                                 int *nfiles,gmx_file_position_t **outputfiles)
+ +{
+ +    int  file_version;
+ +    char *version,*btime,*buser,*bhost,*fprog,*ftime;
+ +    int  double_prec;
+ +    int  eIntegrator;
+ +    int  nppnodes,npme;
+ +    ivec dd_nc;
+ +    int  flags_eks,flags_enh,flags_dfh;
+ +    int  nfiles_loc;
+ +    gmx_file_position_t *files_loc=NULL;
+ +    int  ret;
+ +      
+ +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
+ +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
+ +                  &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
+ +                  &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
+ +                  &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,NULL);
+ +    ret =
+ +        do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
+ +                           flags_eks,&state->ekinstate,NULL);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
+ +                          flags_enh,&state->enerhist,NULL);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +    ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
+ +                          flags_dfh,&state->dfhist,NULL);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +
+ +    ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
+ +                       outputfiles != NULL ? outputfiles : &files_loc,
+ +                       outputfiles != NULL ? nfiles : &nfiles_loc,
+ +                       NULL,file_version);
+ +    if (files_loc != NULL)
+ +    {
+ +        sfree(files_loc);
+ +    }
+ +      
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +      
+ +    ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +
+ +    sfree(fprog);
+ +    sfree(ftime);
+ +    sfree(btime);
+ +    sfree(buser);
+ +    sfree(bhost);
+ +}
+ +
+ +void 
+ +read_checkpoint_state(const char *fn,int *simulation_part,
+ +                      gmx_large_int_t *step,double *t,t_state *state)
+ +{
+ +    t_fileio *fp;
+ +    
+ +    fp = gmx_fio_open(fn,"r");
+ +    read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
+ +    if( gmx_fio_close(fp) != 0)
+ +      {
+ +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
+ +      }
+ +}
+ +
+ +void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
+ +{
+ +    t_state state;
+ +    int simulation_part;
+ +    gmx_large_int_t step;
+ +    double t;
+ +    
+ +    init_state(&state,0,0,0,0,0);
+ +    
+ +    read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
+ +    
+ +    fr->natoms  = state.natoms;
+ +    fr->bTitle  = FALSE;
+ +    fr->bStep   = TRUE;
+ +    fr->step    = gmx_large_int_to_int(step,
+ +                                    "conversion of checkpoint to trajectory");
+ +    fr->bTime   = TRUE;
+ +    fr->time    = t;
+ +    fr->bLambda = TRUE;
+ +    fr->lambda  = state.lambda[efptFEP];
+ +    fr->fep_state  = state.fep_state;
+ +    fr->bAtoms  = FALSE;
+ +    fr->bX      = (state.flags & (1<<estX));
+ +    if (fr->bX)
+ +    {
+ +        fr->x     = state.x;
+ +        state.x   = NULL;
+ +    }
+ +    fr->bV      = (state.flags & (1<<estV));
+ +    if (fr->bV)
+ +    {
+ +        fr->v     = state.v;
+ +        state.v   = NULL;
+ +    }
+ +    fr->bF      = FALSE;
+ +    fr->bBox    = (state.flags & (1<<estBOX));
+ +    if (fr->bBox)
+ +    {
+ +        copy_mat(state.box,fr->box);
+ +    }
+ +    done_state(&state);
+ +}
+ +
+ +void list_checkpoint(const char *fn,FILE *out)
+ +{
+ +    t_fileio *fp;
+ +    int  file_version;
+ +    char *version,*btime,*buser,*bhost,*fprog,*ftime;
+ +    int  double_prec;
+ +    int  eIntegrator,simulation_part,nppnodes,npme;
+ +    gmx_large_int_t step;
+ +    double t;
+ +    ivec dd_nc;
+ +    t_state state;
+ +    int  flags_eks,flags_enh,flags_dfh;
+ +    int  indent;
+ +    int  i,j;
+ +    int  ret;
+ +    gmx_file_position_t *outputfiles;
+ +      int  nfiles;
+ +      
+ +    init_state(&state,-1,-1,-1,-1,0);
+ +
+ +    fp = gmx_fio_open(fn,"r");
+ +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
+ +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
+ +                  &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
+ +                  &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
+ +                  &(state.dfhist.nlambda),&state.flags,
+ +                  &flags_eks,&flags_enh,&flags_dfh,out);
+ +    ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
+ +                           flags_eks,&state.ekinstate,out);
+ +    if (ret)
+ +    {
+ +        cp_error();
+ +    }
+ +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
+ +                          flags_enh,&state.enerhist,out);
+ +
+ +    if (ret == 0)
+ +    {
+ +        init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
+ +        ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
+ +                             flags_dfh,&state.dfhist,out);
+ +    }
+ +    if (ret == 0)
+ +    {
+ +              do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
+ +      }
+ +      
+ +    if (ret == 0)
+ +    {
+ +        ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
+ +    }
+ +      
+ +    if (ret)
+ +    {
+ +        cp_warning(out);
+ +    }
+ +    if( gmx_fio_close(fp) != 0)
+ +      {
+ +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
+ +      }
+ +    
+ +    done_state(&state);
+ +}
+ +
+ +
+ +static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
+ +{
+ +    int i;
+ +
+ +    /* Check if the output file name stored in the checkpoint file
+ +     * is one of the output file names of mdrun.
+ +     */
+ +    i = 0;
+ +    while (i < nfile &&
+ +           !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
+ +    {
+ +        i++;
+ +    }
+ +    
+ +    return (i < nfile && gmx_fexist(fnm_cp));
+ +}
+ +
+ +/* This routine cannot print tons of data, since it is called before the log file is opened. */
+ +gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
+ +                                     gmx_large_int_t *cpt_step,t_commrec *cr,
+ +                                     gmx_bool bAppendReq,
+ +                                     int nfile,const t_filenm fnm[],
+ +                                     const char *part_suffix,gmx_bool *bAddPart)
+ +{
+ +    t_fileio *fp;
+ +    gmx_large_int_t step=0;
+ +      double t;
+ +    t_state state;
+ +    int  nfiles;
+ +    gmx_file_position_t *outputfiles;
+ +    int  nexist,f;
+ +    gmx_bool bAppend;
+ +    char *fn,suf_up[STRLEN];
+ +
+ +    bAppend = FALSE;
+ +
+ +    if (SIMMASTER(cr)) {
+ +        if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
+ +        {
+ +            *simulation_part = 0;
+ +        }
+ +        else 
+ +        {
+ +            init_state(&state,0,0,0,0,0);
+ +
+ +            read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
+ +                                 &nfiles,&outputfiles);
+ +            if( gmx_fio_close(fp) != 0)
+ +            {
+ +                gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
+ +            }
+ +            done_state(&state);
+ +
+ +            if (bAppendReq)
+ +            {
+ +                nexist = 0;
+ +                for(f=0; f<nfiles; f++)
+ +                {
+ +                    if (exist_output_file(outputfiles[f].filename,nfile,fnm))
+ +                    {
+ +                        nexist++;
+ +                    }
+ +                }
+ +                if (nexist == nfiles)
+ +                {
+ +                    bAppend = bAppendReq;
+ +                }
+ +                else if (nexist > 0)
+ +                {
+ +                    fprintf(stderr,
+ +                            "Output file appending has been requested,\n"
+ +                            "but some output files listed in the checkpoint file %s\n"
+ +                            "are not present or are named differently by the current program:\n",
+ +                            filename);
+ +                    fprintf(stderr,"output files present:");
+ +                    for(f=0; f<nfiles; f++)
+ +                    {
+ +                        if (exist_output_file(outputfiles[f].filename,
+ +                                              nfile,fnm))
+ +                        {
+ +                            fprintf(stderr," %s",outputfiles[f].filename);
+ +                        }
+ +                    }
+ +                    fprintf(stderr,"\n");
+ +                    fprintf(stderr,"output files not present or named differently:");
+ +                    for(f=0; f<nfiles; f++)
+ +                    {
+ +                        if (!exist_output_file(outputfiles[f].filename,
+ +                                               nfile,fnm))
+ +                        {
+ +                            fprintf(stderr," %s",outputfiles[f].filename);
+ +                        }
+ +                    }
+ +                    fprintf(stderr,"\n");
+ +                    
+ +                    gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
+ +                }
+ +            }
+ +            
+ +            if (bAppend)
+ +            {
+ +                if (nfiles == 0)
+ +                {
+ +                    gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
+ +                }
+ +                fn = outputfiles[0].filename;
+ +                if (strlen(fn) < 4 ||
+ +                    gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
+ +                {
+ +                    gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
+ +                }
+ +                /* Set bAddPart to whether the suffix string '.part' is present
+ +                 * in the log file name.
+ +                 */
+ +                strcpy(suf_up,part_suffix);
+ +                upstring(suf_up);
+ +                *bAddPart = (strstr(fn,part_suffix) != NULL ||
+ +                             strstr(fn,suf_up) != NULL);
+ +            }
+ +
+ +            sfree(outputfiles);
+ +        }
+ +    }
+ +    if (PAR(cr))
+ +    {
+ +        gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
+ +
+ +        if (*simulation_part > 0 && bAppendReq)
+ +        {
+ +            gmx_bcast(sizeof(bAppend),&bAppend,cr);
+ +            gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
+ +        }
+ +    }
+ +    if (NULL != cpt_step)
+ +    {
+ +        *cpt_step = step;
+ +    }
+ +
+ +    return bAppend;
+ +}
diff --cc src/gromacs/gmxlib/cuda_tools/CMakeLists.txt

index 0000000000000000000000000000000000000000,e008c507952f7b1881284e5792ab874fbc105af9..e008c507952f7b1881284e5792ab874fbc105af9

mode 000000,100644..100644
--- /dev/null
--- 2/src/gmxlib/cuda_tools/CMakeLists.txt
+++ b/src/gromacs/gmxlib/cuda_tools/CMakeLists.txt
diff --cc src/gromacs/gmxlib/cuda_tools/cudautils.cu

index 0000000000000000000000000000000000000000,606a811692826343b6f812eb14d3b3150c21db30..606a811692826343b6f812eb14d3b3150c21db30

mode 000000,100644..100644
--- /dev/null
--- 2/src/gmxlib/cuda_tools/cudautils.cu
+++ b/src/gromacs/gmxlib/cuda_tools/cudautils.cu
diff --cc src/gromacs/gmxlib/cuda_tools/cudautils.cuh

index 0000000000000000000000000000000000000000,fe1f47cba761840456ea775185d6b392e748231f..fe1f47cba761840456ea775185d6b392e748231f

mode 000000,100644..100644
--- /dev/null
--- 2/src/gmxlib/cuda_tools/cudautils.cuh
+++ b/src/gromacs/gmxlib/cuda_tools/cudautils.cuh
diff --cc src/gromacs/gmxlib/cuda_tools/pmalloc_cuda.cu

index 0000000000000000000000000000000000000000,ac80122ecdefcbb5c2966f7c01f9888b9be09f81..ac80122ecdefcbb5c2966f7c01f9888b9be09f81

mode 000000,100644..100644
--- /dev/null
--- 2/src/gmxlib/cuda_tools/pmalloc_cuda.cu
+++ b/src/gromacs/gmxlib/cuda_tools/pmalloc_cuda.cu
diff --cc src/gromacs/gmxlib/cuda_tools/vectype_ops.cuh

index 0000000000000000000000000000000000000000,a2657b240b5f67d1355d8d51c4ba016b866021d9..a2657b240b5f67d1355d8d51c4ba016b866021d9

mode 000000,100644..100644
--- /dev/null
--- 2/src/gmxlib/cuda_tools/vectype_ops.cuh
+++ b/src/gromacs/gmxlib/cuda_tools/vectype_ops.cuh
diff --cc src/gromacs/gmxlib/disre.c
Simple merge
diff --cc src/gromacs/gmxlib/ewald_util.c
Simple merge
diff --cc src/gromacs/gmxlib/gmx_cpuid.c

index 0000000000000000000000000000000000000000,90c73f8318ba4c4f0ce0d45cb94bd12228cda38f..90c73f8318ba4c4f0ce0d45cb94bd12228cda38f

mode 000000,100644..100644
--- /dev/null
--- 2/src/gmxlib/gmx_cpuid.c
+++ b/src/gromacs/gmxlib/gmx_cpuid.c
diff --cc src/gromacs/gmxlib/gmx_detect_hardware.c

index 0000000000000000000000000000000000000000,6fe77d759ef9f8e3a9f5fd847b4c53172daafac3..6fe77d759ef9f8e3a9f5fd847b4c53172daafac3

mode 000000,100644..100644
--- /dev/null
--- 2/src/gmxlib/gmx_detect_hardware.c
+++ b/src/gromacs/gmxlib/gmx_detect_hardware.c
diff --cc src/gromacs/gmxlib/gmx_fatal.c
Simple merge
diff --cc src/gromacs/gmxlib/gmx_omp.c
Simple merge
diff --cc src/gromacs/gmxlib/gmx_omp_nthreads.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..8edfb22717ba99afec184f692ffd0f892798ab53

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/gromacs/gmxlib/gmx_omp_nthreads.c
@@@ -1,0 -1,0 +1,445 @@@
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
++ * 
++ *                This source code is part of
++ * 
++ *                 G   R   O   M   A   C   S
++ * 
++ *          GROningen MAchine for Chemical Simulations
++ * 
++ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
++ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
++ * Copyright (c) 2001-2010, The GROMACS development team,
++ * check out http://www.gromacs.org for more information.
++
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ * 
++ * If you want to redistribute modifications, please consider that
++ * scientific software is very special. Version control is crucial -
++ * bugs must be traceable. We will be happy to consider code for
++ * inclusion in the official distribution, but derived work must not
++ * be called official GROMACS. Details are found in the README & COPYING
++ * files - if they are missing, get the official version at www.gromacs.org.
++ * 
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the papers on the package - you can find them in the top README file.
++ * 
++ * For more info, check our website at http://www.gromacs.org
++ * 
++ * And Hey:
++ * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
++ */
++
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <assert.h>
++
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++
++#include "gmx_fatal.h"
++#include "typedefs.h"
++#include "macros.h"
++#include "network.h"
++#include "statutil.h"
++#include "gmx_omp.h"
++#include "gmx_omp_nthreads.h"
++#include "md_logging.h"
++
++/** Structure with the number of threads for each OpenMP multi-threaded
++ *  algorithmic module in mdrun. */
++typedef struct
++{
++    int gnth;               /**< Global num. of threads per PP or PP+PME process/tMPI thread. */
++    int gnth_pme;           /**< Global num. of threads per PME only process/tMPI thread. */
++
++    int nth[emntNR];        /**< Number of threads for each module, indexed with module_nth_t */
++    gmx_bool initialized;   /**< TRUE if the module as been initialized. */
++} omp_module_nthreads_t;
++
++/** Names of environment variables to set the per module number of threads.
++ *
++ *  Indexed with the values of module_nth_t.
++ * */
++static const char *modth_env_var[emntNR] =
++{
++    "GMX_DEFAULT_NUM_THREADS should never be set",
++    "GMX_DOMDEC_NUM_THREADS", "GMX_PAIRSEARCH_NUM_THREADS",
++    "GMX_NONBONDED_NUM_THREADS", "GMX_BONDED_NUM_THREADS",
++    "GMX_PME_NUM_THREADS", "GMX_UPDATE_NUM_THREADS",
++    "GMX_LINCS_NUM_THREADS", "GMX_SETTLE_NUM_THREADS"
++};
++
++/** Names of the modules. */
++static const char *mod_name[emntNR] =
++{
++    "default", "domain decomposition", "pair search", "non-bonded",
++    "bonded", "PME", "update", "LINCS", "SETTLE"
++};
++
++/** Number of threads for each algorithmic module.
++ *
++ *  File-scope global variable that gets set once in \init_module_nthreads
++ *  and queried via gmx_omp_nthreads_get.
++ *
++ *  All fields are initialized to 0 which should result in errors if
++ *  the init call is omitted.
++ * */
++static omp_module_nthreads_t modth = { 0, 0, {0, 0, 0, 0, 0, 0, 0, 0}, FALSE};
++
++
++/** Determine the number of threads for module \mod.
++ *
++ *  \m takes values form the module_nth_t enum and maps these to the
++ *  corresponding value in modth_env_var.
++ *
++ *  Each number of threads per module takes the default value unless
++ *  GMX_*_NUM_THERADS env var is set, case in which its value overrides
++ *  the deafult.
++ *
++ *  The "group" scheme supports OpenMP only in PME and in thise case all but
++ *  the PME nthread values default to 1.
++ */
++static int pick_module_nthreads(FILE *fplog, int m,
++                                gmx_bool bSimMaster,
++                                gmx_bool bFullOmpSupport,
++                                gmx_bool bSepPME)
++{
++    char *env;
++    int  nth;
++    char sbuf[STRLEN];
++    gmx_bool bOMP;
++
++#ifdef GMX_OPENMP
++    bOMP = TRUE;
++#else
++    bOMP = FALSE;
++#endif /* GMX_OPENMP */
++
++    /* The default should never be set through a GMX_*_NUM_THREADS env var
++     * as it's always equal with gnth. */
++    if (m == emntDefault)
++    {
++        return modth.nth[emntDefault];
++    }
++
++    /* check the environment variable */
++    if ((env = getenv(modth_env_var[m])) != NULL)
++    {
++        sscanf(env, "%d", &nth);
++
++        if (!bOMP)
++        {
++            gmx_warning("%s=%d is set, but %s is compiled without OpenMP!",
++                        modth_env_var[m], nth, ShortProgram());
++        }
++
++        /* with the verlet codepath, when any GMX_*_NUM_THREADS env var is set,
++         * OMP_NUM_THREADS also has to be set */
++        if (bFullOmpSupport && getenv("OMP_NUM_THREADS") == NULL)
++        {
++            gmx_fatal(FARGS, "%s=%d is set, the default number of threads also "
++                      "needs to be set with OMP_NUM_THREADS!",
++                      modth_env_var[m], nth);
++        }
++
++        /* with the group scheme warn if any env var except PME is set */
++        if (!bFullOmpSupport)
++        {
++            if (m != emntPME)
++            {
++                gmx_warning("%s=%d is set, but OpenMP multithreading is not "
++                            "supported in %s!",
++                            modth_env_var[m], nth, mod_name[m]);
++                nth = 1;
++            }
++        }
++
++        /* only babble if we are really overriding with a different value */
++        if ((bSepPME && m == emntPME && nth != modth.gnth_pme) || (nth != modth.gnth))
++        {
++            sprintf(sbuf, "%s=%d set, overriding the default number of %s threads",
++                    modth_env_var[m], nth, mod_name[m]);
++            if (bSimMaster)
++            {
++                fprintf(stderr, "\n%s\n", sbuf);
++            }
++            if (fplog)
++            {
++                fprintf(fplog, "%s\n", sbuf);
++            }
++        }
++    }
++    else
++    {
++        /* pick the global PME node nthreads if we are setting the number
++         * of threads in separate PME nodes  */
++        nth = (bSepPME && m == emntPME) ? modth.gnth_pme : modth.gnth;
++    }
++
++    return modth.nth[m] = nth;
++}
++
++void gmx_omp_nthreads_read_env(int *nthreads_omp)
++{
++    char *env;
++
++    assert(nthreads_omp);
++
++    if ((env = getenv("OMP_NUM_THREADS")) != NULL)
++    {
++        int nt_omp;
++
++        sscanf(env,"%d",&nt_omp);
++        if (nt_omp <= 0)
++        {
++            gmx_fatal(FARGS,"OMP_NUM_THREADS is invalid: '%s'",env);
++        }
++
++        if (*nthreads_omp > 0 && nt_omp != *nthreads_omp)
++        {
++            gmx_fatal(FARGS,"OMP_NUM_THREADS (%d) and the number of threads requested on the command line (%d) have different values",nt_omp,*nthreads_omp);
++        }
++
++        /* Setting the number of OpenMP threads.
++         * NOTE: with tMPI this function is only called on the master node,
++         * but with MPI on all nodes which means lots of messages on stderr.
++         */
++        fprintf(stderr,"Getting the number of OpenMP threads from OMP_NUM_THREADS: %d\n",nt_omp);
++        *nthreads_omp = nt_omp;
++    }
++}
++
++void gmx_omp_nthreads_init(FILE *fplog, t_commrec *cr,
++                           int nthreads_hw_avail,
++                           int omp_nthreads_req,
++                           int omp_nthreads_pme_req,
++                           gmx_bool bThisNodePMEOnly,
++                           gmx_bool bFullOmpSupport)
++{
++    int  nth, nth_pmeonly, gmx_maxth, nppn;
++    char *env;
++    gmx_bool bSepPME, bOMP;
++
++#ifdef GMX_OPENMP
++    bOMP = TRUE;
++#else
++    bOMP = FALSE;
++#endif /* GMX_OPENMP */
++
++    /* number of processes per node */
++    nppn = cr->nnodes_intra;
++
++    bSepPME = ( (cr->duty & DUTY_PP) && !(cr->duty & DUTY_PME)) ||
++              (!(cr->duty & DUTY_PP) &&  (cr->duty & DUTY_PME));
++
++#ifdef GMX_THREAD_MPI
++    /* modth is shared among tMPI threads, so for thread safety do the
++     * detection is done on the master only. It is not thread-safe with
++     * multiple simulations, but that's anyway not supported by tMPI. */
++    if (SIMMASTER(cr))
++#endif
++    {
++        /* just return if the initialization has already been done */
++        if (modth.initialized)
++        {
++            return;
++        }
++
++        /* With full OpenMP support (verlet scheme) set the number of threads
++         * per process / default:
++         * - 1 if not compiled with OpenMP or
++         * - OMP_NUM_THREADS if the env. var is set, or
++         * - omp_nthreads_req = #of threads requested by the user on the mdrun
++         *   command line, otherwise
++         * - take the max number of available threads and distribute them
++         *   on the processes/tMPI threads.
++         * ~ The GMX_*_NUM_THREADS env var overrides the number of threads of
++         *   the respective module and it has to be used in conjunction with
++         *   OMP_NUM_THREADS.
++         *
++         * With the group scheme OpenMP multithreading is only supported in PME,
++         * for all other modules nthreads is set to 1.
++         * The number of PME threads is equal to:
++         * - 1 if not compiled with OpenMP or
++         * - GMX_PME_NUM_THREADS if defined, otherwise
++         * - OMP_NUM_THREADS if defined, otherwise
++         * - 1
++         */
++        nth = 1;
++        if ((env = getenv("OMP_NUM_THREADS")) != NULL)
++        {
++            if (!bOMP && (strncmp(env, "1", 1) != 0))
++            {
++                gmx_warning("OMP_NUM_THREADS is set, but %s was compiled without OpenMP support!",
++                            ShortProgram());
++            }
++            else
++            {
++                nth = gmx_omp_get_max_threads();
++            }
++        }
++        else if (omp_nthreads_req > 0)
++        {
++            nth = omp_nthreads_req;
++        }
++        else if (bFullOmpSupport && bOMP)
++        {
++            /* max available threads per node */
++            nth = nthreads_hw_avail;
++
++            /* divide the threads among the MPI processes/tMPI threads */
++            if (nth >= nppn)
++            {
++                nth /= nppn;
++            }
++            else
++            {
++                nth = 1;
++            }
++        }
++
++        /* now we have the global values, set them:
++         * - 1 if not compiled with OpenMP and for the group scheme
++         * - nth for the verlet scheme when compiled with OpenMP
++         */
++        if (bFullOmpSupport && bOMP)
++        {
++            modth.gnth = nth;
++        }
++        else
++        {
++            modth.gnth = 1;
++        }
++
++        if (bSepPME)
++        {
++            if (omp_nthreads_pme_req > 0)
++            {
++                modth.gnth_pme = omp_nthreads_pme_req;
++            }
++            else
++            {
++                modth.gnth_pme = nth;
++            }
++        }
++        else
++        {
++            modth.gnth_pme = 0;
++        }
++
++        /* now set the per-module values */
++        modth.nth[emntDefault] = modth.gnth;
++        pick_module_nthreads(fplog, emntDomdec, SIMMASTER(cr), bFullOmpSupport, bSepPME);
++        pick_module_nthreads(fplog, emntPairsearch, SIMMASTER(cr), bFullOmpSupport, bSepPME);
++        pick_module_nthreads(fplog, emntNonbonded, SIMMASTER(cr), bFullOmpSupport, bSepPME);
++        pick_module_nthreads(fplog, emntBonded, SIMMASTER(cr), bFullOmpSupport, bSepPME);
++        pick_module_nthreads(fplog, emntPME, SIMMASTER(cr), bFullOmpSupport, bSepPME);
++        pick_module_nthreads(fplog, emntUpdate, SIMMASTER(cr), bFullOmpSupport, bSepPME);
++        pick_module_nthreads(fplog, emntLINCS, SIMMASTER(cr), bFullOmpSupport, bSepPME);
++        pick_module_nthreads(fplog, emntSETTLE, SIMMASTER(cr), bFullOmpSupport, bSepPME);
++
++        /* set the number of threads globally */
++        if (bOMP)
++        {
++#ifndef GMX_THREAD_MPI
++            if (bThisNodePMEOnly)
++            {
++                gmx_omp_set_num_threads(modth.gnth_pme);
++            }
++            else
++#endif /* GMX_THREAD_MPI */
++            {
++                if (bFullOmpSupport)
++                {
++                    gmx_omp_set_num_threads(nth);
++                }
++                else
++                {
++                    gmx_omp_set_num_threads(1);
++                }
++            }
++        }
++
++        modth.initialized = TRUE;
++    }
++#ifdef GMX_THREAD_MPI
++    /* Non-master threads have to wait for the detection to be done. */
++    if (PAR(cr))
++    {
++        MPI_Barrier(cr->mpi_comm_mysim);
++    }
++#endif
++
++    /* inform the user about the settings */
++    if (SIMMASTER(cr) && bOMP)
++    {
++#ifdef GMX_THREAD_MPI
++        const char *mpi_str="per tMPI thread";
++#else
++        const char *mpi_str="per MPI process";
++#endif
++
++        /* for group scheme we print PME threads info only */
++        if (bFullOmpSupport)
++        {
++            fprintf(stderr, "Using %d OpenMP thread%s %s\n",
++                    modth.gnth,modth.gnth > 1 ? "s" : "",
++                    cr->nnodes > 1 ? mpi_str : "");
++        }
++        if (bSepPME && modth.gnth_pme != modth.gnth)
++        {
++            fprintf(stderr, "Using %d OpenMP thread%s %s for PME\n",
++                    modth.gnth_pme,modth.gnth_pme > 1 ? "s" : "",
++                    cr->nnodes > 1 ? mpi_str : "");
++        }
++    }
++
++    /* detect and warn about oversubscription
++     * TODO: enable this for separate PME nodes as well! */
++    if (!bSepPME && cr->nodeid_intra == 0)
++    {
++        char sbuf[STRLEN], sbuf1[STRLEN], sbuf2[STRLEN];
++
++        if (modth.gnth*nppn > nthreads_hw_avail)
++        {
++            sprintf(sbuf, "threads");
++            sbuf1[0] = '\0';
++            sprintf(sbuf2, "O");
++#ifdef GMX_MPI
++            if (modth.gnth == 1)
++            {
++#ifdef GMX_THREAD_MPI
++                sprintf(sbuf, "thread-MPI threads");
++#else
++                sprintf(sbuf, "MPI processes");
++                sprintf(sbuf1, " per node");
++                sprintf(sbuf2, "On node %d: o", cr->sim_nodeid);
++#endif
++            }
++#endif
++            md_print_warn(cr, fplog,
++                          "WARNING: %sversubscribing the available %d logical CPU cores%s with %d %s.\n"
++                          "         This will cause considerable performance loss!",
++                          sbuf2, nthreads_hw_avail, sbuf1, nppn*modth.gnth, sbuf);
++        }
++    }
++}
++
++int gmx_omp_nthreads_get(int mod)
++{
++    if (mod < 0 || mod >= emntNR)
++    {
++        /* invalid module queried */
++        return -1;
++    }
++    else
++    {
++        return modth.nth[mod];
++    }
++}
diff --cc src/gromacs/gmxlib/gpu_utils/CMakeLists.txt
Simple merge
diff --cc src/gromacs/gmxlib/gpu_utils/gpu_utils.cu
Simple merge
diff --cc src/gromacs/gmxlib/gpu_utils/memtestG80_core.cu
Simple merge
diff --cc src/gromacs/gmxlib/gpu_utils/memtestG80_core.h
Simple merge
diff --cc src/gromacs/gmxlib/main.c

index 1b13fc705e2e4b710b4a22bbb16363b73241b307,0000000000000000000000000000000000000000..48fc4e4a270cfae3e6965129f9431a33377fe77d

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/main.c
--- /dev/null
+++ b/src/gromacs/gmxlib/main.c
@@@ -1,562 -1,0 +1,571 @@@
- #include "mdrun.h"
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +#include "gromacs/utility/gmx_header_config.h"
+ +
+ +#include <stdio.h>
+ +#include <stdlib.h>
+ +#include <string.h>
+ +#include <limits.h>
+ +#include <time.h>
+ +
+ +#ifdef HAVE_SYS_TIME_H
+ +#include <sys/time.h>
+ +#endif
+ +
+ +#include "smalloc.h"
+ +#include "gmx_fatal.h"
+ +#include "network.h"
+ +#include "main.h"
+ +#include "macros.h"
+ +#include "futil.h"
+ +#include "filenm.h"
-                    unsigned long Flags, FILE** fplog)
+ +#include "gmxfio.h"
+ +#include "string2.h"
+ +
+ +#ifdef GMX_THREAD_MPI
+ +#include "thread_mpi.h"
+ +#endif
+ +
+ +/* The source code in this file should be thread-safe. 
+ +         Please keep it that way. */
+ +
+ +
+ +#ifdef HAVE_UNISTD_H
+ +#include <unistd.h>
+ +#endif
+ +
+ +#ifdef GMX_NATIVE_WINDOWS
+ +#include <process.h>
+ +#endif
+ +
+ +
+ +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
+ +char *
+ +gmx_ctime_r(const time_t *clock,char *buf, int n);
+ +
+ +
+ +#define BUFSIZE       1024
+ +
+ +
+ +static void par_fn(char *base,int ftp,const t_commrec *cr,
+ +                 gmx_bool bAppendSimId,gmx_bool bAppendNodeId,
+ +                 char buf[],int bufsize)
+ +{
+ +  int n;
+ +  
+ +  if((size_t)bufsize<(strlen(base)+10))
+ +     gmx_mem("Character buffer too small!");
+ +
+ +  /* Copy to buf, and strip extension */
+ +  strcpy(buf,base);
+ +  buf[strlen(base) - strlen(ftp2ext(fn2ftp(base))) - 1] = '\0';
+ +
+ +  if (bAppendSimId) {
+ +    sprintf(buf+strlen(buf),"%d",cr->ms->sim);
+ +  }
+ +  if (bAppendNodeId) {
+ +    strcat(buf,"_node");
+ +    sprintf(buf+strlen(buf),"%d",cr->nodeid);
+ +  }
+ +  strcat(buf,".");
+ +  
+ +  /* Add extension again */
+ +  strcat(buf,(ftp == efTPX) ? "tpr" : (ftp == efEDR) ? "edr" : ftp2ext(ftp));
+ +  if (debug)
+ +  {
+ +      fprintf(debug, "node %d par_fn '%s'\n",cr->nodeid,buf);
+ +      if (fn2ftp(buf) == efLOG)
+ +      {
+ +          fprintf(debug,"log\n");
+ +      }
+ +  }
+ +}
+ +
+ +void check_multi_int(FILE *log,const gmx_multisim_t *ms,int val,
+ +                     const char *name)
+ +{
+ +  int  *ibuf,p;
+ +  gmx_bool bCompatible;
+ +
+ +  if (NULL != log)
+ +      fprintf(log,"Multi-checking %s ... ",name);
+ +  
+ +  if (ms == NULL)
+ +    gmx_fatal(FARGS,
+ +            "check_multi_int called with a NULL communication pointer");
+ +
+ +  snew(ibuf,ms->nsim);
+ +  ibuf[ms->sim] = val;
+ +  gmx_sumi_sim(ms->nsim,ibuf,ms);
+ +  
+ +  bCompatible = TRUE;
+ +  for(p=1; p<ms->nsim; p++)
+ +    bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
+ +  
+ +  if (bCompatible) 
+ +  {
+ +      if (NULL != log)
+ +          fprintf(log,"OK\n");
+ +  }
+ +  else 
+ +  {
+ +      if (NULL != log)
+ +      {
+ +          fprintf(log,"\n%s is not equal for all subsystems\n",name);
+ +          for(p=0; p<ms->nsim; p++)
+ +              fprintf(log,"  subsystem %d: %d\n",p,ibuf[p]);
+ +      }
+ +      gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
+ +  }
+ +  
+ +  sfree(ibuf);
+ +}
+ +
+ +void check_multi_large_int(FILE *log,const gmx_multisim_t *ms,
+ +                           gmx_large_int_t val, const char *name)
+ +{
+ +  gmx_large_int_t  *ibuf;
+ +  int p;
+ +  gmx_bool bCompatible;
+ +
+ +  if (NULL != log)
+ +      fprintf(log,"Multi-checking %s ... ",name);
+ +  
+ +  if (ms == NULL)
+ +    gmx_fatal(FARGS,
+ +            "check_multi_int called with a NULL communication pointer");
+ +
+ +  snew(ibuf,ms->nsim);
+ +  ibuf[ms->sim] = val;
+ +  gmx_sumli_sim(ms->nsim,ibuf,ms);
+ +  
+ +  bCompatible = TRUE;
+ +  for(p=1; p<ms->nsim; p++)
+ +    bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
+ +  
+ +  if (bCompatible) 
+ +  {
+ +      if (NULL != log)
+ +          fprintf(log,"OK\n");
+ +  }
+ +  else 
+ +  {
+ +      if (NULL != log)
+ +      {
+ +          fprintf(log,"\n%s is not equal for all subsystems\n",name);
+ +          for(p=0; p<ms->nsim; p++)
+ +          {
+ +              char strbuf[255];
+ +              /* first make the format string */
+ +              snprintf(strbuf, 255, "  subsystem %%d: %s\n", 
+ +                       gmx_large_int_pfmt);
+ +              fprintf(log,strbuf,p,ibuf[p]);
+ +          }
+ +      }
+ +      gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
+ +  }
+ +  
+ +  sfree(ibuf);
+ +}
+ +
+ +
++char *gmx_gethostname(char *name, size_t len)
++{
++    if (len < 8)
++    {
++        gmx_incons("gmx_gethostname called with len<8");
++    }
++#ifdef HAVE_UNISTD_H
++    if (gethostname(name, len-1) != 0)
++    {
++        strncpy(name, "unknown",8);
++    }
++#else
++    strncpy(name, "unknown",8);
++#endif
++
++    return name;
++}
++
++
+ +void gmx_log_open(const char *lognm,const t_commrec *cr,gmx_bool bMasterOnly, 
- 
-     gmx_bool bAppend = Flags & MD_APPENDFILES;        
++                  gmx_bool bAppendFiles, FILE** fplog)
+ +{
+ +    int  len,testlen,pid;
+ +    char buf[256],host[256];
+ +    time_t t;
+ +    char timebuf[STRLEN];
+ +    FILE *fp=*fplog;
+ +    char *tmpnm;
-         fp = gmx_fio_fopen(buf, bAppend ? "a+" : "w+" );
+ +  
+ +    debug_gmx();
+ +  
+ +    /* Communicate the filename for logfile */
+ +    if (cr->nnodes > 1 && !bMasterOnly
+ +#ifdef GMX_THREAD_MPI
+ +        /* With thread MPI the non-master log files are opened later
+ +         * when the files names are already known on all nodes.
+ +         */
+ +        && FALSE
+ +#endif
+ +        )
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            len = strlen(lognm) + 1;
+ +        }
+ +        gmx_bcast(sizeof(len),&len,cr);
+ +        if (!MASTER(cr))
+ +        {
+ +            snew(tmpnm,len+8);
+ +        }
+ +        else
+ +        {
+ +            tmpnm=gmx_strdup(lognm);
+ +        }
+ +        gmx_bcast(len*sizeof(*tmpnm),tmpnm,cr);
+ +    }
+ +    else
+ +    {
+ +        tmpnm=gmx_strdup(lognm);
+ +    }
+ +  
+ +    debug_gmx();
+ +
+ +    if (!bMasterOnly && !MASTER(cr))
+ +    {
+ +        /* Since log always ends with '.log' let's use this info */
+ +        par_fn(tmpnm,efLOG,cr,FALSE,!bMasterOnly,buf,255);
-     else if (!bAppend)
++        fp = gmx_fio_fopen(buf, bAppendFiles ? "a+" : "w+" );
+ +    }
-         fp = gmx_fio_fopen(tmpnm, bAppend ? "a+" : "w+" );
++    else if (!bAppendFiles)
+ +    {
- #ifdef HAVE_UNISTD_H
-     if (gethostname(host,255) != 0)
-     {
-         sprintf(host,"unknown");
-     }
- #else
-     sprintf(host,"unknown");
- #endif  
++        fp = gmx_fio_fopen(tmpnm, bAppendFiles ? "a+" : "w+" );
+ +    }
+ +
+ +    sfree(tmpnm);
+ +
+ +    gmx_fatal_set_log_file(fp);
+ +  
+ +    /* Get some machine parameters */
-     if (bAppend)
++    gmx_gethostname(host,256);
+ +
+ +    time(&t);
+ +
+ +#ifndef NO_GETPID
+ +#   ifdef GMX_NATIVE_WINDOWS
+ +    pid = _getpid();
+ +#   else
+ +    pid = getpid();
+ +#   endif
+ +#else
+ +      pid = 0;
+ +#endif
+ +
++    if (bAppendFiles)
+ +    {
+ +        fprintf(fp,
+ +                "\n"
+ +                "\n"
+ +                "-----------------------------------------------------------\n"
+ +                "Restarting from checkpoint, appending to previous log file.\n"
+ +                "\n"
+ +            );
+ +    }
+ +      
+ +    gmx_ctime_r(&t,timebuf,STRLEN);
+ +
+ +    fprintf(fp,
+ +            "Log file opened on %s"
+ +            "Host: %s  pid: %d  nodeid: %d  nnodes:  %d\n",
+ +            timebuf,host,pid,cr->nodeid,cr->nnodes);
+ +    fprintf(fp,
+ +            "Built %s by %s\n"
+ +            "Build os/architecture: %s\n"
+ +            "Build CPU Vendor: %s  Brand: %s\n"
+ +            "Build CPU Family: %d  Model: %d  Stepping: %d\n"
+ +            "Build CPU Features: %s\n"
+ +            "Compiler: %s\n"
+ +            "CFLAGS: %s\n\n",
+ +            BUILD_TIME,BUILD_USER,BUILD_HOST,
+ +            BUILD_CPU_VENDOR,BUILD_CPU_BRAND,
+ +            BUILD_CPU_FAMILY,BUILD_CPU_MODEL,BUILD_CPU_STEPPING,
+ +            BUILD_CPU_FEATURES,BUILD_COMPILER,BUILD_CFLAGS);
+ +
+ +    fflush(fp);
+ +    debug_gmx();
+ +
+ +    *fplog = fp;
+ +}
+ +
+ +void gmx_log_close(FILE *fp)
+ +{
+ +  if (fp) {
+ +    gmx_fatal_set_log_file(NULL);
+ +    gmx_fio_fclose(fp);
+ +  }
+ +}
+ +
+ +static void comm_args(const t_commrec *cr,int *argc,char ***argv)
+ +{
+ +  int i,len;
+ +  
+ +  if (PAR(cr))
+ +    gmx_bcast(sizeof(*argc),argc,cr);
+ +  
+ +  if (!MASTER(cr))
+ +    snew(*argv,*argc+1);
+ +  fprintf(stderr,"NODEID=%d argc=%d\n",cr->nodeid,*argc);
+ +  for(i=0; (i<*argc); i++) {
+ +    if (MASTER(cr))
+ +      len = strlen((*argv)[i])+1;
+ +    gmx_bcast(sizeof(len),&len,cr);
+ +    if (!MASTER(cr))
+ +      snew((*argv)[i],len);
+ +    /*gmx_bcast(len*sizeof((*argv)[i][0]),(*argv)[i],cr);*/
+ +    gmx_bcast(len*sizeof(char),(*argv)[i],cr);
+ +  }
+ +  debug_gmx();
+ +}
+ +
+ +void init_multisystem(t_commrec *cr,int nsim, char **multidirs,
+ +                      int nfile, const t_filenm fnm[],gmx_bool bParFn)
+ +{
+ +    gmx_multisim_t *ms;
+ +    int  nnodes,nnodpersim,sim,i,ftp;
+ +    char buf[256];
+ +#ifdef GMX_MPI
+ +    MPI_Group mpi_group_world;
+ +#endif  
+ +    int *rank;
+ +
+ +#ifndef GMX_MPI
+ +    if (nsim > 1)
+ +    {
+ +        gmx_fatal(FARGS,"This binary is compiled without MPI support, can not do multiple simulations.");
+ +    }
+ +#endif
+ +
+ +    nnodes  = cr->nnodes;
+ +    if (nnodes % nsim != 0)
+ +    {
+ +        gmx_fatal(FARGS,"The number of nodes (%d) is not a multiple of the number of simulations (%d)",nnodes,nsim);
+ +    }
+ +
+ +    nnodpersim = nnodes/nsim;
+ +    sim = cr->nodeid/nnodpersim;
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"We have %d simulations, %d nodes per simulation, local simulation is %d\n",nsim,nnodpersim,sim);
+ +    }
+ +
+ +    snew(ms,1);
+ +    cr->ms = ms;
+ +    ms->nsim = nsim;
+ +    ms->sim  = sim;
+ +#ifdef GMX_MPI
+ +    /* Create a communicator for the master nodes */
+ +    snew(rank,ms->nsim);
+ +    for(i=0; i<ms->nsim; i++)
+ +    {
+ +        rank[i] = i*nnodpersim;
+ +    }
+ +    MPI_Comm_group(MPI_COMM_WORLD,&mpi_group_world);
+ +    MPI_Group_incl(mpi_group_world,nsim,rank,&ms->mpi_group_masters);
+ +    sfree(rank);
+ +    MPI_Comm_create(MPI_COMM_WORLD,ms->mpi_group_masters,
+ +                    &ms->mpi_comm_masters);
+ +
+ +#if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
+ +    /* initialize the MPI_IN_PLACE replacement buffers */
+ +    snew(ms->mpb, 1);
+ +    ms->mpb->ibuf=NULL;
+ +    ms->mpb->libuf=NULL;
+ +    ms->mpb->fbuf=NULL;
+ +    ms->mpb->dbuf=NULL;
+ +    ms->mpb->ibuf_alloc=0;
+ +    ms->mpb->libuf_alloc=0;
+ +    ms->mpb->fbuf_alloc=0;
+ +    ms->mpb->dbuf_alloc=0;
+ +#endif
+ +
+ +#endif
+ +
+ +    /* Reduce the intra-simulation communication */
+ +    cr->sim_nodeid = cr->nodeid % nnodpersim;
+ +    cr->nnodes = nnodpersim;
+ +#ifdef GMX_MPI
+ +    MPI_Comm_split(MPI_COMM_WORLD,sim,cr->sim_nodeid,&cr->mpi_comm_mysim);
+ +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
+ +    cr->nodeid = cr->sim_nodeid;
+ +#endif
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"This is simulation %d",cr->ms->sim);
+ +        if (PAR(cr))
+ +        {
+ +            fprintf(debug,", local number of nodes %d, local nodeid %d",
+ +                    cr->nnodes,cr->sim_nodeid);
+ +        }
+ +        fprintf(debug,"\n\n");
+ +    }
+ +
+ +    if (multidirs)
+ +    {
+ +        int ret;
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Changing to directory %s\n",multidirs[cr->ms->sim]);
+ +        }
+ +        gmx_chdir(multidirs[cr->ms->sim]);
+ +    }
+ +    else if (bParFn)
+ +    {
+ +        /* Patch output and tpx, cpt and rerun input file names */
+ +        for(i=0; (i<nfile); i++)
+ +        {
+ +            /* Because of possible multiple extensions per type we must look 
+ +             * at the actual file name 
+ +             */
+ +            if (is_output(&fnm[i]) ||
+ +                fnm[i].ftp == efTPX || fnm[i].ftp == efCPT ||
+ +                strcmp(fnm[i].opt,"-rerun") == 0)
+ +            {
+ +                ftp = fn2ftp(fnm[i].fns[0]);
+ +                par_fn(fnm[i].fns[0],ftp,cr,TRUE,FALSE,buf,255);
+ +                sfree(fnm[i].fns[0]);
+ +                fnm[i].fns[0] = gmx_strdup(buf);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +t_commrec *init_par(int *argc,char ***argv_ptr)
+ +{
+ +    t_commrec *cr;
+ +    char      **argv;
+ +    int       i;
+ +    gmx_bool      pe=FALSE;
+ +
+ +    snew(cr,1);
+ +
+ +    argv = argv_ptr ? *argv_ptr : NULL;
+ +
+ +#if defined GMX_MPI && !defined GMX_THREAD_MPI
+ +    cr->sim_nodeid = gmx_setup(argc,argv,&cr->nnodes);
+ +
+ +    if (!PAR(cr) && (cr->sim_nodeid != 0))
+ +    {
+ +        gmx_comm("(!PAR(cr) && (cr->sim_nodeid != 0))");
+ +    }
+ +
+ +    cr->mpi_comm_mysim   = MPI_COMM_WORLD;
+ +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
+ +#else
+ +    /* These should never be accessed */
+ +    cr->mpi_comm_mysim   = NULL;
+ +    cr->mpi_comm_mygroup = NULL;
+ +    cr->nnodes           = 1;
+ +    cr->sim_nodeid       = 0;
+ +#endif
+ +
+ +    cr->nodeid = cr->sim_nodeid;
+ +
+ +    cr->duty = (DUTY_PP | DUTY_PME);
+ +
+ +    /* Communicate arguments if parallel */
+ +#ifndef GMX_THREAD_MPI
+ +    if (PAR(cr))
+ +    {
+ +        comm_args(cr,argc,argv_ptr);
+ +    }
+ +#endif /* GMX_THREAD_MPI */
+ +
+ +#ifdef GMX_MPI
+ +#if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
+ +  /* initialize the MPI_IN_PLACE replacement buffers */
+ +  snew(cr->mpb, 1);
+ +  cr->mpb->ibuf=NULL;
+ +  cr->mpb->libuf=NULL;
+ +  cr->mpb->fbuf=NULL;
+ +  cr->mpb->dbuf=NULL;
+ +  cr->mpb->ibuf_alloc=0;
+ +  cr->mpb->libuf_alloc=0;
+ +  cr->mpb->fbuf_alloc=0;
+ +  cr->mpb->dbuf_alloc=0;
+ +#endif
+ +#endif
+ +
+ +    return cr;
+ +}
+ +
+ +t_commrec *init_par_threads(const t_commrec *cro)
+ +{
+ +#ifdef GMX_THREAD_MPI
+ +    int initialized;
+ +    t_commrec *cr;
+ +
+ +    /* make a thread-specific commrec */
+ +    snew(cr,1);
+ +    /* now copy the whole thing, so settings like the number of PME nodes
+ +       get propagated. */
+ +    *cr=*cro;
+ +
+ +    /* and we start setting our own thread-specific values for things */
+ +    MPI_Initialized(&initialized);
+ +    if (!initialized)
+ +    {
+ +        gmx_comm("Initializing threads without comm");
+ +    }
+ +    /* once threads will be used together with MPI, we'll
+ +       fill the cr structure with distinct data here. This might even work: */
+ +    cr->sim_nodeid = gmx_setup(0,NULL, &cr->nnodes);
+ +
+ +    cr->mpi_comm_mysim = MPI_COMM_WORLD;
+ +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
+ +    cr->nodeid = cr->sim_nodeid;
+ +    cr->duty = (DUTY_PP | DUTY_PME);
+ +
+ +    return cr;
+ +#else
+ +    return NULL;
+ +#endif
+ +}
diff --cc src/gromacs/gmxlib/maths.c
Simple merge
diff --cc src/gromacs/gmxlib/md_logging.c

index 0000000000000000000000000000000000000000,a7a87fe5eb3f96d66c6ab331faba782f6e278631..a7a87fe5eb3f96d66c6ab331faba782f6e278631

mode 000000,100644..100644
--- /dev/null
--- 2/src/gmxlib/md_logging.c
+++ b/src/gromacs/gmxlib/md_logging.c
diff --cc src/gromacs/gmxlib/mtop_util.c

index d9fdc5cdedd226c8a00152895beaf1c4c3fb387c,0000000000000000000000000000000000000000..a35ffd5de7e21289f778ae9ce740d2f91b6c1836

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/mtop_util.c
--- /dev/null
+++ b/src/gromacs/gmxlib/mtop_util.c
@@@ -1,964 -1,0 +1,1129 @@@
- void gmx_mtop_atomnr_to_atom(const gmx_mtop_t *mtop,int atnr_global,
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + * This file is part of Gromacs        Copyright (c) 1991-2008
+ + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gnomes, ROck Monsters And Chili Sauce
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include "smalloc.h"
+ +#include "typedefs.h"
+ +#include "mtop_util.h"
+ +#include "topsort.h"
+ +#include "symtab.h"
+ +#include "gmx_fatal.h"
+ +
+ +static int gmx_mtop_maxresnr(const gmx_mtop_t *mtop,int maxres_renum)
+ +{
+ +    int maxresnr,mt,r;
+ +    const t_atoms *atoms;
+ +
+ +    maxresnr = 0;
+ +
+ +    for(mt=0; mt<mtop->nmoltype; mt++)
+ +    {
+ +        atoms = &mtop->moltype[mt].atoms;
+ +        if (atoms->nres > maxres_renum)
+ +        {
+ +            for(r=0; r<atoms->nres; r++)
+ +            {
+ +                if (atoms->resinfo[r].nr > maxresnr)
+ +                {
+ +                    maxresnr = atoms->resinfo[r].nr;
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    return maxresnr;
+ +}
+ +
+ +void gmx_mtop_finalize(gmx_mtop_t *mtop)
+ +{
+ +    char *env;
+ +
+ +    mtop->maxres_renum = 1;
+ +    
+ +    env = getenv("GMX_MAXRESRENUM");
+ +    if (env != NULL)
+ +    {
+ +        sscanf(env,"%d",&mtop->maxres_renum);
+ +    }
+ +    if (mtop->maxres_renum == -1)
+ +    {
+ +        /* -1 signals renumber residues in all molecules */
+ +        mtop->maxres_renum = INT_MAX;
+ +    }
+ +
+ +    mtop->maxresnr = gmx_mtop_maxresnr(mtop,mtop->maxres_renum);
+ +}
+ +
+ +int ncg_mtop(const gmx_mtop_t *mtop)
+ +{
+ +    int ncg;
+ +    int mb;
+ +    
+ +    ncg = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        ncg +=
+ +            mtop->molblock[mb].nmol*
+ +            mtop->moltype[mtop->molblock[mb].type].cgs.nr;
+ +    }
+ +    
+ +    return ncg;
+ +}
+ +
-     int mb,a_start,a_end,atnr_mol;
++void gmx_mtop_remove_chargegroups(gmx_mtop_t *mtop)
++{
++    int mt;
++    t_block *cgs;
++    int i;
++
++    for(mt=0; mt<mtop->nmoltype; mt++)
++    {
++        cgs = &mtop->moltype[mt].cgs;
++        if (cgs->nr < mtop->moltype[mt].atoms.nr)
++        {
++            cgs->nr = mtop->moltype[mt].atoms.nr;
++            srenew(cgs->index,cgs->nr+1);
++            for(i=0; i<cgs->nr+1; i++)
++            {
++                cgs->index[i] = i;
++            }
++        }
++    }
++}
++
++
++typedef struct
++{
++    int a_start;
++    int a_end;
++    int na_mol;
++} mb_at_t;
++
++typedef struct gmx_mtop_atomlookup
++{
++    const gmx_mtop_t *mtop;
++    int     nmb;
++    int     mb_start;
++    mb_at_t *mba;
++} t_gmx_mtop_atomlookup;
++
++
++gmx_mtop_atomlookup_t
++gmx_mtop_atomlookup_init(const gmx_mtop_t *mtop)
++{
++    t_gmx_mtop_atomlookup *alook;
++    int mb;
++    int a_start,a_end,na,na_start=-1;
++
++    snew(alook,1);
++
++    alook->mtop     = mtop;
++    alook->nmb      = mtop->nmolblock;
++    alook->mb_start = 0;
++    snew(alook->mba,alook->nmb);
++
++    a_start = 0;
++    for(mb=0; mb<mtop->nmolblock; mb++)
++    {
++        na    = mtop->molblock[mb].nmol*mtop->molblock[mb].natoms_mol;
++        a_end = a_start + na;
++
++        alook->mba[mb].a_start = a_start;
++        alook->mba[mb].a_end   = a_end;
++        alook->mba[mb].na_mol  = mtop->molblock[mb].natoms_mol;
++
++        /* We start the binary search with the largest block */
++        if (mb == 0 || na > na_start)
++        {
++            alook->mb_start = mb;
++            na_start        = na;
++        }
++
++        a_start = a_end;
++    }
++
++    return alook;
++}
++
++gmx_mtop_atomlookup_t
++gmx_mtop_atomlookup_settle_init(const gmx_mtop_t *mtop)
++{
++     t_gmx_mtop_atomlookup *alook;
++     int mb;
++     int na,na_start=-1;
++
++     alook = gmx_mtop_atomlookup_init(mtop);
++
++     /* Check if the starting molblock has settle */
++     if (mtop->moltype[mtop->molblock[alook->mb_start].type].ilist[F_SETTLE].nr  == 0)
++     {
++         /* Search the largest molblock with settle */
++         alook->mb_start = -1;
++         for(mb=0; mb<mtop->nmolblock; mb++)
++         {
++             if (mtop->moltype[mtop->molblock[mb].type].ilist[F_SETTLE].nr > 0)
++             {
++                 na = alook->mba[mb].a_end - alook->mba[mb].a_start;
++                 if (alook->mb_start == -1 || na > na_start)
++                 {
++                     alook->mb_start = mb;
++                     na_start        = na;
++                 }
++             }
++         }
++
++         if (alook->mb_start == -1)
++         {
++             gmx_incons("gmx_mtop_atomlookup_settle_init called without settles");
++         }
++     }
++
++     return alook;
++}
++
++void
++gmx_mtop_atomlookup_destroy(gmx_mtop_atomlookup_t alook)
++{
++    sfree(alook->mba);
++    sfree(alook);
++}
++
++void gmx_mtop_atomnr_to_atom(const gmx_mtop_atomlookup_t alook,
++                             int atnr_global,
+ +                             t_atom **atom)
+ +{
-         gmx_fatal(FARGS,"gmx_mtop_atomnr_to_atom was called with atnr_global=%d which is not in the atom range of this system (%d-%d)",
++    int mb0,mb1,mb;
++    int a_start,atnr_mol;
+ +
++#ifdef DEBUG_MTOP
+ +    if (atnr_global < 0 || atnr_global >= mtop->natoms)
+ +    {
-     
-     mb = -1;
-     a_end = 0;
-     do
++        gmx_fatal(FARGS,"gmx_mtop_atomnr_to_moltype was called with atnr_global=%d which is not in the atom range of this system (%d-%d)",
+ +                  atnr_global,0,mtop->natoms-1);
+ +    }
-         mb++;
-         a_start = a_end;
-         a_end = a_start + mtop->molblock[mb].nmol*mtop->molblock[mb].natoms_mol;
++#endif
++
++    mb0 = -1;
++    mb1 = alook->nmb;
++    mb  = alook->mb_start;
++        
++    while (TRUE)
+ +    {
-     while (atnr_global >= a_end);
++        a_start = alook->mba[mb].a_start;
++        if (atnr_global < a_start)
++        {
++            mb1 = mb;
++        }
++        else if (atnr_global >= alook->mba[mb].a_end)
++        {
++            mb0 = mb;
++        }
++        else
++        {
++            break;
++        }
++        mb = ((mb0 + mb1 + 1)>>1);
+ +    }
-     atnr_mol = (atnr_global - a_start) % mtop->molblock[mb].natoms_mol;
+ +    
-     *atom = &mtop->moltype[mtop->molblock[mb].type].atoms.atom[atnr_mol];
++    atnr_mol = (atnr_global - a_start) % alook->mba[mb].na_mol;
+ +
- void gmx_mtop_atomnr_to_ilist(const gmx_mtop_t *mtop,int atnr_global,
++    *atom = &alook->mtop->moltype[alook->mtop->molblock[mb].type].atoms.atom[atnr_mol];
+ +}
+ +
-     int mb,a_start,a_end,atnr_local;
++void gmx_mtop_atomnr_to_ilist(const gmx_mtop_atomlookup_t alook,
++                              int atnr_global,
+ +                              t_ilist **ilist_mol,int *atnr_offset)
+ +{
-     
-     mb = -1;
-     a_end = 0;
-     do
++    int mb0,mb1,mb;
++    int a_start,atnr_local;
+ +
++#ifdef DEBUG_MTOP
+ +    if (atnr_global < 0 || atnr_global >= mtop->natoms)
+ +    {
+ +        gmx_fatal(FARGS,"gmx_mtop_atomnr_to_moltype was called with atnr_global=%d which is not in the atom range of this system (%d-%d)",
+ +                  atnr_global,0,mtop->natoms-1);
+ +    }
-         mb++;
-         a_start = a_end;
-         a_end = a_start + mtop->molblock[mb].nmol*mtop->molblock[mb].natoms_mol;
++#endif
++
++    mb0 = -1;
++    mb1 = alook->nmb;
++    mb  = alook->mb_start;
++        
++    while (TRUE)
+ +    {
-     while (atnr_global >= a_end);
++        a_start = alook->mba[mb].a_start;
++        if (atnr_global < a_start)
++        {
++            mb1 = mb;
++        }
++        else if (atnr_global >= alook->mba[mb].a_end)
++        {
++            mb0 = mb;
++        }
++        else
++        {
++            break;
++        }
++        mb = ((mb0 + mb1 + 1)>>1);
+ +    }
-     *ilist_mol = mtop->moltype[mtop->molblock[mb].type].ilist;
+ +
-     atnr_local = (atnr_global - a_start) % mtop->molblock[mb].natoms_mol;
++    *ilist_mol = alook->mtop->moltype[alook->mtop->molblock[mb].type].ilist;
+ +    
- void gmx_mtop_atomnr_to_molblock_ind(const gmx_mtop_t *mtop,int atnr_global,
++    atnr_local = (atnr_global - a_start) % alook->mba[mb].na_mol;
+ +
+ +    *atnr_offset = atnr_global - atnr_local;
+ +}
+ +
-     int mb,a_start,a_end;
-     t_atoms *atoms;
++void gmx_mtop_atomnr_to_molblock_ind(const gmx_mtop_atomlookup_t alook,
++                                     int atnr_global,
+ +                                     int *molb,int *molnr,int *atnr_mol)
+ +{
-     
-     mb = -1;
-     a_end = 0;
-     do
++    int mb0,mb1,mb;
++    int a_start;
+ +
++#ifdef DEBUG_MTOP
+ +    if (atnr_global < 0 || atnr_global >= mtop->natoms)
+ +    {
+ +        gmx_fatal(FARGS,"gmx_mtop_atomnr_to_moltype was called with atnr_global=%d which is not in the atom range of this system (%d-%d)",
+ +                  atnr_global,0,mtop->natoms-1);
+ +    }
-         mb++;
-         a_start = a_end;
-         a_end = a_start + mtop->molblock[mb].nmol*mtop->molblock[mb].natoms_mol;
++#endif
++
++    mb0 = -1;
++    mb1 = alook->nmb;
++    mb  = alook->mb_start;
++        
++    while (TRUE)
+ +    {
-     while (atnr_global >= a_end);
++        a_start = alook->mba[mb].a_start;
++        if (atnr_global < a_start)
++        {
++            mb1 = mb;
++        }
++        else if (atnr_global >= alook->mba[mb].a_end)
++        {
++            mb0 = mb;
++        }
++        else
++        {
++            break;
++        }
++        mb = ((mb0 + mb1 + 1)>>1);
+ +    }
-     *molnr = (atnr_global - a_start) / mtop->molblock[mb].natoms_mol;
-     *atnr_mol = atnr_global - a_start - (*molnr)*mtop->molblock[mb].natoms_mol;
+ +
+ +    *molb  = mb;
++    *molnr = (atnr_global - a_start) / alook->mba[mb].na_mol;
++    *atnr_mol = atnr_global - a_start - (*molnr)*alook->mba[mb].na_mol;
+ +}
+ +
+ +void gmx_mtop_atominfo_global(const gmx_mtop_t *mtop,int atnr_global,
+ +                              char **atomname,int *resnr,char **resname)
+ +{
+ +    int mb,a_start,a_end,maxresnr,at_loc;
+ +    gmx_molblock_t *molb;
+ +    t_atoms *atoms=NULL;
+ +    
+ +    if (atnr_global < 0 || atnr_global >= mtop->natoms)
+ +    {
+ +        gmx_fatal(FARGS,"gmx_mtop_atominfo_global was called with atnr_global=%d which is not in the atom range of this system (%d-%d)",
+ +                  atnr_global,0,mtop->natoms-1);
+ +    }
+ +    
+ +    mb = -1;
+ +    a_end = 0;
+ +    maxresnr = mtop->maxresnr;
+ +    do
+ +    {
+ +        if (mb >= 0)
+ +        {
+ +            if (atoms->nres <= mtop->maxres_renum)
+ +            {
+ +                /* Single residue molecule, keep counting */
+ +                maxresnr += mtop->molblock[mb].nmol*atoms->nres;
+ +            }
+ +        }
+ +        mb++;
+ +        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ +        a_start = a_end;
+ +        a_end = a_start + mtop->molblock[mb].nmol*atoms->nr;
+ +    }
+ +    while (atnr_global >= a_end);
+ +
+ +    at_loc = (atnr_global - a_start) % atoms->nr;
+ +    *atomname = *(atoms->atomname[at_loc]);
+ +    if (atoms->nres > mtop->maxres_renum)
+ +    {
+ +        *resnr = atoms->resinfo[atoms->atom[at_loc].resind].nr;
+ +    }
+ +    else
+ +    {
+ +        /* Single residue molecule, keep counting */
+ +        *resnr = maxresnr + 1 + (atnr_global - a_start)/atoms->nr*atoms->nres + atoms->atom[at_loc].resind;
+ +    }
+ +    *resname  = *(atoms->resinfo[atoms->atom[at_loc].resind].name);
+ +}
+ +
+ +typedef struct gmx_mtop_atomloop_all
+ +{
+ +    const gmx_mtop_t *mtop;
+ +    int        mblock;
+ +    t_atoms    *atoms;
+ +    int        mol;
+ +    int        maxresnr;
+ +    int        at_local;
+ +    int        at_global;
+ +} t_gmx_mtop_atomloop_all;
+ +
+ +gmx_mtop_atomloop_all_t
+ +gmx_mtop_atomloop_all_init(const gmx_mtop_t *mtop)
+ +{
+ +    struct gmx_mtop_atomloop_all *aloop;
+ +
+ +    snew(aloop,1);
+ +
+ +    aloop->mtop         = mtop;
+ +    aloop->mblock       = 0;
+ +    aloop->atoms        =
+ +        &mtop->moltype[mtop->molblock[aloop->mblock].type].atoms;
+ +    aloop->mol          = 0;
+ +    aloop->maxresnr     = mtop->maxresnr;
+ +    aloop->at_local     = -1;
+ +    aloop->at_global    = -1;
+ +
+ +    return aloop;
+ +}
+ +
+ +static void gmx_mtop_atomloop_all_destroy(gmx_mtop_atomloop_all_t aloop)
+ +{
+ +    sfree(aloop);
+ +}
+ +
+ +gmx_bool gmx_mtop_atomloop_all_next(gmx_mtop_atomloop_all_t aloop,
+ +                                int *at_global,t_atom **atom)
+ +{
+ +    if (aloop == NULL)
+ +    {
+ +        gmx_incons("gmx_mtop_atomloop_all_next called without calling gmx_mtop_atomloop_all_init");
+ +    }
+ +
+ +    aloop->at_local++;
+ +    aloop->at_global++;
+ +
+ +    if (aloop->at_local >= aloop->atoms->nr)
+ +    {
+ +        if (aloop->atoms->nres <= aloop->mtop->maxres_renum)
+ +        {
+ +            /* Single residue molecule, increase the count with one */
+ +            aloop->maxresnr += aloop->atoms->nres;
+ +        }
+ +        aloop->mol++;
+ +        aloop->at_local = 0;
+ +        if (aloop->mol >= aloop->mtop->molblock[aloop->mblock].nmol)
+ +        {
+ +            aloop->mblock++;
+ +            if (aloop->mblock >= aloop->mtop->nmolblock)
+ +            {
+ +                gmx_mtop_atomloop_all_destroy(aloop);
+ +                return FALSE;
+ +            }
+ +            aloop->atoms = &aloop->mtop->moltype[aloop->mtop->molblock[aloop->mblock].type].atoms;
+ +            aloop->mol = 0;
+ +        }
+ +    }
+ +
+ +    *at_global = aloop->at_global;
+ +    *atom      = &aloop->atoms->atom[aloop->at_local];
+ +
+ +    return TRUE;
+ +}
+ +
+ +void gmx_mtop_atomloop_all_names(gmx_mtop_atomloop_all_t aloop,
+ +                                 char **atomname,int *resnr,char **resname)
+ +{
+ +    int resind_mol;
+ +
+ +    *atomname = *(aloop->atoms->atomname[aloop->at_local]);
+ +    resind_mol = aloop->atoms->atom[aloop->at_local].resind;
+ +    *resnr = aloop->atoms->resinfo[resind_mol].nr;
+ +    if (aloop->atoms->nres <= aloop->mtop->maxres_renum)
+ +    {
+ +        *resnr = aloop->maxresnr + 1 + resind_mol;
+ +    }
+ +    *resname  = *(aloop->atoms->resinfo[resind_mol].name);
+ +}
+ +
+ +void gmx_mtop_atomloop_all_moltype(gmx_mtop_atomloop_all_t aloop,
+ +                                   gmx_moltype_t **moltype,int *at_mol)
+ +{
+ +    *moltype = &aloop->mtop->moltype[aloop->mtop->molblock[aloop->mblock].type];
+ +    *at_mol  = aloop->at_local;
+ +}
+ +
+ +typedef struct gmx_mtop_atomloop_block
+ +{
+ +    const gmx_mtop_t *mtop;
+ +    int        mblock;
+ +    t_atoms    *atoms;
+ +    int        at_local;
+ +} t_gmx_mtop_atomloop_block;
+ +
+ +gmx_mtop_atomloop_block_t
+ +gmx_mtop_atomloop_block_init(const gmx_mtop_t *mtop)
+ +{
+ +    struct gmx_mtop_atomloop_block *aloop;
+ +
+ +    snew(aloop,1);
+ +
+ +    aloop->mtop      = mtop;
+ +    aloop->mblock    = 0;
+ +    aloop->atoms     = &mtop->moltype[mtop->molblock[aloop->mblock].type].atoms;
+ +    aloop->at_local  = -1;
+ +
+ +    return aloop;
+ +}
+ +
+ +static void gmx_mtop_atomloop_block_destroy(gmx_mtop_atomloop_block_t aloop)
+ +{
+ +    sfree(aloop);
+ +}
+ +
+ +gmx_bool gmx_mtop_atomloop_block_next(gmx_mtop_atomloop_block_t aloop,
+ +                                  t_atom **atom,int *nmol)
+ +{
+ +    if (aloop == NULL)
+ +    {
+ +        gmx_incons("gmx_mtop_atomloop_all_next called without calling gmx_mtop_atomloop_all_init");
+ +    }
+ +
+ +    aloop->at_local++;
+ +
+ +    if (aloop->at_local >= aloop->atoms->nr)
+ +    {
+ +        aloop->mblock++;
+ +        if (aloop->mblock >= aloop->mtop->nmolblock)
+ +        {
+ +            gmx_mtop_atomloop_block_destroy(aloop);
+ +            return FALSE;
+ +        }
+ +        aloop->atoms = &aloop->mtop->moltype[aloop->mtop->molblock[aloop->mblock].type].atoms;
+ +        aloop->at_local = 0;
+ +    }
+ +    
+ +    *atom = &aloop->atoms->atom[aloop->at_local];
+ +    *nmol = aloop->mtop->molblock[aloop->mblock].nmol;
+ +   
+ +    return TRUE;
+ +}
+ +
+ +typedef struct gmx_mtop_ilistloop
+ +{
+ +    const gmx_mtop_t *mtop;
+ +    int           mblock;
+ +} t_gmx_mtop_ilist;
+ +
+ +gmx_mtop_ilistloop_t
+ +gmx_mtop_ilistloop_init(const gmx_mtop_t *mtop)
+ +{
+ +    struct gmx_mtop_ilistloop *iloop;
+ +
+ +    snew(iloop,1);
+ +
+ +    iloop->mtop      = mtop;
+ +    iloop->mblock    = -1;
+ +
+ +    return iloop;
+ +}
+ +
+ +static void gmx_mtop_ilistloop_destroy(gmx_mtop_ilistloop_t iloop)
+ +{
+ +    sfree(iloop);
+ +}
+ +
+ +gmx_bool gmx_mtop_ilistloop_next(gmx_mtop_ilistloop_t iloop,
+ +                             t_ilist **ilist_mol,int *nmol)
+ +{
+ +    if (iloop == NULL)
+ +    {
+ +        gmx_incons("gmx_mtop_ilistloop_next called without calling gmx_mtop_ilistloop_init");
+ +    }
+ +
+ +    iloop->mblock++;
+ +    if (iloop->mblock == iloop->mtop->nmolblock)
+ +    {
+ +        gmx_mtop_ilistloop_destroy(iloop);
+ +        return FALSE;
+ +    }
+ +
+ +    *ilist_mol =
+ +        iloop->mtop->moltype[iloop->mtop->molblock[iloop->mblock].type].ilist;
+ +
+ +    *nmol = iloop->mtop->molblock[iloop->mblock].nmol;
+ +
+ +    return TRUE;
+ +}
+ +typedef struct gmx_mtop_ilistloop_all
+ +{
+ +    const gmx_mtop_t *mtop;
+ +    int           mblock;
+ +    int           mol;
+ +    int           a_offset;
+ +} t_gmx_mtop_ilist_all;
+ +
+ +gmx_mtop_ilistloop_all_t
+ +gmx_mtop_ilistloop_all_init(const gmx_mtop_t *mtop)
+ +{
+ +    struct gmx_mtop_ilistloop_all *iloop;
+ +
+ +    snew(iloop,1);
+ +
+ +    iloop->mtop      = mtop;
+ +    iloop->mblock    = 0;
+ +    iloop->mol       = -1;
+ +    iloop->a_offset  = 0;
+ +
+ +    return iloop;
+ +}
+ +
+ +static void gmx_mtop_ilistloop_all_destroy(gmx_mtop_ilistloop_all_t iloop)
+ +{
+ +    sfree(iloop);
+ +}
+ +
+ +gmx_bool gmx_mtop_ilistloop_all_next(gmx_mtop_ilistloop_all_t iloop,
+ +                                 t_ilist **ilist_mol,int *atnr_offset)
+ +{
+ +    gmx_molblock_t *molb;
+ +
+ +    if (iloop == NULL)
+ +    {
+ +        gmx_incons("gmx_mtop_ilistloop_all_next called without calling gmx_mtop_ilistloop_all_init");
+ +    }
+ +    
+ +    if (iloop->mol >= 0)
+ +    {
+ +        iloop->a_offset += iloop->mtop->molblock[iloop->mblock].natoms_mol;
+ +    }
+ +
+ +    iloop->mol++;
+ +
+ +    if (iloop->mol >= iloop->mtop->molblock[iloop->mblock].nmol) {
+ +        iloop->mblock++;
+ +        iloop->mol = 0;
+ +        if (iloop->mblock == iloop->mtop->nmolblock)
+ +        {
+ +            gmx_mtop_ilistloop_all_destroy(iloop);
+ +            return FALSE;
+ +        }
+ +    }
+ +    
+ +    *ilist_mol =
+ +        iloop->mtop->moltype[iloop->mtop->molblock[iloop->mblock].type].ilist;
+ +
+ +    *atnr_offset = iloop->a_offset;
+ +
+ +    return TRUE;
+ +}
+ +
+ +int gmx_mtop_ftype_count(const gmx_mtop_t *mtop,int ftype)
+ +{
+ +    gmx_mtop_ilistloop_t iloop;
+ +    t_ilist *il;
+ +    int n,nmol;
+ +
+ +    n = 0;
+ +
+ +    iloop = gmx_mtop_ilistloop_init(mtop);
+ +    while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
+ +    {
+ +        n += nmol*il[ftype].nr/(1+NRAL(ftype));
+ +    }
+ +
+ +    return n;
+ +}
+ +
+ +t_block gmx_mtop_global_cgs(const gmx_mtop_t *mtop)
+ +{
+ +    t_block cgs_gl,*cgs_mol;
+ +    int mb,mol,cg;
+ +    gmx_molblock_t *molb;
+ +    t_atoms *atoms;
+ +    
+ +    /* In most cases this is too much, but we realloc at the end */
+ +    snew(cgs_gl.index,mtop->natoms+1);
+ +    
+ +    cgs_gl.nr       = 0;
+ +    cgs_gl.index[0] = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molb    = &mtop->molblock[mb];
+ +        cgs_mol = &mtop->moltype[molb->type].cgs;
+ +        for(mol=0; mol<molb->nmol; mol++)
+ +        {
+ +            for(cg=0; cg<cgs_mol->nr; cg++)
+ +            {
+ +                cgs_gl.index[cgs_gl.nr+1] =
+ +                    cgs_gl.index[cgs_gl.nr] +
+ +                    cgs_mol->index[cg+1] - cgs_mol->index[cg];
+ +                cgs_gl.nr++;
+ +            }
+ +        }
+ +    }
+ +    cgs_gl.nalloc_index = cgs_gl.nr + 1;
+ +    srenew(cgs_gl.index,cgs_gl.nalloc_index);
+ +
+ +    return cgs_gl;
+ +}
+ +
+ +static void atomcat(t_atoms *dest, t_atoms *src, int copies,
+ +                    int maxres_renum, int *maxresnr)
+ +{
+ +    int i,j,l,size;
+ +    int srcnr=src->nr;
+ +    int destnr=dest->nr;
+ +
+ +    if (srcnr)
+ +    {
+ +        size=destnr+copies*srcnr;
+ +        srenew(dest->atom,size);
+ +        srenew(dest->atomname,size);
+ +        srenew(dest->atomtype,size);
+ +        srenew(dest->atomtypeB,size);
+ +    }
+ +    if (src->nres)
+ +    {
+ +        size=dest->nres+copies*src->nres;
+ +        srenew(dest->resinfo,size);
+ +    }
+ +    
+ +    /* residue information */
+ +    for (l=dest->nres,j=0; (j<copies); j++,l+=src->nres)
+ +    {
+ +        memcpy((char *) &(dest->resinfo[l]),(char *) &(src->resinfo[0]),
+ +               (size_t)(src->nres*sizeof(src->resinfo[0])));
+ +    }
+ +    
+ +    for (l=destnr,j=0; (j<copies); j++,l+=srcnr)
+ +    {
+ +        memcpy((char *) &(dest->atomname[l]),(char *) &(src->atomname[0]),
+ +               (size_t)(srcnr*sizeof(src->atomname[0])));
+ +        memcpy((char *) &(dest->atomtype[l]),(char *) &(src->atomtype[0]),
+ +               (size_t)(srcnr*sizeof(src->atomtype[0])));
+ +        memcpy((char *) &(dest->atomtypeB[l]),(char *) &(src->atomtypeB[0]),
+ +               (size_t)(srcnr*sizeof(src->atomtypeB[0])));
+ +        memcpy((char *) &(dest->atom[l]),(char *) &(src->atom[0]),
+ +               (size_t)(srcnr*sizeof(src->atom[0])));
+ +    }
+ +    
+ +    /* Increment residue indices */
+ +    for (l=destnr,j=0; (j<copies); j++)
+ +    {
+ +        for (i=0; (i<srcnr); i++,l++)
+ +        {
+ +            dest->atom[l].resind = dest->nres+j*src->nres+src->atom[i].resind;
+ +        }
+ +    }    
+ +    
+ +    if (src->nres <= maxres_renum)
+ +    {
+ +        /* Single residue molecule, continue counting residues */
+ +        for (j=0; (j<copies); j++)
+ +        {
+ +            for (l=0; l<src->nres; l++)
+ +            {
+ +                (*maxresnr)++;
+ +                dest->resinfo[dest->nres+j*src->nres+l].nr = *maxresnr;
+ +            }
+ +        }
+ +    }
+ +    
+ +    dest->nres += copies*src->nres;
+ +    dest->nr   += copies*src->nr;
+ +}
+ +
+ +t_atoms gmx_mtop_global_atoms(const gmx_mtop_t *mtop)
+ +{
+ +    t_atoms atoms;
+ +    int maxresnr,mb;
+ +    gmx_molblock_t *molb;
+ +
+ +    init_t_atoms(&atoms,0,FALSE);
+ +
+ +    maxresnr = mtop->maxresnr;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        atomcat(&atoms,&mtop->moltype[molb->type].atoms,molb->nmol,
+ +                mtop->maxres_renum,&maxresnr);
+ +    }
+ +    
+ +    return atoms;
+ +}
+ +
+ +void gmx_mtop_make_atomic_charge_groups(gmx_mtop_t *mtop,
+ +                                        gmx_bool bKeepSingleMolCG)
+ +{
+ +    int     mb,cg;
+ +    t_block *cgs_mol;
+ +    
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        cgs_mol = &mtop->moltype[mtop->molblock[mb].type].cgs;
+ +        if (!(bKeepSingleMolCG && cgs_mol->nr == 1))
+ +        {
+ +            cgs_mol->nr           = mtop->molblock[mb].natoms_mol;
+ +            cgs_mol->nalloc_index = cgs_mol->nr + 1;
+ +            srenew(cgs_mol->index,cgs_mol->nalloc_index);
+ +            for(cg=0; cg<cgs_mol->nr+1; cg++)
+ +            {
+ +                cgs_mol->index[cg] = cg;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +/*
+ + * The cat routines below are old code from src/kernel/topcat.c
+ + */ 
+ +
+ +static void blockcat(t_block *dest,t_block *src,int copies, 
+ +                     int dnum,int snum)
+ +{
+ +    int i,j,l,nra,size;
+ +    
+ +    if (src->nr)
+ +    {
+ +        size=(dest->nr+copies*src->nr+1);
+ +        srenew(dest->index,size);
+ +    }
+ +    
+ +    nra = dest->index[dest->nr];
+ +    for (l=dest->nr,j=0; (j<copies); j++)
+ +    {
+ +        for (i=0; (i<src->nr); i++)
+ +        {
+ +            dest->index[l++] = nra + src->index[i];
+ +        }
+ +        nra += src->index[src->nr];
+ +    }
+ +    dest->nr += copies*src->nr;
+ +    dest->index[dest->nr] = nra;
+ +}
+ +
+ +static void blockacat(t_blocka *dest,t_blocka *src,int copies, 
+ +                      int dnum,int snum)
+ +{
+ +    int i,j,l,size;
+ +    int destnr  = dest->nr;
+ +    int destnra = dest->nra;
+ +    
+ +    if (src->nr)
+ +    {
+ +        size=(dest->nr+copies*src->nr+1);
+ +        srenew(dest->index,size);
+ +    }
+ +    if (src->nra)
+ +    {
+ +        size=(dest->nra+copies*src->nra);
+ +        srenew(dest->a,size);
+ +    }
+ +    
+ +    for (l=destnr,j=0; (j<copies); j++)
+ +    {
+ +        for (i=0; (i<src->nr); i++)
+ +        {
+ +            dest->index[l++] = dest->nra+src->index[i];
+ +        }
+ +        dest->nra += src->nra;
+ +    }
+ +    for (l=destnra,j=0; (j<copies); j++)
+ +    {
+ +        for (i=0; (i<src->nra); i++)
+ +        {
+ +            dest->a[l++] = dnum+src->a[i];
+ +        }
+ +        dnum+=snum;
+ +        dest->nr += src->nr;
+ +    }
+ +    dest->index[dest->nr] = dest->nra;
+ +}
+ +
+ +static void ilistcat(int ftype,t_ilist *dest,t_ilist *src,int copies, 
+ +                     int dnum,int snum)
+ +{
+ +    int nral,c,i,a;
+ +
+ +    nral = NRAL(ftype);
+ +
+ +    dest->nalloc = dest->nr + copies*src->nr;
+ +    srenew(dest->iatoms,dest->nalloc);
+ +
+ +    for(c=0; c<copies; c++)
+ +    {
+ +        for(i=0; i<src->nr; )
+ +        {
+ +            dest->iatoms[dest->nr++] = src->iatoms[i++];
+ +            for(a=0; a<nral; a++)
+ +            {
+ +                dest->iatoms[dest->nr++] = dnum + src->iatoms[i++];
+ +            }
+ +        }
+ +        dnum += snum;
+ +    }
+ +}
+ +
+ +static void set_posres_params(t_idef *idef,gmx_molblock_t *molb,
+ +                              int i0,int a_offset)
+ +{
+ +    t_ilist *il;
+ +    int i1,i,a_molb;
+ +    t_iparams *ip;
+ +
+ +    il = &idef->il[F_POSRES];
+ +    i1 = il->nr/2;
+ +    idef->iparams_posres_nalloc = i1;
+ +    srenew(idef->iparams_posres,idef->iparams_posres_nalloc);
+ +    for(i=i0; i<i1; i++)
+ +    {
+ +        ip = &idef->iparams_posres[i];
+ +        /* Copy the force constants */
+ +        *ip    = idef->iparams[il->iatoms[i*2]];
+ +        a_molb = il->iatoms[i*2+1] - a_offset;
+ +        if (molb->nposres_xA == 0)
+ +        {
+ +            gmx_incons("Position restraint coordinates are missing");
+ +        }
+ +        ip->posres.pos0A[XX] = molb->posres_xA[a_molb][XX];
+ +        ip->posres.pos0A[YY] = molb->posres_xA[a_molb][YY];
+ +        ip->posres.pos0A[ZZ] = molb->posres_xA[a_molb][ZZ];
+ +        if (molb->nposres_xB > 0)
+ +        {
+ +            ip->posres.pos0B[XX] = molb->posres_xB[a_molb][XX];
+ +            ip->posres.pos0B[YY] = molb->posres_xB[a_molb][YY];
+ +            ip->posres.pos0B[ZZ] = molb->posres_xB[a_molb][ZZ];
+ +        }
+ +        else
+ +        {
+ +            ip->posres.pos0B[XX] = ip->posres.pos0A[XX];
+ +            ip->posres.pos0B[YY] = ip->posres.pos0A[YY];
+ +            ip->posres.pos0B[ZZ] = ip->posres.pos0A[ZZ];
+ +        }
+ +        /* Set the parameter index for idef->iparams_posre */
+ +        il->iatoms[i*2] = i;
+ +    }
+ +}
+ +
+ +static void set_fbposres_params(t_idef *idef,gmx_molblock_t *molb,
+ +                              int i0,int a_offset)
+ +{
+ +    t_ilist *il;
+ +    int i1,i,a_molb;
+ +    t_iparams *ip;
+ +
+ +    il = &idef->il[F_FBPOSRES];
+ +    i1 = il->nr/2;
+ +    idef->iparams_fbposres_nalloc = i1;
+ +    srenew(idef->iparams_fbposres,idef->iparams_fbposres_nalloc);
+ +    for(i=i0; i<i1; i++)
+ +    {
+ +        ip = &idef->iparams_fbposres[i];
+ +        /* Copy the force constants */
+ +        *ip    = idef->iparams[il->iatoms[i*2]];
+ +        a_molb = il->iatoms[i*2+1] - a_offset;
+ +        if (molb->nposres_xA == 0)
+ +        {
+ +            gmx_incons("Position restraint coordinates are missing");
+ +        }
+ +        /* Take flat-bottom posres reference from normal position restraints */
+ +        ip->fbposres.pos0[XX] = molb->posres_xA[a_molb][XX];
+ +        ip->fbposres.pos0[YY] = molb->posres_xA[a_molb][YY];
+ +        ip->fbposres.pos0[ZZ] = molb->posres_xA[a_molb][ZZ];
+ +        /* Note: no B-type for flat-bottom posres */
+ +
+ +        /* Set the parameter index for idef->iparams_posre */
+ +        il->iatoms[i*2] = i;
+ +    }
+ +}
+ +
+ +static void gen_local_top(const gmx_mtop_t *mtop,const t_inputrec *ir,
+ +                          gmx_bool bMergeConstr,
+ +                          gmx_localtop_t *top)
+ +{
+ +    int mb,srcnr,destnr,ftype,ftype_dest,mt,natoms,mol,nposre_old,nfbposre_old;
+ +    gmx_molblock_t *molb;
+ +    gmx_moltype_t *molt;
+ +    const gmx_ffparams_t *ffp;
+ +    t_idef *idef;
+ +    real   *qA,*qB;
+ +    gmx_mtop_atomloop_all_t aloop;
+ +    int    ag;
+ +    t_atom *atom;
+ +
+ +    top->atomtypes = mtop->atomtypes;
+ +    
+ +    ffp = &mtop->ffparams;
+ +    
+ +    idef = &top->idef;
+ +    idef->ntypes   = ffp->ntypes;
+ +    idef->atnr     = ffp->atnr;
+ +    idef->functype = ffp->functype;
+ +    idef->iparams  = ffp->iparams;
+ +    idef->iparams_posres = NULL;
+ +    idef->iparams_posres_nalloc = 0;
+ +    idef->iparams_fbposres = NULL;
+ +    idef->iparams_fbposres_nalloc = 0;
+ +    idef->fudgeQQ  = ffp->fudgeQQ;
+ +    idef->cmap_grid = ffp->cmap_grid;
+ +    idef->ilsort   = ilsortUNKNOWN;
+ +
+ +    init_block(&top->cgs);
+ +    init_blocka(&top->excls);
+ +    for(ftype=0; ftype<F_NRE; ftype++)
+ +    {
+ +        idef->il[ftype].nr     = 0;
+ +        idef->il[ftype].nalloc = 0;
+ +        idef->il[ftype].iatoms = NULL;
+ +    }
+ +
+ +    natoms = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        molt = &mtop->moltype[molb->type];
+ +        
+ +        srcnr  = molt->atoms.nr;
+ +        destnr = natoms;
+ +        
+ +        blockcat(&top->cgs,&molt->cgs,molb->nmol,destnr,srcnr);
+ +
+ +        blockacat(&top->excls,&molt->excls,molb->nmol,destnr,srcnr);
+ +
+ +        nposre_old = idef->il[F_POSRES].nr;
+ +        nfbposre_old = idef->il[F_FBPOSRES].nr;
+ +        for(ftype=0; ftype<F_NRE; ftype++)
+ +        {
+ +            if (bMergeConstr &&
+ +                ftype == F_CONSTR && molt->ilist[F_CONSTRNC].nr > 0)
+ +            {
+ +                /* Merge all constrains into one ilist.
+ +                 * This simplifies the constraint code.
+ +                 */
+ +                for(mol=0; mol<molb->nmol; mol++) {
+ +                    ilistcat(ftype,&idef->il[F_CONSTR],&molt->ilist[F_CONSTR],
+ +                             1,destnr+mol*srcnr,srcnr);
+ +                    ilistcat(ftype,&idef->il[F_CONSTR],&molt->ilist[F_CONSTRNC],
+ +                             1,destnr+mol*srcnr,srcnr);
+ +                }
+ +            }
+ +            else if (!(bMergeConstr && ftype == F_CONSTRNC))
+ +            {
+ +                ilistcat(ftype,&idef->il[ftype],&molt->ilist[ftype],
+ +                         molb->nmol,destnr,srcnr);
+ +            }
+ +        }
+ +        if (idef->il[F_POSRES].nr > nposre_old)
+ +        {
+ +            set_posres_params(idef,molb,nposre_old/2,natoms);
+ +        }
+ +        if (idef->il[F_FBPOSRES].nr > nfbposre_old)
+ +        {
+ +            set_fbposres_params(idef,molb,nfbposre_old/2,natoms);
+ +        }
+ +
+ +        natoms += molb->nmol*srcnr;
+ +    }
+ +
+ +    if (ir == NULL)
+ +    {
+ +        top->idef.ilsort = ilsortUNKNOWN;
+ +    }
+ +    else
+ +    {
+ +        if (ir->efep != efepNO && gmx_mtop_bondeds_free_energy(mtop))
+ +        {
+ +            snew(qA,mtop->natoms);
+ +            snew(qB,mtop->natoms);
+ +            aloop = gmx_mtop_atomloop_all_init(mtop);
+ +            while (gmx_mtop_atomloop_all_next(aloop,&ag,&atom))
+ +            {
+ +                qA[ag] = atom->q;
+ +                qB[ag] = atom->qB;
+ +            }
+ +            gmx_sort_ilist_fe(&top->idef,qA,qB);
+ +            sfree(qA);
+ +            sfree(qB);
+ +        }
+ +        else
+ +        {
+ +            top->idef.ilsort = ilsortNO_FE;
+ +        }
+ +    }
+ +}
+ +
+ +gmx_localtop_t *gmx_mtop_generate_local_top(const gmx_mtop_t *mtop,
+ +                                            const t_inputrec *ir)
+ +{
+ +    gmx_localtop_t *top;
+ +
+ +    snew(top,1);
+ +
+ +    gen_local_top(mtop,ir,TRUE,top);
+ +
+ +    return top;
+ +}
+ +
+ +t_topology gmx_mtop_t_to_t_topology(gmx_mtop_t *mtop)
+ +{
+ +    int mt,mb;
+ +    gmx_localtop_t ltop;
+ +    t_topology top;
+ +
+ +    gen_local_top(mtop,NULL,FALSE,&ltop);
+ +
+ +    top.name      = mtop->name;
+ +    top.idef      = ltop.idef;
+ +    top.atomtypes = ltop.atomtypes;
+ +    top.cgs       = ltop.cgs;
+ +    top.excls     = ltop.excls;
+ +    top.atoms     = gmx_mtop_global_atoms(mtop);
+ +    top.mols      = mtop->mols;
+ +    top.symtab    = mtop->symtab;
+ +
+ +    /* We only need to free the moltype and molblock data,
+ +     * all other pointers have been copied to top.
+ +     *
+ +     * Well, except for the group data, but we can't free those, because they
+ +     * are used somewhere even after a call to this function.
+ +     */
+ +    for(mt=0; mt<mtop->nmoltype; mt++)
+ +    {
+ +        done_moltype(&mtop->moltype[mt]);
+ +    }
+ +    sfree(mtop->moltype);
+ +
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        done_molblock(&mtop->molblock[mb]);
+ +    }
+ +    sfree(mtop->molblock);
+ +
+ +    return top;
+ +}
diff --cc src/gromacs/gmxlib/names.c
Simple merge
diff --cc src/gromacs/gmxlib/network.c

index 38189bbedf87ef412ec5ea5eab55f74cad19875e,0000000000000000000000000000000000000000..5cdd13419865cfdce40eeded7840db896c4357fc

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/network.c
--- /dev/null
+++ b/src/gromacs/gmxlib/network.c
@@@ -1,662 -1,0 +1,777 @@@
- /*
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include "gmx_fatal.h"
+ +#include "main.h"
+ +#include "smalloc.h"
+ +#include "network.h"
+ +#include "copyrite.h"
+ +#include "statutil.h"
+ +#include <ctype.h>
+ +#include "macros.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +
+ +/* The source code in this file should be thread-safe. 
+ +      Please keep it that way. */
+ +
+ +gmx_bool gmx_mpi_initialized(void)
+ +{
+ +  int n;
+ +#ifndef GMX_MPI
+ +  return 0;
+ +#else
+ +  MPI_Initialized(&n);
+ +  
+ +  return n;
+ +#endif
+ +}
+ +
+ +int gmx_setup(int *argc,char **argv,int *nnodes)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_setup");
+ +  return 0;
+ +#else
+ +  char   buf[256];
+ +  int    resultlen;               /* actual length of node name      */
+ +  int    i,flag;
+ +  int  mpi_num_nodes;
+ +  int  mpi_my_rank;
+ +  char mpi_hostname[MPI_MAX_PROCESSOR_NAME];
+ +
+ +  /* Call the MPI routines */
+ +#ifdef GMX_LIB_MPI
+ +#ifdef GMX_FAHCORE
+ +  (void) fah_MPI_Init(argc,&argv);
+ +#else
+ +  (void) MPI_Init(argc,&argv);
+ +#endif
+ +#endif
+ +  (void) MPI_Comm_size( MPI_COMM_WORLD, &mpi_num_nodes );
+ +  (void) MPI_Comm_rank( MPI_COMM_WORLD, &mpi_my_rank );
+ +  (void) MPI_Get_processor_name( mpi_hostname, &resultlen );
+ + 
+ +#ifdef GMX_LIB_MPI 
+ +  fprintf(stderr,"NNODES=%d, MYRANK=%d, HOSTNAME=%s\n",
+ +        mpi_num_nodes,mpi_my_rank,mpi_hostname);
+ +#endif
+ +  
+ +  *nnodes=mpi_num_nodes;
+ +  
+ +  return mpi_my_rank;
+ +#endif
+ +}
+ +
+ +int  gmx_node_num(void)
+ +{
+ +#ifndef GMX_MPI
+ +  return 1;
+ +#else
+ +  int i;
+ +  (void) MPI_Comm_size(MPI_COMM_WORLD, &i);
+ +  return i;
+ +#endif
+ +}
+ +
+ +int gmx_node_rank(void)
+ +{
+ +#ifndef GMX_MPI
+ +  return 0;
+ +#else
+ +  int i;
+ +  (void) MPI_Comm_rank(MPI_COMM_WORLD, &i);
+ +  return i;
+ +#endif
+ +}
+ +
+ +
+ +int gmx_hostname_num()
+ +{
+ +#ifndef GMX_MPI
+ +  return 0;
++#else
++#ifdef GMX_THREAD_MPI
++  /* thread-MPI currently puts the thread number in the process name,
++   * we might want to change this, as this is inconsistent with what
++   * most MPI implementations would do when running on a single node.
++   */
++  return 0;
+ +#else
+ +  int  resultlen,hostnum,i,j;
+ +  char mpi_hostname[MPI_MAX_PROCESSOR_NAME],hostnum_str[MPI_MAX_PROCESSOR_NAME];
+ +
+ +  MPI_Get_processor_name(mpi_hostname,&resultlen);
+ +  /* This procedure can only differentiate nodes with host names
+ +   * that end on unique numbers.
+ +   */
+ +  i = 0;
+ +  j = 0;
+ +  /* Only parse the host name up to the first dot */
+ +  while(i < resultlen && mpi_hostname[i] != '.') {
+ +    if (isdigit(mpi_hostname[i])) {
+ +      hostnum_str[j++] = mpi_hostname[i];
+ +    }
+ +    i++;
+ +  }
+ +  hostnum_str[j] = '\0';
+ +  if (j == 0) {
+ +    hostnum = 0;
+ +  } else {
+ +    /* Use only the last 9 decimals, so we don't overflow an int */
+ +    hostnum = strtol(hostnum_str + max(0,j-9), NULL, 10);
+ +  }
+ +
+ +  if (debug) {
+ +    fprintf(debug,"In gmx_setup_nodecomm: hostname '%s', hostnum %d\n",
+ +        mpi_hostname,hostnum);
+ +  }
+ +  return hostnum;
+ +#endif
++#endif
+ +}
+ +
+ +void gmx_setup_nodecomm(FILE *fplog,t_commrec *cr)
+ +{
-   gmx_nodecomm_t *nc;
-   int  n,rank,hostnum,ng,ni;
- 
-   /* Many MPI implementations do not optimize MPI_Allreduce
-    * (and probably also other global communication calls)
-    * for multi-core nodes connected by a network.
-    * We can optimize such communication by using one MPI call
-    * within each node and one between the nodes.
-    * For MVAPICH2 and Intel MPI this reduces the time for
-    * the global_stat communication by 25%
-    * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
-    * B. Hess, November 2007
-    */
++    gmx_nodecomm_t *nc;
++    int  n,rank,hostnum,ng,ni;
++
++    /* Many MPI implementations do not optimize MPI_Allreduce
++     * (and probably also other global communication calls)
++     * for multi-core nodes connected by a network.
++     * We can optimize such communication by using one MPI call
++     * within each node and one between the nodes.
++     * For MVAPICH2 and Intel MPI this reduces the time for
++     * the global_stat communication by 25%
++     * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
++     * B. Hess, November 2007
++     */
+ +
-   nc = &cr->nc;
++    nc = &cr->nc;
+ +
-   nc->bUse = FALSE;
++    nc->bUse = FALSE;
+ +#ifndef GMX_THREAD_MPI
-   if (getenv("GMX_NO_NODECOMM") == NULL) {
+ +#ifdef GMX_MPI
+ +    MPI_Comm_size(cr->mpi_comm_mygroup,&n);
+ +    MPI_Comm_rank(cr->mpi_comm_mygroup,&rank);
+ +
+ +    hostnum = gmx_hostname_num();
+ +
-     if (debug) {
-       fprintf(debug,
-               "In gmx_setup_nodecomm: splitting communicator of size %d\n",
-               n);
++    if (debug)
++    {
++        fprintf(debug,"In gmx_setup_nodecomm: splitting communicator of size %d\n",n);
+ +    }
+ +
+ +
+ +    /* The intra-node communicator, split on node number */
+ +    MPI_Comm_split(cr->mpi_comm_mygroup,hostnum,rank,&nc->comm_intra);
+ +    MPI_Comm_rank(nc->comm_intra,&nc->rank_intra);
-     if (debug) {
-       fprintf(debug,"In gmx_setup_nodecomm: node rank %d rank_intra %d\n",
-             rank,nc->rank_intra);
++    if (debug)
++    {
++        fprintf(debug,"In gmx_setup_nodecomm: node rank %d rank_intra %d\n",
++                rank,nc->rank_intra);
+ +    }
+ +    /* The inter-node communicator, split on rank_intra.
+ +     * We actually only need the one for rank=0,
+ +     * but it is easier to create them all.
+ +     */
+ +    MPI_Comm_split(cr->mpi_comm_mygroup,nc->rank_intra,rank,&nc->comm_inter);
+ +    /* Check if this really created two step communication */
+ +    MPI_Comm_size(nc->comm_inter,&ng);
+ +    MPI_Comm_size(nc->comm_intra,&ni);
-     if (debug) {
-       fprintf(debug,"In gmx_setup_nodecomm: groups %d, my group size %d\n",
-             ng,ni);
++    if (debug)
++    {
++        fprintf(debug,"In gmx_setup_nodecomm: groups %d, my group size %d\n",
++                ng,ni);
+ +    }
-     if ((ng > 1 && ng < n) || (ni > 1 && ni < n)) {
-       nc->bUse = TRUE;
-       if (fplog)
-       fprintf(fplog,"Using two step summing over %d groups of on average %.1f processes\n\n",ng,(real)n/(real)ng);
-       if (nc->rank_intra > 0)
-       MPI_Comm_free(&nc->comm_inter);
-     } else {
-       /* One group or all processes in a separate group, use normal summing */
-       MPI_Comm_free(&nc->comm_inter);
-       MPI_Comm_free(&nc->comm_intra);
++
++    if (getenv("GMX_NO_NODECOMM") == NULL &&
++        ((ng > 1 && ng < n) || (ni > 1 && ni < n)))
++    {
++        nc->bUse = TRUE;
++        if (fplog)
++        {
++            fprintf(fplog,"Using two step summing over %d groups of on average %.1f processes\n\n",
++                    ng,(real)n/(real)ng);
++        }
++        if (nc->rank_intra > 0)
++        {
++            MPI_Comm_free(&nc->comm_inter);
++        }
++    }
++    else
++    {
++        /* One group or all processes in a separate group, use normal summing */
++        MPI_Comm_free(&nc->comm_inter);
++        MPI_Comm_free(&nc->comm_intra);
++        if (debug)
++        {
++            fprintf(debug,"In gmx_setup_nodecomm: not unsing separate inter- and intra-node communicators.\n");
++        }
+ +    }
+ +#endif
-   }
++#else
++    /* tMPI runs only on a single node so just use the nodeid */
++    nc->rank_intra = cr->nodeid;
+ +#endif
+ +}
+ +
++void gmx_init_intra_counters(t_commrec *cr)
++{
++    /* counters for PP+PME and PP-only processes on my node */
++    int nnodes, nnodes_pp, id_mynode=-1, id_mynode_group=-1, nproc_mynode, nproc_mynode_pp;
++#if defined GMX_MPI && !defined GMX_THREAD_MPI
++    int i, mynum, *num, *num_s, *num_pp, *num_pp_s;
++#endif
++
++    nnodes    = cr->nnodes;
++    nnodes_pp = nnodes - cr->npmenodes;
++
++#if defined GMX_MPI && !defined GMX_THREAD_MPI
++    /* We have MPI and can expect to have different compute nodes */
++    mynum = gmx_hostname_num();
++
++    /* We can't rely on MPI_IN_PLACE, so we need send and receive buffers */
++    snew(num,   nnodes);
++    snew(num_s, nnodes);
++    snew(num_pp,   nnodes_pp);
++    snew(num_pp_s, nnodes_pp);
++
++    num_s[cr->sim_nodeid] = mynum;
++    if (cr->duty & DUTY_PP)
++    {
++        num_pp_s[cr->nodeid] = mynum;
++    }
++
++    MPI_Allreduce(num_s, num, nnodes, MPI_INT, MPI_SUM, cr->mpi_comm_mysim);
++    MPI_Allreduce(num_pp_s, num_pp, nnodes_pp, MPI_INT, MPI_SUM, cr->mpi_comm_mygroup);
++
++    id_mynode       = 0;
++    id_mynode_group = 0;
++    nproc_mynode    = 0;
++    nproc_mynode_pp = 0;
++    for(i=0; i<nnodes; i++)
++    {
++        if (num[i] == mynum)
++        {
++            nproc_mynode++;
++            if (i < cr->sim_nodeid)
++            {
++                id_mynode++;
++            }
++            if (i < cr->nodeid)
++            {
++                id_mynode_group++;
++            }
++        }
++    }
++    for(i=0; i<nnodes_pp; i++)
++    {
++        if (num_pp[i] == mynum)
++        {
++            nproc_mynode_pp++;
++        }
++    }
++    sfree(num);
++    sfree(num_s);
++    sfree(num_pp);
++    sfree(num_pp_s);
++#else
++    /* Serial or thread-MPI code, we are running within a node */
++    id_mynode       = cr->sim_nodeid;
++    id_mynode_group = cr->nodeid;
++    nproc_mynode    = cr->nnodes;
++    nproc_mynode_pp = cr->nnodes - cr->npmenodes;
++#endif
++
++    if (debug)
++    {
++        char sbuf[STRLEN];
++        if (cr->duty & DUTY_PP && cr->duty & DUTY_PME)
++        {
++            sprintf(sbuf, "PP+PME");
++        }
++        else
++        {
++            sprintf(sbuf, "%s", cr->duty & DUTY_PP ? "PP" : "PME");
++        }
++        fprintf(debug, "On %3s node %d: nodeid_intra=%d, nodeid_group_intra=%d, "
++                "nnodes_intra=%d, nnodes_pp_intra=%d\n", sbuf, cr->sim_nodeid,
++                id_mynode, id_mynode_group, nproc_mynode, nproc_mynode_pp);
++    }
++
++    cr->nodeid_intra        = id_mynode;
++    cr->nodeid_group_intra  = id_mynode_group;
++    cr->nnodes_intra        = nproc_mynode;
++    cr->nnodes_pp_intra     = nproc_mynode_pp;
++}
++
++
+ +void gmx_barrier(const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_barrier");
+ +#else
+ +  MPI_Barrier(cr->mpi_comm_mygroup);
+ +#endif
+ +}
+ +
+ +void gmx_abort(int noderank,int nnodes,int errorno)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_abort");
+ +#else
+ +#ifdef GMX_THREAD_MPI
+ +  fprintf(stderr,"Halting program %s\n",ShortProgram());
+ +  thanx(stderr);
+ +  exit(1);
+ +#else
+ +  if (nnodes > 1)
+ +  {
+ +      fprintf(stderr,"Halting parallel program %s on CPU %d out of %d\n",
+ +              ShortProgram(),noderank,nnodes);
+ +  }
+ +  else
+ +  {
+ +      fprintf(stderr,"Halting program %s\n",ShortProgram());
+ +  }
+ +
+ +  thanx(stderr);
+ +  MPI_Abort(MPI_COMM_WORLD,errorno);
+ +  exit(1);
+ +#endif
+ +#endif
+ +}
+ +
+ +void gmx_bcast(int nbytes,void *b,const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_bast");
+ +#else
+ +  MPI_Bcast(b,nbytes,MPI_BYTE,MASTERRANK(cr),cr->mpi_comm_mygroup);
+ +#endif
+ +}
+ +
+ +void gmx_bcast_sim(int nbytes,void *b,const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_bast");
+ +#else
+ +  MPI_Bcast(b,nbytes,MPI_BYTE,MASTERRANK(cr),cr->mpi_comm_mysim);
+ +#endif
+ +}
+ +
+ +void gmx_sumd(int nr,double r[],const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +    gmx_call("gmx_sumd");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
+ +    if (cr->nc.bUse) {
+ +        if (cr->nc.rank_intra == 0)
+ +        {
+ +            /* Use two step summing. */
+ +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,0,
+ +                       cr->nc.comm_intra);
+ +            /* Sum the roots of the internal (intra) buffers. */
+ +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,
+ +                          cr->nc.comm_inter);
+ +        }
+ +        else
+ +        {
+ +            /* This is here because of the silly MPI specification
+ +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
+ +            MPI_Reduce(r,NULL,nr,MPI_DOUBLE,MPI_SUM,0,cr->nc.comm_intra);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_DOUBLE,0,cr->nc.comm_intra);
+ +    } 
+ +    else 
+ +    {
+ +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM, 
+ +                      cr->mpi_comm_mygroup);
+ +    }
+ +#else
+ +    int i;
+ +
+ +    if (nr > cr->mpb->dbuf_alloc) {
+ +        cr->mpb->dbuf_alloc = nr;
+ +        srenew(cr->mpb->dbuf,cr->mpb->dbuf_alloc);
+ +    }
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        MPI_Allreduce(r,cr->mpb->dbuf,nr,MPI_DOUBLE,MPI_SUM,cr->nc.comm_intra);
+ +        if (cr->nc.rank_intra == 0) {
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(cr->mpb->dbuf,r,nr,MPI_DOUBLE,MPI_SUM, 
+ +                          cr->nc.comm_inter);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_DOUBLE,0,cr->nc.comm_intra);
+ +    } else {
+ +        MPI_Allreduce(r,cr->mpb->dbuf,nr,MPI_DOUBLE,MPI_SUM,
+ +                      cr->mpi_comm_mygroup);
+ +        for(i=0; i<nr; i++)
+ +            r[i] = cr->mpb->dbuf[i];
+ +    }
+ +#endif
+ +#endif
+ +}
+ +
+ +void gmx_sumf(int nr,float r[],const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +    gmx_call("gmx_sumf");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing.  */
+ +        if (cr->nc.rank_intra == 0)
+ +        {
+ +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,0,
+ +                       cr->nc.comm_intra);
+ +            /* Sum the roots of the internal (intra) buffers */
+ +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,
+ +                          cr->nc.comm_inter);
+ +        }
+ +        else
+ +        {
+ +            /* This is here because of the silly MPI specification
+ +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
+ +            MPI_Reduce(r,NULL,nr,MPI_FLOAT,MPI_SUM,0,cr->nc.comm_intra);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_FLOAT,0,cr->nc.comm_intra);
+ +    } 
+ +    else 
+ +    {
+ +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,cr->mpi_comm_mygroup);
+ +    }
+ +#else
+ +    int i;
+ +
+ +    if (nr > cr->mpb->fbuf_alloc) {
+ +        cr->mpb->fbuf_alloc = nr;
+ +        srenew(cr->mpb->fbuf,cr->mpb->fbuf_alloc);
+ +    }
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        MPI_Allreduce(r,cr->mpb->fbuf,nr,MPI_FLOAT,MPI_SUM,cr->nc.comm_intra);
+ +        if (cr->nc.rank_intra == 0) {
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(cr->mpb->fbuf,r,nr,MPI_FLOAT,MPI_SUM, 
+ +                          cr->nc.comm_inter);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_FLOAT,0,cr->nc.comm_intra);
+ +    } else {
+ +        MPI_Allreduce(r,cr->mpb->fbuf,nr,MPI_FLOAT,MPI_SUM,
+ +                      cr->mpi_comm_mygroup);
+ +        for(i=0; i<nr; i++)
+ +            r[i] = cr->mpb->fbuf[i];
+ +    }
+ +#endif
+ +#endif
+ +}
+ +
+ +void gmx_sumi(int nr,int r[],const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +    gmx_call("gmx_sumi");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        if (cr->nc.rank_intra == 0) 
+ +        {
+ +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,0,cr->nc.comm_intra);
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,cr->nc.comm_inter);
+ +        }
+ +        else
+ +        {
+ +            /* This is here because of the silly MPI specification
+ +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
+ +            MPI_Reduce(r,NULL,nr,MPI_INT,MPI_SUM,0,cr->nc.comm_intra);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_INT,0,cr->nc.comm_intra);
+ +    } 
+ +    else 
+ +    {
+ +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,cr->mpi_comm_mygroup);
+ +    }
+ +#else
+ +    int i;
+ +
+ +    if (nr > cr->mpb->ibuf_alloc) {
+ +        cr->mpb->ibuf_alloc = nr;
+ +        srenew(cr->mpb->ibuf,cr->mpb->ibuf_alloc);
+ +    }
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        MPI_Allreduce(r,cr->mpb->ibuf,nr,MPI_INT,MPI_SUM,cr->nc.comm_intra);
+ +        if (cr->nc.rank_intra == 0) {
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(cr->mpb->ibuf,r,nr,MPI_INT,MPI_SUM,cr->nc.comm_inter);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_INT,0,cr->nc.comm_intra);
+ +    } else {
+ +        MPI_Allreduce(r,cr->mpb->ibuf,nr,MPI_INT,MPI_SUM,cr->mpi_comm_mygroup);
+ +        for(i=0; i<nr; i++)
+ +            r[i] = cr->mpb->ibuf[i];
+ +    }
+ +#endif
+ +#endif
+ +}
+ +
+ +void gmx_sumli(int nr,gmx_large_int_t r[],const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +    gmx_call("gmx_sumli");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        if (cr->nc.rank_intra == 0) 
+ +        {
+ +            MPI_Reduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,
+ +                       cr->nc.comm_intra);
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ +                          cr->nc.comm_inter);
+ +        }
+ +        else
+ +        {
+ +            /* This is here because of the silly MPI specification
+ +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
+ +            MPI_Reduce(r,NULL,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,cr->nc.comm_intra);
+ +        }
+ +        MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
+ +    } 
+ +    else 
+ +    {
+ +        MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,cr->mpi_comm_mygroup);
+ +    }
+ +#else
+ +    int i;
+ +
+ +    if (nr > cr->mpb->libuf_alloc) {
+ +        cr->mpb->libuf_alloc = nr;
+ +        srenew(cr->mpb->libuf,cr->mpb->libuf_alloc);
+ +    }
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        MPI_Allreduce(r,cr->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ +                      cr->nc.comm_intra);
+ +        if (cr->nc.rank_intra == 0) {
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(cr->mpb->libuf,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ +                          cr->nc.comm_inter);
+ +        }
+ +        MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
+ +    } else {
+ +        MPI_Allreduce(r,cr->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ +                      cr->mpi_comm_mygroup);
+ +        for(i=0; i<nr; i++)
+ +            r[i] = cr->mpb->libuf[i];
+ +    }
+ +#endif
+ +#endif
+ +}
+ +
+ +
+ +
+ +#ifdef GMX_MPI
+ +void gmx_sumd_comm(int nr,double r[],MPI_Comm mpi_comm)
+ +{
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
+ +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,mpi_comm);
+ +#else
+ +    /* this function is only used in code that is not performance critical,
+ +       (during setup, when comm_rec is not the appropriate communication  
+ +       structure), so this isn't as bad as it looks. */
+ +    double *buf;
+ +    int i;
+ +
+ +    snew(buf, nr);
+ +    MPI_Allreduce(r,buf,nr,MPI_DOUBLE,MPI_SUM,mpi_comm);
+ +    for(i=0; i<nr; i++)
+ +        r[i] = buf[i];
+ +    sfree(buf);
+ +#endif
+ +}
+ +#endif
+ +
+ +#ifdef GMX_MPI
+ +void gmx_sumf_comm(int nr,float r[],MPI_Comm mpi_comm)
+ +{
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
+ +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,mpi_comm);
+ +#else
+ +    /* this function is only used in code that is not performance critical,
+ +       (during setup, when comm_rec is not the appropriate communication  
+ +       structure), so this isn't as bad as it looks. */
+ +    float *buf;
+ +    int i;
+ +
+ +    snew(buf, nr);
+ +    MPI_Allreduce(r,buf,nr,MPI_FLOAT,MPI_SUM,mpi_comm);
+ +    for(i=0; i<nr; i++)
+ +        r[i] = buf[i];
+ +    sfree(buf);
+ +#endif
+ +}
+ +#endif
+ +
+ +void gmx_sumd_sim(int nr,double r[],const gmx_multisim_t *ms)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_sumd_sim");
+ +#else
+ +  gmx_sumd_comm(nr,r,ms->mpi_comm_masters);
+ +#endif
+ +}
+ +
+ +void gmx_sumf_sim(int nr,float r[],const gmx_multisim_t *ms)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_sumf_sim");
+ +#else
+ +  gmx_sumf_comm(nr,r,ms->mpi_comm_masters);
+ +#endif
+ +}
+ +
+ +void gmx_sumi_sim(int nr,int r[], const gmx_multisim_t *ms)
+ +{
+ +#ifndef GMX_MPI
+ +    gmx_call("gmx_sumi_sim");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
+ +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,ms->mpi_comm_masters);
+ +#else
+ +    /* this is thread-unsafe, but it will do for now: */
+ +    int i;
+ +
+ +    if (nr > ms->mpb->ibuf_alloc) {
+ +        ms->mpb->ibuf_alloc = nr;
+ +        srenew(ms->mpb->ibuf,ms->mpb->ibuf_alloc);
+ +    }
+ +    MPI_Allreduce(r,ms->mpb->ibuf,nr,MPI_INT,MPI_SUM,ms->mpi_comm_masters);
+ +    for(i=0; i<nr; i++)
+ +        r[i] = ms->mpb->ibuf[i];
+ +#endif
+ +#endif
+ +}
+ +
+ +void gmx_sumli_sim(int nr,gmx_large_int_t r[], const gmx_multisim_t *ms)
+ +{
+ +#ifndef GMX_MPI
+ +    gmx_call("gmx_sumli_sim");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
+ +    MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ +                  ms->mpi_comm_masters);
+ +#else
+ +    /* this is thread-unsafe, but it will do for now: */
+ +    int i;
+ +
+ +    if (nr > ms->mpb->libuf_alloc) {
+ +        ms->mpb->libuf_alloc = nr;
+ +        srenew(ms->mpb->libuf,ms->mpb->libuf_alloc);
+ +    }
+ +    MPI_Allreduce(r,ms->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
+ +                  ms->mpi_comm_masters);
+ +    for(i=0; i<nr; i++)
+ +        r[i] = ms->mpb->libuf[i];
+ +#endif
+ +#endif
+ +}
+ +
+ +
+ +void gmx_finalize_par(void)
+ +{
+ +#ifndef GMX_MPI
+ +    /* Compiled without MPI, no MPI finalizing needed */
+ +    return;
+ +#else
+ +    int initialized,finalized;
+ +    int ret;
+ +
+ +    MPI_Initialized(&initialized);
+ +    if (!initialized)
+ +    {
+ +        return;
+ +    }
+ +    /* just as a check; we don't want to finalize twice */
+ +    MPI_Finalized(&finalized);
+ +    if (finalized)
+ +    {
+ +      return;
+ +    }
+ +
+ +  /* We sync the processes here to try to avoid problems
+ +   * with buggy MPI implementations that could cause
+ +   * unfinished processes to terminate.
+ +   */
+ +  MPI_Barrier(MPI_COMM_WORLD);
+ +
+ +  /*
+ +  if (DOMAINDECOMP(cr)) {
+ +    if (cr->npmenodes > 0 || cr->dd->bCartesian) 
+ +      MPI_Comm_free(&cr->mpi_comm_mygroup);
+ +    if (cr->dd->bCartesian)
+ +      MPI_Comm_free(&cr->mpi_comm_mysim);
+ +  }
+ +  */
+ +
+ +  /* Apparently certain mpich implementations cause problems
+ +   * with MPI_Finalize. In that case comment out MPI_Finalize.
+ +   */
+ +  if (debug)
+ +    fprintf(debug,"Will call MPI_Finalize now\n");
+ +
+ +  ret = MPI_Finalize();
+ +  if (debug)
+ +    fprintf(debug,"Return code from MPI_Finalize = %d\n",ret);
+ +#endif
+ +}
+ +
diff --cc src/gromacs/gmxlib/nonbonded/nonbonded.c
Simple merge
diff --cc src/gromacs/gmxlib/nrnb.c

index 5bae5589ef2b0f544ffd104ed462b178bf9b845c,0000000000000000000000000000000000000000..1039778a32d50b970f199512d66f5832309ba5e3

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/nrnb.c
--- /dev/null
+++ b/src/gromacs/gmxlib/nrnb.c
@@@ -1,536 -1,0 +1,572 @@@
-               double nbfs,double mflop)
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
++#include "types/commrec.h"
+ +#include "sysstuff.h"
+ +#include "gmx_fatal.h"
+ +#include "names.h"
+ +#include "macros.h"
+ +#include "nrnb.h"
+ +#include "main.h"
+ +#include "smalloc.h"
+ +#include "copyrite.h"
+ +
+ +typedef struct {
+ +  const char *name;
+ +  int  flop;
+ +} t_nrnb_data;
+ +
+ +
+ +static const t_nrnb_data nbdata[eNRNB] = {
+ +    { "LJ",                             33 }, /* nb_kernel010 */
+ +    { "Buckingham",                     61 }, /* nb_kernel020 */ 
+ +    { "VdW(T)",                         54 }, /* nb_kernel030 */
+ +    { "Coulomb",                        27 }, /* nb_kernel100 */
+ +    { "Coulomb [W3]",                   80 }, /* nb_kernel101 */
+ +    { "Coulomb [W3-W3]",               234 }, /* nb_kernel102 */
+ +    { "Coulomb [W4]",                   80 }, /* nb_kernel103 */
+ +    { "Coulomb [W4-W4]",               234 }, /* nb_kernel104 */
+ +    { "Coulomb + LJ",                   38 }, /* nb_kernel110 */
+ +    { "Coulomb + LJ [W3]",              91 }, /* nb_kernel111 */
+ +    { "Coulomb + LJ [W3-W3]",          245 }, /* nb_kernel112 */
+ +    { "Coulomb + LJ [W4]",             113 }, /* nb_kernel113 */
+ +    { "Coulomb + LJ [W4-W4]",          267 }, /* nb_kernel114 */
+ +    { "Coulomb + Bham ",                64 }, /* nb_kernel120 */
+ +    { "Coulomb + Bham [W3]",           117 }, /* nb_kernel121 */
+ +    { "Coulomb + Bham [W3-W3]",        271 }, /* nb_kernel122 */
+ +    { "Coulomb + Bham [W4]",           141 }, /* nb_kernel123 */
+ +    { "Coulomb + Bham [W4-W4]",        295 }, /* nb_kernel124 */
+ +    { "Coulomb + VdW(T) ",              59 }, /* nb_kernel130 */
+ +    { "Coulomb + VdW(T) [W3]",         112 }, /* nb_kernel131 */
+ +    { "Coulomb + VdW(T) [W3-W3]",      266 }, /* nb_kernel132 */
+ +    { "Coulomb + VdW(T) [W4]",         134 }, /* nb_kernel133 */
+ +    { "Coulomb + VdW(T) [W4-W4]",      288 }, /* nb_kernel134 */
+ +    { "RF Coul",                        33 }, /* nb_kernel200 */
+ +    { "RF Coul [W3]",                   98 }, /* nb_kernel201 */
+ +    { "RF Coul [W3-W3]",               288 }, /* nb_kernel202 */
+ +    { "RF Coul [W4]",                   98 }, /* nb_kernel203 */
+ +    { "RF Coul [W4-W4]",               288 }, /* nb_kernel204 */
+ +    { "RF Coul + LJ",                   44 }, /* nb_kernel210 */
+ +    { "RF Coul + LJ [W3]",             109 }, /* nb_kernel211 */
+ +    { "RF Coul + LJ [W3-W3]",          299 }, /* nb_kernel212 */
+ +    { "RF Coul + LJ [W4]",             131 }, /* nb_kernel213 */
+ +    { "RF Coul + LJ [W4-W4]",          321 }, /* nb_kernel214 */
+ +    { "RF Coul + Bham ",                70 }, /* nb_kernel220 */
+ +    { "RF Coul + Bham [W3]",           135 }, /* nb_kernel221 */
+ +    { "RF Coul + Bham [W3-W3]",        325 }, /* nb_kernel222 */
+ +    { "RF Coul + Bham [W4]",           159 }, /* nb_kernel223 */
+ +    { "RF Coul + Bham [W4-W4]",        349 }, /* nb_kernel224 */
+ +    { "RF Coul + VdW(T) ",              65 }, /* nb_kernel230 */
+ +    { "RF Coul + VdW(T) [W3]",         130 }, /* nb_kernel231 */
+ +    { "RF Coul + VdW(T) [W3-W3]",      320 }, /* nb_kernel232 */
+ +    { "RF Coul + VdW(T) [W4]",         152 }, /* nb_kernel233 */
+ +    { "RF Coul + VdW(T) [W4-W4]",      342 }, /* nb_kernel234 */
+ +    { "Coul(T)",                        42 }, /* nb_kernel300 */
+ +    { "Coul(T) [W3]",                  125 }, /* nb_kernel301 */
+ +    { "Coul(T) [W3-W3]",               369 }, /* nb_kernel302 */
+ +    { "Coul(T) [W4]",                  125 }, /* nb_kernel303 */
+ +    { "Coul(T) [W4-W4]",               369 }, /* nb_kernel304 */
+ +    { "Coul(T) + LJ",                   55 }, /* nb_kernel310 */
+ +    { "Coul(T) + LJ [W3]",             138 }, /* nb_kernel311 */
+ +    { "Coul(T) + LJ [W3-W3]",          382 }, /* nb_kernel312 */
+ +    { "Coul(T) + LJ [W4]",             158 }, /* nb_kernel313 */
+ +    { "Coul(T) + LJ [W4-W4]",          402 }, /* nb_kernel314 */
+ +    { "Coul(T) + Bham",                 81 }, /* nb_kernel320 */
+ +    { "Coul(T) + Bham [W3]",           164 }, /* nb_kernel321 */
+ +    { "Coul(T) + Bham [W3-W3]",        408 }, /* nb_kernel322 */
+ +    { "Coul(T) + Bham [W4]",           186 }, /* nb_kernel323 */
+ +    { "Coul(T) + Bham [W4-W4]",        430 }, /* nb_kernel324 */
+ +    { "Coul(T) + VdW(T)",               68 }, /* nb_kernel330 */
+ +    { "Coul(T) + VdW(T) [W3]",         151 }, /* nb_kernel331 */
+ +    { "Coul(T) + VdW(T) [W3-W3]",      395 }, /* nb_kernel332 */
+ +    { "Coul(T) + VdW(T) [W4]",         179 }, /* nb_kernel333 */
+ +    { "Coul(T) + VdW(T) [W4-W4]",      423 }, /* nb_kernel334 */
+ +    { "Generalized Born Coulomb",       48 }, /* nb_kernel400 */
+ +    { "GB Coulomb + LJ",                61 }, /* nb_kernel410 */
+ +    { "GB Coulomb + VdW(T)",            79 }, /* nb_kernel430 */
+ +    { "LJ NF",                          19 }, /* nb_kernel010nf */
+ +    { "Buckingham NF",                  48 }, /* nb_kernel020nf */ 
+ +    { "VdW(T) NF",                      33 }, /* nb_kernel030nf */
+ +    { "Coulomb NF",                     16 }, /* nb_kernel100nf */
+ +    { "Coulomb [W3] NF",                47 }, /* nb_kernel101nf */
+ +    { "Coulomb [W3-W3] NF",            135 }, /* nb_kernel102nf */
+ +    { "Coulomb [W4] NF",                47 }, /* nb_kernel103nf */
+ +    { "Coulomb [W4-W4] NF",            135 }, /* nb_kernel104nf */
+ +    { "Coulomb + LJ NF",                24 }, /* nb_kernel110nf */
+ +    { "Coulomb + LJ [W3] NF",           55 }, /* nb_kernel111nf */
+ +    { "Coulomb + LJ [W3-W3] NF",       143 }, /* nb_kernel112nf */
+ +    { "Coulomb + LJ [W4] NF",           66 }, /* nb_kernel113nf */
+ +    { "Coulomb + LJ [W4-W4] NF",       154 }, /* nb_kernel114nf */
+ +    { "Coulomb + Bham  NF",             51 }, /* nb_kernel120nf */
+ +    { "Coulomb + Bham [W3] NF",         82 }, /* nb_kernel121nf */
+ +    { "Coulomb + Bham [W3-W3] NF",     170 }, /* nb_kernel122nf */
+ +    { "Coulomb + Bham [W4] NF",         95 }, /* nb_kernel123nf */
+ +    { "Coulomb + Bham [W4-W4] NF",     183 }, /* nb_kernel124nf */
+ +    { "Coulomb + VdW(T)  NF",           36 }, /* nb_kernel130nf */
+ +    { "Coulomb + VdW(T) [W3] NF",       67 }, /* nb_kernel131nf */
+ +    { "Coulomb + VdW(T) [W3-W3] NF",   155 }, /* nb_kernel132nf */
+ +    { "Coulomb + VdW(T) [W4] NF",       80 }, /* nb_kernel133nf */
+ +    { "Coulomb + VdW(T) [W4-W4] NF",   168 }, /* nb_kernel134nf */
+ +    { "RF Coul NF",                     19 }, /* nb_kernel200nf */
+ +    { "RF Coul [W3] NF",                56 }, /* nb_kernel201nf */
+ +    { "RF Coul [W3-W3] NF",            162 }, /* nb_kernel202nf */
+ +    { "RF Coul [W4] NF",                56 }, /* nb_kernel203nf */
+ +    { "RF Coul [W4-W4] NF",            162 }, /* nb_kernel204nf */
+ +    { "RF Coul + LJ NF",                27 }, /* nb_kernel210nf */
+ +    { "RF Coul + LJ [W3] NF",           64 }, /* nb_kernel211nf */
+ +    { "RF Coul + LJ [W3-W3] NF",       170 }, /* nb_kernel212nf */
+ +    { "RF Coul + LJ [W4] NF",           75 }, /* nb_kernel213nf */
+ +    { "RF Coul + LJ [W4-W4] NF",       181 }, /* nb_kernel214nf */
+ +    { "RF Coul + Bham  NF",             54 }, /* nb_kernel220nf */
+ +    { "RF Coul + Bham [W3] NF",         91 }, /* nb_kernel221nf */
+ +    { "RF Coul + Bham [W3-W3] NF",     197 }, /* nb_kernel222nf */
+ +    { "RF Coul + Bham [W4] NF",        104 }, /* nb_kernel223nf */
+ +    { "RF Coul + Bham [W4-W4] NF",     210 }, /* nb_kernel224nf */
+ +    { "RF Coul + VdW(T)  NF",           39 }, /* nb_kernel230nf */
+ +    { "RF Coul + VdW(T) [W3] NF",       76 }, /* nb_kernel231nf */
+ +    { "RF Coul + VdW(T) [W3-W3] NF",   182 }, /* nb_kernel232nf */
+ +    { "RF Coul + VdW(T) [W4] NF",       89 }, /* nb_kernel233nf */
+ +    { "RF Coul + VdW(T) [W4-W4] NF",   195 }, /* nb_kernel234nf */
+ +    { "Coul(T) NF",                     26 }, /* nb_kernel300nf */
+ +    { "Coul(T) [W3] NF",                77 }, /* nb_kernel301nf */
+ +    { "Coul(T) [W3-W3] NF",            225 }, /* nb_kernel302nf */
+ +    { "Coul(T) [W4] NF",                77 }, /* nb_kernel303nf */
+ +    { "Coul(T) [W4-W4] NF",            225 }, /* nb_kernel304nf */
+ +    { "Coul(T) + LJ NF",                34 }, /* nb_kernel310nf */
+ +    { "Coul(T) + LJ [W3] NF",           85 }, /* nb_kernel311nf */
+ +    { "Coul(T) + LJ [W3-W3] NF",       233 }, /* nb_kernel312nf */
+ +    { "Coul(T) + LJ [W4] NF",           96 }, /* nb_kernel313nf */
+ +    { "Coul(T) + LJ [W4-W4] NF",       244 }, /* nb_kernel314nf */
+ +    { "Coul(T) + Bham NF",              61 }, /* nb_kernel320nf */
+ +    { "Coul(T) + Bham [W3] NF",        112 }, /* nb_kernel321nf */
+ +    { "Coul(T) + Bham [W3-W3] NF",     260 }, /* nb_kernel322nf */
+ +    { "Coul(T) + Bham [W4] NF",        125 }, /* nb_kernel323nf */
+ +    { "Coul(T) + Bham [W4-W4] NF",     273 }, /* nb_kernel324nf */
+ +    { "Coul(T) + VdW(T) NF",            42 }, /* nb_kernel330nf */
+ +    { "Coul(T) + VdW(T) [W3] NF",       93 }, /* nb_kernel331nf */
+ +    { "Coul(T) + VdW(T) [W3-W3] NF",   241 }, /* nb_kernel332nf */
+ +    { "Coul(T) + VdW(T) [W4] NF",      110 }, /* nb_kernel333nf */
+ +    { "Coul(T) + VdW(T) [W4-W4] NF",   258 }, /* nb_kernel334nf */
+ +    { "Generalized Born Coulomb NF",    29 }, /* nb_kernel400nf */
+ +    { "GB Coulomb + LJ NF",             37 }, /* nb_kernel410nf */
+ +    { "GB Coulomb + VdW(T) NF",         49 }, /* nb_kernel430nf */
+ +    { "Free energy innerloop",         150 }, /* free energy, estimate */  
+ +    { "All-vs-All, Coul + LJ",          38 },
+ +    { "All-vs-All, GB + LJ",            61 },
+ +    { "Outer nonbonded loop",           10 },
++    { "Pair Search distance check",      9 }, /* nbnxn pair dist. check */
++    /* nbnxn kernel flops are based on inner-loops without exclusion checks.
++     * Plain Coulomb runs through the RF kernels, except with CUDA.
++     * invsqrt is counted as 6 flops: 1 for _mm_rsqt_ps + 5 for iteration.
++     * The flops are equal for plain-C, x86 SIMD and CUDA, except for:
++     * - plain-C kernel uses one flop more for Coulomb-only (F) than listed
++     * - x86 SIMD LJ geom-comb.rule kernels (fastest) use 2 more flops
++     * - x86 SIMD LJ LB-comb.rule kernels (fast) use 3 (8 for F+E) more flops
++     * - GPU always does exclusions, which requires 2-4 flops, but as invsqrt
++     *   is always counted as 6 flops, this roughly compensates.
++     */
++    { "LJ + Coulomb RF (F)",            38 }, /* nbnxn kernel LJ+RF, no ener */
++    { "LJ + Coulomb RF (F+E)",          54 },
++    { "LJ + Coulomb tabulated (F)",     41 }, /* nbnxn kernel LJ+tab, no en */
++    { "LJ + Coulomb tabulated (F+E)",   59 },
++    { "LJ (F)",                         33 }, /* nbnxn kernel LJ, no ener */
++    { "LJ (F+E)",                       43 },
++    { "Coulomb RF (F)",                 31 }, /* nbnxn kernel RF, no ener */
++    { "Coulomb RF (F+E)",               36 },
++    { "Coulomb tabulated (F)",          34 }, /* nbnxn kernel tab, no ener */
++    { "Coulomb tabulated (F+E)",        41 },
+ +    { "1,4 nonbonded interactions",     90 },
+ +    { "Born radii (Still)",             47 },
+ +    { "Born radii (HCT/OBC)",          183 },
+ +    { "Born force chain rule",          15 },
+ +    { "All-vs-All Still radii",         47 },
+ +    { "All-vs-All HCT/OBC radii",      183 },
+ +    { "All-vs-All Born chain rule",     15 },
+ +    { "Calc Weights",                   36 },
+ +    { "Spread Q",                        6 },
+ +    { "Spread Q Bspline",                2 }, 
+ +    { "Gather F",                      23  },
+ +    { "Gather F Bspline",              6   }, 
+ +    { "3D-FFT",                        8   },
+ +    { "Convolution",                   4   },
+ +    { "Solve PME",                     64  },
+ +    { "NS-Pairs",                      21  },
+ +    { "Reset In Box",                  3   },
+ +    { "Shift-X",                       6   },
+ +    { "CG-CoM",                        3   },
+ +    { "Sum Forces",                    1   },
+ +    { "Bonds",                         59  },
+ +    { "G96Bonds",                      44  },
+ +    { "FENE Bonds",                    58  },
+ +    { "Tab. Bonds",                    62  },
+ +    { "Restraint Potential",           86  },
+ +    { "Linear Angles",                 57  },
+ +    { "Angles",                        168 },
+ +    { "G96Angles",                     150 },
+ +    { "Quartic Angles",                160 },
+ +    { "Tab. Angles",                   169 },
+ +    { "Propers",                       229 },
+ +    { "Impropers",                     208 },
+ +    { "RB-Dihedrals",                  247 },
+ +    { "Four. Dihedrals",               247 },
+ +    { "Tab. Dihedrals",                227 },
+ +    { "Dist. Restr.",                  200 },
+ +    { "Orient. Restr.",                200 },
+ +    { "Dihedral Restr.",               200 },
+ +    { "Pos. Restr.",                   50  },
+ +    { "Flat-bottom posres",            50  },
+ +    { "Angle Restr.",                  191 },
+ +    { "Angle Restr. Z",                164 },
+ +    { "Morse Potent.",                 83  },
+ +    { "Cubic Bonds",                   54  },
+ +    { "Walls",                         31  },
+ +    { "Polarization",                  59  },
+ +    { "Anharmonic Polarization",       72  },
+ +    { "Water Pol.",                    62  },
+ +    { "Thole Pol.",                    296 },
+ +    { "Virial",                        18  },
+ +    { "Update",                        31  },
+ +    { "Ext.ens. Update",               54  },
+ +    { "Stop-CM",                       10  },
+ +    { "P-Coupling",                    6   },
+ +    { "Calc-Ekin",                     27  },
+ +    { "Lincs",                         60  },
+ +    { "Lincs-Mat",                     4   },
+ +    { "Shake",                         30  },
+ +    { "Constraint-V",                   8  },
+ +    { "Shake-Init",                    10  },
+ +    { "Constraint-Vir",                24  },
+ +    { "Settle",                        323 },
+ +    { "Virtual Site 2",                23  },
+ +    { "Virtual Site 3",                37  },
+ +    { "Virtual Site 3fd",              95  },
+ +    { "Virtual Site 3fad",             176 },
+ +    { "Virtual Site 3out",             87  },
+ +    { "Virtual Site 4fd",              110 }, 
+ +    { "Virtual Site 4fdn",             254 }, 
+ +    { "Virtual Site N",                 15 },
+ +    { "Mixed Generalized Born stuff",   10 } 
+ +};
+ +
+ +
+ +void init_nrnb(t_nrnb *nrnb)
+ +{
+ +  int i;
+ +
+ +  for(i=0; (i<eNRNB); i++)
+ +    nrnb->n[i]=0.0;
+ +}
+ +
+ +void cp_nrnb(t_nrnb *dest, t_nrnb *src)
+ +{
+ +  int i;
+ +
+ +  for(i=0; (i<eNRNB); i++)
+ +    dest->n[i]=src->n[i];
+ +}
+ +
+ +void add_nrnb(t_nrnb *dest, t_nrnb *s1, t_nrnb *s2)
+ +{
+ +  int i;
+ +
+ +  for(i=0; (i<eNRNB); i++)
+ +    dest->n[i]=s1->n[i]+s2->n[i];
+ +}
+ +
+ +void print_nrnb(FILE *out, t_nrnb *nrnb)
+ +{
+ +  int i;
+ +
+ +  for(i=0; (i<eNRNB); i++)
+ +    if (nrnb->n[i] > 0)
+ +      fprintf(out," %-26s %10.0f.\n",nbdata[i].name,nrnb->n[i]);
+ +}
+ +
+ +void _inc_nrnb(t_nrnb *nrnb,int enr,int inc,char *file,int line)
+ +{
+ +  nrnb->n[enr]+=inc;
+ +#ifdef DEBUG_NRNB
+ +  printf("nrnb %15s(%2d) incremented with %8d from file %s line %d\n",
+ +        nbdata[enr].name,enr,inc,file,line);
+ +#endif
+ +}
+ +
+ +void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
+ +{
+ +  int    i;
+ +  double mni,frac,tfrac,tflop;
+ +  const char   *myline = "-----------------------------------------------------------------------------";
+ +  
+ +  *nbfs = 0.0;
+ +  for(i=0; (i<eNR_NBKERNEL_NR); i++) {
+ +    if (strstr(nbdata[i].name,"W3-W3") != NULL)
+ +      *nbfs += 9e-6*nrnb->n[i];
+ +    else if (strstr(nbdata[i].name,"W3") != NULL)
+ +      *nbfs += 3e-6*nrnb->n[i];
+ +    else if (strstr(nbdata[i].name,"W4-W4") != NULL)
+ +      *nbfs += 10e-6*nrnb->n[i];
+ +    else if (strstr(nbdata[i].name,"W4") != NULL)
+ +      *nbfs += 4e-6*nrnb->n[i];
+ +    else
+ +      *nbfs += 1e-6*nrnb->n[i];
+ +  }
+ +  tflop=0;
+ +  for(i=0; (i<eNRNB); i++) 
+ +    tflop+=1e-6*nrnb->n[i]*nbdata[i].flop;
+ +  
+ +  if (tflop == 0) {
+ +    fprintf(out,"No MEGA Flopsen this time\n");
+ +    return;
+ +  }
+ +  if (out) {
+ +    fprintf(out,"\n\tM E G A - F L O P S   A C C O U N T I N G\n\n");
+ +  }
+ +
+ +  if (out) {
+ +    fprintf(out,"   RF=Reaction-Field  FE=Free Energy  SCFE=Soft-Core/Free Energy\n");
+ +    fprintf(out,"   T=Tabulated        W3=SPC/TIP3p    W4=TIP4p (single or pairs)\n");
+ +    fprintf(out,"   NF=No Forces\n\n");
+ +    
+ +    fprintf(out," %-32s %16s %15s  %7s\n",
+ +          "Computing:","M-Number","M-Flops","% Flops");
+ +    fprintf(out,"%s\n",myline);
+ +  }
+ +  *mflop=0.0;
+ +  tfrac=0.0;
+ +  for(i=0; (i<eNRNB); i++) {
+ +    mni     = 1e-6*nrnb->n[i];
+ +    *mflop += mni*nbdata[i].flop;
+ +    frac    = 100.0*mni*nbdata[i].flop/tflop;
+ +    tfrac  += frac;
+ +    if (out && mni != 0)
+ +      fprintf(out," %-32s %16.6f %15.3f  %6.1f\n",
+ +            nbdata[i].name,mni,mni*nbdata[i].flop,frac);
+ +  }
+ +  if (out) {
+ +    fprintf(out,"%s\n",myline);
+ +    fprintf(out," %-32s %16s %15.3f  %6.1f\n",
+ +          "Total","",*mflop,tfrac);
+ +    fprintf(out,"%s\n\n",myline);
+ +  }
+ +}
+ +
+ +void print_perf(FILE *out,double nodetime,double realtime,int nprocs,
+ +              gmx_large_int_t nsteps,real delta_t,
-   if (nodetime == 0.0) {
-     fprintf(out,"nodetime = 0! Infinite Giga flopses!\n");
-   }
- #ifdef GMX_OPENMM
-   nodetime = realtime;
-   fprintf(out,"\tOpenMM run - timing based on wallclock.\n\n");
- #else
-   if (nprocs > 1)
++              double nbfs,double mflop,
++                int omp_nth_pp)
+ +{
+ +  real runtime;
+ +
+ +  fprintf(out,"\n");
+ +
-       nodetime = realtime;
-       fprintf(out,"\tParallel run - timing based on wallclock.\n\n");
-   }
- #endif
- 
-   if ((nodetime > 0) && (realtime > 0)) {
-     fprintf(out,"%12s %10s %10s %8s\n","","NODE (s)","Real (s)","(%)");
-     fprintf(out,"%12s %10.3f %10.3f %8.1f\n","Time:",
++  if (realtime > 0) 
+ +  {
-     if (nodetime > 60) {
-       fprintf(out,"%12s %10s","","");
-       pr_difftime(out,nodetime);
++    fprintf(out,"%12s %12s %12s %10s\n","","Core t (s)","Wall t (s)","(%)");
++    fprintf(out,"%12s %12.3f %12.3f %10.1f\n","Time:",
+ +          nodetime, realtime, 100.0*nodetime/realtime);
-     if (delta_t > 0) {
-       mflop = mflop/nodetime;
++    /* only print day-hour-sec format if realtime is more than 30 min */
++    if (realtime > 30*60)
++    {
++      fprintf(out,"%12s %12s","","");
++      pr_difftime(out,realtime);
+ +    }
-       fprintf(out,"%12s %10s %10s %10s %10s\n",
-             "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
-             "(ns/day)","(hour/ns)");
-       fprintf(out,"%12s %10.3f %10.3f %10.3f %10.3f\n","Performance:",
-             nbfs/nodetime,(mflop > 1000) ? (mflop/1000) : mflop,
-             runtime*24*3.6/nodetime,1000*nodetime/(3600*runtime));
-     } else {
-       fprintf(out,"%12s %10s %10s %14s\n",
-             "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
-             "(steps/hour)");
-       fprintf(out,"%12s %10.3f %10.3f %14.1f\n","Performance:",
-             nbfs/nodetime,(mflop > 1000) ? (mflop/1000) : mflop,
-             nsteps*3600.0/nodetime);
++    if (delta_t > 0) 
++    {
++      mflop = mflop/realtime;
+ +      runtime = nsteps*delta_t;
++
++      if (getenv("GMX_DETAILED_PERF_STATS") == NULL)
++      {
++          fprintf(out,"%12s %12s %12s\n",
++                  "","(ns/day)","(hour/ns)");
++          fprintf(out,"%12s %12.3f %12.3f\n","Performance:",
++                  runtime*24*3.6/realtime,1000*realtime/(3600*runtime));
++      }
++      else
++      {
++        fprintf(out,"%12s %12s %12s %12s %12s\n",
++              "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
++              "(ns/day)","(hour/ns)");
++        fprintf(out,"%12s %12.3f %12.3f %12.3f %12.3f\n","Performance:",
++              nbfs/realtime,(mflop > 1000) ? (mflop/1000) : mflop,
++              runtime*24*3.6/realtime,1000*realtime/(3600*runtime));
++      }
++    } 
++    else 
++    {
++      if (getenv("GMX_DETAILED_PERF_STATS") == NULL)
++      {
++          fprintf(out,"%12s %14s\n",
++                  "","(steps/hour)");
++          fprintf(out,"%12s %14.1f\n","Performance:",
++                  nsteps*3600.0/realtime);
++      }
++      else
++      {
++          fprintf(out,"%12s %12s %12s %14s\n",
++                "","(Mnbf/s)",(mflop > 1000) ? "(GFlops)" : "(MFlops)",
++                "(steps/hour)");
++          fprintf(out,"%12s %12.3f %12.3f %14.1f\n","Performance:",
++            nbfs/realtime,(mflop > 1000) ? (mflop/1000) : mflop,
++            nsteps*3600.0/realtime);
++      }
+ +    }
+ +  }
+ +}
+ +
+ +int cost_nrnb(int enr)
+ +{
+ +  return nbdata[enr].flop;
+ +}
+ +
+ +const char *nrnb_str(int enr)
+ +{
+ +  return nbdata[enr].name;
+ +}
+ +
+ +static const int    force_index[]={ 
+ +  eNR_BONDS,  eNR_ANGLES,  eNR_PROPER, eNR_IMPROPER, 
+ +  eNR_RB,     eNR_DISRES,  eNR_ORIRES, eNR_POSRES,
+ +  eNR_FBPOSRES,  eNR_NS,     eNR_NBKERNEL_OUTER
+ +};
+ +#define NFORCE_INDEX asize(force_index)
+ +
+ +static const int    constr_index[]={ 
+ +  eNR_SHAKE,     eNR_SHAKE_RIJ, eNR_SETTLE,       eNR_UPDATE,       eNR_PCOUPL,
+ +  eNR_CONSTR_VIR,eNR_CONSTR_V
+ +};
+ +#define NCONSTR_INDEX asize(constr_index)
+ +
+ +static double pr_av(FILE *log,t_commrec *cr,
+ +                  double fav,double ftot[],const char *title)
+ +{
+ +  int    i,perc;
+ +  double dperc,unb;
+ +  
+ +  unb=0;
+ +  if (fav > 0) {
+ +    fav /= cr->nnodes - cr->npmenodes;
+ +    fprintf(log,"\n %-26s",title);
+ +    for(i=0; (i<cr->nnodes); i++) {
+ +      dperc=(100.0*ftot[i])/fav;
+ +      unb=max(unb,dperc);
+ +      perc=dperc;
+ +      fprintf(log,"%3d ",perc);
+ +    }
+ +    if (unb > 0) {
+ +      perc=10000.0/unb;
+ +      fprintf(log,"%6d%%\n\n",perc);
+ +    }
+ +    else
+ +      fprintf(log,"\n\n");
+ +  }
+ +  return unb;
+ +}
+ +
+ +void pr_load(FILE *log,t_commrec *cr,t_nrnb nrnb[])
+ +{
+ +  int    i,j,perc;
+ +  double dperc,unb,uf,us;
+ +  double *ftot,fav;
+ +  double *stot,sav;
+ +  t_nrnb *av;
+ +
+ +  snew(av,1);
+ +  snew(ftot,cr->nnodes);
+ +  snew(stot,cr->nnodes);
+ +  init_nrnb(av);
+ +  for(i=0; (i<cr->nnodes); i++) {
+ +      add_nrnb(av,av,&(nrnb[i]));
+ +      /* Cost due to forces */
+ +      for(j=0; (j<eNR_NBKERNEL_NR); j++)
+ +      ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
+ +      for(j=0; (j<NFORCE_INDEX); j++) 
+ +      ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
+ +      /* Due to shake */
+ +      for(j=0; (j<NCONSTR_INDEX); j++) {
+ +      stot[i]+=nrnb[i].n[constr_index[j]]*cost_nrnb(constr_index[j]);
+ +      }
+ +  }   
+ +  for(j=0; (j<eNRNB); j++)
+ +    av->n[j]=av->n[j]/(double)(cr->nnodes - cr->npmenodes);
+ +    
+ +    fprintf(log,"\nDetailed load balancing info in percentage of average\n");
+ +  
+ +  fprintf(log," Type                 NODE:");
+ +  for(i=0; (i<cr->nnodes); i++)
+ +      fprintf(log,"%3d ",i);
+ +  fprintf(log,"Scaling\n");
+ +  fprintf(log,"---------------------------");
+ +  for(i=0; (i<cr->nnodes); i++)
+ +      fprintf(log,"----");
+ +  fprintf(log,"-------\n");
+ +  
+ +  for(j=0; (j<eNRNB); j++) {
+ +    unb=100.0;
+ +    if (av->n[j] > 0) {
+ +      fprintf(log," %-26s",nrnb_str(j));
+ +      for(i=0; (i<cr->nnodes); i++) {
+ +        dperc=(100.0*nrnb[i].n[j])/av->n[j];
+ +        unb=max(unb,dperc);
+ +        perc=dperc;
+ +        fprintf(log,"%3d ",perc);
+ +      }
+ +      if (unb > 0) {
+ +      perc=10000.0/unb;
+ +      fprintf(log,"%6d%%\n",perc);
+ +      }
+ +      else
+ +      fprintf(log,"\n");
+ +    }   
+ +  }
+ +  fav=sav=0;
+ +  for(i=0; (i<cr->nnodes); i++) {
+ +    fav+=ftot[i];
+ +    sav+=stot[i];
+ +  }
+ +  uf=pr_av(log,cr,fav,ftot,"Total Force");
+ +  us=pr_av(log,cr,sav,stot,"Total Constr.");
+ +  
+ +  unb=(uf*fav+us*sav)/(fav+sav);
+ +  if (unb > 0) {
+ +    unb=10000.0/unb;
+ +    fprintf(log,"\nTotal Scaling: %.0f%% of max performance\n\n",unb);
+ +  }
+ +}
+ +
diff --cc src/gromacs/gmxlib/pbc.c

index 5780a6f090c7ba810ad5dfc3d165fe9f870a8176,0000000000000000000000000000000000000000..bd9da866924d1b2747b577f1e0e8f80505e85a0c

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/pbc.c
--- /dev/null
+++ b/src/gromacs/gmxlib/pbc.c
@@@ -1,1277 -1,0 +1,1330 @@@
- void put_atom_in_box(matrix box,rvec x)
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <math.h>
+ +#include "sysstuff.h"
+ +#include "typedefs.h"
+ +#include "vec.h"
+ +#include "maths.h"
+ +#include "main.h"
+ +#include "pbc.h"
+ +#include "smalloc.h"
+ +#include "txtdump.h"
+ +#include "gmx_fatal.h"
+ +#include "names.h"
+ +#include "macros.h"
++#include "gmx_omp_nthreads.h"
+ +
+ +/* Skip 0 so we have more chance of detecting if we forgot to call set_pbc. */
+ +enum { epbcdxRECTANGULAR=1, epbcdxTRICLINIC,
+ +    epbcdx2D_RECT,       epbcdx2D_TRIC,
+ +    epbcdx1D_RECT,       epbcdx1D_TRIC,
+ +    epbcdxSCREW_RECT,    epbcdxSCREW_TRIC,
+ +    epbcdxNOPBC,         epbcdxUNSUPPORTED };
+ +
+ +/* Margin factor for error message and correction if the box is too skewed */
+ +#define BOX_MARGIN         1.0010
+ +#define BOX_MARGIN_CORRECT 1.0005
+ +
+ +int ePBC2npbcdim(int ePBC)
+ +{
+ +    int npbcdim=0;
+ +
+ +    switch(ePBC) {
+ +    case epbcXYZ:   npbcdim = 3; break;
+ +    case epbcXY:    npbcdim = 2; break;
+ +    case epbcSCREW: npbcdim = 3; break;
+ +    case epbcNONE:  npbcdim = 0; break;
+ +    default: gmx_fatal(FARGS,"Unknown ePBC=%d in ePBC2npbcdim",ePBC);
+ +    }
+ +
+ +    return npbcdim;
+ +}
+ +
+ +int inputrec2nboundeddim(t_inputrec *ir)
+ +{
+ +    if (ir->nwall == 2 && ir->ePBC == epbcXY)
+ +    {
+ +        return 3;
+ +    }
+ +    else
+ +    {
+ +        return ePBC2npbcdim(ir->ePBC);
+ +    }
+ +}
+ +
+ +void dump_pbc(FILE *fp,t_pbc *pbc) 
+ +{
+ +    rvec sum_box;
+ +
+ +    fprintf(fp,"ePBCDX = %d\n",pbc->ePBCDX);
+ +    pr_rvecs(fp,0,"box",pbc->box,DIM);
+ +    pr_rvecs(fp,0,"fbox_diag",&pbc->fbox_diag,1);
+ +    pr_rvecs(fp,0,"hbox_diag",&pbc->hbox_diag,1);
+ +    pr_rvecs(fp,0,"mhbox_diag",&pbc->mhbox_diag,1);
+ +    rvec_add(pbc->hbox_diag,pbc->mhbox_diag,sum_box);
+ +    pr_rvecs(fp,0,"sum of the above two",&sum_box,1);
+ +    fprintf(fp,"max_cutoff2 = %g\n",pbc->max_cutoff2);
+ +    fprintf(fp,"bLimitDistance = %s\n",EBOOL(pbc->bLimitDistance));
+ +    fprintf(fp,"limit_distance2 = %g\n",pbc->limit_distance2);
+ +    fprintf(fp,"ntric_vec = %d\n",pbc->ntric_vec);
+ +    if (pbc->ntric_vec > 0) {
+ +        pr_ivecs(fp,0,"tric_shift",pbc->tric_shift,pbc->ntric_vec,FALSE);
+ +        pr_rvecs(fp,0,"tric_vec",pbc->tric_vec,pbc->ntric_vec);
+ +    }
+ +}
+ +
+ +const char *check_box(int ePBC,matrix box)
+ +{
+ +    const char *ptr;
+ +
+ +    if (ePBC == -1)
+ +        ePBC = guess_ePBC(box);
+ +
+ +    if (ePBC == epbcNONE)
+ +        return NULL;
+ +
+ +    if ((box[XX][YY] != 0) || (box[XX][ZZ] != 0) || (box[YY][ZZ] != 0)) {
+ +        ptr = "Only triclinic boxes with the first vector parallel to the x-axis and the second vector in the xy-plane are supported.";
+ +    } else if (ePBC == epbcSCREW && (box[YY][XX] != 0 || box[ZZ][XX] != 0)) {
+ +        ptr = "The unit cell can not have off-diagonal x-components with screw pbc";
+ +    } else if (fabs(box[YY][XX]) > BOX_MARGIN*0.5*box[XX][XX] ||
+ +        (ePBC != epbcXY &&
+ +            (fabs(box[ZZ][XX]) > BOX_MARGIN*0.5*box[XX][XX] ||
+ +                fabs(box[ZZ][YY]) > BOX_MARGIN*0.5*box[YY][YY]))) {
+ +        ptr = "Triclinic box is too skewed.";
+ +    } else {
+ +        ptr = NULL;
+ +    }
+ +
+ +    return ptr;
+ +}
+ +
+ +real max_cutoff2(int ePBC,matrix box)
+ +{
+ +    real min_hv2,min_ss;
+ +
+ +    /* Physical limitation of the cut-off
+ +     * by half the length of the shortest box vector.
+ +     */
+ +    min_hv2 = min(0.25*norm2(box[XX]),0.25*norm2(box[YY]));
+ +    if (ePBC != epbcXY)
+ +        min_hv2 = min(min_hv2,0.25*norm2(box[ZZ]));
+ +
+ +    /* Limitation to the smallest diagonal element due to optimizations:
+ +     * checking only linear combinations of single box-vectors (2 in x)
+ +     * in the grid search and pbc_dx is a lot faster
+ +     * than checking all possible combinations.
+ +     */
+ +    if (ePBC == epbcXY) {
+ +        min_ss = min(box[XX][XX],box[YY][YY]);
+ +    } else {
+ +        min_ss = min(box[XX][XX],min(box[YY][YY]-fabs(box[ZZ][YY]),box[ZZ][ZZ]));
+ +    }
+ +
+ +    return min(min_hv2,min_ss*min_ss);
+ +}
+ +
+ +/* this one is mostly harmless... */
+ +static gmx_bool bWarnedGuess=FALSE;
+ +
+ +int guess_ePBC(matrix box)
+ +{
+ +    int ePBC;
+ +
+ +    if (box[XX][XX]>0 && box[YY][YY]>0 && box[ZZ][ZZ]>0) {
+ +        ePBC = epbcXYZ;
+ +    } else if (box[XX][XX]>0 && box[YY][YY]>0 && box[ZZ][ZZ]==0) {
+ +        ePBC = epbcXY;
+ +    } else if (box[XX][XX]==0 && box[YY][YY]==0 && box[ZZ][ZZ]==0) {
+ +        ePBC = epbcNONE;
+ +    } else {
+ +        if (!bWarnedGuess) {
+ +            fprintf(stderr,"WARNING: Unsupported box diagonal %f %f %f, "
+ +                    "will not use periodic boundary conditions\n\n",
+ +                    box[XX][XX],box[YY][YY],box[ZZ][ZZ]);
+ +            bWarnedGuess = TRUE;
+ +        }
+ +        ePBC = epbcNONE;
+ +    }
+ +
+ +    if (debug)
+ +        fprintf(debug,"Guessed pbc = %s from the box matrix\n",epbc_names[ePBC]);
+ +
+ +    return ePBC;
+ +}
+ +
+ +static int correct_box_elem(FILE *fplog,int step,tensor box,int v,int d)
+ +{
+ +    int shift,maxshift=10;
+ +
+ +    shift = 0;
+ +
+ +    /* correct elem d of vector v with vector d */
+ +    while (box[v][d] > BOX_MARGIN_CORRECT*0.5*box[d][d]) {
+ +        if (fplog) {
+ +            fprintf(fplog,"Step %d: correcting invalid box:\n",step);
+ +            pr_rvecs(fplog,0,"old box",box,DIM);
+ +        }
+ +        rvec_dec(box[v],box[d]);
+ +        shift--;
+ +        if (fplog) {
+ +            pr_rvecs(fplog,0,"new box",box,DIM);
+ +        }
+ +        if (shift <= -maxshift)
+ +            gmx_fatal(FARGS,
+ +                      "Box was shifted at least %d times. Please see log-file.",
+ +                      maxshift);
+ +    } 
+ +    while (box[v][d] < -BOX_MARGIN_CORRECT*0.5*box[d][d]) {
+ +        if (fplog) {
+ +            fprintf(fplog,"Step %d: correcting invalid box:\n",step);
+ +            pr_rvecs(fplog,0,"old box",box,DIM);
+ +        }
+ +        rvec_inc(box[v],box[d]);
+ +        shift++;
+ +        if (fplog) {
+ +            pr_rvecs(fplog,0,"new box",box,DIM);
+ +        }
+ +        if (shift >= maxshift)
+ +            gmx_fatal(FARGS,
+ +                      "Box was shifted at least %d times. Please see log-file.",
+ +                      maxshift);
+ +    }
+ +
+ +    return shift;
+ +}
+ +
+ +gmx_bool correct_box(FILE *fplog,int step,tensor box,t_graph *graph)
+ +{
+ +    int  zy,zx,yx,i;
+ +    gmx_bool bCorrected;
+ +
+ +    /* check if the box still obeys the restrictions, if not, correct it */
+ +    zy = correct_box_elem(fplog,step,box,ZZ,YY);
+ +    zx = correct_box_elem(fplog,step,box,ZZ,XX);
+ +    yx = correct_box_elem(fplog,step,box,YY,XX);
+ +
+ +    bCorrected = (zy || zx || yx);
+ +
+ +    if (bCorrected && graph) {
+ +        /* correct the graph */
+ +        for(i=graph->at_start; i<graph->at_end; i++) {
+ +            graph->ishift[i][YY] -= graph->ishift[i][ZZ]*zy;
+ +            graph->ishift[i][XX] -= graph->ishift[i][ZZ]*zx;
+ +            graph->ishift[i][XX] -= graph->ishift[i][YY]*yx;
+ +        }
+ +    }
+ +
+ +    return bCorrected;
+ +}
+ +
+ +int ndof_com(t_inputrec *ir)
+ +{
+ +    int n=0;
+ +
+ +    switch (ir->ePBC) {
+ +    case epbcXYZ:
+ +    case epbcNONE:
+ +        n = 3;
+ +        break;
+ +    case epbcXY:
+ +        n = (ir->nwall == 0 ? 3 : 2);
+ +        break;
+ +    case epbcSCREW:
+ +        n = 1;
+ +        break;
+ +    default:
+ +        gmx_incons("Unknown pbc in calc_nrdf");
+ +    }
+ +    
+ +    return n;
+ +}
+ +
+ +static void low_set_pbc(t_pbc *pbc,int ePBC,ivec *dd_nc,matrix box)
+ +{
+ +    int  order[5]={0,-1,1,-2,2};
+ +    int  ii,jj,kk,i,j,k,d,dd,jc,kc,npbcdim,shift;
+ +    ivec bPBC;
+ +    real d2old,d2new,d2new_c;
+ +    rvec trial,pos;
+ +    gmx_bool bXY,bUse;
+ +    const char *ptr;
+ +
+ +    pbc->ndim_ePBC = ePBC2npbcdim(ePBC);
+ +
+ +    copy_mat(box,pbc->box);
+ +    pbc->bLimitDistance = FALSE;
+ +    pbc->max_cutoff2 = 0;
+ +    pbc->dim = -1;
+ +
+ +    for(i=0; (i<DIM); i++) {
+ +        pbc->fbox_diag[i]  =  box[i][i];
+ +        pbc->hbox_diag[i]  =  pbc->fbox_diag[i]*0.5;
+ +        pbc->mhbox_diag[i] = -pbc->hbox_diag[i];
+ +    }
+ +
+ +    ptr = check_box(ePBC,box);
+ +    if (ePBC == epbcNONE) {
+ +        pbc->ePBCDX = epbcdxNOPBC;
+ +    } else if (ptr) {
+ +        fprintf(stderr,   "Warning: %s\n",ptr);
+ +        pr_rvecs(stderr,0,"         Box",box,DIM);
+ +        fprintf(stderr,   "         Can not fix pbc.\n");
+ +        pbc->ePBCDX = epbcdxUNSUPPORTED;
+ +        pbc->bLimitDistance = TRUE;
+ +        pbc->limit_distance2 = 0;
+ +    } else {
+ +        if (ePBC == epbcSCREW && dd_nc) {
+ +            /* This combinated should never appear here */
+ +            gmx_incons("low_set_pbc called with screw pbc and dd_nc != NULL");
+ +        }
+ +
+ +        npbcdim = 0;
+ +        for(i=0; i<DIM; i++) {
+ +            if ((dd_nc && (*dd_nc)[i] > 1) || (ePBC == epbcXY && i == ZZ)) {
+ +                bPBC[i] = 0;
+ +            } else {
+ +                bPBC[i] = 1;
+ +                npbcdim++;
+ +            }
+ +        }
+ +        switch (npbcdim) {
+ +        case 1:
+ +            /* 1D pbc is not an mdp option and it is therefore only used
+ +             * with single shifts.
+ +             */
+ +            pbc->ePBCDX = epbcdx1D_RECT;
+ +            for(i=0; i<DIM; i++)
+ +                if (bPBC[i])
+ +                    pbc->dim = i;
+ +            for(i=0; i<pbc->dim; i++)
+ +                if (pbc->box[pbc->dim][i] != 0)
+ +                    pbc->ePBCDX = epbcdx1D_TRIC;
+ +            break;
+ +        case 2:
+ +            pbc->ePBCDX = epbcdx2D_RECT;
+ +            for(i=0; i<DIM; i++)
+ +                if (!bPBC[i])
+ +                    pbc->dim = i;
+ +            for(i=0; i<DIM; i++)
+ +                if (bPBC[i])
+ +                    for(j=0; j<i; j++)
+ +                        if (pbc->box[i][j] != 0)
+ +                            pbc->ePBCDX = epbcdx2D_TRIC;
+ +            break;
+ +        case 3:
+ +            if (ePBC != epbcSCREW) {
+ +                if (TRICLINIC(box)) {
+ +                    pbc->ePBCDX = epbcdxTRICLINIC;
+ +                } else {
+ +                    pbc->ePBCDX = epbcdxRECTANGULAR;
+ +                }
+ +            } else {
+ +                pbc->ePBCDX = (box[ZZ][YY]==0 ? epbcdxSCREW_RECT : epbcdxSCREW_TRIC);
+ +                if (pbc->ePBCDX == epbcdxSCREW_TRIC) {
+ +                    fprintf(stderr,
+ +                            "Screw pbc is not yet implemented for triclinic boxes.\n"
+ +                            "Can not fix pbc.\n");
+ +                    pbc->ePBCDX = epbcdxUNSUPPORTED;
+ +                }
+ +            }
+ +            break;
+ +        default:
+ +            gmx_fatal(FARGS,"Incorrect number of pbc dimensions with DD: %d",
+ +                      npbcdim);
+ +        }
+ +        pbc->max_cutoff2 = max_cutoff2(ePBC,box);
+ +
+ +        if (pbc->ePBCDX == epbcdxTRICLINIC ||
+ +            pbc->ePBCDX == epbcdx2D_TRIC ||
+ +            pbc->ePBCDX == epbcdxSCREW_TRIC) {
+ +            if (debug) {
+ +                pr_rvecs(debug,0,"Box",box,DIM);
+ +                fprintf(debug,"max cutoff %.3f\n",sqrt(pbc->max_cutoff2));
+ +            }
+ +            pbc->ntric_vec = 0;
+ +            /* We will only use single shifts, but we will check a few
+ +             * more shifts to see if there is a limiting distance
+ +             * above which we can not be sure of the correct distance.
+ +             */
+ +            for(kk=0; kk<5; kk++) {
+ +                k = order[kk];
+ +                if (!bPBC[ZZ] && k != 0)
+ +                    continue;
+ +                for(jj=0; jj<5; jj++) {
+ +                    j = order[jj];
+ +                    if (!bPBC[YY] && j != 0)
+ +                        continue;
+ +                    for(ii=0; ii<3; ii++) {
+ +                        i = order[ii];
+ +                        if (!bPBC[XX] && i != 0)
+ +                            continue;
+ +                        /* A shift is only useful when it is trilinic */
+ +                        if (j != 0 || k != 0) {
+ +                            d2old = 0;
+ +                            d2new = 0;
+ +                            for(d=0; d<DIM; d++) {
+ +                                trial[d] = i*box[XX][d] + j*box[YY][d] + k*box[ZZ][d];
+ +                                /* Choose the vector within the brick around 0,0,0 that
+ +                                 * will become the shortest due to shift try.
+ +                                 */
+ +                                if (d == pbc->dim) {
+ +                                    trial[d] = 0;
+ +                                    pos[d] = 0;
+ +                                } else {
+ +                                    if (trial[d] < 0)
+ +                                        pos[d] = min( pbc->hbox_diag[d],-trial[d]);
+ +                                    else
+ +                                        pos[d] = max(-pbc->hbox_diag[d],-trial[d]);
+ +                                }
+ +                                d2old += sqr(pos[d]);
+ +                                d2new += sqr(pos[d] + trial[d]);
+ +                            }
+ +                            if (BOX_MARGIN*d2new < d2old) {
+ +                                if (j < -1 || j > 1 || k < -1 || k > 1) {
+ +                                    /* Check if there is a single shift vector
+ +                                     * that decreases this distance even more.
+ +                                     */
+ +                                    jc = 0;
+ +                                    kc = 0;
+ +                                    if (j < -1 || j > 1)
+ +                                        jc = j/2;
+ +                                    if (k < -1 || k > 1)
+ +                                        kc = k/2;
+ +                                    d2new_c = 0;
+ +                                    for(d=0; d<DIM; d++)
+ +                                        d2new_c += sqr(pos[d] + trial[d] 
+ +                                                                      - jc*box[YY][d] - kc*box[ZZ][d]);
+ +                                    if (d2new_c > BOX_MARGIN*d2new) {
+ +                                        /* Reject this shift vector, as there is no a priori limit
+ +                                         * to the number of shifts that decrease distances.
+ +                                         */
+ +                                        if (!pbc->bLimitDistance || d2new <  pbc->limit_distance2)
+ +                                            pbc->limit_distance2 = d2new;
+ +                                        pbc->bLimitDistance = TRUE;
+ +                                    }
+ +                                } else {
+ +                                    /* Check if shifts with one box vector less do better */
+ +                                    bUse = TRUE;
+ +                                    for(dd=0; dd<DIM; dd++) {
+ +                                        shift = (dd==0 ? i : (dd==1 ? j : k));
+ +                                        if (shift) {
+ +                                            d2new_c = 0;
+ +                                            for(d=0; d<DIM; d++)
+ +                                                d2new_c += sqr(pos[d] + trial[d] - shift*box[dd][d]);
+ +                                            if (d2new_c <= BOX_MARGIN*d2new)
+ +                                                bUse = FALSE;
+ +                                        }
+ +                                    }
+ +                                    if (bUse) {
+ +                                        /* Accept this shift vector. */
+ +                                        if (pbc->ntric_vec >= MAX_NTRICVEC) {
+ +                                            fprintf(stderr,"\nWARNING: Found more than %d triclinic correction vectors, ignoring some.\n"
+ +                                                    "  There is probably something wrong with your box.\n",MAX_NTRICVEC);
+ +                                            pr_rvecs(stderr,0,"         Box",box,DIM);
+ +                                        } else {
+ +                                            copy_rvec(trial,pbc->tric_vec[pbc->ntric_vec]);
+ +                                            pbc->tric_shift[pbc->ntric_vec][XX] = i;
+ +                                            pbc->tric_shift[pbc->ntric_vec][YY] = j;
+ +                                            pbc->tric_shift[pbc->ntric_vec][ZZ] = k;
+ +                                            pbc->ntric_vec++;
+ +                                        }
+ +                                    }
+ +                                }
+ +                                if (debug) {
+ +                                    fprintf(debug,"  tricvec %2d = %2d %2d %2d  %5.2f %5.2f  %5.2f %5.2f %5.2f  %5.2f %5.2f %5.2f\n",
+ +                                            pbc->ntric_vec,i,j,k,
+ +                                            sqrt(d2old),sqrt(d2new),
+ +                                            trial[XX],trial[YY],trial[ZZ],
+ +                                            pos[XX],pos[YY],pos[ZZ]);
+ +                                }
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +void set_pbc(t_pbc *pbc,int ePBC,matrix box)
+ +{
+ +    if (ePBC == -1)
+ +        ePBC = guess_ePBC(box);
+ +
+ +    low_set_pbc(pbc,ePBC,NULL,box);
+ +}
+ +
+ +t_pbc *set_pbc_dd(t_pbc *pbc,int ePBC,
+ +                  gmx_domdec_t *dd,gmx_bool bSingleDir,matrix box)
+ +{
+ +    ivec nc2;
+ +    int  npbcdim,i;
+ +
+ +    if (dd == NULL) {
+ +        npbcdim = DIM;
+ +    } else {
+ +        if (ePBC == epbcSCREW && dd->nc[XX] > 1) {
+ +            /* The rotation has been taken care of during coordinate communication */
+ +            ePBC = epbcXYZ;
+ +        }
+ +        npbcdim = 0;
+ +        for(i=0; i<DIM; i++) {
+ +            if (dd->nc[i] <= (bSingleDir ? 1 : 2)) {
+ +                nc2[i] = 1;
+ +                if (!(ePBC == epbcXY && i == ZZ))
+ +                    npbcdim++;
+ +            } else {
+ +                nc2[i] = dd->nc[i];
+ +            }
+ +        }
+ +    }
+ +
+ +    if (npbcdim > 0)
+ +        low_set_pbc(pbc,ePBC,npbcdim<DIM ? &nc2 : NULL,box);
+ +
+ +    return (npbcdim > 0 ? pbc : NULL);
+ +}
+ +
+ +void pbc_dx(const t_pbc *pbc,const rvec x1, const rvec x2, rvec dx)
+ +{
+ +    int  i,j;
+ +    rvec dx_start,trial;
+ +    real d2min,d2trial;
+ +    gmx_bool bRot;
+ +
+ +    rvec_sub(x1,x2,dx);
+ +
+ +    switch (pbc->ePBCDX) {
+ +    case epbcdxRECTANGULAR:
+ +        for(i=0; i<DIM; i++) {
+ +            while (dx[i] > pbc->hbox_diag[i]) {
+ +                dx[i] -= pbc->fbox_diag[i];
+ +            }
+ +            while (dx[i] <= pbc->mhbox_diag[i]) {
+ +                dx[i] += pbc->fbox_diag[i];
+ +            }
+ +        }
+ +        break;
+ +    case epbcdxTRICLINIC:
+ +        for(i=DIM-1; i>=0; i--) {
+ +            while (dx[i] > pbc->hbox_diag[i]) {
+ +                for (j=i; j>=0; j--)
+ +                    dx[j] -= pbc->box[i][j];
+ +            }
+ +            while (dx[i] <= pbc->mhbox_diag[i]) {
+ +                for (j=i; j>=0; j--)
+ +                    dx[j] += pbc->box[i][j];
+ +            }
+ +        }
+ +        /* dx is the distance in a rectangular box */
+ +        d2min = norm2(dx);
+ +        if (d2min > pbc->max_cutoff2) {
+ +            copy_rvec(dx,dx_start);
+ +            d2min = norm2(dx);
+ +            /* Now try all possible shifts, when the distance is within max_cutoff
+ +             * it must be the shortest possible distance.
+ +             */
+ +            i = 0;
+ +            while ((d2min > pbc->max_cutoff2) && (i < pbc->ntric_vec)) {
+ +                rvec_add(dx_start,pbc->tric_vec[i],trial);
+ +                d2trial = norm2(trial);
+ +                if (d2trial < d2min) {
+ +                    copy_rvec(trial,dx);
+ +                    d2min = d2trial;
+ +                }
+ +                i++;
+ +            }
+ +        }
+ +        break;
+ +    case epbcdx2D_RECT:
+ +        for(i=0; i<DIM; i++) {
+ +            if (i != pbc->dim) {
+ +                while (dx[i] > pbc->hbox_diag[i]) {
+ +                    dx[i] -= pbc->fbox_diag[i];
+ +                }
+ +                while (dx[i] <= pbc->mhbox_diag[i]) {
+ +                    dx[i] += pbc->fbox_diag[i];
+ +                }
+ +            }
+ +        }
+ +        break;
+ +    case epbcdx2D_TRIC:
+ +        d2min = 0;
+ +        for(i=DIM-1; i>=0; i--) {
+ +            if (i != pbc->dim) {
+ +                while (dx[i] > pbc->hbox_diag[i]) {
+ +                    for (j=i; j>=0; j--)
+ +                        dx[j] -= pbc->box[i][j];
+ +                }
+ +                while (dx[i] <= pbc->mhbox_diag[i]) {
+ +                    for (j=i; j>=0; j--)
+ +                        dx[j] += pbc->box[i][j];
+ +                }
+ +                d2min += dx[i]*dx[i];
+ +            }
+ +        }
+ +        if (d2min > pbc->max_cutoff2) {
+ +            copy_rvec(dx,dx_start);
+ +            d2min = norm2(dx);
+ +            /* Now try all possible shifts, when the distance is within max_cutoff
+ +             * it must be the shortest possible distance.
+ +             */
+ +            i = 0;
+ +            while ((d2min > pbc->max_cutoff2) && (i < pbc->ntric_vec)) {
+ +                rvec_add(dx_start,pbc->tric_vec[i],trial);
+ +                d2trial = 0;
+ +                for(j=0; j<DIM; j++) {
+ +                    if (j != pbc->dim) {
+ +                        d2trial += trial[j]*trial[j];
+ +                    }
+ +                }
+ +                if (d2trial < d2min) {
+ +                    copy_rvec(trial,dx);
+ +                    d2min = d2trial;
+ +                }
+ +                i++;
+ +            }
+ +        }
+ +        break;
+ +    case epbcdxSCREW_RECT:
+ +        /* The shift definition requires x first */
+ +        bRot = FALSE;
+ +        while (dx[XX] > pbc->hbox_diag[XX]) {
+ +            dx[XX] -= pbc->fbox_diag[XX];
+ +            bRot = !bRot;
+ +        }
+ +        while (dx[XX] <= pbc->mhbox_diag[XX]) {
+ +            dx[XX] += pbc->fbox_diag[YY];
+ +            bRot = !bRot;
+ +        }
+ +        if (bRot) {
+ +            /* Rotate around the x-axis in the middle of the box */
+ +            dx[YY] = pbc->box[YY][YY] - x1[YY] - x2[YY];
+ +            dx[ZZ] = pbc->box[ZZ][ZZ] - x1[ZZ] - x2[ZZ];
+ +        }
+ +        /* Normal pbc for y and z */
+ +        for(i=YY; i<=ZZ; i++) {
+ +            while (dx[i] > pbc->hbox_diag[i]) {
+ +                dx[i] -= pbc->fbox_diag[i];
+ +            }
+ +            while (dx[i] <= pbc->mhbox_diag[i]) {
+ +                dx[i] += pbc->fbox_diag[i];
+ +            }
+ +        }
+ +        break;
+ +    case epbcdxNOPBC:
+ +    case epbcdxUNSUPPORTED:
+ +        break;
+ +    default:
+ +        gmx_fatal(FARGS,"Internal error in pbc_dx, set_pbc has not been called");
+ +        break;
+ +    }
+ +}
+ +
+ +int pbc_dx_aiuc(const t_pbc *pbc,const rvec x1, const rvec x2, rvec dx)
+ +{
+ +    int  i,j,is;
+ +    rvec dx_start,trial;
+ +    real d2min,d2trial;
+ +    ivec ishift,ishift_start;
+ +
+ +    rvec_sub(x1,x2,dx);
+ +    clear_ivec(ishift);
+ +
+ +    switch (pbc->ePBCDX) {
+ +    case epbcdxRECTANGULAR:
+ +        for(i=0; i<DIM; i++) {
+ +            if (dx[i] > pbc->hbox_diag[i]) {
+ +                dx[i] -=  pbc->fbox_diag[i];
+ +                ishift[i]--;
+ +            } else if (dx[i] <= pbc->mhbox_diag[i]) {
+ +                dx[i] +=  pbc->fbox_diag[i];
+ +                ishift[i]++;
+ +            }
+ +        }
+ +        break;
+ +    case epbcdxTRICLINIC:
+ +        /* For triclinic boxes the performance difference between
+ +         * if/else and two while loops is negligible.
+ +         * However, the while version can cause extreme delays
+ +         * before a simulation crashes due to large forces which
+ +         * can cause unlimited displacements.
+ +         * Also allowing multiple shifts would index fshift beyond bounds.
+ +         */
+ +        for(i=DIM-1; i>=1; i--) {
+ +            if (dx[i] > pbc->hbox_diag[i]) {
+ +                for (j=i; j>=0; j--)
+ +                    dx[j] -= pbc->box[i][j];
+ +                ishift[i]--;
+ +            } else if (dx[i] <= pbc->mhbox_diag[i]) {
+ +                for (j=i; j>=0; j--)
+ +                    dx[j] += pbc->box[i][j];
+ +                ishift[i]++;
+ +            }
+ +        }
+ +        /* Allow 2 shifts in x */
+ +        if (dx[XX] > pbc->hbox_diag[XX]) {
+ +            dx[XX] -= pbc->fbox_diag[XX];
+ +            ishift[XX]--;
+ +            if (dx[XX] > pbc->hbox_diag[XX]) {
+ +                dx[XX] -= pbc->fbox_diag[XX];
+ +                ishift[XX]--;
+ +            }
+ +        } else if (dx[XX] <= pbc->mhbox_diag[XX]) {
+ +            dx[XX] += pbc->fbox_diag[XX];
+ +            ishift[XX]++;
+ +            if (dx[XX] <= pbc->mhbox_diag[XX]) {
+ +                dx[XX] += pbc->fbox_diag[XX];
+ +                ishift[XX]++;
+ +            }
+ +        }
+ +        /* dx is the distance in a rectangular box */
+ +        d2min = norm2(dx);
+ +        if (d2min > pbc->max_cutoff2) {
+ +            copy_rvec(dx,dx_start);
+ +            copy_ivec(ishift,ishift_start);
+ +            d2min = norm2(dx);
+ +            /* Now try all possible shifts, when the distance is within max_cutoff
+ +             * it must be the shortest possible distance.
+ +             */
+ +            i = 0;
+ +            while ((d2min > pbc->max_cutoff2) && (i < pbc->ntric_vec)) {
+ +                rvec_add(dx_start,pbc->tric_vec[i],trial);
+ +                d2trial = norm2(trial);
+ +                if (d2trial < d2min) {
+ +                    copy_rvec(trial,dx);
+ +                    ivec_add(ishift_start,pbc->tric_shift[i],ishift);
+ +                    d2min = d2trial;
+ +                }
+ +                i++;
+ +            }
+ +        }
+ +        break;
+ +    case epbcdx2D_RECT:
+ +        for(i=0; i<DIM; i++) {
+ +            if (i != pbc->dim) {
+ +                if (dx[i] > pbc->hbox_diag[i]) {
+ +                    dx[i] -= pbc->fbox_diag[i];
+ +                    ishift[i]--;
+ +                } else if (dx[i] <= pbc->mhbox_diag[i]) {
+ +                    dx[i] += pbc->fbox_diag[i];
+ +                    ishift[i]++;
+ +                }
+ +            }
+ +        }
+ +        break;
+ +    case epbcdx2D_TRIC:
+ +        d2min = 0;
+ +        for(i=DIM-1; i>=1; i--) {
+ +            if (i != pbc->dim) {
+ +                if (dx[i] > pbc->hbox_diag[i]) {
+ +                    for (j=i; j>=0; j--)
+ +                        dx[j] -= pbc->box[i][j];
+ +                    ishift[i]--;
+ +                } else if (dx[i] <= pbc->mhbox_diag[i]) {
+ +                    for (j=i; j>=0; j--)
+ +                        dx[j] += pbc->box[i][j];
+ +                    ishift[i]++;
+ +                }
+ +                d2min += dx[i]*dx[i];
+ +            }
+ +        }
+ +        if (pbc->dim != XX) {
+ +            /* Allow 2 shifts in x */
+ +            if (dx[XX] > pbc->hbox_diag[XX]) {
+ +                dx[XX] -= pbc->fbox_diag[XX];
+ +                ishift[XX]--;
+ +                if (dx[XX] > pbc->hbox_diag[XX]) {
+ +                    dx[XX] -= pbc->fbox_diag[XX];
+ +                    ishift[XX]--;
+ +                }
+ +            } else if (dx[XX] <= pbc->mhbox_diag[XX]) {
+ +                dx[XX] += pbc->fbox_diag[XX];
+ +                ishift[XX]++;
+ +                if (dx[XX] <= pbc->mhbox_diag[XX]) {
+ +                    dx[XX] += pbc->fbox_diag[XX];
+ +                    ishift[XX]++;
+ +                }
+ +            }
+ +            d2min += dx[XX]*dx[XX];
+ +        }
+ +        if (d2min > pbc->max_cutoff2) {
+ +            copy_rvec(dx,dx_start);
+ +            copy_ivec(ishift,ishift_start);
+ +            /* Now try all possible shifts, when the distance is within max_cutoff
+ +             * it must be the shortest possible distance.
+ +             */
+ +            i = 0;
+ +            while ((d2min > pbc->max_cutoff2) && (i < pbc->ntric_vec)) {
+ +                rvec_add(dx_start,pbc->tric_vec[i],trial);
+ +                d2trial = 0;
+ +                for(j=0; j<DIM; j++) {
+ +                    if (j != pbc->dim) {
+ +                        d2trial += trial[j]*trial[j];
+ +                    }
+ +                }
+ +                if (d2trial < d2min) {
+ +                    copy_rvec(trial,dx);
+ +                    ivec_add(ishift_start,pbc->tric_shift[i],ishift);
+ +                    d2min = d2trial;
+ +                }
+ +                i++;
+ +            }
+ +        }
+ +        break;
+ +    case epbcdx1D_RECT:
+ +        i = pbc->dim;
+ +        if (dx[i] > pbc->hbox_diag[i]) {
+ +            dx[i] -= pbc->fbox_diag[i];
+ +            ishift[i]--;
+ +        } else if (dx[i] <= pbc->mhbox_diag[i]) {
+ +            dx[i] += pbc->fbox_diag[i];
+ +            ishift[i]++;
+ +        }
+ +        break;
+ +    case epbcdx1D_TRIC:
+ +        i = pbc->dim;
+ +        if (dx[i] > pbc->hbox_diag[i]) {
+ +            rvec_dec(dx,pbc->box[i]);
+ +            ishift[i]--;
+ +        } else if (dx[i] <= pbc->mhbox_diag[i]) {
+ +            rvec_inc(dx,pbc->box[i]);
+ +            ishift[i]++;
+ +        }
+ +        break;
+ +    case epbcdxSCREW_RECT:
+ +        /* The shift definition requires x first */
+ +        if (dx[XX] > pbc->hbox_diag[XX]) {
+ +            dx[XX] -= pbc->fbox_diag[XX];
+ +            ishift[XX]--;
+ +        } else if (dx[XX] <= pbc->mhbox_diag[XX]) {
+ +            dx[XX] += pbc->fbox_diag[XX];
+ +            ishift[XX]++;
+ +        }
+ +        if (ishift[XX] == 1 || ishift[XX] == -1) {
+ +            /* Rotate around the x-axis in the middle of the box */
+ +            dx[YY] = pbc->box[YY][YY] - x1[YY] - x2[YY];
+ +            dx[ZZ] = pbc->box[ZZ][ZZ] - x1[ZZ] - x2[ZZ];
+ +        }
+ +        /* Normal pbc for y and z */
+ +        for(i=YY; i<=ZZ; i++) {
+ +            if (dx[i] > pbc->hbox_diag[i]) {
+ +                dx[i] -= pbc->fbox_diag[i];
+ +                ishift[i]--;
+ +            } else if (dx[i] <= pbc->mhbox_diag[i]) {
+ +                dx[i] += pbc->fbox_diag[i];
+ +                ishift[i]++;
+ +            }
+ +        }
+ +        break;
+ +    case epbcdxNOPBC:
+ +    case epbcdxUNSUPPORTED:
+ +        break;
+ +    default:
+ +        gmx_fatal(FARGS,"Internal error in pbc_dx_aiuc, set_pbc_dd or set_pbc has not been called");
+ +        break;
+ +    }
+ +
+ +    is = IVEC2IS(ishift);
+ +    if (debug)
+ +    {
+ +        range_check_mesg(is,0,SHIFTS,"PBC shift vector index range check.");
+ +    }
+ +
+ +    return is; 
+ +}
+ +
+ +void pbc_dx_d(const t_pbc *pbc,const dvec x1, const dvec x2, dvec dx)
+ +{
+ +    int  i,j;
+ +    dvec dx_start,trial;
+ +    double d2min,d2trial;
+ +    gmx_bool bRot;
+ +
+ +    dvec_sub(x1,x2,dx);
+ +
+ +    switch (pbc->ePBCDX) {
+ +    case epbcdxRECTANGULAR:
+ +    case epbcdx2D_RECT:
+ +        for(i=0; i<DIM; i++) {
+ +            if (i != pbc->dim) {
+ +                while (dx[i] > pbc->hbox_diag[i]) {
+ +                    dx[i] -= pbc->fbox_diag[i];
+ +                }
+ +                while (dx[i] <= pbc->mhbox_diag[i]) {
+ +                    dx[i] += pbc->fbox_diag[i];
+ +                }
+ +            }
+ +        }
+ +        break;
+ +    case epbcdxTRICLINIC:
+ +    case epbcdx2D_TRIC:
+ +        d2min = 0;
+ +        for(i=DIM-1; i>=0; i--) {
+ +            if (i != pbc->dim) {
+ +                while (dx[i] > pbc->hbox_diag[i]) {
+ +                    for (j=i; j>=0; j--)
+ +                        dx[j] -= pbc->box[i][j];
+ +                }
+ +                while (dx[i] <= pbc->mhbox_diag[i]) {
+ +                    for (j=i; j>=0; j--)
+ +                        dx[j] += pbc->box[i][j];
+ +                }
+ +                d2min += dx[i]*dx[i];
+ +            }
+ +        }
+ +        if (d2min > pbc->max_cutoff2) {
+ +            copy_dvec(dx,dx_start);
+ +            /* Now try all possible shifts, when the distance is within max_cutoff
+ +             * it must be the shortest possible distance.
+ +             */
+ +            i = 0;
+ +            while ((d2min > pbc->max_cutoff2) && (i < pbc->ntric_vec)) {
+ +                for(j=0; j<DIM; j++) {
+ +                    trial[j] = dx_start[j] + pbc->tric_vec[i][j];
+ +                }
+ +                d2trial = 0;
+ +                for(j=0; j<DIM; j++) {
+ +                    if (j != pbc->dim) {
+ +                        d2trial += trial[j]*trial[j];
+ +                    }
+ +                }
+ +                if (d2trial < d2min) {
+ +                    copy_dvec(trial,dx);
+ +                    d2min = d2trial;
+ +                }
+ +                i++;
+ +            }
+ +        }
+ +        break;
+ +    case epbcdxSCREW_RECT:
+ +        /* The shift definition requires x first */
+ +        bRot = FALSE;
+ +        while (dx[XX] > pbc->hbox_diag[XX]) {
+ +            dx[XX] -= pbc->fbox_diag[XX];
+ +            bRot = !bRot;
+ +        }
+ +        while (dx[XX] <= pbc->mhbox_diag[XX]) {
+ +            dx[XX] += pbc->fbox_diag[YY];
+ +            bRot = !bRot;
+ +        }
+ +        if (bRot) {
+ +            /* Rotate around the x-axis in the middle of the box */
+ +            dx[YY] = pbc->box[YY][YY] - x1[YY] - x2[YY];
+ +            dx[ZZ] = pbc->box[ZZ][ZZ] - x1[ZZ] - x2[ZZ];
+ +        }
+ +        /* Normal pbc for y and z */
+ +        for(i=YY; i<=ZZ; i++) {
+ +            while (dx[i] > pbc->hbox_diag[i]) {
+ +                dx[i] -= pbc->fbox_diag[i];
+ +            }
+ +            while (dx[i] <= pbc->mhbox_diag[i]) {
+ +                dx[i] += pbc->fbox_diag[i];
+ +            }
+ +        }
+ +        break;
+ +    case epbcdxNOPBC:
+ +    case epbcdxUNSUPPORTED:
+ +        break;
+ +    default:
+ +        gmx_fatal(FARGS,"Internal error in pbc_dx, set_pbc has not been called");
+ +        break;
+ +    }
+ +}
+ +
+ +gmx_bool image_rect(ivec xi,ivec xj,ivec box_size,real rlong2,int *shift,real *r2)
+ +{
+ +    int       m,t;
+ +    int       dx,b,b_2;
+ +    real  dxr,rij2;
+ +
+ +    rij2=0.0;
+ +    t=0;
+ +    for(m=0; (m<DIM); m++) {
+ +        dx=xi[m]-xj[m];
+ +        t*=DIM;
+ +        b=box_size[m];
+ +        b_2=b/2;
+ +        if (dx < -b_2) {
+ +            t+=2;
+ +            dx+=b;
+ +        }
+ +        else if (dx > b_2)
+ +            dx-=b;
+ +        else
+ +            t+=1;
+ +        dxr=dx;
+ +        rij2+=dxr*dxr;
+ +        if (rij2 >= rlong2) 
+ +            return FALSE;
+ +    }
+ +
+ +    *shift = t;
+ +    *r2 = rij2;
+ +    return TRUE;
+ +}
+ +
+ +gmx_bool image_cylindric(ivec xi,ivec xj,ivec box_size,real rlong2,
+ +                     int *shift,real *r2)
+ +{
+ +    int       m,t;
+ +    int       dx,b,b_2;
+ +    real  dxr,rij2;
+ +
+ +    rij2=0.0;
+ +    t=0;
+ +    for(m=0; (m<DIM); m++) {
+ +        dx=xi[m]-xj[m];
+ +        t*=DIM;
+ +        b=box_size[m];
+ +        b_2=b/2;
+ +        if (dx < -b_2) {
+ +            t+=2;
+ +            dx+=b;
+ +        }
+ +        else if (dx > b_2)
+ +            dx-=b;
+ +        else
+ +            t+=1;
+ +
+ +        dxr=dx;
+ +        rij2+=dxr*dxr;
+ +        if (m < ZZ) {
+ +            if (rij2 >= rlong2) 
+ +                return FALSE;
+ +        }
+ +    }
+ +
+ +    *shift = t;
+ +    *r2 = rij2;
+ +    return TRUE;
+ +}
+ +
+ +void calc_shifts(matrix box,rvec shift_vec[])
+ +{
+ +    int k,l,m,d,n,test;
+ +
+ +    n=0;
+ +    for(m = -D_BOX_Z; m <= D_BOX_Z; m++)
+ +        for(l = -D_BOX_Y; l <= D_BOX_Y; l++) 
+ +            for(k = -D_BOX_X; k <= D_BOX_X; k++,n++) {
+ +                test = XYZ2IS(k,l,m);
+ +                if (n != test)
+ +                    gmx_incons("inconsistent shift numbering");
+ +                for(d = 0; d < DIM; d++)
+ +                    shift_vec[n][d] = k*box[XX][d] + l*box[YY][d] + m*box[ZZ][d];
+ +            }
+ +}
+ +
+ +void calc_box_center(int ecenter,matrix box,rvec box_center)
+ +{
+ +    int d,m;
+ +
+ +    clear_rvec(box_center);
+ +    switch (ecenter) {
+ +    case ecenterTRIC:
+ +        for(m=0; (m<DIM); m++)  
+ +            for(d=0; d<DIM; d++)
+ +                box_center[d] += 0.5*box[m][d];
+ +        break;
+ +    case ecenterRECT:
+ +        for(d=0; d<DIM; d++)
+ +            box_center[d] = 0.5*box[d][d];
+ +        break;
+ +    case ecenterZERO:
+ +        break;
+ +    default:
+ +        gmx_fatal(FARGS,"Unsupported value %d for ecenter",ecenter);
+ +    }
+ +}
+ +
+ +void calc_triclinic_images(matrix box,rvec img[])
+ +{
+ +    int i;
+ +
+ +    /* Calculate 3 adjacent images in the xy-plane */
+ +    copy_rvec(box[0],img[0]);
+ +    copy_rvec(box[1],img[1]);
+ +    if (img[1][XX] < 0)
+ +        svmul(-1,img[1],img[1]);
+ +    rvec_sub(img[1],img[0],img[2]);
+ +
+ +    /* Get the next 3 in the xy-plane as mirror images */
+ +    for(i=0; i<3; i++)
+ +        svmul(-1,img[i],img[3+i]);
+ +
+ +    /* Calculate the first 4 out of xy-plane images */
+ +    copy_rvec(box[2],img[6]);
+ +    if (img[6][XX] < 0)
+ +        svmul(-1,img[6],img[6]);
+ +    for(i=0; i<3; i++)
+ +        rvec_add(img[6],img[i+1],img[7+i]);
+ +
+ +    /* Mirror the last 4 from the previous in opposite rotation */
+ +    for(i=0; i<4; i++)
+ +        svmul(-1,img[6 + (2+i) % 4],img[10+i]);
+ +}
+ +
+ +void calc_compact_unitcell_vertices(int ecenter,matrix box,rvec vert[])
+ +{
+ +    rvec img[NTRICIMG],box_center;
+ +    int n,i,j,tmp[4],d;
+ +
+ +    calc_triclinic_images(box,img);
+ +
+ +    n=0;
+ +    for(i=2; i<=5; i+=3) {
+ +        tmp[0] = i-1;
+ +        if (i==2)
+ +            tmp[1] = 8;
+ +        else 
+ +            tmp[1] = 6;
+ +        tmp[2] = (i+1) % 6;
+ +        tmp[3] = tmp[1]+4;
+ +        for(j=0; j<4; j++) {
+ +            for(d=0; d<DIM; d++)
+ +                vert[n][d] = img[i][d]+img[tmp[j]][d]+img[tmp[(j+1)%4]][d];
+ +            n++;
+ +        }
+ +    }
+ +    for(i=7; i<=13; i+=6) {
+ +        tmp[0] = (i-7)/2;
+ +        tmp[1] = tmp[0]+1;
+ +        if (i==7)
+ +            tmp[2] = 8;
+ +        else
+ +            tmp[2] = 10;
+ +        tmp[3] = i-1;
+ +        for(j=0; j<4; j++) {
+ +            for(d=0; d<DIM; d++)
+ +                vert[n][d] = img[i][d]+img[tmp[j]][d]+img[tmp[(j+1)%4]][d];
+ +            n++;
+ +        }
+ +    }
+ +    for(i=9; i<=11; i+=2) {
+ +        if (i==9)
+ +            tmp[0] = 3;
+ +        else
+ +            tmp[0] = 0;
+ +        tmp[1] = tmp[0]+1;
+ +        if (i==9)
+ +            tmp[2] = 6;
+ +        else
+ +            tmp[2] = 12;
+ +        tmp[3] = i-1;
+ +        for(j=0; j<4; j++) {
+ +            for(d=0; d<DIM; d++)
+ +                vert[n][d] = img[i][d]+img[tmp[j]][d]+img[tmp[(j+1)%4]][d];
+ +            n++;
+ +        }
+ +    }
+ +
+ +    calc_box_center(ecenter,box,box_center);
+ +    for(i=0; i<NCUCVERT; i++)
+ +        for(d=0; d<DIM; d++)
+ +            vert[i][d] = vert[i][d]*0.25+box_center[d];
+ +}
+ +
+ +int *compact_unitcell_edges()
+ +{
+ +    /* this is an index in vert[] (see calc_box_vertices) */
+ +    /*static int edge[NCUCEDGE*2];*/
+ +    int *edge;
+ +    static const int hexcon[24] = { 0,9, 1,19, 2,15, 3,21, 
+ +        4,17, 5,11, 6,23, 7,13,
+ +        8,20, 10,18, 12,16, 14,22 };
+ +    int e,i,j;
+ +    gmx_bool bFirst = TRUE;
+ +
+ +    snew(edge,NCUCEDGE*2);
+ +
+ +    if (bFirst) {
+ +        e = 0;
+ +        for(i=0; i<6; i++)
+ +            for(j=0; j<4; j++) {
+ +                edge[e++] = 4*i + j;
+ +                edge[e++] = 4*i + (j+1) % 4;
+ +            }
+ +        for(i=0; i<12*2; i++)
+ +            edge[e++] = hexcon[i];
+ +
+ +        bFirst = FALSE;
+ +    }
+ +
+ +    return edge;
+ +}
+ +
-     int i,m,d;
- 
-     for(m=DIM-1; m>=0; m--) {
-         while (x[m] < 0) 
-             for(d=0; d<=m; d++)
-                 x[d] += box[m][d];
-         while (x[m] >= box[m][m])
-             for(d=0; d<=m; d++)
-                 x[d] -= box[m][d];
++void put_atoms_in_box_omp(int ePBC,matrix box,int natoms,rvec x[])
+ +{
- void put_atoms_in_box(matrix box,int natoms,rvec x[])
++    int t, nth;
++    nth = gmx_omp_nthreads_get(emntDefault);
++
++#pragma omp parallel for num_threads(nth) schedule(static)
++    for(t=0; t<nth; t++)
++    {
++        int offset, len;
++
++        offset = (natoms*t    )/nth;
++        len    = (natoms*(t + 1))/nth - offset;        
++        put_atoms_in_box(ePBC, box, len, x + offset);
+ +    }
+ +}
+ +
-     int i,m,d;
++void put_atoms_in_box(int ePBC,matrix box,int natoms,rvec x[])
+ +{
-     for(i=0; (i<natoms); i++)
-         put_atom_in_box(box,x[i]);
++    int npbcdim,i,m,d;
+ +
++    if (ePBC == epbcSCREW)
++    {
++        gmx_fatal(FARGS,"Sorry, %s pbc is not yet supported",epbc_names[ePBC]);
++    }
++
++    if (ePBC == epbcXY)
++    {
++        npbcdim = 2;
++    }
++    else
++    {
++        npbcdim = 3;
++    }
++
++    if (TRICLINIC(box))
++    {
++        for(i=0; (i<natoms); i++)
++        {
++            for(m=npbcdim-1; m>=0; m--) {
++                while (x[i][m] < 0)
++                {
++                    for(d=0; d<=m; d++)
++                    {
++                        x[i][d] += box[m][d];
++                    }
++                }
++                while (x[i][m] >= box[m][m])
++                {
++                    for(d=0; d<=m; d++)
++                    {
++                        x[i][d] -= box[m][d];
++                    }
++                }
++            }
++        }
++    }
++    else
++    {
++        for(i=0; i<natoms; i++)
++        {
++            for(d=0; d<npbcdim; d++) {
++                while (x[i][d] < 0)
++                {
++                    x[i][d] += box[d][d];
++                }
++                while (x[i][d] >= box[d][d])
++                {
++                    x[i][d] -= box[d][d];
++                }
++            }
++        }
++    }
+ +}
+ +
+ +void put_atoms_in_triclinic_unitcell(int ecenter,matrix box,
+ +                                     int natoms,rvec x[])
+ +{
+ +    rvec   box_center,shift_center;
+ +    real   shm01,shm02,shm12,shift;
+ +    int    i,m,d;
+ +
+ +    calc_box_center(ecenter,box,box_center);
+ +
+ +    /* The product of matrix shm with a coordinate gives the shift vector
+ +     which is required determine the periodic cell position */
+ +    shm01 = box[1][0]/box[1][1];
+ +    shm02 = (box[1][1]*box[2][0] - box[2][1]*box[1][0])/(box[1][1]*box[2][2]);
+ +    shm12 = box[2][1]/box[2][2];
+ +
+ +    clear_rvec(shift_center);
+ +    for(d=0; d<DIM; d++)
+ +        rvec_inc(shift_center,box[d]);
+ +    svmul(0.5,shift_center,shift_center);
+ +    rvec_sub(box_center,shift_center,shift_center);
+ +
+ +    shift_center[0] = shm01*shift_center[1] + shm02*shift_center[2];
+ +    shift_center[1] = shm12*shift_center[2];
+ +    shift_center[2] = 0;
+ +
+ +    for(i=0; (i<natoms); i++)
+ +        for(m=DIM-1; m>=0; m--) {
+ +            shift = shift_center[m];
+ +            if (m == 0) {
+ +                shift += shm01*x[i][1] + shm02*x[i][2];
+ +            } else if (m == 1) {
+ +                shift += shm12*x[i][2];
+ +            }
+ +            while (x[i][m]-shift < 0)
+ +                for(d=0; d<=m; d++)
+ +                    x[i][d] += box[m][d];
+ +            while (x[i][m]-shift >= box[m][m])
+ +                for(d=0; d<=m; d++)
+ +                    x[i][d] -= box[m][d];
+ +        }
+ +}
+ +
+ +const char *
+ +put_atoms_in_compact_unitcell(int ePBC,int ecenter,matrix box,
+ +                              int natoms,rvec x[])
+ +                              {
+ +    t_pbc pbc;
+ +    rvec box_center,dx;
+ +    int  i;
+ +
+ +    set_pbc(&pbc,ePBC,box);
+ +    calc_box_center(ecenter,box,box_center);
+ +    for(i=0; i<natoms; i++) {
+ +        pbc_dx(&pbc,x[i],box_center,dx);
+ +        rvec_add(box_center,dx,x[i]);
+ +    }
+ +
+ +    return pbc.bLimitDistance ?
+ +        "WARNING: Could not put all atoms in the compact unitcell\n"
+ +        : NULL;
+ +                              }
+ +
diff --cc src/gromacs/gmxlib/smalloc.c
Simple merge
diff --cc src/gromacs/gmxlib/tpxio.c

index a4ad9f0cbde4b8741ad25dc9855432d22f840427,0000000000000000000000000000000000000000..412c76945e55e5c9f6180af319299a97d5be0b33

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/tpxio.c
--- /dev/null
+++ b/src/gromacs/gmxlib/tpxio.c
@@@ -1,2877 -1,0 +1,2935 @@@
- static const int tpx_version = 80;
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +/* This file is completely threadsafe - keep it that way! */
+ +#ifdef GMX_THREAD_MPI
+ +#include <thread_mpi.h>
+ +#endif
+ +
+ +
+ +#include <ctype.h>
+ +#include "sysstuff.h"
+ +#include "smalloc.h"
+ +#include "string2.h"
+ +#include "gmx_fatal.h"
+ +#include "macros.h"
+ +#include "names.h"
+ +#include "symtab.h"
+ +#include "futil.h"
+ +#include "filenm.h"
+ +#include "gmxfio.h"
+ +#include "topsort.h"
+ +#include "tpxio.h"
+ +#include "txtdump.h"
+ +#include "confio.h"
+ +#include "atomprop.h"
+ +#include "copyrite.h"
+ +#include "vec.h"
+ +#include "mtop_util.h"
+ +
+ +#define TPX_TAG_RELEASE  "release"
+ +
+ +/* This is the tag string which is stored in the tpx file.
+ + * Change this if you want to change the tpx format in a feature branch.
+ + * This ensures that there will not be different tpx formats around which
+ + * can not be distinguished.
+ + */
+ +static const char *tpx_tag = TPX_TAG_RELEASE;
+ +
+ +/* This number should be increased whenever the file format changes! */
-  * of the tpx format. This way we can maintain forward compatibility too
-  * for all analysis tools and/or external programs that only need to
-  * know the atom/residue names, charges, and bond connectivity.
++static const int tpx_version = 90;
+ +
+ +/* This number should only be increased when you edit the TOPOLOGY section
- static const int tpx_generation = 24;
++ * or the HEADER of the tpx format.
++ * This way we can maintain forward compatibility too for all analysis tools
++ * and/or external programs that only need to know the atom/residue names,
++ * charges, and bond connectivity.
+ + *  
+ + * It first appeared in tpx version 26, when I also moved the inputrecord
+ + * to the end of the tpx file, so we can just skip it if we only
+ + * want the topology.
+ + */
-         
++static const int tpx_generation = 25;
+ +
+ +/* This number should be the most recent backwards incompatible version 
+ + * I.e., if this number is 9, we cannot read tpx version 9 with this code.
+ + */
+ +static const int tpx_incompatible_version = 9;
+ +
+ +
+ +
+ +/* Struct used to maintain tpx compatibility when function types are added */
+ +typedef struct {
+ +  int fvnr; /* file version number in which the function type first appeared */
+ +  int ftype; /* function type */
+ +} t_ftupd;
+ +
+ +/* 
+ + *The entries should be ordered in:
+ + * 1. ascending file version number
+ + * 2. ascending function type number
+ + */
+ +/*static const t_ftupd ftupd[] = {
+ +  { 20, F_CUBICBONDS        },
+ +  { 20, F_CONNBONDS         },
+ +  { 20, F_HARMONIC          },
+ +  { 20, F_EQM,              },
+ +  { 22, F_DISRESVIOL        },
+ +  { 22, F_ORIRES            },
+ +  { 22, F_ORIRESDEV         },
+ +  { 26, F_FOURDIHS          },
+ +  { 26, F_PIDIHS            },
+ +  { 26, F_DIHRES            },
+ +  { 26, F_DIHRESVIOL        },
+ +  { 30, F_CROSS_BOND_BONDS  },
+ +  { 30, F_CROSS_BOND_ANGLES },
+ +  { 30, F_UREY_BRADLEY      },
+ +  { 30, F_POLARIZATION      },
+ +  { 54, F_DHDL_CON          },
+ +  };*/
+ +/* 
+ + *The entries should be ordered in:
+ + * 1. ascending function type number
+ + * 2. ascending file version number
+ + */
+ +/* question; what is the purpose of the commented code above? */
+ +static const t_ftupd ftupd[] = {
+ +  { 20, F_CUBICBONDS        },
+ +  { 20, F_CONNBONDS         },
+ +  { 20, F_HARMONIC          },
+ +  { 34, F_FENEBONDS         },
+ +  { 43, F_TABBONDS          },
+ +  { 43, F_TABBONDSNC        },
+ +  { 70, F_RESTRBONDS        },
+ +  { 76, F_LINEAR_ANGLES     },
+ +  { 30, F_CROSS_BOND_BONDS  },
+ +  { 30, F_CROSS_BOND_ANGLES },
+ +  { 30, F_UREY_BRADLEY      },
+ +  { 34, F_QUARTIC_ANGLES    },
+ +  { 43, F_TABANGLES         },
+ +  { 26, F_FOURDIHS          },
+ +  { 26, F_PIDIHS            },
+ +  { 43, F_TABDIHS           },
+ +  { 65, F_CMAP              },
+ +  { 60, F_GB12              },
+ +  { 61, F_GB13              },
+ +  { 61, F_GB14              },        
+ +  { 72, F_GBPOL             },
+ +  { 72, F_NPSOLVATION       },
+ +  { 41, F_LJC14_Q           },
+ +  { 41, F_LJC_PAIRS_NB      },
+ +  { 32, F_BHAM_LR           },
+ +  { 32, F_RF_EXCL           },
+ +  { 32, F_COUL_RECIP        },
+ +  { 46, F_DPD               },
+ +  { 30, F_POLARIZATION      },
+ +  { 36, F_THOLE_POL         },
+ +  { 80, F_FBPOSRES          },
+ +  { 22, F_DISRESVIOL        },
+ +  { 22, F_ORIRES            },
+ +  { 22, F_ORIRESDEV         },
+ +  { 26, F_DIHRES            },
+ +  { 26, F_DIHRESVIOL        },
+ +  { 49, F_VSITE4FDN         },
+ +  { 50, F_VSITEN            },
+ +  { 46, F_COM_PULL          },
+ +  { 20, F_EQM               },
+ +  { 46, F_ECONSERVED        },
+ +  { 69, F_VTEMP             },
+ +  { 66, F_PDISPCORR         },
+ +  { 54, F_DHDL_CON          },
+ +  { 76, F_ANHARM_POL        },
+ +  { 79, F_DVDL_COUL         },
+ +  { 79, F_DVDL_VDW,         },
+ +  { 79, F_DVDL_BONDED,      },
+ +  { 79, F_DVDL_RESTRAINT    },
+ +  { 79, F_DVDL_TEMPERATURE  },
+ +  { 54, F_DHDL_CON          }
+ +};
+ +#define NFTUPD asize(ftupd)
+ +
+ +/* Needed for backward compatibility */
+ +#define MAXNODES 256
+ +
+ +static void _do_section(t_fileio *fio,int key,gmx_bool bRead,const char *src,
+ +                        int line)
+ +{
+ +  char buf[STRLEN];
+ +  gmx_bool bDbg;
+ +
+ +  if (gmx_fio_getftp(fio) == efTPA) {
+ +    if (!bRead) {
+ +      gmx_fio_write_string(fio,itemstr[key]);
+ +      bDbg       = gmx_fio_getdebug(fio);
+ +      gmx_fio_setdebug(fio,FALSE);
+ +      gmx_fio_write_string(fio,comment_str[key]);
+ +      gmx_fio_setdebug(fio,bDbg);
+ +    }
+ +    else {
+ +      if (gmx_fio_getdebug(fio))
+ +      fprintf(stderr,"Looking for section %s (%s, %d)",
+ +              itemstr[key],src,line);
+ +      
+ +      do {
+ +      gmx_fio_do_string(fio,buf);
+ +      } while ((gmx_strcasecmp(buf,itemstr[key]) != 0));
+ +      
+ +      if (gmx_strcasecmp(buf,itemstr[key]) != 0) 
+ +      gmx_fatal(FARGS,"\nCould not find section heading %s",itemstr[key]);
+ +      else if (gmx_fio_getdebug(fio))
+ +      fprintf(stderr," and found it\n");
+ +    }
+ +  }
+ +}
+ +
+ +#define do_section(fio,key,bRead) _do_section(fio,key,bRead,__FILE__,__LINE__)
+ +
+ +/**************************************************************
+ + *
+ + * Now the higer level routines that do io of the structures and arrays
+ + *
+ + **************************************************************/
+ +static void do_pullgrp(t_fileio *fio, t_pullgrp *pgrp, gmx_bool bRead, 
+ +                       int file_version)
+ +{
+ +  gmx_bool bDum=TRUE;
+ +  int  i;
+ +
+ +  gmx_fio_do_int(fio,pgrp->nat);
+ +  if (bRead)
+ +    snew(pgrp->ind,pgrp->nat);
+ +  bDum=gmx_fio_ndo_int(fio,pgrp->ind,pgrp->nat);
+ +  gmx_fio_do_int(fio,pgrp->nweight);
+ +  if (bRead)
+ +    snew(pgrp->weight,pgrp->nweight);
+ +  bDum=gmx_fio_ndo_real(fio,pgrp->weight,pgrp->nweight);
+ +  gmx_fio_do_int(fio,pgrp->pbcatom);
+ +  gmx_fio_do_rvec(fio,pgrp->vec);
+ +  gmx_fio_do_rvec(fio,pgrp->init);
+ +  gmx_fio_do_real(fio,pgrp->rate);
+ +  gmx_fio_do_real(fio,pgrp->k);
+ +  if (file_version >= 56) {
+ +    gmx_fio_do_real(fio,pgrp->kB);
+ +  } else {
+ +    pgrp->kB = pgrp->k;
+ +  }
+ +}
+ +
+ +static void do_expandedvals(t_fileio *fio,t_expanded *expand,int n_lambda, gmx_bool bRead, int file_version)
+ +{
+ +  /* i is used in the ndo_double macro*/
+ +  int i;
+ +  real fv;
+ +  gmx_bool bDum=TRUE;
+ +  real rdum;
+ +
+ +  if (file_version >= 79)
+ +  {
+ +      if (n_lambda>0)
+ +      {
+ +          if (bRead)
+ +          {
+ +              snew(expand->init_lambda_weights,n_lambda);
+ +          }
+ +          bDum=gmx_fio_ndo_real(fio,expand->init_lambda_weights,n_lambda);
+ +          gmx_fio_do_gmx_bool(fio,expand->bInit_weights);
+ +      }
+ +
+ +      gmx_fio_do_int(fio,expand->nstexpanded);
+ +      gmx_fio_do_int(fio,expand->elmcmove);
+ +      gmx_fio_do_int(fio,expand->elamstats);
+ +      gmx_fio_do_int(fio,expand->lmc_repeats);
+ +      gmx_fio_do_int(fio,expand->gibbsdeltalam);
+ +      gmx_fio_do_int(fio,expand->lmc_forced_nstart);
+ +      gmx_fio_do_int(fio,expand->lmc_seed);
+ +      gmx_fio_do_real(fio,expand->mc_temp);
+ +      gmx_fio_do_int(fio,expand->bSymmetrizedTMatrix);
+ +      gmx_fio_do_int(fio,expand->nstTij);
+ +      gmx_fio_do_int(fio,expand->minvarmin);
+ +      gmx_fio_do_int(fio,expand->c_range);
+ +      gmx_fio_do_real(fio,expand->wl_scale);
+ +      gmx_fio_do_real(fio,expand->wl_ratio);
+ +      gmx_fio_do_real(fio,expand->init_wl_delta);
+ +      gmx_fio_do_gmx_bool(fio,expand->bWLoneovert);
+ +      gmx_fio_do_int(fio,expand->elmceq);
+ +      gmx_fio_do_int(fio,expand->equil_steps);
+ +      gmx_fio_do_int(fio,expand->equil_samples);
+ +      gmx_fio_do_int(fio,expand->equil_n_at_lam);
+ +      gmx_fio_do_real(fio,expand->equil_wl_delta);
+ +      gmx_fio_do_real(fio,expand->equil_ratio);
+ +  }
+ +}
+ +
+ +static void do_simtempvals(t_fileio *fio,t_simtemp *simtemp, int n_lambda, gmx_bool bRead, 
+ +                           int file_version)
+ +{
+ +  gmx_bool bDum=TRUE;
+ +
+ +  if (file_version >= 79)
+ +  {
+ +      gmx_fio_do_int(fio,simtemp->eSimTempScale);
+ +      gmx_fio_do_real(fio,simtemp->simtemp_high);
+ +      gmx_fio_do_real(fio,simtemp->simtemp_low);
+ +      if (n_lambda>0)
+ +      {
+ +          if (bRead)
+ +          {
+ +              snew(simtemp->temperatures,n_lambda);
+ +          }
+ +          bDum=gmx_fio_ndo_real(fio,simtemp->temperatures,n_lambda);
+ +      }
+ +  }
+ +}
+ +
+ +static void do_fepvals(t_fileio *fio,t_lambda *fepvals,gmx_bool bRead, int file_version)
+ +{
+ +  /* i is defined in the ndo_double macro; use g to iterate. */
+ +  int i,g;
+ +  real fv;
+ +  gmx_bool bDum=TRUE;
+ +  real rdum;
+ +
+ +  /* free energy values */
+ +  if (file_version >= 79)
+ +  {
+ +      gmx_fio_do_int(fio,fepvals->init_fep_state);
+ +      gmx_fio_do_double(fio,fepvals->init_lambda);
+ +      gmx_fio_do_double(fio,fepvals->delta_lambda);
+ +  }
+ +  else if (file_version >= 59) {
+ +      gmx_fio_do_double(fio,fepvals->init_lambda);
+ +      gmx_fio_do_double(fio,fepvals->delta_lambda);
+ +  } else {
+ +      gmx_fio_do_real(fio,rdum);
+ +      fepvals->init_lambda = rdum;
+ +      gmx_fio_do_real(fio,rdum);
+ +      fepvals->delta_lambda = rdum;
+ +  }
+ +  if (file_version >= 79)
+ +  {
+ +      gmx_fio_do_int(fio,fepvals->n_lambda);
+ +      if (bRead)
+ +      {
+ +          snew(fepvals->all_lambda,efptNR);
+ +      }
+ +      for (g=0;g<efptNR;g++)
+ +      {
+ +          if (fepvals->n_lambda > 0) {
+ +              if (bRead)
+ +              {
+ +                  snew(fepvals->all_lambda[g],fepvals->n_lambda);
+ +              }
+ +              bDum=gmx_fio_ndo_double(fio,fepvals->all_lambda[g],fepvals->n_lambda);
+ +              bDum=gmx_fio_ndo_int(fio,fepvals->separate_dvdl,efptNR);
+ +          }
+ +          else if (fepvals->init_lambda >= 0)
+ +          {
+ +              fepvals->separate_dvdl[efptFEP] = TRUE;
+ +          }
+ +      }
+ +  }
+ +  else if (file_version >= 64)
+ +  {
+ +      gmx_fio_do_int(fio,fepvals->n_lambda);
+ +      snew(fepvals->all_lambda,efptNR);
+ +      if (bRead)
+ +      {
+ +          snew(fepvals->all_lambda[efptFEP],fepvals->n_lambda);
+ +      }
+ +      bDum=gmx_fio_ndo_double(fio,fepvals->all_lambda[efptFEP],fepvals->n_lambda);
+ +      if (fepvals->init_lambda >= 0)
+ +      {
+ +          fepvals->separate_dvdl[efptFEP] = TRUE;
+ +      }
+ +      /* still allocate the all_lambda array's contents. */
+ +      for (g=0;g<efptNR;g++)
+ +      {
+ +          if (fepvals->n_lambda > 0) {
+ +              if (bRead)
+ +              {
+ +                  snew(fepvals->all_lambda[g],fepvals->n_lambda);
+ +              }
+ +          }
+ +      }
+ +  }
+ +  else
+ +  {
+ +      fepvals->n_lambda = 0;
+ +      fepvals->all_lambda   = NULL;
+ +      if (fepvals->init_lambda >= 0)
+ +      {
+ +          fepvals->separate_dvdl[efptFEP] = TRUE;
+ +      }
+ +  }
+ +  if (file_version >= 13)
+ +  {
+ +      gmx_fio_do_real(fio,fepvals->sc_alpha);
+ +  }
+ +  else
+ +  {
+ +      fepvals->sc_alpha = 0;
+ +  }
+ +  if (file_version >= 38)
+ +  {
+ +      gmx_fio_do_int(fio,fepvals->sc_power);
+ +  }
+ +  else
+ +  {
+ +      fepvals->sc_power = 2;
+ +  }
+ +  if (file_version >= 79)
+ +  {
+ +      gmx_fio_do_real(fio,fepvals->sc_r_power);
+ +  }
+ +  else
+ +  {
+ +      fepvals->sc_r_power = 6.0;
+ +  }
+ +  if (file_version >= 15)
+ +  {
+ +      gmx_fio_do_real(fio,fepvals->sc_sigma);
+ +  }
+ +  else
+ +  {
+ +      fepvals->sc_sigma = 0.3;
+ +  }
+ +  if (bRead)
+ +  {
+ +      if (file_version >= 71)
+ +      {
+ +          fepvals->sc_sigma_min = fepvals->sc_sigma;
+ +      }
+ +      else
+ +      {
+ +          fepvals->sc_sigma_min = 0;
+ +      }
+ +  }
+ +  if (file_version >= 79)
+ +  {
+ +      gmx_fio_do_int(fio,fepvals->bScCoul);
+ +  }
+ +  else
+ +  {
+ +      fepvals->bScCoul = TRUE;
+ +  }
+ +  if (file_version >= 64) {
+ +      gmx_fio_do_int(fio,fepvals->nstdhdl);
+ +  } else {
+ +      fepvals->nstdhdl = 1;
+ +  }
+ +
+ +  if (file_version >= 73)
+ +  {
+ +      gmx_fio_do_int(fio, fepvals->separate_dhdl_file);
+ +      gmx_fio_do_int(fio, fepvals->dhdl_derivatives);
+ +  }
+ +  else
+ +  {
+ +      fepvals->separate_dhdl_file = esepdhdlfileYES;
+ +      fepvals->dhdl_derivatives = edhdlderivativesYES;
+ +  }
+ +  if (file_version >= 71)
+ +  {
+ +      gmx_fio_do_int(fio,fepvals->dh_hist_size);
+ +      gmx_fio_do_double(fio,fepvals->dh_hist_spacing);
+ +  }
+ +  else
+ +  {
+ +      fepvals->dh_hist_size    = 0;
+ +      fepvals->dh_hist_spacing = 0.1;
+ +  }
+ +  if (file_version >= 79)
+ +  {
+ +      gmx_fio_do_int(fio,fepvals->bPrintEnergy);
+ +  }
+ +  else
+ +  {
+ +      fepvals->bPrintEnergy = FALSE;
+ +  }
+ +}
+ +
+ +static void do_pull(t_fileio *fio, t_pull *pull,gmx_bool bRead, int file_version)
+ +{
+ +  int g;
+ +
+ +  gmx_fio_do_int(fio,pull->ngrp);
+ +  gmx_fio_do_int(fio,pull->eGeom);
+ +  gmx_fio_do_ivec(fio,pull->dim);
+ +  gmx_fio_do_real(fio,pull->cyl_r1);
+ +  gmx_fio_do_real(fio,pull->cyl_r0);
+ +  gmx_fio_do_real(fio,pull->constr_tol);
+ +  gmx_fio_do_int(fio,pull->nstxout);
+ +  gmx_fio_do_int(fio,pull->nstfout);
+ +  if (bRead)
+ +    snew(pull->grp,pull->ngrp+1);
+ +  for(g=0; g<pull->ngrp+1; g++)
+ +    do_pullgrp(fio,&pull->grp[g],bRead,file_version);
+ +}
+ +
+ +
+ +static void do_rotgrp(t_fileio *fio, t_rotgrp *rotg,gmx_bool bRead, int file_version)
+ +{
+ +  gmx_bool bDum=TRUE;
+ +  int  i;
+ +
+ +  gmx_fio_do_int(fio,rotg->eType);
+ +  gmx_fio_do_int(fio,rotg->bMassW);
+ +  gmx_fio_do_int(fio,rotg->nat);
+ +  if (bRead)
+ +    snew(rotg->ind,rotg->nat);
+ +  gmx_fio_ndo_int(fio,rotg->ind,rotg->nat);
+ +  if (bRead)
+ +      snew(rotg->x_ref,rotg->nat);
+ +  gmx_fio_ndo_rvec(fio,rotg->x_ref,rotg->nat);
+ +  gmx_fio_do_rvec(fio,rotg->vec);
+ +  gmx_fio_do_rvec(fio,rotg->pivot);
+ +  gmx_fio_do_real(fio,rotg->rate);
+ +  gmx_fio_do_real(fio,rotg->k);
+ +  gmx_fio_do_real(fio,rotg->slab_dist);
+ +  gmx_fio_do_real(fio,rotg->min_gaussian);
+ +  gmx_fio_do_real(fio,rotg->eps);
+ +  gmx_fio_do_int(fio,rotg->eFittype);
+ +  gmx_fio_do_int(fio,rotg->PotAngle_nstep);
+ +  gmx_fio_do_real(fio,rotg->PotAngle_step);
+ +}
+ +
+ +static void do_rot(t_fileio *fio, t_rot *rot,gmx_bool bRead, int file_version)
+ +{
+ +  int g;
+ +
+ +  gmx_fio_do_int(fio,rot->ngrp);
+ +  gmx_fio_do_int(fio,rot->nstrout);
+ +  gmx_fio_do_int(fio,rot->nstsout);
+ +  if (bRead)
+ +    snew(rot->grp,rot->ngrp);
+ +  for(g=0; g<rot->ngrp; g++)
+ +    do_rotgrp(fio, &rot->grp[g],bRead,file_version);
+ +}
+ +
+ +
+ +static void do_inputrec(t_fileio *fio, t_inputrec *ir,gmx_bool bRead, 
+ +                        int file_version, real *fudgeQQ)
+ +{
+ +    int  i,j,k,*tmp,idum=0; 
+ +    gmx_bool bDum=TRUE;
+ +    real rdum,bd_temp;
+ +    rvec vdum;
+ +    gmx_bool bSimAnn;
+ +    real zerotemptime,finish_t,init_temp,finish_temp;
+ +    
+ +    if (file_version != tpx_version)
+ +    {
+ +        /* Give a warning about features that are not accessible */
+ +        fprintf(stderr,"Note: file tpx version %d, software tpx version %d\n",
+ +                file_version,tpx_version);
+ +    }
+ +
+ +    if (bRead)
+ +    {
+ +        init_inputrec(ir);
+ +    }
+ +
+ +    if (file_version == 0)
+ +    {
+ +        return;
+ +    }
+ +
+ +    /* Basic inputrec stuff */  
+ +    gmx_fio_do_int(fio,ir->eI); 
+ +    if (file_version >= 62) {
+ +      gmx_fio_do_gmx_large_int(fio, ir->nsteps);
+ +    } else {
+ +      gmx_fio_do_int(fio,idum);
+ +      ir->nsteps = idum;
+ +    }
+ +    if(file_version > 25) {
+ +      if (file_version >= 62) {
+ +      gmx_fio_do_gmx_large_int(fio, ir->init_step);
+ +      } else {
+ +      gmx_fio_do_int(fio,idum);
+ +      ir->init_step = idum;
+ +      }
+ +    }  else {
+ +      ir->init_step=0;
+ +    }
+ +
+ +      if(file_version >= 58)
+ +        gmx_fio_do_int(fio,ir->simulation_part);
+ +      else
+ +        ir->simulation_part=1;
+ +        
+ +    if (file_version >= 67) {
+ +      gmx_fio_do_int(fio,ir->nstcalcenergy);
+ +    } else {
+ +      ir->nstcalcenergy = 1;
+ +    }
+ +    if (file_version < 53) {
+ +      /* The pbc info has been moved out of do_inputrec,
+ +       * since we always want it, also without reading the inputrec.
+ +       */
+ +      gmx_fio_do_int(fio,ir->ePBC);
+ +      if ((file_version <= 15) && (ir->ePBC == 2))
+ +      ir->ePBC = epbcNONE;
+ +      if (file_version >= 45) {
+ +      gmx_fio_do_int(fio,ir->bPeriodicMols);
+ +      } else {
+ +      if (ir->ePBC == 2) {
+ +        ir->ePBC = epbcXYZ;
+ +        ir->bPeriodicMols = TRUE;
+ +      } else {
+ +      ir->bPeriodicMols = FALSE;
+ +      }
+ +      }
+ +    }
++    if (file_version >= 81)
++    {
++        gmx_fio_do_int(fio,ir->cutoff_scheme);
++    }
++    else
++    {
++        ir->cutoff_scheme = ecutsGROUP;
++    }
+ +    gmx_fio_do_int(fio,ir->ns_type);
+ +    gmx_fio_do_int(fio,ir->nstlist);
+ +    gmx_fio_do_int(fio,ir->ndelta);
+ +    if (file_version < 41) {
+ +      gmx_fio_do_int(fio,idum);
+ +      gmx_fio_do_int(fio,idum);
+ +    }
+ +    if (file_version >= 45)
+ +      gmx_fio_do_real(fio,ir->rtpi);
+ +    else
+ +      ir->rtpi = 0.05;
+ +    gmx_fio_do_int(fio,ir->nstcomm); 
+ +    if (file_version > 34)
+ +      gmx_fio_do_int(fio,ir->comm_mode);
+ +    else if (ir->nstcomm < 0) 
+ +      ir->comm_mode = ecmANGULAR;
+ +    else
+ +      ir->comm_mode = ecmLINEAR;
+ +    ir->nstcomm = abs(ir->nstcomm);
+ +    
+ +    if(file_version > 25)
+ +      gmx_fio_do_int(fio,ir->nstcheckpoint);
+ +    else
+ +      ir->nstcheckpoint=0;
+ +    
+ +    gmx_fio_do_int(fio,ir->nstcgsteep); 
+ +
+ +    if(file_version>=30)
+ +      gmx_fio_do_int(fio,ir->nbfgscorr); 
+ +    else if (bRead)
+ +      ir->nbfgscorr = 10;
+ +
+ +    gmx_fio_do_int(fio,ir->nstlog); 
+ +    gmx_fio_do_int(fio,ir->nstxout); 
+ +    gmx_fio_do_int(fio,ir->nstvout); 
+ +    gmx_fio_do_int(fio,ir->nstfout); 
+ +    gmx_fio_do_int(fio,ir->nstenergy); 
+ +    gmx_fio_do_int(fio,ir->nstxtcout); 
+ +    if (file_version >= 59) {
+ +      gmx_fio_do_double(fio,ir->init_t);
+ +      gmx_fio_do_double(fio,ir->delta_t);
+ +    } else {
+ +      gmx_fio_do_real(fio,rdum);
+ +      ir->init_t = rdum;
+ +      gmx_fio_do_real(fio,rdum);
+ +      ir->delta_t = rdum;
+ +    }
+ +    gmx_fio_do_real(fio,ir->xtcprec); 
+ +    if (file_version < 19) {
+ +      gmx_fio_do_int(fio,idum); 
+ +      gmx_fio_do_int(fio,idum);
+ +    }
+ +    if(file_version < 18)
+ +      gmx_fio_do_int(fio,idum); 
++    if (file_version >= 81) {
++      gmx_fio_do_real(fio,ir->verletbuf_drift);
++    } else {
++      ir->verletbuf_drift = 0;
++    }
+ +    gmx_fio_do_real(fio,ir->rlist); 
+ +    if (file_version >= 67) {
+ +      gmx_fio_do_real(fio,ir->rlistlong);
+ +    }
+ +    gmx_fio_do_int(fio,ir->coulombtype); 
+ +    if (file_version < 32 && ir->coulombtype == eelRF)
+ +      ir->coulombtype = eelRF_NEC;      
++    if (file_version >= 81)
++    {
++        gmx_fio_do_int(fio,ir->coulomb_modifier); 
++    }
++    else
++    {
++        ir->coulomb_modifier = (ir->cutoff_scheme == ecutsVERLET ? eintmodPOTSHIFT : eintmodNONE);
++    }
+ +    gmx_fio_do_real(fio,ir->rcoulomb_switch); 
+ +    gmx_fio_do_real(fio,ir->rcoulomb); 
+ +    gmx_fio_do_int(fio,ir->vdwtype);
++    if (file_version >= 81)
++    {
++        gmx_fio_do_int(fio,ir->vdw_modifier); 
++    }
++    else
++    {
++        ir->vdw_modifier = (ir->cutoff_scheme == ecutsVERLET ? eintmodPOTSHIFT : eintmodNONE);
++    }
+ +    gmx_fio_do_real(fio,ir->rvdw_switch); 
+ +    gmx_fio_do_real(fio,ir->rvdw); 
+ +    if (file_version < 67) {
+ +      ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
+ +    }
+ +    gmx_fio_do_int(fio,ir->eDispCorr); 
+ +    gmx_fio_do_real(fio,ir->epsilon_r);
+ +    if (file_version >= 37) {
+ +      gmx_fio_do_real(fio,ir->epsilon_rf);
+ +    } else {
+ +      if (EEL_RF(ir->coulombtype)) {
+ +      ir->epsilon_rf = ir->epsilon_r;
+ +      ir->epsilon_r  = 1.0;
+ +      } else {
+ +      ir->epsilon_rf = 1.0;
+ +      }
+ +    }
+ +    if (file_version >= 29)
+ +      gmx_fio_do_real(fio,ir->tabext);
+ +    else
+ +      ir->tabext=1.0;
+ + 
+ +    if(file_version > 25) {
+ +      gmx_fio_do_int(fio,ir->gb_algorithm);
+ +      gmx_fio_do_int(fio,ir->nstgbradii);
+ +      gmx_fio_do_real(fio,ir->rgbradii);
+ +      gmx_fio_do_real(fio,ir->gb_saltconc);
+ +      gmx_fio_do_int(fio,ir->implicit_solvent);
+ +    } else {
+ +      ir->gb_algorithm=egbSTILL;
+ +      ir->nstgbradii=1;
+ +      ir->rgbradii=1.0;
+ +      ir->gb_saltconc=0;
+ +      ir->implicit_solvent=eisNO;
+ +    }
+ +      if(file_version>=55)
+ +      {
+ +              gmx_fio_do_real(fio,ir->gb_epsilon_solvent);
+ +              gmx_fio_do_real(fio,ir->gb_obc_alpha);
+ +              gmx_fio_do_real(fio,ir->gb_obc_beta);
+ +              gmx_fio_do_real(fio,ir->gb_obc_gamma);
+ +              if(file_version>=60)
+ +              {
+ +                      gmx_fio_do_real(fio,ir->gb_dielectric_offset);
+ +                      gmx_fio_do_int(fio,ir->sa_algorithm);
+ +              }
+ +              else
+ +              {
+ +                      ir->gb_dielectric_offset = 0.009;
+ +                      ir->sa_algorithm = esaAPPROX;
+ +              }
+ +              gmx_fio_do_real(fio,ir->sa_surface_tension);
+ +
+ +    /* Override sa_surface_tension if it is not changed in the mpd-file */
+ +    if(ir->sa_surface_tension<0)
+ +    {
+ +      if(ir->gb_algorithm==egbSTILL)
+ +      {
+ +        ir->sa_surface_tension = 0.0049 * 100 * CAL2JOULE;
+ +      }
+ +      else if(ir->gb_algorithm==egbHCT || ir->gb_algorithm==egbOBC)
+ +      {
+ +        ir->sa_surface_tension = 0.0054 * 100 * CAL2JOULE;
+ +      }
+ +    }
+ +    
+ +      }
+ +      else
+ +      {
+ +              /* Better use sensible values than insane (0.0) ones... */
+ +              ir->gb_epsilon_solvent = 80;
+ +              ir->gb_obc_alpha       = 1.0;
+ +              ir->gb_obc_beta        = 0.8;
+ +              ir->gb_obc_gamma       = 4.85;
+ +              ir->sa_surface_tension = 2.092;
+ +      }
+ +
-   int i;
++       
++    if (file_version >= 81)
++    {
++        gmx_fio_do_real(fio,ir->fourier_spacing); 
++    }
++    else
++    {
++        ir->fourier_spacing = 0.0;
++    }
+ +    gmx_fio_do_int(fio,ir->nkx); 
+ +    gmx_fio_do_int(fio,ir->nky); 
+ +    gmx_fio_do_int(fio,ir->nkz);
+ +    gmx_fio_do_int(fio,ir->pme_order);
+ +    gmx_fio_do_real(fio,ir->ewald_rtol);
+ +
+ +    if (file_version >=24) 
+ +      gmx_fio_do_int(fio,ir->ewald_geometry);
+ +    else
+ +      ir->ewald_geometry=eewg3D;
+ +
+ +    if (file_version <=17) {
+ +      ir->epsilon_surface=0;
+ +      if (file_version==17)
+ +      gmx_fio_do_int(fio,idum);
+ +    } 
+ +    else
+ +      gmx_fio_do_real(fio,ir->epsilon_surface);
+ +    
+ +    gmx_fio_do_gmx_bool(fio,ir->bOptFFT);
+ +
+ +    gmx_fio_do_gmx_bool(fio,ir->bContinuation); 
+ +    gmx_fio_do_int(fio,ir->etc);
+ +    /* before version 18, ir->etc was a gmx_bool (ir->btc),
+ +     * but the values 0 and 1 still mean no and
+ +     * berendsen temperature coupling, respectively.
+ +     */
+ +    if (file_version >= 79) {
+ +        gmx_fio_do_gmx_bool(fio,ir->bPrintNHChains);
+ +    }
+ +    if (file_version >= 71)
+ +    {
+ +        gmx_fio_do_int(fio,ir->nsttcouple);
+ +    }
+ +    else
+ +    {
+ +        ir->nsttcouple = ir->nstcalcenergy;
+ +    }
+ +    if (file_version <= 15)
+ +    {
+ +        gmx_fio_do_int(fio,idum);
+ +    }
+ +    if (file_version <=17)
+ +    {
+ +        gmx_fio_do_int(fio,ir->epct); 
+ +        if (file_version <= 15)
+ +        {
+ +            if (ir->epct == 5)
+ +            {
+ +                ir->epct = epctSURFACETENSION;
+ +            }
+ +            gmx_fio_do_int(fio,idum);
+ +        }
+ +        ir->epct -= 1;
+ +        /* we have removed the NO alternative at the beginning */
+ +        if(ir->epct==-1)
+ +        {
+ +            ir->epc=epcNO;
+ +            ir->epct=epctISOTROPIC;
+ +        } 
+ +        else
+ +        {
+ +            ir->epc=epcBERENDSEN;
+ +        }
+ +    } 
+ +    else
+ +    {
+ +        gmx_fio_do_int(fio,ir->epc);
+ +        gmx_fio_do_int(fio,ir->epct);
+ +    }
+ +    if (file_version >= 71)
+ +    {
+ +        gmx_fio_do_int(fio,ir->nstpcouple);
+ +    }
+ +    else
+ +    {
+ +        ir->nstpcouple = ir->nstcalcenergy;
+ +    }
+ +    gmx_fio_do_real(fio,ir->tau_p); 
+ +    if (file_version <= 15) {
+ +      gmx_fio_do_rvec(fio,vdum);
+ +      clear_mat(ir->ref_p);
+ +      for(i=0; i<DIM; i++)
+ +      ir->ref_p[i][i] = vdum[i];
+ +    } else {
+ +      gmx_fio_do_rvec(fio,ir->ref_p[XX]);
+ +      gmx_fio_do_rvec(fio,ir->ref_p[YY]);
+ +      gmx_fio_do_rvec(fio,ir->ref_p[ZZ]);
+ +    }
+ +    if (file_version <= 15) {
+ +      gmx_fio_do_rvec(fio,vdum);
+ +      clear_mat(ir->compress);
+ +      for(i=0; i<DIM; i++)
+ +      ir->compress[i][i] = vdum[i];
+ +    } 
+ +    else {
+ +      gmx_fio_do_rvec(fio,ir->compress[XX]);
+ +      gmx_fio_do_rvec(fio,ir->compress[YY]);
+ +      gmx_fio_do_rvec(fio,ir->compress[ZZ]);
+ +    }
+ +    if (file_version >= 47) {
+ +      gmx_fio_do_int(fio,ir->refcoord_scaling);
+ +      gmx_fio_do_rvec(fio,ir->posres_com);
+ +      gmx_fio_do_rvec(fio,ir->posres_comB);
+ +    } else {
+ +      ir->refcoord_scaling = erscNO;
+ +      clear_rvec(ir->posres_com);
+ +      clear_rvec(ir->posres_comB);
+ +    }
+ +    if((file_version > 25) && (file_version < 79))
+ +        gmx_fio_do_int(fio,ir->andersen_seed);
+ +    else
+ +        ir->andersen_seed=0;
+ +    if(file_version < 26) {
+ +      gmx_fio_do_gmx_bool(fio,bSimAnn); 
+ +      gmx_fio_do_real(fio,zerotemptime);
+ +    }
+ +    
+ +    if (file_version < 37)
+ +      gmx_fio_do_real(fio,rdum); 
+ +
+ +    gmx_fio_do_real(fio,ir->shake_tol);
+ +    if (file_version < 54)
+ +      gmx_fio_do_real(fio,*fudgeQQ);
+ +
+ +    gmx_fio_do_int(fio,ir->efep);
+ +    if (file_version <= 14 && ir->efep != efepNO)
+ +    {
+ +        ir->efep = efepYES;
+ +    }
+ +    do_fepvals(fio,ir->fepvals,bRead,file_version);
+ +
+ +    if (file_version >= 79)
+ +    {
+ +        gmx_fio_do_gmx_bool(fio,ir->bSimTemp);
+ +        if (ir->bSimTemp) 
+ +        {
+ +            ir->bSimTemp = TRUE;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        ir->bSimTemp = FALSE;
+ +    }
+ +    if (ir->bSimTemp)
+ +    {
+ +        do_simtempvals(fio,ir->simtempvals,ir->fepvals->n_lambda,bRead,file_version);
+ +    }
+ +
+ +    if (file_version >= 79)
+ +    {
+ +        gmx_fio_do_gmx_bool(fio,ir->bExpanded);
+ +        if (ir->bExpanded)
+ +        {
+ +            ir->bExpanded = TRUE;
+ +        }
+ +        else
+ +        {
+ +            ir->bExpanded = FALSE;
+ +        }
+ +    }
+ +    if (ir->bExpanded)
+ +    {
+ +        do_expandedvals(fio,ir->expandedvals,ir->fepvals->n_lambda,bRead,file_version);
+ +    }
+ +    if (file_version >= 57) {
+ +      gmx_fio_do_int(fio,ir->eDisre); 
+ +    }
+ +    gmx_fio_do_int(fio,ir->eDisreWeighting); 
+ +    if (file_version < 22) {
+ +      if (ir->eDisreWeighting == 0)
+ +      ir->eDisreWeighting = edrwEqual;
+ +      else
+ +      ir->eDisreWeighting = edrwConservative;
+ +    }
+ +    gmx_fio_do_gmx_bool(fio,ir->bDisreMixed); 
+ +    gmx_fio_do_real(fio,ir->dr_fc); 
+ +    gmx_fio_do_real(fio,ir->dr_tau); 
+ +    gmx_fio_do_int(fio,ir->nstdisreout);
+ +    if (file_version >= 22) {
+ +      gmx_fio_do_real(fio,ir->orires_fc);
+ +      gmx_fio_do_real(fio,ir->orires_tau);
+ +      gmx_fio_do_int(fio,ir->nstorireout);
+ +    } else {
+ +      ir->orires_fc = 0;
+ +      ir->orires_tau = 0;
+ +      ir->nstorireout = 0;
+ +    }
+ +    if(file_version >= 26 && file_version < 79) {
+ +      gmx_fio_do_real(fio,ir->dihre_fc);
+ +      if (file_version < 56) 
+ +      {
+ +          gmx_fio_do_real(fio,rdum);
+ +          gmx_fio_do_int(fio,idum);
+ +      }
+ +    } else {
+ +        ir->dihre_fc=0;
+ +    }
+ +
+ +    gmx_fio_do_real(fio,ir->em_stepsize); 
+ +    gmx_fio_do_real(fio,ir->em_tol); 
+ +    if (file_version >= 22) 
+ +      gmx_fio_do_gmx_bool(fio,ir->bShakeSOR);
+ +    else if (bRead)
+ +      ir->bShakeSOR = TRUE;
+ +    if (file_version >= 11)
+ +      gmx_fio_do_int(fio,ir->niter);
+ +    else if (bRead) {
+ +      ir->niter = 25;
+ +      fprintf(stderr,"Note: niter not in run input file, setting it to %d\n",
+ +            ir->niter);
+ +    }
+ +    if (file_version >= 21)
+ +      gmx_fio_do_real(fio,ir->fc_stepsize);
+ +    else
+ +      ir->fc_stepsize = 0;
+ +    gmx_fio_do_int(fio,ir->eConstrAlg);
+ +    gmx_fio_do_int(fio,ir->nProjOrder);
+ +    gmx_fio_do_real(fio,ir->LincsWarnAngle);
+ +    if (file_version <= 14)
+ +      gmx_fio_do_int(fio,idum);
+ +    if (file_version >=26)
+ +      gmx_fio_do_int(fio,ir->nLincsIter);
+ +    else if (bRead) {
+ +      ir->nLincsIter = 1;
+ +      fprintf(stderr,"Note: nLincsIter not in run input file, setting it to %d\n",
+ +            ir->nLincsIter);
+ +    }
+ +    if (file_version < 33)
+ +      gmx_fio_do_real(fio,bd_temp);
+ +    gmx_fio_do_real(fio,ir->bd_fric);
+ +    gmx_fio_do_int(fio,ir->ld_seed);
+ +    if (file_version >= 33) {
+ +      for(i=0; i<DIM; i++)
+ +      gmx_fio_do_rvec(fio,ir->deform[i]);
+ +    } else {
+ +      for(i=0; i<DIM; i++)
+ +      clear_rvec(ir->deform[i]);
+ +    }
+ +    if (file_version >= 14)
+ +      gmx_fio_do_real(fio,ir->cos_accel);
+ +    else if (bRead)
+ +      ir->cos_accel = 0;
+ +    gmx_fio_do_int(fio,ir->userint1); 
+ +    gmx_fio_do_int(fio,ir->userint2); 
+ +    gmx_fio_do_int(fio,ir->userint3); 
+ +    gmx_fio_do_int(fio,ir->userint4); 
+ +    gmx_fio_do_real(fio,ir->userreal1); 
+ +    gmx_fio_do_real(fio,ir->userreal2); 
+ +    gmx_fio_do_real(fio,ir->userreal3); 
+ +    gmx_fio_do_real(fio,ir->userreal4); 
+ +    
+ +    /* AdResS stuff */
+ +    if (file_version >= 77) {
+ +      gmx_fio_do_gmx_bool(fio,ir->bAdress);
+ +      if(ir->bAdress){
+ +          if (bRead) snew(ir->adress, 1);
+ +          gmx_fio_do_int(fio,ir->adress->type);
+ +          gmx_fio_do_real(fio,ir->adress->const_wf);
+ +          gmx_fio_do_real(fio,ir->adress->ex_width);
+ +          gmx_fio_do_real(fio,ir->adress->hy_width);
+ +          gmx_fio_do_int(fio,ir->adress->icor);
+ +          gmx_fio_do_int(fio,ir->adress->site);
+ +          gmx_fio_do_rvec(fio,ir->adress->refs);
+ +          gmx_fio_do_int(fio,ir->adress->n_tf_grps);
+ +          gmx_fio_do_real(fio, ir->adress->ex_forcecap);
+ +          gmx_fio_do_int(fio, ir->adress->n_energy_grps);
+ +          gmx_fio_do_int(fio,ir->adress->do_hybridpairs);
+ +
+ +          if (bRead)snew(ir->adress->tf_table_index,ir->adress->n_tf_grps);
+ +          if (ir->adress->n_tf_grps > 0) {
+ +            bDum=gmx_fio_ndo_int(fio,ir->adress->tf_table_index,ir->adress->n_tf_grps);
+ +          }
+ +          if (bRead)snew(ir->adress->group_explicit,ir->adress->n_energy_grps);
+ +          if (ir->adress->n_energy_grps > 0) {
+ +            bDum=gmx_fio_ndo_int(fio, ir->adress->group_explicit,ir->adress->n_energy_grps);
+ +          }
+ +      }
+ +    } else {
+ +      ir->bAdress = FALSE;
+ +    }
+ +
+ +    /* pull stuff */
+ +    if (file_version >= 48) {
+ +      gmx_fio_do_int(fio,ir->ePull);
+ +      if (ir->ePull != epullNO) {
+ +      if (bRead)
+ +        snew(ir->pull,1);
+ +      do_pull(fio, ir->pull,bRead,file_version);
+ +      }
+ +    } else {
+ +      ir->ePull = epullNO;
+ +    }
+ +    
+ +    /* Enforced rotation */
+ +    if (file_version >= 74) {
+ +        gmx_fio_do_int(fio,ir->bRot);
+ +        if (ir->bRot == TRUE) {
+ +            if (bRead)
+ +                snew(ir->rot,1);
+ +            do_rot(fio, ir->rot,bRead,file_version);
+ +        }
+ +    } else {
+ +        ir->bRot = FALSE;
+ +    }
+ +    
+ +    /* grpopts stuff */
+ +    gmx_fio_do_int(fio,ir->opts.ngtc); 
+ +    if (file_version >= 69) {
+ +      gmx_fio_do_int(fio,ir->opts.nhchainlength);
+ +    } else {
+ +      ir->opts.nhchainlength = 1;
+ +    }
+ +    gmx_fio_do_int(fio,ir->opts.ngacc); 
+ +    gmx_fio_do_int(fio,ir->opts.ngfrz); 
+ +    gmx_fio_do_int(fio,ir->opts.ngener);
+ +    
+ +    if (bRead) {
+ +      snew(ir->opts.nrdf,   ir->opts.ngtc); 
+ +      snew(ir->opts.ref_t,  ir->opts.ngtc); 
+ +      snew(ir->opts.annealing, ir->opts.ngtc); 
+ +      snew(ir->opts.anneal_npoints, ir->opts.ngtc); 
+ +      snew(ir->opts.anneal_time, ir->opts.ngtc); 
+ +      snew(ir->opts.anneal_temp, ir->opts.ngtc); 
+ +      snew(ir->opts.tau_t,  ir->opts.ngtc); 
+ +      snew(ir->opts.nFreeze,ir->opts.ngfrz); 
+ +      snew(ir->opts.acc,    ir->opts.ngacc); 
+ +      snew(ir->opts.egp_flags,ir->opts.ngener*ir->opts.ngener);
+ +    } 
+ +    if (ir->opts.ngtc > 0) {
+ +      if (bRead && file_version<13) {
+ +      snew(tmp,ir->opts.ngtc);
+ +      bDum=gmx_fio_ndo_int(fio,tmp, ir->opts.ngtc);
+ +      for(i=0; i<ir->opts.ngtc; i++)
+ +        ir->opts.nrdf[i] = tmp[i];
+ +      sfree(tmp);
+ +      } else {
+ +      bDum=gmx_fio_ndo_real(fio,ir->opts.nrdf, ir->opts.ngtc);
+ +      }
+ +      bDum=gmx_fio_ndo_real(fio,ir->opts.ref_t,ir->opts.ngtc); 
+ +      bDum=gmx_fio_ndo_real(fio,ir->opts.tau_t,ir->opts.ngtc); 
+ +      if (file_version<33 && ir->eI==eiBD) {
+ +      for(i=0; i<ir->opts.ngtc; i++)
+ +        ir->opts.tau_t[i] = bd_temp;
+ +      }
+ +    }
+ +    if (ir->opts.ngfrz > 0) 
+ +      bDum=gmx_fio_ndo_ivec(fio,ir->opts.nFreeze,ir->opts.ngfrz);
+ +    if (ir->opts.ngacc > 0) 
+ +      gmx_fio_ndo_rvec(fio,ir->opts.acc,ir->opts.ngacc); 
+ +    if (file_version >= 12)
+ +      bDum=gmx_fio_ndo_int(fio,ir->opts.egp_flags,
+ +                           ir->opts.ngener*ir->opts.ngener);
+ +
+ +    if(bRead && file_version < 26) {
+ +      for(i=0;i<ir->opts.ngtc;i++) {
+ +      if(bSimAnn) {
+ +        ir->opts.annealing[i] = eannSINGLE;
+ +        ir->opts.anneal_npoints[i] = 2;
+ +        snew(ir->opts.anneal_time[i],2);
+ +        snew(ir->opts.anneal_temp[i],2);
+ +        /* calculate the starting/ending temperatures from reft, zerotemptime, and nsteps */
+ +        finish_t = ir->init_t + ir->nsteps * ir->delta_t;
+ +        init_temp = ir->opts.ref_t[i]*(1-ir->init_t/zerotemptime);
+ +        finish_temp = ir->opts.ref_t[i]*(1-finish_t/zerotemptime);
+ +        ir->opts.anneal_time[i][0] = ir->init_t;
+ +        ir->opts.anneal_time[i][1] = finish_t;
+ +        ir->opts.anneal_temp[i][0] = init_temp;
+ +        ir->opts.anneal_temp[i][1] = finish_temp;
+ +      } else {
+ +        ir->opts.annealing[i] = eannNO;
+ +        ir->opts.anneal_npoints[i] = 0;
+ +      }
+ +      }
+ +    } else {
+ +      /* file version 26 or later */
+ +      /* First read the lists with annealing and npoints for each group */
+ +      bDum=gmx_fio_ndo_int(fio,ir->opts.annealing,ir->opts.ngtc);
+ +      bDum=gmx_fio_ndo_int(fio,ir->opts.anneal_npoints,ir->opts.ngtc);
+ +      for(j=0;j<(ir->opts.ngtc);j++) {
+ +      k=ir->opts.anneal_npoints[j];
+ +      if(bRead) {
+ +        snew(ir->opts.anneal_time[j],k);
+ +        snew(ir->opts.anneal_temp[j],k);
+ +      }
+ +      bDum=gmx_fio_ndo_real(fio,ir->opts.anneal_time[j],k);
+ +      bDum=gmx_fio_ndo_real(fio,ir->opts.anneal_temp[j],k);
+ +      }
+ +    }
+ +    /* Walls */
+ +    if (file_version >= 45) {
+ +      gmx_fio_do_int(fio,ir->nwall);
+ +      gmx_fio_do_int(fio,ir->wall_type);
+ +      if (file_version >= 50)
+ +      gmx_fio_do_real(fio,ir->wall_r_linpot);
+ +      else
+ +      ir->wall_r_linpot = -1;
+ +      gmx_fio_do_int(fio,ir->wall_atomtype[0]);
+ +      gmx_fio_do_int(fio,ir->wall_atomtype[1]);
+ +      gmx_fio_do_real(fio,ir->wall_density[0]);
+ +      gmx_fio_do_real(fio,ir->wall_density[1]);
+ +      gmx_fio_do_real(fio,ir->wall_ewald_zfac);
+ +    } else {
+ +      ir->nwall = 0;
+ +      ir->wall_type = 0;
+ +      ir->wall_atomtype[0] = -1;
+ +      ir->wall_atomtype[1] = -1;
+ +      ir->wall_density[0] = 0;
+ +      ir->wall_density[1] = 0;
+ +      ir->wall_ewald_zfac = 3;
+ +    }
+ +    /* Cosine stuff for electric fields */
+ +    for(j=0; (j<DIM); j++) {
+ +      gmx_fio_do_int(fio,ir->ex[j].n);
+ +      gmx_fio_do_int(fio,ir->et[j].n);
+ +      if (bRead) {
+ +      snew(ir->ex[j].a,  ir->ex[j].n);
+ +      snew(ir->ex[j].phi,ir->ex[j].n);
+ +      snew(ir->et[j].a,  ir->et[j].n);
+ +      snew(ir->et[j].phi,ir->et[j].n);
+ +      }
+ +      bDum=gmx_fio_ndo_real(fio,ir->ex[j].a,  ir->ex[j].n);
+ +      bDum=gmx_fio_ndo_real(fio,ir->ex[j].phi,ir->ex[j].n);
+ +      bDum=gmx_fio_ndo_real(fio,ir->et[j].a,  ir->et[j].n);
+ +      bDum=gmx_fio_ndo_real(fio,ir->et[j].phi,ir->et[j].n);
+ +    }
+ +    
+ +    /* QMMM stuff */
+ +    if(file_version>=39){
+ +      gmx_fio_do_gmx_bool(fio,ir->bQMMM);
+ +      gmx_fio_do_int(fio,ir->QMMMscheme);
+ +      gmx_fio_do_real(fio,ir->scalefactor);
+ +      gmx_fio_do_int(fio,ir->opts.ngQM);
+ +      if (bRead) {
+ +        snew(ir->opts.QMmethod,    ir->opts.ngQM);
+ +        snew(ir->opts.QMbasis,     ir->opts.ngQM);
+ +        snew(ir->opts.QMcharge,    ir->opts.ngQM);
+ +        snew(ir->opts.QMmult,      ir->opts.ngQM);
+ +        snew(ir->opts.bSH,         ir->opts.ngQM);
+ +        snew(ir->opts.CASorbitals, ir->opts.ngQM);
+ +        snew(ir->opts.CASelectrons,ir->opts.ngQM);
+ +        snew(ir->opts.SAon,        ir->opts.ngQM);
+ +        snew(ir->opts.SAoff,       ir->opts.ngQM);
+ +        snew(ir->opts.SAsteps,     ir->opts.ngQM);
+ +        snew(ir->opts.bOPT,        ir->opts.ngQM);
+ +        snew(ir->opts.bTS,         ir->opts.ngQM);
+ +      }
+ +      if (ir->opts.ngQM > 0) {
+ +        bDum=gmx_fio_ndo_int(fio,ir->opts.QMmethod,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_int(fio,ir->opts.QMbasis,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_int(fio,ir->opts.QMcharge,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_int(fio,ir->opts.QMmult,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_gmx_bool(fio,ir->opts.bSH,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_int(fio,ir->opts.CASorbitals,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_int(fio,ir->opts.CASelectrons,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_real(fio,ir->opts.SAon,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_real(fio,ir->opts.SAoff,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_int(fio,ir->opts.SAsteps,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_gmx_bool(fio,ir->opts.bOPT,ir->opts.ngQM);
+ +        bDum=gmx_fio_ndo_gmx_bool(fio,ir->opts.bTS,ir->opts.ngQM);
+ +      }
+ +      /* end of QMMM stuff */
+ +    }    
+ +}
+ +
+ +
+ +static void do_harm(t_fileio *fio, t_iparams *iparams,gmx_bool bRead)
+ +{
+ +  gmx_fio_do_real(fio,iparams->harmonic.rA);
+ +  gmx_fio_do_real(fio,iparams->harmonic.krA);
+ +  gmx_fio_do_real(fio,iparams->harmonic.rB);
+ +  gmx_fio_do_real(fio,iparams->harmonic.krB);
+ +}
+ +
+ +void do_iparams(t_fileio *fio, t_functype ftype,t_iparams *iparams,
+ +                gmx_bool bRead, int file_version)
+ +{
-     if (fver >= 77)
++  int idum;
+ +  gmx_bool bDum;
+ +  real rdum;
+ +  
+ +  if (!bRead)
+ +    gmx_fio_set_comment(fio, interaction_function[ftype].name);
+ +  switch (ftype) {
+ +  case F_ANGLES:
+ +  case F_G96ANGLES:
+ +  case F_BONDS:
+ +  case F_G96BONDS:
+ +  case F_HARMONIC:
+ +  case F_IDIHS:
+ +    do_harm(fio, iparams,bRead);
+ +    if ((ftype == F_ANGRES || ftype == F_ANGRESZ) && bRead) {
+ +      /* Correct incorrect storage of parameters */
+ +      iparams->pdihs.phiB = iparams->pdihs.phiA;
+ +      iparams->pdihs.cpB  = iparams->pdihs.cpA;
+ +    }
+ +    break;
+ +  case F_LINEAR_ANGLES:
+ +    gmx_fio_do_real(fio,iparams->linangle.klinA);
+ +    gmx_fio_do_real(fio,iparams->linangle.aA);
+ +    gmx_fio_do_real(fio,iparams->linangle.klinB);
+ +    gmx_fio_do_real(fio,iparams->linangle.aB);
+ +    break;
+ +  case F_FENEBONDS:
+ +    gmx_fio_do_real(fio,iparams->fene.bm);
+ +    gmx_fio_do_real(fio,iparams->fene.kb);
+ +    break;
+ +  case F_RESTRBONDS:
+ +    gmx_fio_do_real(fio,iparams->restraint.lowA);
+ +    gmx_fio_do_real(fio,iparams->restraint.up1A);
+ +    gmx_fio_do_real(fio,iparams->restraint.up2A);
+ +    gmx_fio_do_real(fio,iparams->restraint.kA);
+ +    gmx_fio_do_real(fio,iparams->restraint.lowB);
+ +    gmx_fio_do_real(fio,iparams->restraint.up1B);
+ +    gmx_fio_do_real(fio,iparams->restraint.up2B);
+ +    gmx_fio_do_real(fio,iparams->restraint.kB);
+ +    break;
+ +  case F_TABBONDS:
+ +  case F_TABBONDSNC:
+ +  case F_TABANGLES:
+ +  case F_TABDIHS:
+ +    gmx_fio_do_real(fio,iparams->tab.kA);
+ +    gmx_fio_do_int(fio,iparams->tab.table);
+ +    gmx_fio_do_real(fio,iparams->tab.kB);
+ +    break;
+ +  case F_CROSS_BOND_BONDS:
+ +    gmx_fio_do_real(fio,iparams->cross_bb.r1e);
+ +    gmx_fio_do_real(fio,iparams->cross_bb.r2e);
+ +    gmx_fio_do_real(fio,iparams->cross_bb.krr);
+ +    break;
+ +  case F_CROSS_BOND_ANGLES:
+ +    gmx_fio_do_real(fio,iparams->cross_ba.r1e);
+ +    gmx_fio_do_real(fio,iparams->cross_ba.r2e);
+ +    gmx_fio_do_real(fio,iparams->cross_ba.r3e);
+ +    gmx_fio_do_real(fio,iparams->cross_ba.krt);
+ +    break;
+ +  case F_UREY_BRADLEY:
+ +    gmx_fio_do_real(fio,iparams->u_b.thetaA);
+ +    gmx_fio_do_real(fio,iparams->u_b.kthetaA);
+ +    gmx_fio_do_real(fio,iparams->u_b.r13A);
+ +    gmx_fio_do_real(fio,iparams->u_b.kUBA);
+ +    if (file_version >= 79) {
+ +        gmx_fio_do_real(fio,iparams->u_b.thetaB);
+ +        gmx_fio_do_real(fio,iparams->u_b.kthetaB);
+ +        gmx_fio_do_real(fio,iparams->u_b.r13B);
+ +        gmx_fio_do_real(fio,iparams->u_b.kUBB);
+ +    } else {
+ +        iparams->u_b.thetaB=iparams->u_b.thetaA;
+ +        iparams->u_b.kthetaB=iparams->u_b.kthetaA;
+ +        iparams->u_b.r13B=iparams->u_b.r13A;
+ +        iparams->u_b.kUBB=iparams->u_b.kUBA;
+ +    }
+ +    break;
+ +  case F_QUARTIC_ANGLES:
+ +    gmx_fio_do_real(fio,iparams->qangle.theta);
+ +    bDum=gmx_fio_ndo_real(fio,iparams->qangle.c,5);
+ +    break;
+ +  case F_BHAM:
+ +    gmx_fio_do_real(fio,iparams->bham.a);
+ +    gmx_fio_do_real(fio,iparams->bham.b);
+ +    gmx_fio_do_real(fio,iparams->bham.c);
+ +    break;
+ +  case F_MORSE:
+ +    gmx_fio_do_real(fio,iparams->morse.b0A);
+ +    gmx_fio_do_real(fio,iparams->morse.cbA);
+ +    gmx_fio_do_real(fio,iparams->morse.betaA);
+ +    if (file_version >= 79) {
+ +        gmx_fio_do_real(fio,iparams->morse.b0B);
+ +        gmx_fio_do_real(fio,iparams->morse.cbB);
+ +        gmx_fio_do_real(fio,iparams->morse.betaB);
+ +    } else {
+ +        iparams->morse.b0B = iparams->morse.b0A;
+ +        iparams->morse.cbB = iparams->morse.cbA;
+ +        iparams->morse.betaB = iparams->morse.betaA;
+ +    }
+ +    break;
+ +  case F_CUBICBONDS:
+ +    gmx_fio_do_real(fio,iparams->cubic.b0);
+ +    gmx_fio_do_real(fio,iparams->cubic.kb);
+ +    gmx_fio_do_real(fio,iparams->cubic.kcub);
+ +    break;
+ +  case F_CONNBONDS:
+ +    break;
+ +  case F_POLARIZATION:
+ +    gmx_fio_do_real(fio,iparams->polarize.alpha);
+ +    break;
+ +  case F_ANHARM_POL:
+ +    gmx_fio_do_real(fio,iparams->anharm_polarize.alpha);
+ +    gmx_fio_do_real(fio,iparams->anharm_polarize.drcut);
+ +    gmx_fio_do_real(fio,iparams->anharm_polarize.khyp);
+ +    break;
+ +  case F_WATER_POL:
+ +    if (file_version < 31) 
+ +      gmx_fatal(FARGS,"Old tpr files with water_polarization not supported. Make a new.");
+ +    gmx_fio_do_real(fio,iparams->wpol.al_x);
+ +    gmx_fio_do_real(fio,iparams->wpol.al_y);
+ +    gmx_fio_do_real(fio,iparams->wpol.al_z);
+ +    gmx_fio_do_real(fio,iparams->wpol.rOH);
+ +    gmx_fio_do_real(fio,iparams->wpol.rHH);
+ +    gmx_fio_do_real(fio,iparams->wpol.rOD);
+ +    break;
+ +  case F_THOLE_POL:
+ +    gmx_fio_do_real(fio,iparams->thole.a);
+ +    gmx_fio_do_real(fio,iparams->thole.alpha1);
+ +    gmx_fio_do_real(fio,iparams->thole.alpha2);
+ +    gmx_fio_do_real(fio,iparams->thole.rfac);
+ +    break;
+ +  case F_LJ:
+ +    gmx_fio_do_real(fio,iparams->lj.c6);
+ +    gmx_fio_do_real(fio,iparams->lj.c12);
+ +    break;
+ +  case F_LJ14:
+ +    gmx_fio_do_real(fio,iparams->lj14.c6A);
+ +    gmx_fio_do_real(fio,iparams->lj14.c12A);
+ +    gmx_fio_do_real(fio,iparams->lj14.c6B);
+ +    gmx_fio_do_real(fio,iparams->lj14.c12B);
+ +    break;
+ +  case F_LJC14_Q:
+ +    gmx_fio_do_real(fio,iparams->ljc14.fqq);
+ +    gmx_fio_do_real(fio,iparams->ljc14.qi);
+ +    gmx_fio_do_real(fio,iparams->ljc14.qj);
+ +    gmx_fio_do_real(fio,iparams->ljc14.c6);
+ +    gmx_fio_do_real(fio,iparams->ljc14.c12);
+ +    break;
+ +  case F_LJC_PAIRS_NB:
+ +    gmx_fio_do_real(fio,iparams->ljcnb.qi);
+ +    gmx_fio_do_real(fio,iparams->ljcnb.qj);
+ +    gmx_fio_do_real(fio,iparams->ljcnb.c6);
+ +    gmx_fio_do_real(fio,iparams->ljcnb.c12);
+ +    break;
+ +  case F_PDIHS:
+ +  case F_PIDIHS:
+ +  case F_ANGRES:
+ +  case F_ANGRESZ:
+ +    gmx_fio_do_real(fio,iparams->pdihs.phiA);
+ +    gmx_fio_do_real(fio,iparams->pdihs.cpA);
+ +    if ((ftype == F_ANGRES || ftype == F_ANGRESZ) && file_version < 42) {
+ +      /* Read the incorrectly stored multiplicity */
+ +      gmx_fio_do_real(fio,iparams->harmonic.rB);
+ +      gmx_fio_do_real(fio,iparams->harmonic.krB);
+ +      iparams->pdihs.phiB = iparams->pdihs.phiA;
+ +      iparams->pdihs.cpB  = iparams->pdihs.cpA;
+ +    } else {
+ +      gmx_fio_do_real(fio,iparams->pdihs.phiB);
+ +      gmx_fio_do_real(fio,iparams->pdihs.cpB);
+ +      gmx_fio_do_int(fio,iparams->pdihs.mult);
+ +    }
+ +    break;
+ +  case F_DISRES:
+ +    gmx_fio_do_int(fio,iparams->disres.label);
+ +    gmx_fio_do_int(fio,iparams->disres.type);
+ +    gmx_fio_do_real(fio,iparams->disres.low);
+ +    gmx_fio_do_real(fio,iparams->disres.up1);
+ +    gmx_fio_do_real(fio,iparams->disres.up2);
+ +    gmx_fio_do_real(fio,iparams->disres.kfac);
+ +    break;
+ +  case F_ORIRES:
+ +    gmx_fio_do_int(fio,iparams->orires.ex);
+ +    gmx_fio_do_int(fio,iparams->orires.label);
+ +    gmx_fio_do_int(fio,iparams->orires.power);
+ +    gmx_fio_do_real(fio,iparams->orires.c);
+ +    gmx_fio_do_real(fio,iparams->orires.obs);
+ +    gmx_fio_do_real(fio,iparams->orires.kfac);
+ +    break;
+ +  case F_DIHRES:
++    if ( file_version < 72) {
++        gmx_fio_do_int(fio,idum);
++        gmx_fio_do_int(fio,idum);
++    }
+ +    gmx_fio_do_real(fio,iparams->dihres.phiA);
+ +    gmx_fio_do_real(fio,iparams->dihres.dphiA);
+ +    gmx_fio_do_real(fio,iparams->dihres.kfacA);
+ +    if (file_version >= 72) {
+ +        gmx_fio_do_real(fio,iparams->dihres.phiB);
+ +        gmx_fio_do_real(fio,iparams->dihres.dphiB);
+ +        gmx_fio_do_real(fio,iparams->dihres.kfacB);
+ +    } else {
+ +        iparams->dihres.phiB=iparams->dihres.phiA;
+ +        iparams->dihres.dphiB=iparams->dihres.dphiA;
+ +        iparams->dihres.kfacB=iparams->dihres.kfacA;
+ +    }
+ +    break;
+ +  case F_POSRES:
+ +    gmx_fio_do_rvec(fio,iparams->posres.pos0A);
+ +    gmx_fio_do_rvec(fio,iparams->posres.fcA);
+ +    if (bRead && file_version < 27) {
+ +      copy_rvec(iparams->posres.pos0A,iparams->posres.pos0B);
+ +      copy_rvec(iparams->posres.fcA,iparams->posres.fcB);
+ +    } else {
+ +      gmx_fio_do_rvec(fio,iparams->posres.pos0B);
+ +      gmx_fio_do_rvec(fio,iparams->posres.fcB);
+ +    }
+ +    break;
+ +  case F_FBPOSRES:
+ +      gmx_fio_do_int(fio,iparams->fbposres.geom);
+ +      gmx_fio_do_rvec(fio,iparams->fbposres.pos0);
+ +      gmx_fio_do_real(fio,iparams->fbposres.r);
+ +      gmx_fio_do_real(fio,iparams->fbposres.k);
+ +      break;
+ +  case F_RBDIHS:
+ +    bDum=gmx_fio_ndo_real(fio,iparams->rbdihs.rbcA,NR_RBDIHS);
+ +    if(file_version>=25) 
+ +      bDum=gmx_fio_ndo_real(fio,iparams->rbdihs.rbcB,NR_RBDIHS);
+ +    break;
+ +  case F_FOURDIHS:
+ +    /* Fourier dihedrals are internally represented
+ +     * as Ryckaert-Bellemans since those are faster to compute.
+ +     */
+ +     bDum=gmx_fio_ndo_real(fio,iparams->rbdihs.rbcA, NR_RBDIHS);
+ +     bDum=gmx_fio_ndo_real(fio,iparams->rbdihs.rbcB, NR_RBDIHS);
+ +    break;
+ +  case F_CONSTR:
+ +  case F_CONSTRNC:
+ +    gmx_fio_do_real(fio,iparams->constr.dA);
+ +    gmx_fio_do_real(fio,iparams->constr.dB);
+ +    break;
+ +  case F_SETTLE:
+ +    gmx_fio_do_real(fio,iparams->settle.doh);
+ +    gmx_fio_do_real(fio,iparams->settle.dhh);
+ +    break;
+ +  case F_VSITE2:
+ +    gmx_fio_do_real(fio,iparams->vsite.a);
+ +    break;
+ +  case F_VSITE3:
+ +  case F_VSITE3FD:
+ +  case F_VSITE3FAD:
+ +    gmx_fio_do_real(fio,iparams->vsite.a);
+ +    gmx_fio_do_real(fio,iparams->vsite.b);
+ +    break;
+ +  case F_VSITE3OUT:
+ +  case F_VSITE4FD: 
+ +  case F_VSITE4FDN: 
+ +    gmx_fio_do_real(fio,iparams->vsite.a);
+ +    gmx_fio_do_real(fio,iparams->vsite.b);
+ +    gmx_fio_do_real(fio,iparams->vsite.c);
+ +    break;
+ +  case F_VSITEN:
+ +    gmx_fio_do_int(fio,iparams->vsiten.n);
+ +    gmx_fio_do_real(fio,iparams->vsiten.a);
+ +    break;
+ +  case F_GB12:
+ +  case F_GB13:
+ +  case F_GB14:
+ +    /* We got rid of some parameters in version 68 */
+ +    if(bRead && file_version<68)
+ +    {
+ +        gmx_fio_do_real(fio,rdum);    
+ +        gmx_fio_do_real(fio,rdum);    
+ +        gmx_fio_do_real(fio,rdum);    
+ +        gmx_fio_do_real(fio,rdum);    
+ +    }
+ +      gmx_fio_do_real(fio,iparams->gb.sar);   
+ +      gmx_fio_do_real(fio,iparams->gb.st);
+ +      gmx_fio_do_real(fio,iparams->gb.pi);
+ +      gmx_fio_do_real(fio,iparams->gb.gbr);
+ +      gmx_fio_do_real(fio,iparams->gb.bmlt);
+ +      break;
+ +  case F_CMAP:
+ +      gmx_fio_do_int(fio,iparams->cmap.cmapA);
+ +      gmx_fio_do_int(fio,iparams->cmap.cmapB);
+ +    break;
+ +  default:
+ +      gmx_fatal(FARGS,"unknown function type %d (%s) in %s line %d",
+ +                ftype,interaction_function[ftype].name,__FILE__,__LINE__);
+ +  }
+ +  if (!bRead)
+ +    gmx_fio_unset_comment(fio);
+ +}
+ +
+ +static void do_ilist(t_fileio *fio, t_ilist *ilist,gmx_bool bRead,int file_version,
+ +                   int ftype)
+ +{
+ +  int  i,k,idum;
+ +  gmx_bool bDum=TRUE;
+ +  
+ +  if (!bRead) {
+ +    gmx_fio_set_comment(fio, interaction_function[ftype].name);
+ +  }
+ +  if (file_version < 44) {
+ +    for(i=0; i<MAXNODES; i++)
+ +      gmx_fio_do_int(fio,idum);
+ +  }
+ +  gmx_fio_do_int(fio,ilist->nr);
+ +  if (bRead)
+ +    snew(ilist->iatoms,ilist->nr);
+ +  bDum=gmx_fio_ndo_int(fio,ilist->iatoms,ilist->nr);
+ +  if (!bRead)
+ +    gmx_fio_unset_comment(fio);
+ +}
+ +
+ +static void do_ffparams(t_fileio *fio, gmx_ffparams_t *ffparams,
+ +                      gmx_bool bRead, int file_version)
+ +{
+ +  int  idum,i,j;
+ +  gmx_bool bDum=TRUE;
+ +  unsigned int k;
+ +
+ +  gmx_fio_do_int(fio,ffparams->atnr);
+ +  if (file_version < 57) {
+ +    gmx_fio_do_int(fio,idum);
+ +  }
+ +  gmx_fio_do_int(fio,ffparams->ntypes);
+ +  if (bRead && debug)
+ +    fprintf(debug,"ffparams->atnr = %d, ntypes = %d\n",
+ +          ffparams->atnr,ffparams->ntypes);
+ +  if (bRead) {
+ +    snew(ffparams->functype,ffparams->ntypes);
+ +    snew(ffparams->iparams,ffparams->ntypes);
+ +  }
+ +  /* Read/write all the function types */
+ +  bDum=gmx_fio_ndo_int(fio,ffparams->functype,ffparams->ntypes);
+ +  if (bRead && debug)
+ +    pr_ivec(debug,0,"functype",ffparams->functype,ffparams->ntypes,TRUE);
+ +
+ +  if (file_version >= 66) {
+ +    gmx_fio_do_double(fio,ffparams->reppow);
+ +  } else {
+ +    ffparams->reppow = 12.0;
+ +  }
+ +
+ +  if (file_version >= 57) {
+ +    gmx_fio_do_real(fio,ffparams->fudgeQQ);
+ +  }
+ +
+ +  /* Check whether all these function types are supported by the code.
+ +   * In practice the code is backwards compatible, which means that the
+ +   * numbering may have to be altered from old numbering to new numbering
+ +   */
+ +  for (i=0; (i<ffparams->ntypes); i++) {
+ +    if (bRead)
+ +      /* Loop over file versions */
+ +      for (k=0; (k<NFTUPD); k++)
+ +      /* Compare the read file_version to the update table */
+ +      if ((file_version < ftupd[k].fvnr) && 
+ +          (ffparams->functype[i] >= ftupd[k].ftype)) {
+ +        ffparams->functype[i] += 1;
+ +        if (debug) {
+ +          fprintf(debug,"Incrementing function type %d to %d (due to %s)\n",
+ +                  i,ffparams->functype[i],
+ +                  interaction_function[ftupd[k].ftype].longname);
+ +          fflush(debug);
+ +        }
+ +      }
+ +    
+ +    do_iparams(fio, ffparams->functype[i],&ffparams->iparams[i],bRead,
+ +               file_version);
+ +    if (bRead && debug)
+ +      pr_iparams(debug,ffparams->functype[i],&ffparams->iparams[i]);
+ +  }
+ +}
+ +
+ +static void add_settle_atoms(t_ilist *ilist)
+ +{
+ +    int i;
+ +
+ +    /* Settle used to only store the first atom: add the other two */
+ +    srenew(ilist->iatoms,2*ilist->nr);
+ +    for(i=ilist->nr/2-1; i>=0; i--)
+ +    {
+ +        ilist->iatoms[4*i+0] = ilist->iatoms[2*i+0];
+ +        ilist->iatoms[4*i+1] = ilist->iatoms[2*i+1];
+ +        ilist->iatoms[4*i+2] = ilist->iatoms[2*i+1] + 1;
+ +        ilist->iatoms[4*i+3] = ilist->iatoms[2*i+1] + 2;
+ +    }
+ +    ilist->nr = 2*ilist->nr;
+ +}
+ +
+ +static void do_ilists(t_fileio *fio, t_ilist *ilist,gmx_bool bRead, 
+ +                      int file_version)
+ +{
+ +  int i,j,renum[F_NRE];
+ +  gmx_bool bDum=TRUE,bClear;
+ +  unsigned int k;
+ +  
+ +  for(j=0; (j<F_NRE); j++) {
+ +    bClear = FALSE;
+ +    if (bRead)
+ +      for (k=0; k<NFTUPD; k++)
+ +        if ((file_version < ftupd[k].fvnr) && (j == ftupd[k].ftype)) 
+ +          bClear = TRUE;
+ +    if (bClear) {
+ +      ilist[j].nr = 0;
+ +      ilist[j].iatoms = NULL;
+ +    } else {
+ +      do_ilist(fio, &ilist[j],bRead,file_version,j);
+ +      if (file_version < 78 && j == F_SETTLE && ilist[j].nr > 0)
+ +      {
+ +          add_settle_atoms(&ilist[j]);
+ +      }
+ +    }
+ +    /*
+ +    if (bRead && gmx_debug_at)
+ +      pr_ilist(debug,0,interaction_function[j].longname,
+ +             functype,&ilist[j],TRUE);
+ +    */
+ +  }
+ +}
+ +
+ +static void do_idef(t_fileio *fio, gmx_ffparams_t *ffparams,gmx_moltype_t *molt,
+ +                  gmx_bool bRead, int file_version)
+ +{
+ +  do_ffparams(fio, ffparams,bRead,file_version);
+ +    
+ +  if (file_version >= 54) {
+ +    gmx_fio_do_real(fio,ffparams->fudgeQQ);
+ +  }
+ +
+ +  do_ilists(fio, molt->ilist,bRead,file_version);
+ +}
+ +
+ +static void do_block(t_fileio *fio, t_block *block,gmx_bool bRead,int file_version)
+ +{
+ +  int  i,idum,dum_nra,*dum_a;
+ +  gmx_bool bDum=TRUE;
+ +
+ +  if (file_version < 44)
+ +    for(i=0; i<MAXNODES; i++)
+ +      gmx_fio_do_int(fio,idum);
+ +  gmx_fio_do_int(fio,block->nr);
+ +  if (file_version < 51)
+ +    gmx_fio_do_int(fio,dum_nra);
+ +  if (bRead) {
+ +    block->nalloc_index = block->nr+1;
+ +    snew(block->index,block->nalloc_index);
+ +  }
+ +  bDum=gmx_fio_ndo_int(fio,block->index,block->nr+1);
+ +
+ +  if (file_version < 51 && dum_nra > 0) {
+ +    snew(dum_a,dum_nra);
+ +    bDum=gmx_fio_ndo_int(fio,dum_a,dum_nra);
+ +    sfree(dum_a);
+ +  }
+ +}
+ +
+ +static void do_blocka(t_fileio *fio, t_blocka *block,gmx_bool bRead,
+ +                      int file_version)
+ +{
+ +  int  i,idum;
+ +  gmx_bool bDum=TRUE;
+ +
+ +  if (file_version < 44)
+ +    for(i=0; i<MAXNODES; i++)
+ +      gmx_fio_do_int(fio,idum);
+ +  gmx_fio_do_int(fio,block->nr);
+ +  gmx_fio_do_int(fio,block->nra);
+ +  if (bRead) {
+ +    block->nalloc_index = block->nr+1;
+ +    snew(block->index,block->nalloc_index);
+ +    block->nalloc_a = block->nra;
+ +    snew(block->a,block->nalloc_a);
+ +  }
+ +  bDum=gmx_fio_ndo_int(fio,block->index,block->nr+1);
+ +  bDum=gmx_fio_ndo_int(fio,block->a,block->nra);
+ +}
+ +
+ +static void do_atom(t_fileio *fio, t_atom *atom,int ngrp,gmx_bool bRead, 
+ +                    int file_version, gmx_groups_t *groups,int atnr)
+ +{ 
+ +  int i,myngrp;
+ +  
+ +  gmx_fio_do_real(fio,atom->m);
+ +  gmx_fio_do_real(fio,atom->q);
+ +  gmx_fio_do_real(fio,atom->mB);
+ +  gmx_fio_do_real(fio,atom->qB);
+ +  gmx_fio_do_ushort(fio, atom->type);
+ +  gmx_fio_do_ushort(fio, atom->typeB);
+ +  gmx_fio_do_int(fio,atom->ptype);
+ +  gmx_fio_do_int(fio,atom->resind);
+ +  if (file_version >= 52)
+ +    gmx_fio_do_int(fio,atom->atomnumber);
+ +  else if (bRead)
+ +    atom->atomnumber = NOTSET;
+ +  if (file_version < 23) 
+ +    myngrp = 8;
+ +  else if (file_version < 39) 
+ +    myngrp = 9;
+ +  else
+ +    myngrp = ngrp;
+ +
+ +  if (file_version < 57) {
+ +    unsigned char uchar[egcNR];
+ +    gmx_fio_ndo_uchar(fio,uchar,myngrp);
+ +    for(i=myngrp; (i<ngrp); i++) {
+ +      uchar[i] = 0;
+ +    }
+ +    /* Copy the old data format to the groups struct */
+ +    for(i=0; i<ngrp; i++) {
+ +      groups->grpnr[i][atnr] = uchar[i];
+ +    }
+ +  }
+ +}
+ +
+ +static void do_grps(t_fileio *fio, int ngrp,t_grps grps[],gmx_bool bRead, 
+ +                    int file_version)
+ +{
+ +  int i,j,myngrp;
+ +  gmx_bool bDum=TRUE;
+ +  
+ +  if (file_version < 23) 
+ +    myngrp = 8;
+ +  else if (file_version < 39) 
+ +    myngrp = 9;
+ +  else
+ +    myngrp = ngrp;
+ +
+ +  for(j=0; (j<ngrp); j++) {
+ +    if (j<myngrp) {
+ +      gmx_fio_do_int(fio,grps[j].nr);
+ +      if (bRead)
+ +      snew(grps[j].nm_ind,grps[j].nr);
+ +      bDum=gmx_fio_ndo_int(fio,grps[j].nm_ind,grps[j].nr);
+ +    }
+ +    else {
+ +      grps[j].nr = 1;
+ +      snew(grps[j].nm_ind,grps[j].nr);
+ +    }
+ +  }
+ +}
+ +
+ +static void do_symstr(t_fileio *fio, char ***nm,gmx_bool bRead,t_symtab *symtab)
+ +{
+ +  int ls;
+ +  
+ +  if (bRead) {
+ +    gmx_fio_do_int(fio,ls);
+ +    *nm = get_symtab_handle(symtab,ls);
+ +  }
+ +  else {
+ +    ls = lookup_symtab(symtab,*nm);
+ +    gmx_fio_do_int(fio,ls);
+ +  }
+ +}
+ +
+ +static void do_strstr(t_fileio *fio, int nstr,char ***nm,gmx_bool bRead,
+ +                      t_symtab *symtab)
+ +{
+ +  int  j;
+ +  
+ +  for (j=0; (j<nstr); j++) 
+ +    do_symstr(fio, &(nm[j]),bRead,symtab);
+ +}
+ +
+ +static void do_resinfo(t_fileio *fio, int n,t_resinfo *ri,gmx_bool bRead,
+ +                       t_symtab *symtab, int file_version)
+ +{
+ +  int  j;
+ +  
+ +  for (j=0; (j<n); j++) {
+ +    do_symstr(fio, &(ri[j].name),bRead,symtab);
+ +    if (file_version >= 63) {
+ +      gmx_fio_do_int(fio,ri[j].nr);
+ +      gmx_fio_do_uchar(fio, ri[j].ic);
+ +    } else {
+ +      ri[j].nr = j + 1;
+ +      ri[j].ic = ' ';
+ +    }
+ +  }
+ +}
+ +
+ +static void do_atoms(t_fileio *fio, t_atoms *atoms,gmx_bool bRead,t_symtab *symtab,
+ +                   int file_version,
+ +                   gmx_groups_t *groups)
+ +{
+ +  int i;
+ +  
+ +  gmx_fio_do_int(fio,atoms->nr);
+ +  gmx_fio_do_int(fio,atoms->nres);
+ +  if (file_version < 57) {
+ +    gmx_fio_do_int(fio,groups->ngrpname);
+ +    for(i=0; i<egcNR; i++) {
+ +      groups->ngrpnr[i] = atoms->nr;
+ +      snew(groups->grpnr[i],groups->ngrpnr[i]);
+ +    }
+ +  }
+ +  if (bRead) {
+ +    snew(atoms->atom,atoms->nr);
+ +    snew(atoms->atomname,atoms->nr);
+ +    snew(atoms->atomtype,atoms->nr);
+ +    snew(atoms->atomtypeB,atoms->nr);
+ +    snew(atoms->resinfo,atoms->nres);
+ +    if (file_version < 57) {
+ +      snew(groups->grpname,groups->ngrpname);
+ +    }
+ +    atoms->pdbinfo = NULL;
+ +  }
+ +  for(i=0; (i<atoms->nr); i++) {
+ +    do_atom(fio, &atoms->atom[i],egcNR,bRead, file_version,groups,i);
+ +  }
+ +  do_strstr(fio, atoms->nr,atoms->atomname,bRead,symtab);
+ +  if (bRead && (file_version <= 20)) {
+ +    for(i=0; i<atoms->nr; i++) {
+ +      atoms->atomtype[i]  = put_symtab(symtab,"?");
+ +      atoms->atomtypeB[i] = put_symtab(symtab,"?");
+ +    }
+ +  } else {
+ +    do_strstr(fio, atoms->nr,atoms->atomtype,bRead,symtab);
+ +    do_strstr(fio, atoms->nr,atoms->atomtypeB,bRead,symtab);
+ +  }
+ +  do_resinfo(fio, atoms->nres,atoms->resinfo,bRead,symtab,file_version);
+ +
+ +  if (file_version < 57) {
+ +    do_strstr(fio, groups->ngrpname,groups->grpname,bRead,symtab);
+ +  
+ +    do_grps(fio, egcNR,groups->grps,bRead,file_version);
+ +  }
+ +}
+ +
+ +static void do_groups(t_fileio *fio, gmx_groups_t *groups,
+ +                    gmx_bool bRead,t_symtab *symtab,
+ +                    int file_version)
+ +{
+ +  int  g,n,i;
+ +  gmx_bool bDum=TRUE;
+ +
+ +  do_grps(fio, egcNR,groups->grps,bRead,file_version);
+ +  gmx_fio_do_int(fio,groups->ngrpname);
+ +  if (bRead) {
+ +    snew(groups->grpname,groups->ngrpname);
+ +  }
+ +  do_strstr(fio, groups->ngrpname,groups->grpname,bRead,symtab);
+ +  for(g=0; g<egcNR; g++) {
+ +    gmx_fio_do_int(fio,groups->ngrpnr[g]);
+ +    if (groups->ngrpnr[g] == 0) {
+ +      if (bRead) {
+ +      groups->grpnr[g] = NULL;
+ +      }
+ +    } else {
+ +      if (bRead) {
+ +      snew(groups->grpnr[g],groups->ngrpnr[g]);
+ +      }
+ +      bDum=gmx_fio_ndo_uchar(fio, groups->grpnr[g],groups->ngrpnr[g]);
+ +    }
+ +  }
+ +}
+ +
+ +static void do_atomtypes(t_fileio *fio, t_atomtypes *atomtypes,gmx_bool bRead,
+ +                       t_symtab *symtab,int file_version)
+ +{
+ +  int i,j;
+ +  gmx_bool bDum = TRUE;
+ +  
+ +  if (file_version > 25) {
+ +    gmx_fio_do_int(fio,atomtypes->nr);
+ +    j=atomtypes->nr;
+ +    if (bRead) {
+ +      snew(atomtypes->radius,j);
+ +      snew(atomtypes->vol,j);
+ +      snew(atomtypes->surftens,j);
+ +      snew(atomtypes->atomnumber,j);
+ +      snew(atomtypes->gb_radius,j);
+ +      snew(atomtypes->S_hct,j);
+ +    }
+ +    bDum=gmx_fio_ndo_real(fio,atomtypes->radius,j);
+ +    bDum=gmx_fio_ndo_real(fio,atomtypes->vol,j);
+ +    bDum=gmx_fio_ndo_real(fio,atomtypes->surftens,j);
+ +    if(file_version >= 40)
+ +    {
+ +        bDum=gmx_fio_ndo_int(fio,atomtypes->atomnumber,j);
+ +    }
+ +      if(file_version >= 60)
+ +      {
+ +              bDum=gmx_fio_ndo_real(fio,atomtypes->gb_radius,j);
+ +              bDum=gmx_fio_ndo_real(fio,atomtypes->S_hct,j);
+ +      }
+ +  } else {
+ +    /* File versions prior to 26 cannot do GBSA, 
+ +     * so they dont use this structure 
+ +     */
+ +    atomtypes->nr = 0;
+ +    atomtypes->radius = NULL;
+ +    atomtypes->vol = NULL;
+ +    atomtypes->surftens = NULL;
+ +    atomtypes->atomnumber = NULL;
+ +    atomtypes->gb_radius = NULL;
+ +    atomtypes->S_hct = NULL;
+ +  }  
+ +}
+ +
+ +static void do_symtab(t_fileio *fio, t_symtab *symtab,gmx_bool bRead)
+ +{
+ +  int i,nr;
+ +  t_symbuf *symbuf;
+ +  char buf[STRLEN];
+ +  
+ +  gmx_fio_do_int(fio,symtab->nr);
+ +  nr     = symtab->nr;
+ +  if (bRead) {
+ +    snew(symtab->symbuf,1);
+ +    symbuf = symtab->symbuf;
+ +    symbuf->bufsize = nr;
+ +    snew(symbuf->buf,nr);
+ +    for (i=0; (i<nr); i++) {
+ +      gmx_fio_do_string(fio,buf);
+ +      symbuf->buf[i]=strdup(buf);
+ +    }
+ +  }
+ +  else {
+ +    symbuf = symtab->symbuf;
+ +    while (symbuf!=NULL) {
+ +      for (i=0; (i<symbuf->bufsize) && (i<nr); i++) 
+ +      gmx_fio_do_string(fio,symbuf->buf[i]);
+ +      nr-=i;
+ +      symbuf=symbuf->next;
+ +    }
+ +    if (nr != 0)
+ +      gmx_fatal(FARGS,"nr of symtab strings left: %d",nr);
+ +  }
+ +}
+ +
+ +static void do_cmap(t_fileio *fio, gmx_cmap_t *cmap_grid, gmx_bool bRead)
+ +{
+ +      int i,j,ngrid,gs,nelem;
+ +      
+ +      gmx_fio_do_int(fio,cmap_grid->ngrid);
+ +      gmx_fio_do_int(fio,cmap_grid->grid_spacing);
+ +      
+ +      ngrid = cmap_grid->ngrid;
+ +      gs    = cmap_grid->grid_spacing;
+ +      nelem = gs * gs;
+ +      
+ +      if(bRead)
+ +      {
+ +              snew(cmap_grid->cmapdata,ngrid);
+ +              
+ +              for(i=0;i<cmap_grid->ngrid;i++)
+ +              {
+ +                      snew(cmap_grid->cmapdata[i].cmap,4*nelem);
+ +              }
+ +      }
+ +      
+ +      for(i=0;i<cmap_grid->ngrid;i++)
+ +      {
+ +              for(j=0;j<nelem;j++)
+ +              {
+ +                      gmx_fio_do_real(fio,cmap_grid->cmapdata[i].cmap[j*4]);
+ +                      gmx_fio_do_real(fio,cmap_grid->cmapdata[i].cmap[j*4+1]);
+ +                      gmx_fio_do_real(fio,cmap_grid->cmapdata[i].cmap[j*4+2]);
+ +                      gmx_fio_do_real(fio,cmap_grid->cmapdata[i].cmap[j*4+3]);
+ +              }
+ +      }       
+ +}
+ +
+ +
+ +void tpx_make_chain_identifiers(t_atoms *atoms,t_block *mols)
+ +{
+ +    int m,a,a0,a1,r;
+ +    char c,chainid;
+ +    int  chainnum;
+ +    
+ +    /* We always assign a new chain number, but save the chain id characters 
+ +     * for larger molecules.
+ +     */
+ +#define CHAIN_MIN_ATOMS 15
+ +    
+ +    chainnum=0;
+ +    chainid='A';
+ +    for(m=0; m<mols->nr; m++) 
+ +    {
+ +        a0=mols->index[m];
+ +        a1=mols->index[m+1];
+ +        if ((a1-a0 >= CHAIN_MIN_ATOMS) && (chainid <= 'Z')) 
+ +        {
+ +            c=chainid;
+ +            chainid++;
+ +        } 
+ +        else
+ +        {
+ +            c=' ';
+ +        }
+ +        for(a=a0; a<a1; a++) 
+ +        {
+ +            atoms->resinfo[atoms->atom[a].resind].chainnum = chainnum;
+ +            atoms->resinfo[atoms->atom[a].resind].chainid  = c;
+ +        }
+ +        chainnum++;
+ +    }
+ +    
+ +    /* Blank out the chain id if there was only one chain */
+ +    if (chainid == 'B') 
+ +    {
+ +        for(r=0; r<atoms->nres; r++) 
+ +        {
+ +            atoms->resinfo[r].chainid = ' ';
+ +        }
+ +    }
+ +}
+ +  
+ +static void do_moltype(t_fileio *fio, gmx_moltype_t *molt,gmx_bool bRead,
+ +                       t_symtab *symtab, int file_version,
+ +                     gmx_groups_t *groups)
+ +{
+ +  int i;
+ +
+ +  if (file_version >= 57) {
+ +    do_symstr(fio, &(molt->name),bRead,symtab);
+ +  }
+ +
+ +  do_atoms(fio, &molt->atoms, bRead, symtab, file_version, groups);
+ +
+ +  if (bRead && gmx_debug_at) {
+ +    pr_atoms(debug,0,"atoms",&molt->atoms,TRUE);
+ +  }
+ +  
+ +  if (file_version >= 57) {
+ +    do_ilists(fio, molt->ilist,bRead,file_version);
+ +
+ +    do_block(fio, &molt->cgs,bRead,file_version);
+ +    if (bRead && gmx_debug_at) {
+ +      pr_block(debug,0,"cgs",&molt->cgs,TRUE);
+ +    }
+ +  }
+ +
+ +  /* This used to be in the atoms struct */
+ +  do_blocka(fio, &molt->excls, bRead, file_version);
+ +}
+ +
+ +static void do_molblock(t_fileio *fio, gmx_molblock_t *molb,gmx_bool bRead,
+ +                        int file_version)
+ +{
+ +  int i;
+ +
+ +  gmx_fio_do_int(fio,molb->type);
+ +  gmx_fio_do_int(fio,molb->nmol);
+ +  gmx_fio_do_int(fio,molb->natoms_mol);
+ +  /* Position restraint coordinates */
+ +  gmx_fio_do_int(fio,molb->nposres_xA);
+ +  if (molb->nposres_xA > 0) {
+ +    if (bRead) {
+ +      snew(molb->posres_xA,molb->nposres_xA);
+ +    }
+ +    gmx_fio_ndo_rvec(fio,molb->posres_xA,molb->nposres_xA);
+ +  }
+ +  gmx_fio_do_int(fio,molb->nposres_xB);
+ +  if (molb->nposres_xB > 0) {
+ +    if (bRead) {
+ +      snew(molb->posres_xB,molb->nposres_xB);
+ +    }
+ +    gmx_fio_ndo_rvec(fio,molb->posres_xB,molb->nposres_xB);
+ +  }
+ +
+ +}
+ +
+ +static t_block mtop_mols(gmx_mtop_t *mtop)
+ +{
+ +  int mb,m,a,mol;
+ +  t_block mols;
+ +
+ +  mols.nr = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    mols.nr += mtop->molblock[mb].nmol;
+ +  }
+ +  mols.nalloc_index = mols.nr + 1;
+ +  snew(mols.index,mols.nalloc_index);
+ +
+ +  a = 0;
+ +  m = 0;
+ +  mols.index[m] = a;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    for(mol=0; mol<mtop->molblock[mb].nmol; mol++) {
+ +      a += mtop->molblock[mb].natoms_mol;
+ +      m++;
+ +      mols.index[m] = a;
+ +    }
+ +  }
+ +  
+ +  return mols;
+ +}
+ +
+ +static void add_posres_molblock(gmx_mtop_t *mtop)
+ +{
+ +    t_ilist *il,*ilfb;
+ +  int am,i,mol,a;
+ +  gmx_bool bFE;
+ +  gmx_molblock_t *molb;
+ +  t_iparams *ip;
+ +
+ +  /* posres reference positions are stored in ip->posres (if present) and
+ +     in ip->fbposres (if present). If normal and flat-bottomed posres are present,
+ +     posres.pos0A are identical to fbposres.pos0. */
+ +  il = &mtop->moltype[0].ilist[F_POSRES];
+ +  ilfb = &mtop->moltype[0].ilist[F_FBPOSRES];
+ +  if (il->nr == 0 && ilfb->nr == 0) {
+ +    return;
+ +  }
+ +  am = 0;
+ +  bFE = FALSE;
+ +  for(i=0; i<il->nr; i+=2) {
+ +    ip = &mtop->ffparams.iparams[il->iatoms[i]];
+ +    am = max(am,il->iatoms[i+1]);
+ +    if (ip->posres.pos0B[XX] != ip->posres.pos0A[XX] ||
+ +      ip->posres.pos0B[YY] != ip->posres.pos0A[YY] ||
+ +      ip->posres.pos0B[ZZ] != ip->posres.pos0A[ZZ]) {
+ +      bFE = TRUE;
+ +    }
+ +  }
+ +  /* This loop is required if we have only flat-bottomed posres:
+ +     - set am
+ +     - bFE == FALSE (no B-state for flat-bottomed posres) */
+ +  if (il->nr == 0)
+ +  {
+ +      for(i=0; i<ilfb->nr; i+=2) {
+ +          ip = &mtop->ffparams.iparams[ilfb->iatoms[i]];
+ +          am = max(am,ilfb->iatoms[i+1]);
+ +      }
+ +  }
+ +  /* Make the posres coordinate block end at a molecule end */
+ +  mol = 0;
+ +  while(am >= mtop->mols.index[mol+1]) {
+ +    mol++;
+ +  }
+ +  molb = &mtop->molblock[0];
+ +  molb->nposres_xA = mtop->mols.index[mol+1];
+ +  snew(molb->posres_xA,molb->nposres_xA);
+ +  if (bFE) {
+ +    molb->nposres_xB = molb->nposres_xA;
+ +    snew(molb->posres_xB,molb->nposres_xB);
+ +  } else {
+ +    molb->nposres_xB = 0;
+ +  }
+ +  for(i=0; i<il->nr; i+=2) {
+ +    ip = &mtop->ffparams.iparams[il->iatoms[i]];
+ +    a  = il->iatoms[i+1];
+ +    molb->posres_xA[a][XX] = ip->posres.pos0A[XX];
+ +    molb->posres_xA[a][YY] = ip->posres.pos0A[YY];
+ +    molb->posres_xA[a][ZZ] = ip->posres.pos0A[ZZ];
+ +    if (bFE) {
+ +      molb->posres_xB[a][XX] = ip->posres.pos0B[XX];
+ +      molb->posres_xB[a][YY] = ip->posres.pos0B[YY];
+ +      molb->posres_xB[a][ZZ] = ip->posres.pos0B[ZZ];
+ +    }
+ +  }
+ +  if (il->nr == 0)
+ +  {
+ +      /* If only flat-bottomed posres are present, take reference pos from them.
+ +         Here: bFE == FALSE      */
+ +      for(i=0; i<ilfb->nr; i+=2)
+ +      {
+ +          ip = &mtop->ffparams.iparams[ilfb->iatoms[i]];
+ +          a  = ilfb->iatoms[i+1];
+ +          molb->posres_xA[a][XX] = ip->fbposres.pos0[XX];
+ +          molb->posres_xA[a][YY] = ip->fbposres.pos0[YY];
+ +          molb->posres_xA[a][ZZ] = ip->fbposres.pos0[ZZ];
+ +      }
+ +  }
+ +}
+ +
+ +static void set_disres_npair(gmx_mtop_t *mtop)
+ +{
+ +  int mt,i,npair;
+ +  t_iparams *ip;
+ +  t_ilist *il;
+ +  t_iatom *a;
+ +
+ +  ip = mtop->ffparams.iparams;
+ +
+ +  for(mt=0; mt<mtop->nmoltype; mt++) {
+ +    il = &mtop->moltype[mt].ilist[F_DISRES];
+ +    if (il->nr > 0) {
+ +      a = il->iatoms;
+ +      npair = 0;
+ +      for(i=0; i<il->nr; i+=3) {
+ +      npair++;
+ +      if (i+3 == il->nr || ip[a[i]].disres.label != ip[a[i+3]].disres.label) {
+ +        ip[a[i]].disres.npair = npair;
+ +        npair = 0;
+ +      }
+ +      }
+ +    }
+ +  }
+ +}
+ +
+ +static void do_mtop(t_fileio *fio, gmx_mtop_t *mtop,gmx_bool bRead, 
+ +                    int file_version)
+ +{
+ +  int  mt,mb,i;
+ +  t_blocka dumb;
+ +
+ +  if (bRead)
+ +    init_mtop(mtop);
+ +  do_symtab(fio, &(mtop->symtab),bRead);
+ +  if (bRead && debug) 
+ +    pr_symtab(debug,0,"symtab",&mtop->symtab);
+ +  
+ +  do_symstr(fio, &(mtop->name),bRead,&(mtop->symtab));
+ +  
+ +  if (file_version >= 57) {
+ +    do_ffparams(fio, &mtop->ffparams,bRead,file_version);
+ +
+ +    gmx_fio_do_int(fio,mtop->nmoltype);
+ +  } else {
+ +    mtop->nmoltype = 1;
+ +  }
+ +  if (bRead) {
+ +    snew(mtop->moltype,mtop->nmoltype);
+ +    if (file_version < 57) {
+ +      mtop->moltype[0].name = mtop->name;
+ +    }
+ +  }
+ +  for(mt=0; mt<mtop->nmoltype; mt++) {
+ +    do_moltype(fio, &mtop->moltype[mt],bRead,&mtop->symtab,file_version,
+ +             &mtop->groups);
+ +  }
+ +
+ +  if (file_version >= 57) {
+ +    gmx_fio_do_int(fio,mtop->nmolblock);
+ +  } else {
+ +    mtop->nmolblock = 1;
+ +  }
+ +  if (bRead) {
+ +    snew(mtop->molblock,mtop->nmolblock);
+ +  }
+ +  if (file_version >= 57) {
+ +    for(mb=0; mb<mtop->nmolblock; mb++) {
+ +      do_molblock(fio, &mtop->molblock[mb],bRead,file_version);
+ +    }
+ +    gmx_fio_do_int(fio,mtop->natoms);
+ +  } else {
+ +    mtop->molblock[0].type = 0;
+ +    mtop->molblock[0].nmol = 1;
+ +    mtop->molblock[0].natoms_mol = mtop->moltype[0].atoms.nr;
+ +    mtop->molblock[0].nposres_xA = 0;
+ +    mtop->molblock[0].nposres_xB = 0;
+ +  }
+ +
+ +  do_atomtypes (fio, &(mtop->atomtypes),bRead,&(mtop->symtab), file_version);
+ +  if (bRead && debug) 
+ +    pr_atomtypes(debug,0,"atomtypes",&mtop->atomtypes,TRUE);
+ +
+ +  if (file_version < 57) {
+ +    /* Debug statements are inside do_idef */    
+ +    do_idef (fio, &mtop->ffparams,&mtop->moltype[0],bRead,file_version);
+ +    mtop->natoms = mtop->moltype[0].atoms.nr;
+ +  }
+ +      
+ +  if(file_version >= 65)
+ +  {
+ +      do_cmap(fio, &mtop->ffparams.cmap_grid,bRead);
+ +  }
+ +  else
+ +  {
+ +      mtop->ffparams.cmap_grid.ngrid        = 0;
+ +      mtop->ffparams.cmap_grid.grid_spacing = 0;
+ +      mtop->ffparams.cmap_grid.cmapdata     = NULL;
+ +  }
+ +        
+ +  if (file_version >= 57) {
+ +    do_groups(fio, &mtop->groups,bRead,&(mtop->symtab),file_version);
+ +  }
+ +
+ +  if (file_version < 57) {
+ +    do_block(fio, &mtop->moltype[0].cgs,bRead,file_version);
+ +    if (bRead && gmx_debug_at) {
+ +      pr_block(debug,0,"cgs",&mtop->moltype[0].cgs,TRUE);
+ +    }
+ +    do_block(fio, &mtop->mols,bRead,file_version);
+ +    /* Add the posres coordinates to the molblock */
+ +    add_posres_molblock(mtop);
+ +  }
+ +  if (bRead) {
+ +    if (file_version >= 57) {
+ +      mtop->mols = mtop_mols(mtop);
+ +    }
+ +    if (gmx_debug_at) { 
+ +      pr_block(debug,0,"mols",&mtop->mols,TRUE);
+ +    }
+ +  }
+ +
+ +  if (file_version < 51) {
+ +    /* Here used to be the shake blocks */
+ +    do_blocka(fio, &dumb,bRead,file_version);
+ +    if (dumb.nr > 0)
+ +      sfree(dumb.index);
+ +    if (dumb.nra > 0)
+ +      sfree(dumb.a);
+ +  }
+ +
+ +  if (bRead) {
+ +    close_symtab(&(mtop->symtab));
+ +  }
+ +}
+ +
+ +/* If TopOnlyOK is TRUE then we can read even future versions
+ + * of tpx files, provided the file_generation hasn't changed.
+ + * If it is FALSE, we need the inputrecord too, and bail out
+ + * if the file is newer than the program.
+ + * 
+ + * The version and generation if the topology (see top of this file)
+ + * are returned in the two last arguments.
+ + * 
+ + * If possible, we will read the inputrec even when TopOnlyOK is TRUE.
+ + */
+ +static void do_tpxheader(t_fileio *fio,gmx_bool bRead,t_tpxheader *tpx, 
+ +                         gmx_bool TopOnlyOK, int *file_version, 
+ +                         int *file_generation)
+ +{
+ +    char  buf[STRLEN];
+ +    char  file_tag[STRLEN];
+ +  gmx_bool  bDouble;
+ +  int   precision;
+ +  int   fver,fgen;
+ +  int   idum=0;
+ +  real  rdum=0;
+ +
+ +  gmx_fio_checktype(fio);
+ +  gmx_fio_setdebug(fio,bDebugMode());
+ +  
+ +  /* NEW! XDR tpb file */
+ +  precision = sizeof(real);
+ +  if (bRead) {
+ +    gmx_fio_do_string(fio,buf);
+ +    if (strncmp(buf,"VERSION",7))
+ +      gmx_fatal(FARGS,"Can not read file %s,\n"
+ +                "             this file is from a Gromacs version which is older than 2.0\n"
+ +                "             Make a new one with grompp or use a gro or pdb file, if possible",
+ +                gmx_fio_getname(fio));
+ +    gmx_fio_do_int(fio,precision);
+ +    bDouble = (precision == sizeof(double));
+ +    if ((precision != sizeof(float)) && !bDouble)
+ +      gmx_fatal(FARGS,"Unknown precision in file %s: real is %d bytes "
+ +                "instead of %d or %d",
+ +                gmx_fio_getname(fio),precision,sizeof(float),sizeof(double));
+ +    gmx_fio_setprecision(fio,bDouble);
+ +    fprintf(stderr,"Reading file %s, %s (%s precision)\n",
+ +          gmx_fio_getname(fio),buf,bDouble ? "double" : "single");
+ +  }
+ +  else {
+ +    gmx_fio_write_string(fio,GromacsVersion());
+ +    bDouble = (precision == sizeof(double));
+ +    gmx_fio_setprecision(fio,bDouble);
+ +    gmx_fio_do_int(fio,precision);
+ +    fver = tpx_version;
+ +    sprintf(file_tag,"%s",tpx_tag);
+ +    fgen = tpx_generation;
+ +  }
+ +  
+ +    /* Check versions! */
+ +    gmx_fio_do_int(fio,fver);
++
++    /* This is for backward compatibility with development versions 77-79
++     * where the tag was, mistakenly, placed before the generation,
++     * which would cause a segv instead of a proper error message
++     * when reading the topology only from tpx with <77 code.
++     */
++    if (fver >= 77 && fver <= 79)
++    {
++        gmx_fio_do_string(fio,file_tag);
++    }
+ +  
-             if (!(strcmp(file_tag,TPX_TAG_RELEASE) == 0 && fver < tpx_version))
++    if (fver >= 26)
++    {
++        gmx_fio_do_int(fio,fgen);
++    }
++    else
++    {
++        fgen = 0;
++    }
++ 
++    if (fver >= 81)
+ +    {
+ +        gmx_fio_do_string(fio,file_tag);
+ +    }
+ +    if (bRead)
+ +    {
+ +        if (fver < 77)
+ +        {
+ +            /* Versions before 77 don't have the tag, set it to release */
+ +            sprintf(file_tag,"%s",TPX_TAG_RELEASE);
+ +        }
+ +
+ +        if (strcmp(file_tag,tpx_tag) != 0)
+ +        {
+ +            fprintf(stderr,"Note: file tpx tag '%s', software tpx tag '%s'\n",
+ +                    file_tag,tpx_tag);
+ +
+ +            /* We only support reading tpx files with the same tag as the code
+ +             * or tpx files with the release tag and with lower version number.
+ +             */
-     if (fver >= 26)
-     {
-         gmx_fio_do_int(fio,fgen);
-     }
-     else
-     {
-         fgen=0;
-     }
-  
++            if (!strcmp(file_tag,TPX_TAG_RELEASE) == 0 && fver < tpx_version) 
+ +            {
+ +                gmx_fatal(FARGS,"tpx tag/version mismatch: reading tpx file (%s) version %d, tag '%s' with program for tpx version %d, tag '%s'",
+ +                          gmx_fio_getname(fio),fver,file_tag,
+ +                          tpx_version,tpx_tag);
+ +            }
+ +        }
+ +    }
+ +
-       do_mtop(fio,mtop,bRead, file_version);
+ +    if (file_version != NULL)
+ +    {
+ +        *file_version = fver;
+ +    }
+ +    if (file_generation != NULL)
+ +    {
+ +        *file_generation = fgen;
+ +    }
+ +   
+ +  
+ +  if ((fver <= tpx_incompatible_version) ||
+ +      ((fver > tpx_version) && !TopOnlyOK) ||
+ +      (fgen > tpx_generation))
+ +    gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",
+ +              gmx_fio_getname(fio),fver,tpx_version);
+ +  
+ +  do_section(fio,eitemHEADER,bRead);
+ +  gmx_fio_do_int(fio,tpx->natoms);
+ +  if (fver >= 28)
+ +    gmx_fio_do_int(fio,tpx->ngtc);
+ +  else
+ +    tpx->ngtc = 0;
+ +  if (fver < 62) {
+ +      gmx_fio_do_int(fio,idum);
+ +      gmx_fio_do_real(fio,rdum);
+ +  }
+ +  /*a better decision will eventually (5.0 or later) need to be made
+ +    on how to treat the alchemical state of the system, which can now
+ +    vary through a simulation, and cannot be completely described
+ +    though a single lambda variable, or even a single state
+ +    index. Eventually, should probably be a vector. MRS*/
+ +  if (fver >= 79) 
+ +  {
+ +      gmx_fio_do_int(fio,tpx->fep_state);
+ +  }
+ +  gmx_fio_do_real(fio,tpx->lambda);
+ +  gmx_fio_do_int(fio,tpx->bIr);
+ +  gmx_fio_do_int(fio,tpx->bTop);
+ +  gmx_fio_do_int(fio,tpx->bX);
+ +  gmx_fio_do_int(fio,tpx->bV);
+ +  gmx_fio_do_int(fio,tpx->bF);
+ +  gmx_fio_do_int(fio,tpx->bBox);
+ +
+ +  if((fgen > tpx_generation)) {
+ +    /* This can only happen if TopOnlyOK=TRUE */
+ +    tpx->bIr=FALSE;
+ +  }
+ +}
+ +
+ +static int do_tpx(t_fileio *fio, gmx_bool bRead,
+ +                t_inputrec *ir,t_state *state,rvec *f,gmx_mtop_t *mtop,
+ +                gmx_bool bXVallocated)
+ +{
+ +  t_tpxheader tpx;
+ +  t_inputrec  dum_ir;
+ +  gmx_mtop_t  dum_top;
+ +  gmx_bool        TopOnlyOK,bDum=TRUE;
+ +  int         file_version,file_generation;
+ +  int         i;
+ +  rvec        *xptr,*vptr;
+ +  int         ePBC;
+ +  gmx_bool        bPeriodicMols;
+ +
+ +  if (!bRead) {
+ +    tpx.natoms = state->natoms;
+ +    tpx.ngtc   = state->ngtc;  /* need to add nnhpres here? */
+ +    tpx.fep_state = state->fep_state;
+ +    tpx.lambda = state->lambda[efptFEP];
+ +    tpx.bIr  = (ir       != NULL);
+ +    tpx.bTop = (mtop     != NULL);
+ +    tpx.bX   = (state->x != NULL);
+ +    tpx.bV   = (state->v != NULL);
+ +    tpx.bF   = (f        != NULL);
+ +    tpx.bBox = TRUE;
+ +  }
+ +  
+ +  TopOnlyOK = (ir==NULL);
+ +  
+ +  do_tpxheader(fio,bRead,&tpx,TopOnlyOK,&file_version,&file_generation);
+ +
+ +  if (bRead) {
+ +    state->flags  = 0;
+ +    /* state->lambda = tpx.lambda;*/ /*remove this eventually? */
+ +    /* The init_state calls initialize the Nose-Hoover xi integrals to zero */
+ +    if (bXVallocated) {
+ +      xptr = state->x;
+ +      vptr = state->v;
+ +      init_state(state,0,tpx.ngtc,0,0,0);  /* nose-hoover chains */ /* eventually, need to add nnhpres here? */
+ +      state->natoms = tpx.natoms;
+ +      state->nalloc = tpx.natoms;
+ +      state->x = xptr;
+ +      state->v = vptr;
+ +    } else {
+ +        init_state(state,tpx.natoms,tpx.ngtc,0,0,0);  /* nose-hoover chains */
+ +    }
+ +  }
+ +
+ +#define do_test(fio,b,p) if (bRead && (p!=NULL) && !b) gmx_fatal(FARGS,"No %s in %s",#p,gmx_fio_getname(fio)) 
+ +
+ +  do_test(fio,tpx.bBox,state->box);
+ +  do_section(fio,eitemBOX,bRead);
+ +  if (tpx.bBox) {
+ +    gmx_fio_ndo_rvec(fio,state->box,DIM);
+ +    if (file_version >= 51) {
+ +      gmx_fio_ndo_rvec(fio,state->box_rel,DIM);
+ +    } else {
+ +      /* We initialize box_rel after reading the inputrec */
+ +      clear_mat(state->box_rel);
+ +    }
+ +    if (file_version >= 28) {
+ +      gmx_fio_ndo_rvec(fio,state->boxv,DIM);
+ +      if (file_version < 56) {
+ +      matrix mdum;
+ +      gmx_fio_ndo_rvec(fio,mdum,DIM);
+ +      }
+ +    }
+ +  }
+ +  
+ +  if (state->ngtc > 0 && file_version >= 28) {
+ +    real *dumv;
+ +    /*ndo_double(state->nosehoover_xi,state->ngtc,bDum);*/
+ +    /*ndo_double(state->nosehoover_vxi,state->ngtc,bDum);*/
+ +    /*ndo_double(state->therm_integral,state->ngtc,bDum);*/
+ +    snew(dumv,state->ngtc);
+ +    if (file_version < 69) {
+ +      bDum=gmx_fio_ndo_real(fio,dumv,state->ngtc);
+ +    }
+ +    /* These used to be the Berendsen tcoupl_lambda's */
+ +    bDum=gmx_fio_ndo_real(fio,dumv,state->ngtc);
+ +    sfree(dumv);
+ +  }
+ +
+ +  /* Prior to tpx version 26, the inputrec was here.
+ +   * I moved it to enable partial forward-compatibility
+ +   * for analysis/viewer programs.
+ +   */
+ +  if(file_version<26) {
+ +    do_test(fio,tpx.bIr,ir);
+ +    do_section(fio,eitemIR,bRead);
+ +    if (tpx.bIr) {
+ +      if (ir) {
+ +      do_inputrec(fio, ir,bRead,file_version,
+ +                    mtop ? &mtop->ffparams.fudgeQQ : NULL);
+ +      if (bRead && debug) 
+ +        pr_inputrec(debug,0,"inputrec",ir,FALSE);
+ +      }
+ +      else {
+ +      do_inputrec(fio, &dum_ir,bRead,file_version,
+ +                    mtop ? &mtop->ffparams.fudgeQQ :NULL);
+ +      if (bRead && debug) 
+ +        pr_inputrec(debug,0,"inputrec",&dum_ir,FALSE);
+ +      done_inputrec(&dum_ir);
+ +      }
+ +      
+ +    }
+ +  }
+ +  
+ +  do_test(fio,tpx.bTop,mtop);
+ +  do_section(fio,eitemTOP,bRead);
+ +  if (tpx.bTop) {
++    int mtop_file_version = file_version;
++    /*allow reading of Gromacs 4.6 files*/
++    if (mtop_file_version>80 && mtop_file_version<90)
++    {
++        mtop_file_version = 79;
++    }
+ +    if (mtop) {
-       do_mtop(fio,&dum_top,bRead,file_version);
++      do_mtop(fio,mtop,bRead, mtop_file_version);
+ +    } else {
++      do_mtop(fio,&dum_top,bRead,mtop_file_version);
+ +      done_mtop(&dum_top,TRUE);
+ +    }
+ +  }
+ +  do_test(fio,tpx.bX,state->x);  
+ +  do_section(fio,eitemX,bRead);
+ +  if (tpx.bX) {
+ +    if (bRead) {
+ +      state->flags |= (1<<estX);
+ +    }
+ +    gmx_fio_ndo_rvec(fio,state->x,state->natoms);
+ +  }
+ +  
+ +  do_test(fio,tpx.bV,state->v);
+ +  do_section(fio,eitemV,bRead);
+ +  if (tpx.bV) {
+ +    if (bRead) {
+ +      state->flags |= (1<<estV);
+ +    }
+ +    gmx_fio_ndo_rvec(fio,state->v,state->natoms);
+ +  }
+ +
+ +  do_test(fio,tpx.bF,f);
+ +  do_section(fio,eitemF,bRead);
+ +  if (tpx.bF) gmx_fio_ndo_rvec(fio,f,state->natoms);
+ +
+ +  /* Starting with tpx version 26, we have the inputrec
+ +   * at the end of the file, so we can ignore it 
+ +   * if the file is never than the software (but still the
+ +   * same generation - see comments at the top of this file.
+ +   *
+ +   * 
+ +   */
+ +  ePBC = -1;
+ +  bPeriodicMols = FALSE;
+ +  if (file_version >= 26) {
+ +    do_test(fio,tpx.bIr,ir);
+ +    do_section(fio,eitemIR,bRead);
+ +    if (tpx.bIr) {
+ +      if (file_version >= 53) {
+ +      /* Removed the pbc info from do_inputrec, since we always want it */
+ +      if (!bRead) {
+ +        ePBC          = ir->ePBC;
+ +        bPeriodicMols = ir->bPeriodicMols;
+ +      }
+ +      gmx_fio_do_int(fio,ePBC);
+ +      gmx_fio_do_gmx_bool(fio,bPeriodicMols);
+ +      }
+ +      if (file_generation <= tpx_generation && ir) {
+ +      do_inputrec(fio, ir,bRead,file_version,mtop ? &mtop->ffparams.fudgeQQ : NULL);
+ +      if (bRead && debug) 
+ +        pr_inputrec(debug,0,"inputrec",ir,FALSE);
+ +      if (file_version < 51)
+ +        set_box_rel(ir,state);
+ +      if (file_version < 53) {
+ +        ePBC          = ir->ePBC;
+ +        bPeriodicMols = ir->bPeriodicMols;
+ +      }
+ +      }
+ +      if (bRead && ir && file_version >= 53) {
+ +      /* We need to do this after do_inputrec, since that initializes ir */
+ +      ir->ePBC          = ePBC;
+ +      ir->bPeriodicMols = bPeriodicMols;
+ +      }
+ +    }
+ +  }
+ +
+ +    if (bRead)
+ +    {
+ +        if (tpx.bIr && ir)
+ +        {
+ +            if (state->ngtc == 0)
+ +            {
+ +                /* Reading old version without tcoupl state data: set it */
+ +                init_gtc_state(state,ir->opts.ngtc,0,ir->opts.nhchainlength);
+ +            }
+ +            if (tpx.bTop && mtop)
+ +            {
+ +                if (file_version < 57)
+ +                {
+ +                    if (mtop->moltype[0].ilist[F_DISRES].nr > 0)
+ +                    {
+ +                        ir->eDisre = edrSimple;
+ +                    }
+ +                    else
+ +                    {
+ +                        ir->eDisre = edrNone;
+ +                    }
+ +                }
+ +                set_disres_npair(mtop);
+ +            }
+ +        }
+ +
+ +        if (tpx.bTop && mtop)
+ +        {
+ +            gmx_mtop_finalize(mtop);
+ +        }
+ +
+ +        if (file_version >= 57)
+ +        {
+ +            char *env;
+ +            int  ienv;
+ +            env = getenv("GMX_NOCHARGEGROUPS");
+ +            if (env != NULL)
+ +            {
+ +                sscanf(env,"%d",&ienv);
+ +                fprintf(stderr,"\nFound env.var. GMX_NOCHARGEGROUPS = %d\n",
+ +                        ienv);
+ +                if (ienv > 0)
+ +                {
+ +                    fprintf(stderr,
+ +                            "Will make single atomic charge groups in non-solvent%s\n",
+ +                            ienv > 1 ? " and solvent" : "");
+ +                    gmx_mtop_make_atomic_charge_groups(mtop,ienv==1);
+ +                }
+ +                fprintf(stderr,"\n");
+ +            }
+ +        }
+ +    }
+ +
+ +    return ePBC;
+ +}
+ +
+ +/************************************************************
+ + *
+ + *  The following routines are the exported ones
+ + *
+ + ************************************************************/
+ +
+ +t_fileio *open_tpx(const char *fn,const char *mode)
+ +{
+ +  return gmx_fio_open(fn,mode);
+ +}    
+ + 
+ +void close_tpx(t_fileio *fio)
+ +{
+ +  gmx_fio_close(fio);
+ +}
+ +
+ +void read_tpxheader(const char *fn, t_tpxheader *tpx, gmx_bool TopOnlyOK,
+ +                    int *file_version, int *file_generation)
+ +{
+ +  t_fileio *fio;
+ +
+ +  fio = open_tpx(fn,"r");
+ +  do_tpxheader(fio,TRUE,tpx,TopOnlyOK,file_version,file_generation);
+ +  close_tpx(fio);
+ +}
+ +
+ +void write_tpx_state(const char *fn,
+ +                   t_inputrec *ir,t_state *state,gmx_mtop_t *mtop)
+ +{
+ +  t_fileio *fio;
+ +
+ +  fio = open_tpx(fn,"w");
+ +  do_tpx(fio,FALSE,ir,state,NULL,mtop,FALSE);
+ +  close_tpx(fio);
+ +}
+ +
+ +void read_tpx_state(const char *fn,
+ +                  t_inputrec *ir,t_state *state,rvec *f,gmx_mtop_t *mtop)
+ +{
+ +  t_fileio *fio;
+ +      
+ +  fio = open_tpx(fn,"r");
+ +  do_tpx(fio,TRUE,ir,state,f,mtop,FALSE);
+ +  close_tpx(fio);
+ +}
+ +
+ +int read_tpx(const char *fn,
+ +           t_inputrec *ir, matrix box,int *natoms,
+ +           rvec *x,rvec *v,rvec *f,gmx_mtop_t *mtop)
+ +{
+ +  t_fileio *fio;
+ +  t_state state;
+ +  int ePBC;
+ +
+ +  state.x = x;
+ +  state.v = v;
+ +  fio = open_tpx(fn,"r");
+ +  ePBC = do_tpx(fio,TRUE,ir,&state,f,mtop,TRUE);
+ +  close_tpx(fio);
+ +  *natoms = state.natoms;
+ +  if (box) 
+ +    copy_mat(state.box,box);
+ +  state.x = NULL;
+ +  state.v = NULL;
+ +  done_state(&state);
+ +
+ +  return ePBC;
+ +}
+ +
+ +int read_tpx_top(const char *fn,
+ +               t_inputrec *ir, matrix box,int *natoms,
+ +               rvec *x,rvec *v,rvec *f,t_topology *top)
+ +{
+ +  gmx_mtop_t mtop;
+ +  t_topology *ltop;
+ +  int ePBC;
+ +
+ +  ePBC = read_tpx(fn,ir,box,natoms,x,v,f,&mtop);
+ +  
+ +  *top = gmx_mtop_t_to_t_topology(&mtop);
+ +
+ +  return ePBC;
+ +}
+ +
+ +gmx_bool fn2bTPX(const char *file)
+ +{
+ +  switch (fn2ftp(file)) {
+ +  case efTPR:
+ +  case efTPB:
+ +  case efTPA:
+ +    return TRUE;
+ +  default:
+ +    return FALSE;
+ +  }
+ +}
+ +
+ +gmx_bool read_tps_conf(const char *infile,char *title,t_topology *top,int *ePBC,
+ +                 rvec **x,rvec **v,matrix box,gmx_bool bMass)
+ +{
+ +  t_tpxheader  header;
+ +  int          natoms,i,version,generation;
+ +  gmx_bool         bTop,bXNULL=FALSE;
+ +  gmx_mtop_t   *mtop;
+ +  t_topology   *topconv;
+ +  gmx_atomprop_t aps;
+ +  
+ +  bTop = fn2bTPX(infile);
+ +  *ePBC = -1;
+ +  if (bTop) {
+ +    read_tpxheader(infile,&header,TRUE,&version,&generation);
+ +    if (x)
+ +      snew(*x,header.natoms);
+ +    if (v)
+ +      snew(*v,header.natoms);
+ +    snew(mtop,1);
+ +    *ePBC = read_tpx(infile,NULL,box,&natoms,
+ +                   (x==NULL) ? NULL : *x,(v==NULL) ? NULL : *v,NULL,mtop);
+ +    *top = gmx_mtop_t_to_t_topology(mtop);
+ +    sfree(mtop);
+ +    strcpy(title,*top->name);
+ +    tpx_make_chain_identifiers(&top->atoms,&top->mols);
+ +  }
+ +  else {
+ +    get_stx_coordnum(infile,&natoms);
+ +    init_t_atoms(&top->atoms,natoms,(fn2ftp(infile) == efPDB));
+ +    if (x == NULL)
+ +    {
+ +        snew(x,1);
+ +        bXNULL = TRUE;
+ +    }
+ +    snew(*x,natoms);
+ +    if (v)
+ +      snew(*v,natoms);
+ +    read_stx_conf(infile,title,&top->atoms,*x,(v==NULL) ? NULL : *v,ePBC,box);
+ +    if (bXNULL)
+ +    {
+ +      sfree(*x);
+ +      sfree(x);
+ +    }
+ +    if (bMass) {
+ +      aps = gmx_atomprop_init();
+ +      for(i=0; (i<natoms); i++)
+ +      if (!gmx_atomprop_query(aps,epropMass,
+ +                              *top->atoms.resinfo[top->atoms.atom[i].resind].name,
+ +                              *top->atoms.atomname[i],
+ +                              &(top->atoms.atom[i].m))) {
+ +        if (debug) 
+ +          fprintf(debug,"Can not find mass for atom %s %d %s, setting to 1\n",
+ +                  *top->atoms.resinfo[top->atoms.atom[i].resind].name,
+ +                  top->atoms.resinfo[top->atoms.atom[i].resind].nr,
+ +                  *top->atoms.atomname[i]);
+ +      }
+ +      gmx_atomprop_destroy(aps);
+ +    }
+ +    top->idef.ntypes=-1;
+ +  }
+ +
+ +  return bTop;
+ +}
diff --cc src/gromacs/gmxlib/txtdump.c

index 6170a53a38bde8a9297b4dae5c92e3019e12e72c,0000000000000000000000000000000000000000..f0511a57320851c282096bf92b8bc456bde2f101

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/txtdump.c
--- /dev/null
+++ b/src/gromacs/gmxlib/txtdump.c
@@@ -1,1594 -1,0 +1,1600 @@@
-     PS("ns-type",ENS(ir->ns_type));
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +/* This file is completely threadsafe - please keep it that way! */
+ +#ifdef GMX_THREAD_MPI
+ +#include <thread_mpi.h>
+ +#endif
+ +
+ +
+ +#include <stdio.h>
+ +#include "smalloc.h"
+ +#include "typedefs.h"
+ +#include "names.h"
+ +#include "txtdump.h"
+ +#include "string2.h"
+ +#include "vec.h"
+ +#include "macros.h"
+ +
+ +
+ +int pr_indent(FILE *fp,int n)
+ +{
+ +  int i;
+ +
+ +  for (i=0; i<n; i++) (void) fprintf(fp," ");
+ +  return n;
+ +}
+ +
+ +int available(FILE *fp,void *p,int indent,const char *title)
+ +{
+ +  if (!p) {
+ +    if (indent > 0)
+ +      pr_indent(fp,indent);
+ +    (void) fprintf(fp,"%s: not available\n",title);
+ +  }
+ +  return (p!=NULL);
+ +}
+ +
+ +int pr_title(FILE *fp,int indent,const char *title)
+ +{
+ +  (void) pr_indent(fp,indent);
+ +  (void) fprintf(fp,"%s:\n",title);
+ +  return (indent+INDENT);
+ +}
+ +
+ +int pr_title_n(FILE *fp,int indent,const char *title,int n)
+ +{
+ +  (void) pr_indent(fp,indent);
+ +  (void) fprintf(fp,"%s (%d):\n",title,n);
+ +  return (indent+INDENT);
+ +}
+ +
+ +int pr_title_nxn(FILE *fp,int indent,const char *title,int n1,int n2)
+ +{
+ +  (void) pr_indent(fp,indent);
+ +  (void) fprintf(fp,"%s (%dx%d):\n",title,n1,n2);
+ +  return (indent+INDENT);
+ +}
+ +
+ +void pr_ivec(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
+ +{
+ +  int i;
+ +
+ +  if (available(fp,vec,indent,title))
+ +    {
+ +      indent=pr_title_n(fp,indent,title,n);
+ +      for (i=0; i<n; i++)
+ +        {
+ +          (void) pr_indent(fp,indent);
+ +          (void) fprintf(fp,"%s[%d]=%d\n",title,bShowNumbers?i:-1,vec[i]);
+ +        }
+ +    }
+ +}
+ +
+ +void pr_ivec_block(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
+ +{
+ +    int i,j;
+ +    
+ +    if (available(fp,vec,indent,title))
+ +    {
+ +        indent=pr_title_n(fp,indent,title,n);
+ +        i = 0;
+ +        while (i < n)
+ +        {
+ +            j = i+1;
+ +            while (j < n && vec[j] == vec[j-1]+1)
+ +            {
+ +                j++;
+ +            }
+ +            /* Print consecutive groups of 3 or more as blocks */
+ +            if (j - i < 3)
+ +            {
+ +                while(i < j)
+ +                {
+ +                    (void) pr_indent(fp,indent);
+ +                    (void) fprintf(fp,"%s[%d]=%d\n",
+ +                                   title,bShowNumbers?i:-1,vec[i]);
+ +                    i++;
+ +                }
+ +            }
+ +            else
+ +            {
+ +                (void) pr_indent(fp,indent);
+ +                (void) fprintf(fp,"%s[%d,...,%d] = {%d,...,%d}\n",
+ +                               title,
+ +                               bShowNumbers?i:-1,
+ +                               bShowNumbers?j-1:-1,
+ +                               vec[i],vec[j-1]); 
+ +                i = j;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +void pr_bvec(FILE *fp,int indent,const char *title,gmx_bool vec[],int n, gmx_bool bShowNumbers)
+ +{
+ +  int i;
+ +
+ +  if (available(fp,vec,indent,title))
+ +    {
+ +      indent=pr_title_n(fp,indent,title,n);
+ +      for (i=0; i<n; i++)
+ +        {
+ +          (void) pr_indent(fp,indent);
+ +          (void) fprintf(fp,"%s[%d]=%s\n",title,bShowNumbers?i:-1,
+ +                       EBOOL(vec[i]));
+ +        }
+ +    }
+ +}
+ +
+ +void pr_ivecs(FILE *fp,int indent,const char *title,ivec vec[],int n, gmx_bool bShowNumbers)
+ +{
+ +  int i,j;
+ +
+ +  if (available(fp,vec,indent,title))
+ +    {  
+ +      indent=pr_title_nxn(fp,indent,title,n,DIM);
+ +      for (i=0; i<n; i++)
+ +        {
+ +          (void) pr_indent(fp,indent);
+ +          (void) fprintf(fp,"%s[%d]={",title,bShowNumbers?i:-1);
+ +          for (j=0; j<DIM; j++)
+ +            {
+ +              if (j!=0) (void) fprintf(fp,", ");
+ +              fprintf(fp,"%d",vec[i][j]);
+ +            }
+ +          (void) fprintf(fp,"}\n");
+ +        }
+ +    }
+ +}
+ +
+ +void pr_rvec(FILE *fp,int indent,const char *title,real vec[],int n, gmx_bool bShowNumbers)
+ +{
+ +  int i;
+ +
+ +  if (available(fp,vec,indent,title))
+ +    {  
+ +      indent=pr_title_n(fp,indent,title,n);
+ +      for (i=0; i<n; i++)
+ +        {
+ +          pr_indent(fp,indent);
+ +          fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
+ +        }
+ +    }
+ +}
+ +
+ +void pr_dvec(FILE *fp,int indent,const char *title,double vec[],int n, gmx_bool bShowNumbers)
+ +{
+ +      int i;
+ +      
+ +      if (available(fp,vec,indent,title))
+ +    {  
+ +              indent=pr_title_n(fp,indent,title,n);
+ +              for (i=0; i<n; i++)
+ +        {
+ +                      pr_indent(fp,indent);
+ +                      fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +/*
+ +void pr_mat(FILE *fp,int indent,char *title,matrix m)
+ +{
+ +  int i,j;
+ +  
+ +  if (available(fp,m,indent,title)) {  
+ +    indent=pr_title_n(fp,indent,title,n);
+ +    for(i=0; i<n; i++) {
+ +      pr_indent(fp,indent);
+ +      fprintf(fp,"%s[%d]=%12.5e %12.5e %12.5e\n",
+ +            title,bShowNumbers?i:-1,m[i][XX],m[i][YY],m[i][ZZ]);
+ +    }
+ +  }
+ +}
+ +*/
+ +
+ +void pr_rvecs_len(FILE *fp,int indent,const char *title,rvec vec[],int n)
+ +{
+ +  int i,j;
+ +
+ +  if (available(fp,vec,indent,title)) {  
+ +    indent=pr_title_nxn(fp,indent,title,n,DIM);
+ +    for (i=0; i<n; i++) {
+ +      (void) pr_indent(fp,indent);
+ +      (void) fprintf(fp,"%s[%5d]={",title,i);
+ +      for (j=0; j<DIM; j++) {
+ +      if (j != 0) 
+ +        (void) fprintf(fp,", ");
+ +      (void) fprintf(fp,"%12.5e",vec[i][j]);
+ +      }
+ +      (void) fprintf(fp,"} len=%12.5e\n",norm(vec[i]));
+ +    }
+ +  }
+ +}
+ +
+ +void pr_rvecs(FILE *fp,int indent,const char *title,rvec vec[],int n)
+ +{
+ +  const char *fshort = "%12.5e";
+ +  const char *flong  = "%15.8e";
+ +  const char *format;
+ +  int i,j;
+ +
+ +  if (getenv("LONGFORMAT") != NULL)
+ +    format = flong;
+ +  else
+ +    format = fshort;
+ +    
+ +  if (available(fp,vec,indent,title)) {  
+ +    indent=pr_title_nxn(fp,indent,title,n,DIM);
+ +    for (i=0; i<n; i++) {
+ +      (void) pr_indent(fp,indent);
+ +      (void) fprintf(fp,"%s[%5d]={",title,i);
+ +      for (j=0; j<DIM; j++) {
+ +      if (j != 0) 
+ +        (void) fprintf(fp,", ");
+ +      (void) fprintf(fp,format,vec[i][j]);
+ +      }
+ +      (void) fprintf(fp,"}\n");
+ +    }
+ +  }
+ +}
+ +
+ +
+ +void pr_reals(FILE *fp,int indent,const char *title,real *vec,int n)
+ +{
+ +  int i;
+ +    
+ +  if (available(fp,vec,indent,title)) {  
+ +    (void) pr_indent(fp,indent);
+ +    (void) fprintf(fp,"%s:\t",title);
+ +    for(i=0; i<n; i++)
+ +      fprintf(fp,"  %10g",vec[i]);
+ +    (void) fprintf(fp,"\n");
+ +  }
+ +}
+ +
+ +void pr_doubles(FILE *fp,int indent,const char *title,double *vec,int n)
+ +{
+ +  int i;
+ +    
+ +  if (available(fp,vec,indent,title)) {  
+ +    (void) pr_indent(fp,indent);
+ +    (void) fprintf(fp,"%s:\t",title);
+ +    for(i=0; i<n; i++)
+ +      fprintf(fp,"  %10g",vec[i]);
+ +    (void) fprintf(fp,"\n");
+ +  }
+ +}
+ +
+ +static void pr_int(FILE *fp,int indent,const char *title,int i)
+ +{
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"%-20s = %d\n",title,i);
+ +}
+ +
+ +static void pr_gmx_large_int(FILE *fp,int indent,const char *title,gmx_large_int_t i)
+ +{
+ +  char buf[STEPSTRSIZE];
+ +
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"%-20s = %s\n",title,gmx_step_str(i,buf));
+ +}
+ +
+ +static void pr_real(FILE *fp,int indent,const char *title,real r)
+ +{
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"%-20s = %g\n",title,r);
+ +}
+ +
+ +static void pr_double(FILE *fp,int indent,const char *title,double d)
+ +{
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"%-20s = %g\n",title,d);
+ +}
+ +
+ +static void pr_str(FILE *fp,int indent,const char *title,const char *s)
+ +{
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"%-20s = %s\n",title,s);
+ +}
+ +
+ +void pr_qm_opts(FILE *fp,int indent,const char *title,t_grpopts *opts)
+ +{
+ +  int i,m,j;
+ +
+ +  fprintf(fp,"%s:\n",title);
+ +  
+ +  pr_int(fp,indent,"ngQM",opts->ngQM);
+ +  if (opts->ngQM > 0) {
+ +    pr_ivec(fp,indent,"QMmethod",opts->QMmethod,opts->ngQM,FALSE);
+ +    pr_ivec(fp,indent,"QMbasis",opts->QMbasis,opts->ngQM,FALSE);
+ +    pr_ivec(fp,indent,"QMcharge",opts->QMcharge,opts->ngQM,FALSE);
+ +    pr_ivec(fp,indent,"QMmult",opts->QMmult,opts->ngQM,FALSE);
+ +    pr_bvec(fp,indent,"bSH",opts->bSH,opts->ngQM,FALSE);
+ +    pr_ivec(fp,indent,"CASorbitals",opts->CASorbitals,opts->ngQM,FALSE);
+ +    pr_ivec(fp,indent,"CASelectrons",opts->CASelectrons,opts->ngQM,FALSE);
+ +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
+ +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
+ +    pr_ivec(fp,indent,"SAsteps",opts->SAsteps,opts->ngQM,FALSE);
+ +    pr_bvec(fp,indent,"bOPT",opts->bOPT,opts->ngQM,FALSE);
+ +    pr_bvec(fp,indent,"bTS",opts->bTS,opts->ngQM,FALSE);
+ +  }
+ +}
+ +
+ +static void pr_grp_opts(FILE *out,int indent,const char *title,t_grpopts *opts,
+ +                      gmx_bool bMDPformat)
+ +{
+ +  int i,m,j;
+ +
+ +  if (!bMDPformat)
+ +    fprintf(out,"%s:\n",title);
+ +  
+ +  pr_indent(out,indent);
+ +  fprintf(out,"nrdf%s",bMDPformat ? " = " : ":");
+ +  for(i=0; (i<opts->ngtc); i++)
+ +    fprintf(out,"  %10g",opts->nrdf[i]);
+ +  fprintf(out,"\n");
+ +  
+ +  pr_indent(out,indent);
+ +  fprintf(out,"ref-t%s",bMDPformat ? " = " : ":");
+ +  for(i=0; (i<opts->ngtc); i++)
+ +    fprintf(out,"  %10g",opts->ref_t[i]);
+ +  fprintf(out,"\n");
+ +
+ +  pr_indent(out,indent);
+ +  fprintf(out,"tau-t%s",bMDPformat ? " = " : ":");
+ +  for(i=0; (i<opts->ngtc); i++)
+ +    fprintf(out,"  %10g",opts->tau_t[i]);
+ +  fprintf(out,"\n");  
+ +  
+ +  /* Pretty-print the simulated annealing info */
+ +  fprintf(out,"anneal%s",bMDPformat ? " = " : ":");
+ +  for(i=0; (i<opts->ngtc); i++)
+ +    fprintf(out,"  %10s",EANNEAL(opts->annealing[i]));
+ +  fprintf(out,"\n");  
+ + 
+ +  fprintf(out,"ann-npoints%s",bMDPformat ? " = " : ":");
+ +  for(i=0; (i<opts->ngtc); i++)
+ +    fprintf(out,"  %10d",opts->anneal_npoints[i]);
+ +  fprintf(out,"\n");  
+ + 
+ +  for(i=0; (i<opts->ngtc); i++) {
+ +    if(opts->anneal_npoints[i]>0) {
+ +      fprintf(out,"ann. times [%d]:\t",i);
+ +      for(j=0; (j<opts->anneal_npoints[i]); j++)
+ +      fprintf(out,"  %10.1f",opts->anneal_time[i][j]);
+ +      fprintf(out,"\n");  
+ +      fprintf(out,"ann. temps [%d]:\t",i);
+ +      for(j=0; (j<opts->anneal_npoints[i]); j++)
+ +      fprintf(out,"  %10.1f",opts->anneal_temp[i][j]);
+ +      fprintf(out,"\n");  
+ +    }
+ +  }
+ +  
+ +  pr_indent(out,indent);
+ +  fprintf(out,"acc:\t");
+ +  for(i=0; (i<opts->ngacc); i++)
+ +    for(m=0; (m<DIM); m++)
+ +      fprintf(out,"  %10g",opts->acc[i][m]);
+ +  fprintf(out,"\n");
+ +
+ +  pr_indent(out,indent);
+ +  fprintf(out,"nfreeze:");
+ +  for(i=0; (i<opts->ngfrz); i++)
+ +    for(m=0; (m<DIM); m++)
+ +      fprintf(out,"  %10s",opts->nFreeze[i][m] ? "Y" : "N");
+ +  fprintf(out,"\n");
+ +
+ +
+ +  for(i=0; (i<opts->ngener); i++) {
+ +    pr_indent(out,indent);
+ +    fprintf(out,"energygrp-flags[%3d]:",i);
+ +    for(m=0; (m<opts->ngener); m++)
+ +      fprintf(out," %d",opts->egp_flags[opts->ngener*i+m]);
+ +    fprintf(out,"\n");
+ +  }
+ +
+ +  fflush(out);
+ +}
+ +
+ +static void pr_matrix(FILE *fp,int indent,const char *title,rvec *m,
+ +                    gmx_bool bMDPformat)
+ +{
+ +  if (bMDPformat)
+ +    fprintf(fp,"%-10s    = %g %g %g %g %g %g\n",title,
+ +          m[XX][XX],m[YY][YY],m[ZZ][ZZ],m[XX][YY],m[XX][ZZ],m[YY][ZZ]);
+ +  else
+ +    pr_rvecs(fp,indent,title,m,DIM);
+ +}
+ +
+ +static void pr_cosine(FILE *fp,int indent,const char *title,t_cosines *cos,
+ +                    gmx_bool bMDPformat)
+ +{
+ +  int j;
+ +  
+ +  if (bMDPformat) {
+ +    fprintf(fp,"%s = %d\n",title,cos->n);
+ +  }
+ +  else {
+ +    indent=pr_title(fp,indent,title);
+ +    (void) pr_indent(fp,indent);
+ +    fprintf(fp,"n = %d\n",cos->n);
+ +    if (cos->n > 0) {
+ +      (void) pr_indent(fp,indent+2);
+ +      fprintf(fp,"a =");
+ +      for(j=0; (j<cos->n); j++)
+ +      fprintf(fp," %e",cos->a[j]);
+ +      fprintf(fp,"\n");
+ +      (void) pr_indent(fp,indent+2);
+ +      fprintf(fp,"phi =");
+ +      for(j=0; (j<cos->n); j++)
+ +      fprintf(fp," %e",cos->phi[j]);
+ +      fprintf(fp,"\n");
+ +    }
+ +  }
+ +}
+ +
+ +#define PS(t,s) pr_str(fp,indent,t,s)
+ +#define PI(t,s) pr_int(fp,indent,t,s)
+ +#define PSTEP(t,s) pr_gmx_large_int(fp,indent,t,s)
+ +#define PR(t,s) pr_real(fp,indent,t,s)
+ +#define PD(t,s) pr_double(fp,indent,t,s)
+ +
+ +static void pr_pullgrp(FILE *fp,int indent,int g,t_pullgrp *pg)
+ +{
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"pull-group %d:\n",g);
+ +  indent += 2;
+ +  pr_ivec_block(fp,indent,"atom",pg->ind,pg->nat,TRUE);
+ +  pr_rvec(fp,indent,"weight",pg->weight,pg->nweight,TRUE);
+ +  PI("pbcatom",pg->pbcatom);
+ +  pr_rvec(fp,indent,"vec",pg->vec,DIM,TRUE);
+ +  pr_rvec(fp,indent,"init",pg->init,DIM,TRUE);
+ +  PR("rate",pg->rate);
+ +  PR("k",pg->k);
+ +  PR("kB",pg->kB);
+ +}
+ +
+ +static void pr_simtempvals(FILE *fp,int indent,t_simtemp *simtemp, int n_lambda, gmx_bool bMDPformat)
+ +{
+ +    PR("simtemp_low",simtemp->simtemp_low);
+ +    PR("simtemp_high",simtemp->simtemp_high);
+ +    PS("simulated-tempering-scaling",ESIMTEMP(simtemp->eSimTempScale));
+ +    pr_rvec(fp,indent,"simulated tempering temperatures",simtemp->temperatures,n_lambda,TRUE);
+ +}
+ +
+ +static void pr_expandedvals(FILE *fp,int indent,t_expanded *expand, int n_lambda, gmx_bool bMDPformat)
+ +{
+ +
+ +    PI("nstexpanded", expand->nstexpanded);
+ +    PS("lambda-stats", elamstats_names[expand->elamstats]);
+ +    PS("lambda-mc-move", elmcmove_names[expand->elmcmove]);
+ +    PI("lmc-repeats",expand->lmc_repeats);
+ +    PI("lmc-gibbsdelta",expand->gibbsdeltalam);
+ +    PI("lmc-nstart",expand->lmc_forced_nstart);
+ +    PS("symmetrized-transition-matrix", EBOOL(expand->bSymmetrizedTMatrix));
+ +    PI("nst-transition-matrix",expand->nstTij);
+ +    PI("mininum-var-min",expand->minvarmin); /*default is reasonable */
+ +    PI("weight-c-range",expand->c_range); /* default is just C=0 */
+ +    PR("wl-scale",expand->wl_scale);
+ +    PR("init-wl-delta",expand->init_wl_delta);
+ +    PR("wl-ratio",expand->wl_ratio);
+ +    PS("bWLoneovert",EBOOL(expand->bWLoneovert));
+ +    PI("lmc-seed",expand->lmc_seed);
+ +    PR("mc-temperature",expand->mc_temp);
+ +    PS("lmc-weights-equil",elmceq_names[expand->elmceq]);
+ +    if (expand->elmceq == elmceqNUMATLAM)
+ +    {
+ +        PI("weight-equil-number-all-lambda",expand->equil_n_at_lam);
+ +    }
+ +    if (expand->elmceq == elmceqSAMPLES)
+ +    {
+ +        PI("weight-equil-number-samples",expand->equil_samples);
+ +    }
+ +    if (expand->elmceq == elmceqSTEPS)
+ +    {
+ +        PI("weight-equil-number-steps",expand->equil_steps);
+ +    }
+ +    if (expand->elmceq == elmceqWLDELTA)
+ +    {
+ +        PR("weight-equil-wl-delta",expand->equil_wl_delta);
+ +    }
+ +    if (expand->elmceq == elmceqRATIO)
+ +    {
+ +        PR("weight-equil-count-ratio",expand->equil_ratio);
+ +    }
+ +
+ +    pr_indent(fp,indent);
+ +    pr_rvec(fp,indent,"init-lambda-weights",expand->init_lambda_weights,n_lambda,TRUE);
+ +    PS("init-weights",EBOOL(expand->bInit_weights));
+ +}
+ +
+ +static void pr_fepvals(FILE *fp,int indent,t_lambda *fep, gmx_bool bMDPformat)
+ +{
+ +    int i,j;
+ +
+ +    PI("nstdhdl",fep->nstdhdl);
+ +    PI("init-lambda-state",fep->init_fep_state);
+ +    PR("init-lambda",fep->init_lambda);
+ +    PR("delta-lambda",fep->delta_lambda);
+ +    if (!bMDPformat)
+ +    {
+ +        PI("n-lambdas",fep->n_lambda);
+ +    }
+ +    if (fep->n_lambda > 0)
+ +    {
+ +        pr_indent(fp,indent);
+ +        fprintf(fp,"all-lambdas%s\n",bMDPformat ? " = " : ":");
+ +        for(i=0; i<efptNR; i++) {
+ +            fprintf(fp,"%18s = ",efpt_names[i]);
+ +            for(j=0; j<fep->n_lambda; j++)
+ +            {
+ +                fprintf(fp,"  %10g",fep->all_lambda[i][j]);
+ +            }
+ +            fprintf(fp,"\n");
+ +        }
+ +    }
+ +
+ +    PR("sc-alpha",fep->sc_alpha);
+ +    PS("bScCoul",EBOOL(fep->bScCoul));
+ +    PS("bScPrintEnergy",EBOOL(fep->bPrintEnergy));
+ +    PI("sc-power",fep->sc_power);
+ +    PR("sc-r-power",fep->sc_r_power);
+ +    PR("sc-sigma",fep->sc_sigma);
+ +    PR("sc-sigma-min",fep->sc_sigma_min);
+ +    PS("separate-dhdl-file", SEPDHDLFILETYPE(fep->separate_dhdl_file));
+ +    PS("dhdl-derivatives", DHDLDERIVATIVESTYPE(fep->dhdl_derivatives));
+ +    PI("dh-hist-size", fep->dh_hist_size);
+ +    PD("dh-hist-spacing", fep->dh_hist_spacing);
+ +};
+ +
+ +static void pr_pull(FILE *fp,int indent,t_pull *pull)
+ +{
+ +  int g;
+ +
+ +  PS("pull-geometry",EPULLGEOM(pull->eGeom));
+ +  pr_ivec(fp,indent,"pull-dim",pull->dim,DIM,TRUE);
+ +  PR("pull-r1",pull->cyl_r1);
+ +  PR("pull-r0",pull->cyl_r0);
+ +  PR("pull-constr-tol",pull->constr_tol);
+ +  PI("pull-nstxout",pull->nstxout);
+ +  PI("pull-nstfout",pull->nstfout);
+ +  PI("pull-ngrp",pull->ngrp);
+ +  for(g=0; g<pull->ngrp+1; g++)
+ +    pr_pullgrp(fp,indent,g,&pull->grp[g]);
+ +}
+ +
+ +static void pr_rotgrp(FILE *fp,int indent,int g,t_rotgrp *rotg)
+ +{
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"rotation_group %d:\n",g);
+ +  indent += 2;
+ +  PS("type",EROTGEOM(rotg->eType));
+ +  PS("massw",EBOOL(rotg->bMassW));
+ +  pr_ivec_block(fp,indent,"atom",rotg->ind,rotg->nat,TRUE);
+ +  pr_rvecs(fp,indent,"x_ref",rotg->x_ref,rotg->nat);
+ +  pr_rvec(fp,indent,"vec",rotg->vec,DIM,TRUE);
+ +  pr_rvec(fp,indent,"pivot",rotg->pivot,DIM,TRUE);
+ +  PR("rate",rotg->rate);
+ +  PR("k",rotg->k);
+ +  PR("slab_dist",rotg->slab_dist);
+ +  PR("min_gaussian",rotg->min_gaussian);
+ +  PR("epsilon",rotg->eps);
+ +  PS("fit_method",EROTFIT(rotg->eFittype));
+ +  PI("potfitangle_nstep",rotg->PotAngle_nstep);
+ +  PR("potfitangle_step",rotg->PotAngle_step);
+ +}
+ +
+ +static void pr_rot(FILE *fp,int indent,t_rot *rot)
+ +{
+ +  int g;
+ +
+ +  PI("rot_nstrout",rot->nstrout);
+ +  PI("rot_nstsout",rot->nstsout);
+ +  PI("rot_ngrp",rot->ngrp);
+ +  for(g=0; g<rot->ngrp; g++)
+ +    pr_rotgrp(fp,indent,g,&rot->grp[g]);
+ +}
+ +
+ +void pr_inputrec(FILE *fp,int indent,const char *title,t_inputrec *ir,
+ +                 gmx_bool bMDPformat)
+ +{
+ +  const char *infbuf="inf";
+ +  int  i;
+ +  
+ +  if (available(fp,ir,indent,title)) {
+ +    if (!bMDPformat)
+ +      indent=pr_title(fp,indent,title);
+ +    PS("integrator",EI(ir->eI));
+ +    PSTEP("nsteps",ir->nsteps);
+ +    PSTEP("init-step",ir->init_step);
++    PS("cutoff-scheme",ECUTSCHEME(ir->cutoff_scheme));
++    PS("ns_type",ENS(ir->ns_type));
+ +    PI("nstlist",ir->nstlist);
+ +    PI("ndelta",ir->ndelta);
+ +    PI("nstcomm",ir->nstcomm);
+ +    PS("comm-mode",ECOM(ir->comm_mode));
+ +    PI("nstlog",ir->nstlog);
+ +    PI("nstxout",ir->nstxout);
+ +    PI("nstvout",ir->nstvout);
+ +    PI("nstfout",ir->nstfout);
+ +    PI("nstcalcenergy",ir->nstcalcenergy);
+ +    PI("nstenergy",ir->nstenergy);
+ +    PI("nstxtcout",ir->nstxtcout);
+ +    PR("init-t",ir->init_t);
+ +    PR("delta-t",ir->delta_t);
+ +    
+ +    PR("xtcprec",ir->xtcprec);
++    PR("fourierspacing",ir->fourier_spacing);
+ +    PI("nkx",ir->nkx);
+ +    PI("nky",ir->nky);
+ +    PI("nkz",ir->nkz);
+ +    PI("pme-order",ir->pme_order);
+ +    PR("ewald-rtol",ir->ewald_rtol);
+ +    PR("ewald-geometry",ir->ewald_geometry);
+ +    PR("epsilon-surface",ir->epsilon_surface);
+ +    PS("optimize-fft",EBOOL(ir->bOptFFT));
+ +    PS("ePBC",EPBC(ir->ePBC));
+ +    PS("bPeriodicMols",EBOOL(ir->bPeriodicMols));
+ +    PS("bContinuation",EBOOL(ir->bContinuation));
+ +    PS("bShakeSOR",EBOOL(ir->bShakeSOR));
+ +    PS("etc",ETCOUPLTYPE(ir->etc));
+ +    PS("bPrintNHChains",EBOOL(ir->bPrintNHChains));
+ +    PI("nsttcouple",ir->nsttcouple);
+ +    PS("epc",EPCOUPLTYPE(ir->epc));
+ +    PS("epctype",EPCOUPLTYPETYPE(ir->epct));
+ +    PI("nstpcouple",ir->nstpcouple);
+ +    PR("tau-p",ir->tau_p);
+ +    pr_matrix(fp,indent,"ref-p",ir->ref_p,bMDPformat);
+ +    pr_matrix(fp,indent,"compress",ir->compress,bMDPformat);
+ +    PS("refcoord-scaling",EREFSCALINGTYPE(ir->refcoord_scaling));
+ +    if (bMDPformat)
+ +      fprintf(fp,"posres-com  = %g %g %g\n",ir->posres_com[XX],
+ +            ir->posres_com[YY],ir->posres_com[ZZ]);
+ +    else
+ +      pr_rvec(fp,indent,"posres-com",ir->posres_com,DIM,TRUE);
+ +    if (bMDPformat)
+ +      fprintf(fp,"posres-comB = %g %g %g\n",ir->posres_comB[XX],
+ +            ir->posres_comB[YY],ir->posres_comB[ZZ]);
+ +    else
+ +      pr_rvec(fp,indent,"posres-comB",ir->posres_comB,DIM,TRUE);
++    PR("verlet-buffer-drift",ir->verletbuf_drift);
+ +    PR("rlist",ir->rlist);
+ +    PR("rlistlong",ir->rlistlong);
+ +    PR("rtpi",ir->rtpi);
+ +    PS("coulombtype",EELTYPE(ir->coulombtype));
++    PS("coulomb-modifier",INTMODIFIER(ir->coulomb_modifier));
+ +    PR("rcoulomb-switch",ir->rcoulomb_switch);
+ +    PR("rcoulomb",ir->rcoulomb);
+ +    PS("vdwtype",EVDWTYPE(ir->vdwtype));
++    PS("vdw-modifier",INTMODIFIER(ir->vdw_modifier));
+ +    PR("rvdw-switch",ir->rvdw_switch);
+ +    PR("rvdw",ir->rvdw);
+ +    if (ir->epsilon_r != 0)
+ +      PR("epsilon-r",ir->epsilon_r);
+ +    else
+ +      PS("epsilon-r",infbuf);
+ +    if (ir->epsilon_rf != 0)
+ +      PR("epsilon-rf",ir->epsilon_rf);
+ +    else
+ +      PS("epsilon-rf",infbuf);
+ +    PR("tabext",ir->tabext);
+ +    PS("implicit-solvent",EIMPLICITSOL(ir->implicit_solvent));
+ +    PS("gb-algorithm",EGBALGORITHM(ir->gb_algorithm));
+ +    PR("gb-epsilon-solvent",ir->gb_epsilon_solvent);
+ +    PI("nstgbradii",ir->nstgbradii);
+ +    PR("rgbradii",ir->rgbradii);
+ +    PR("gb-saltconc",ir->gb_saltconc);
+ +    PR("gb-obc-alpha",ir->gb_obc_alpha);
+ +    PR("gb-obc-beta",ir->gb_obc_beta);
+ +    PR("gb-obc-gamma",ir->gb_obc_gamma);
+ +    PR("gb-dielectric-offset",ir->gb_dielectric_offset);
+ +    PS("sa-algorithm",ESAALGORITHM(ir->gb_algorithm));
+ +    PR("sa-surface-tension",ir->sa_surface_tension);
+ +    PS("DispCorr",EDISPCORR(ir->eDispCorr));
+ +    PS("bSimTemp",EBOOL(ir->bSimTemp));
+ +    if (ir->bSimTemp) {
+ +        pr_simtempvals(fp,indent,ir->simtempvals,ir->fepvals->n_lambda,bMDPformat);
+ +    }
+ +    PS("free-energy",EFEPTYPE(ir->efep));
+ +    if (ir->efep != efepNO || ir->bSimTemp) {
+ +        pr_fepvals(fp,indent,ir->fepvals,bMDPformat);
+ +    }
+ +    if (ir->bExpanded) {
+ +        pr_expandedvals(fp,indent,ir->expandedvals,ir->fepvals->n_lambda,bMDPformat);
+ +    }
+ +
+ +    PI("nwall",ir->nwall);
+ +    PS("wall-type",EWALLTYPE(ir->wall_type));
+ +    PI("wall-atomtype[0]",ir->wall_atomtype[0]);
+ +    PI("wall-atomtype[1]",ir->wall_atomtype[1]);
+ +    PR("wall-density[0]",ir->wall_density[0]);
+ +    PR("wall-density[1]",ir->wall_density[1]);
+ +    PR("wall-ewald-zfac",ir->wall_ewald_zfac);
+ +
+ +    PS("pull",EPULLTYPE(ir->ePull));
+ +    if (ir->ePull != epullNO)
+ +      pr_pull(fp,indent,ir->pull);
+ +    
+ +    PS("rotation",EBOOL(ir->bRot));
+ +    if (ir->bRot)
+ +      pr_rot(fp,indent,ir->rot);
+ +
+ +    PS("disre",EDISRETYPE(ir->eDisre));
+ +    PS("disre-weighting",EDISREWEIGHTING(ir->eDisreWeighting));
+ +    PS("disre-mixed",EBOOL(ir->bDisreMixed));
+ +    PR("dr-fc",ir->dr_fc);
+ +    PR("dr-tau",ir->dr_tau);
+ +    PR("nstdisreout",ir->nstdisreout);
+ +    PR("orires-fc",ir->orires_fc);
+ +    PR("orires-tau",ir->orires_tau);
+ +    PR("nstorireout",ir->nstorireout);
+ +
+ +    PR("dihre-fc",ir->dihre_fc);
+ +    
+ +    PR("em-stepsize",ir->em_stepsize);
+ +    PR("em-tol",ir->em_tol);
+ +    PI("niter",ir->niter);
+ +    PR("fc-stepsize",ir->fc_stepsize);
+ +    PI("nstcgsteep",ir->nstcgsteep);
+ +    PI("nbfgscorr",ir->nbfgscorr);
+ +
+ +    PS("ConstAlg",ECONSTRTYPE(ir->eConstrAlg));
+ +    PR("shake-tol",ir->shake_tol);
+ +    PI("lincs-order",ir->nProjOrder);
+ +    PR("lincs-warnangle",ir->LincsWarnAngle);
+ +    PI("lincs-iter",ir->nLincsIter);
+ +    PR("bd-fric",ir->bd_fric);
+ +    PI("ld-seed",ir->ld_seed);
+ +    PR("cos-accel",ir->cos_accel);
+ +    pr_matrix(fp,indent,"deform",ir->deform,bMDPformat);
+ +
+ +    PS("adress",EBOOL(ir->bAdress));
+ +    if (ir->bAdress){
+ +        PS("adress_type",EADRESSTYPE(ir->adress->type));
+ +        PR("adress_const_wf",ir->adress->const_wf);
+ +        PR("adress_ex_width",ir->adress->ex_width);
+ +        PR("adress_hy_width",ir->adress->hy_width);
+ +        PS("adress_interface_correction",EADRESSICTYPE(ir->adress->icor));
+ +        PS("adress_site",EADRESSSITETYPE(ir->adress->site));
+ +        PR("adress_ex_force_cap",ir->adress->ex_forcecap);
+ +        PS("adress_do_hybridpairs", EBOOL(ir->adress->do_hybridpairs));
+ +
+ +        pr_rvec(fp,indent,"adress_reference_coords",ir->adress->refs,DIM,TRUE);
+ +    }
+ +    PI("userint1",ir->userint1);
+ +    PI("userint2",ir->userint2);
+ +    PI("userint3",ir->userint3);
+ +    PI("userint4",ir->userint4);
+ +    PR("userreal1",ir->userreal1);
+ +    PR("userreal2",ir->userreal2);
+ +    PR("userreal3",ir->userreal3);
+ +    PR("userreal4",ir->userreal4);
+ +    pr_grp_opts(fp,indent,"grpopts",&(ir->opts),bMDPformat);
+ +    pr_cosine(fp,indent,"efield-x",&(ir->ex[XX]),bMDPformat);
+ +    pr_cosine(fp,indent,"efield-xt",&(ir->et[XX]),bMDPformat);
+ +    pr_cosine(fp,indent,"efield-y",&(ir->ex[YY]),bMDPformat);
+ +    pr_cosine(fp,indent,"efield-yt",&(ir->et[YY]),bMDPformat);
+ +    pr_cosine(fp,indent,"efield-z",&(ir->ex[ZZ]),bMDPformat);
+ +    pr_cosine(fp,indent,"efield-zt",&(ir->et[ZZ]),bMDPformat);
+ +    PS("bQMMM",EBOOL(ir->bQMMM));
+ +    PI("QMconstraints",ir->QMconstraints);
+ +    PI("QMMMscheme",ir->QMMMscheme);
+ +    PR("scalefactor",ir->scalefactor);
+ +    pr_qm_opts(fp,indent,"qm-opts",&(ir->opts));
+ +  }
+ +}
+ +#undef PS
+ +#undef PR
+ +#undef PI
+ +
+ +static void pr_harm(FILE *fp,t_iparams *iparams,const char *r,const char *kr)
+ +{
+ +  fprintf(fp,"%sA=%12.5e, %sA=%12.5e, %sB=%12.5e, %sB=%12.5e\n",
+ +        r,iparams->harmonic.rA,kr,iparams->harmonic.krA,
+ +        r,iparams->harmonic.rB,kr,iparams->harmonic.krB);
+ +}
+ +
+ +void pr_iparams(FILE *fp,t_functype ftype,t_iparams *iparams)
+ +{
+ +  int i;
+ +  real VA[4],VB[4],*rbcA,*rbcB;
+ +
+ +  switch (ftype) {
+ +  case F_ANGLES:
+ +  case F_G96ANGLES:
+ +    pr_harm(fp,iparams,"th","ct");
+ +    break;
+ +  case F_CROSS_BOND_BONDS:
+ +    fprintf(fp,"r1e=%15.8e, r2e=%15.8e, krr=%15.8e\n",
+ +          iparams->cross_bb.r1e,iparams->cross_bb.r2e,
+ +          iparams->cross_bb.krr);
+ +    break;
+ +  case F_CROSS_BOND_ANGLES:
+ +    fprintf(fp,"r1e=%15.8e, r1e=%15.8e, r3e=%15.8e, krt=%15.8e\n",
+ +          iparams->cross_ba.r1e,iparams->cross_ba.r2e,
+ +          iparams->cross_ba.r3e,iparams->cross_ba.krt);
+ +    break;
+ +  case F_LINEAR_ANGLES:
+ +    fprintf(fp,"klinA=%15.8e, aA=%15.8e, klinB=%15.8e, aB=%15.8e\n",
+ +            iparams->linangle.klinA,iparams->linangle.aA,
+ +            iparams->linangle.klinB,iparams->linangle.aB);
+ +    break;
+ +  case F_UREY_BRADLEY:
+ +      fprintf(fp,"thetaA=%15.8e, kthetaA=%15.8e, r13A=%15.8e, kUBA=%15.8e, thetaB=%15.8e, kthetaB=%15.8e, r13B=%15.8e, kUBB=%15.8e\n",iparams->u_b.thetaA,iparams->u_b.kthetaA,iparams->u_b.r13A,iparams->u_b.kUBA,iparams->u_b.thetaB,iparams->u_b.kthetaB,iparams->u_b.r13B,iparams->u_b.kUBB);
+ +    break;
+ +  case F_QUARTIC_ANGLES:
+ +    fprintf(fp,"theta=%15.8e",iparams->qangle.theta);
+ +    for(i=0; i<5; i++)
+ +      fprintf(fp,", c%c=%15.8e",'0'+i,iparams->qangle.c[i]);
+ +    fprintf(fp,"\n");
+ +    break;
+ +  case F_BHAM:
+ +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
+ +          iparams->bham.a,iparams->bham.b,iparams->bham.c);
+ +    break;
+ +  case F_BONDS:
+ +  case F_G96BONDS:
+ +  case F_HARMONIC:
+ +    pr_harm(fp,iparams,"b0","cb");
+ +    break;
+ +  case F_IDIHS:
+ +    pr_harm(fp,iparams,"xi","cx");
+ +    break;
+ +  case F_MORSE:
+ +    fprintf(fp,"b0A=%15.8e, cbA=%15.8e, betaA=%15.8e, b0B=%15.8e, cbB=%15.8e, betaB=%15.8e\n",
+ +            iparams->morse.b0A,iparams->morse.cbA,iparams->morse.betaA,
+ +            iparams->morse.b0B,iparams->morse.cbB,iparams->morse.betaB);
+ +    break;
+ +  case F_CUBICBONDS:
+ +    fprintf(fp,"b0=%15.8e, kb=%15.8e, kcub=%15.8e\n",
+ +          iparams->cubic.b0,iparams->cubic.kb,iparams->cubic.kcub);
+ +    break;
+ +  case F_CONNBONDS:
+ +    fprintf(fp,"\n");
+ +    break;
+ +  case F_FENEBONDS:
+ +    fprintf(fp,"bm=%15.8e, kb=%15.8e\n",iparams->fene.bm,iparams->fene.kb);
+ +    break;
+ +  case F_RESTRBONDS:
+ +      fprintf(fp,"lowA=%15.8e, up1A=%15.8e, up2A=%15.8e, kA=%15.8e, lowB=%15.8e, up1B=%15.8e, up2B=%15.8e, kB=%15.8e,\n",
+ +              iparams->restraint.lowA,iparams->restraint.up1A,
+ +              iparams->restraint.up2A,iparams->restraint.kA,
+ +              iparams->restraint.lowB,iparams->restraint.up1B,
+ +              iparams->restraint.up2B,iparams->restraint.kB);
+ +      break;
+ +  case F_TABBONDS:
+ +  case F_TABBONDSNC:
+ +  case F_TABANGLES:
+ +  case F_TABDIHS:
+ +    fprintf(fp,"tab=%d, kA=%15.8e, kB=%15.8e\n",
+ +          iparams->tab.table,iparams->tab.kA,iparams->tab.kB);
+ +    break;
+ +  case F_POLARIZATION:
+ +    fprintf(fp,"alpha=%15.8e\n",iparams->polarize.alpha);
+ +    break;
+ +  case F_ANHARM_POL:
+ +    fprintf(fp,"alpha=%15.8e drcut=%15.8e khyp=%15.8e\n",
+ +            iparams->anharm_polarize.alpha,
+ +            iparams->anharm_polarize.drcut,
+ +            iparams->anharm_polarize.khyp);
+ +    break;
+ +  case F_THOLE_POL:
+ +    fprintf(fp,"a=%15.8e, alpha1=%15.8e, alpha2=%15.8e, rfac=%15.8e\n",
+ +          iparams->thole.a,iparams->thole.alpha1,iparams->thole.alpha2,
+ +          iparams->thole.rfac);
+ +    break;
+ +  case F_WATER_POL:
+ +    fprintf(fp,"al_x=%15.8e, al_y=%15.8e, al_z=%15.8e, rOH=%9.6f, rHH=%9.6f, rOD=%9.6f\n",
+ +          iparams->wpol.al_x,iparams->wpol.al_y,iparams->wpol.al_z,
+ +          iparams->wpol.rOH,iparams->wpol.rHH,iparams->wpol.rOD);
+ +    break;
+ +  case F_LJ:
+ +    fprintf(fp,"c6=%15.8e, c12=%15.8e\n",iparams->lj.c6,iparams->lj.c12);
+ +    break;
+ +  case F_LJ14:
+ +    fprintf(fp,"c6A=%15.8e, c12A=%15.8e, c6B=%15.8e, c12B=%15.8e\n",
+ +          iparams->lj14.c6A,iparams->lj14.c12A,
+ +          iparams->lj14.c6B,iparams->lj14.c12B);
+ +    break;
+ +  case F_LJC14_Q:
+ +    fprintf(fp,"fqq=%15.8e, qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
+ +          iparams->ljc14.fqq,
+ +          iparams->ljc14.qi,iparams->ljc14.qj,
+ +          iparams->ljc14.c6,iparams->ljc14.c12);
+ +    break;
+ +  case F_LJC_PAIRS_NB:
+ +    fprintf(fp,"qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
+ +          iparams->ljcnb.qi,iparams->ljcnb.qj,
+ +          iparams->ljcnb.c6,iparams->ljcnb.c12);
+ +    break;
+ +  case F_PDIHS:
+ +  case F_PIDIHS:
+ +  case F_ANGRES:
+ +  case F_ANGRESZ:
+ +    fprintf(fp,"phiA=%15.8e, cpA=%15.8e, phiB=%15.8e, cpB=%15.8e, mult=%d\n",
+ +          iparams->pdihs.phiA,iparams->pdihs.cpA,
+ +          iparams->pdihs.phiB,iparams->pdihs.cpB,
+ +          iparams->pdihs.mult);
+ +    break;
+ +  case F_DISRES:
+ +    fprintf(fp,"label=%4d, type=%1d, low=%15.8e, up1=%15.8e, up2=%15.8e, fac=%15.8e)\n",
+ +          iparams->disres.label,iparams->disres.type,
+ +          iparams->disres.low,iparams->disres.up1,
+ +          iparams->disres.up2,iparams->disres.kfac);
+ +    break;
+ +  case F_ORIRES:
+ +    fprintf(fp,"ex=%4d, label=%d, power=%4d, c=%15.8e, obs=%15.8e, kfac=%15.8e)\n",
+ +          iparams->orires.ex,iparams->orires.label,iparams->orires.power,
+ +          iparams->orires.c,iparams->orires.obs,iparams->orires.kfac);
+ +    break;
+ +  case F_DIHRES:
+ +      fprintf(fp,"phiA=%15.8e, dphiA=%15.8e, kfacA=%15.8e, phiB=%15.8e, dphiB=%15.8e, kfacB=%15.8e\n",
+ +              iparams->dihres.phiA,iparams->dihres.dphiA,iparams->dihres.kfacA,
+ +              iparams->dihres.phiB,iparams->dihres.dphiB,iparams->dihres.kfacB);
+ +    break;
+ +  case F_POSRES:
+ +    fprintf(fp,"pos0A=(%15.8e,%15.8e,%15.8e), fcA=(%15.8e,%15.8e,%15.8e), pos0B=(%15.8e,%15.8e,%15.8e), fcB=(%15.8e,%15.8e,%15.8e)\n",
+ +          iparams->posres.pos0A[XX],iparams->posres.pos0A[YY],
+ +          iparams->posres.pos0A[ZZ],iparams->posres.fcA[XX],
+ +          iparams->posres.fcA[YY],iparams->posres.fcA[ZZ],
+ +          iparams->posres.pos0B[XX],iparams->posres.pos0B[YY],
+ +          iparams->posres.pos0B[ZZ],iparams->posres.fcB[XX],
+ +          iparams->posres.fcB[YY],iparams->posres.fcB[ZZ]);
+ +    break;
+ +  case F_FBPOSRES:
+ +    fprintf(fp,"pos0=(%15.8e,%15.8e,%15.8e), geometry=%d, r=%15.8e, k=%15.8e\n",
+ +        iparams->fbposres.pos0[XX], iparams->fbposres.pos0[YY],
+ +        iparams->fbposres.pos0[ZZ], iparams->fbposres.geom,
+ +        iparams->fbposres.r,        iparams->fbposres.k);
+ +    break;
+ +  case F_RBDIHS:
+ +    for (i=0; i<NR_RBDIHS; i++) 
+ +      fprintf(fp,"%srbcA[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcA[i]);
+ +    fprintf(fp,"\n");
+ +    for (i=0; i<NR_RBDIHS; i++) 
+ +      fprintf(fp,"%srbcB[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcB[i]);
+ +    fprintf(fp,"\n");
+ +    break;
+ +  case F_FOURDIHS:
+ +    /* Use the OPLS -> Ryckaert-Bellemans formula backwards to get the
+ +     * OPLS potential constants back.
+ +     */
+ +    rbcA = iparams->rbdihs.rbcA;
+ +    rbcB = iparams->rbdihs.rbcB;
+ +
+ +    VA[3] = -0.25*rbcA[4];
+ +    VA[2] = -0.5*rbcA[3];
+ +    VA[1] = 4.0*VA[3]-rbcA[2];
+ +    VA[0] = 3.0*VA[2]-2.0*rbcA[1];
+ +
+ +    VB[3] = -0.25*rbcB[4];
+ +    VB[2] = -0.5*rbcB[3];
+ +    VB[1] = 4.0*VB[3]-rbcB[2];
+ +    VB[0] = 3.0*VB[2]-2.0*rbcB[1];
+ +
+ +    for (i=0; i<NR_FOURDIHS; i++) 
+ +      fprintf(fp,"%sFourA[%d]=%15.8e",i==0?"":", ",i,VA[i]);
+ +    fprintf(fp,"\n");
+ +    for (i=0; i<NR_FOURDIHS; i++) 
+ +      fprintf(fp,"%sFourB[%d]=%15.8e",i==0?"":", ",i,VB[i]);
+ +    fprintf(fp,"\n");
+ +    break;
+ +   
+ +  case F_CONSTR:
+ +  case F_CONSTRNC:
+ +    fprintf(fp,"dA=%15.8e, dB=%15.8e\n",iparams->constr.dA,iparams->constr.dB);
+ +    break;
+ +  case F_SETTLE:
+ +    fprintf(fp,"doh=%15.8e, dhh=%15.8e\n",iparams->settle.doh,
+ +          iparams->settle.dhh);
+ +    break;
+ +  case F_VSITE2:
+ +    fprintf(fp,"a=%15.8e\n",iparams->vsite.a);
+ +    break;
+ +  case F_VSITE3:
+ +  case F_VSITE3FD:
+ +  case F_VSITE3FAD:
+ +    fprintf(fp,"a=%15.8e, b=%15.8e\n",iparams->vsite.a,iparams->vsite.b);
+ +    break;
+ +  case F_VSITE3OUT:
+ +  case F_VSITE4FD:
+ +  case F_VSITE4FDN:
+ +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
+ +          iparams->vsite.a,iparams->vsite.b,iparams->vsite.c);
+ +    break;
+ +  case F_VSITEN:
+ +    fprintf(fp,"n=%2d, a=%15.8e\n",iparams->vsiten.n,iparams->vsiten.a);
+ +    break;
+ +  case F_GB12:
+ +  case F_GB13:
+ +  case F_GB14:
+ +    fprintf(fp, "sar=%15.8e, st=%15.8e, pi=%15.8e, gbr=%15.8e, bmlt=%15.8e\n",iparams->gb.sar,iparams->gb.st,iparams->gb.pi,iparams->gb.gbr,iparams->gb.bmlt);
+ +    break;              
+ +  case F_CMAP:
+ +    fprintf(fp, "cmapA=%1d, cmapB=%1d\n",iparams->cmap.cmapA, iparams->cmap.cmapB);
+ +    break;              
+ +  default:
+ +    gmx_fatal(FARGS,"unknown function type %d (%s) in %s line %d",
+ +            ftype,interaction_function[ftype].name,__FILE__,__LINE__);
+ +  }
+ +}
+ +
+ +void pr_ilist(FILE *fp,int indent,const char *title,
+ +              t_functype *functype,t_ilist *ilist, gmx_bool bShowNumbers)
+ +{
+ +    int i,j,k,type,ftype;
+ +    t_iatom *iatoms;
+ +    
+ +    if (available(fp,ilist,indent,title) && ilist->nr > 0)
+ +    {  
+ +        indent=pr_title(fp,indent,title);
+ +        (void) pr_indent(fp,indent);
+ +        fprintf(fp,"nr: %d\n",ilist->nr);
+ +        if (ilist->nr > 0) {
+ +            (void) pr_indent(fp,indent);
+ +            fprintf(fp,"iatoms:\n");
+ +            iatoms=ilist->iatoms;
+ +            for (i=j=0; i<ilist->nr;) {
+ +#ifndef DEBUG
+ +                (void) pr_indent(fp,indent+INDENT);
+ +                type=*(iatoms++);
+ +                ftype=functype[type];
+ +                (void) fprintf(fp,"%d type=%d (%s)",
+ +                               bShowNumbers?j:-1,bShowNumbers?type:-1,
+ +                               interaction_function[ftype].name);
+ +                j++;
+ +                for (k=0; k<interaction_function[ftype].nratoms; k++)
+ +                    (void) fprintf(fp," %u",*(iatoms++));
+ +                (void) fprintf(fp,"\n");
+ +                i+=1+interaction_function[ftype].nratoms;
+ +#else
+ +                fprintf(fp,"%5d%5d\n",i,iatoms[i]);
+ +                i++;
+ +#endif
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void pr_cmap(FILE *fp, int indent, const char *title,
+ +                    gmx_cmap_t *cmap_grid, gmx_bool bShowNumbers)
+ +{
+ +    int i,j,nelem;
+ +    real dx,idx;
+ +      
+ +    dx    = 360.0 / cmap_grid->grid_spacing;
+ +    nelem = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
+ +      
+ +    if(available(fp,cmap_grid,indent,title))
+ +    {
+ +        fprintf(fp,"%s\n",title);
+ +              
+ +        for(i=0;i<cmap_grid->ngrid;i++)
+ +        {
+ +            idx = -180.0;
+ +            fprintf(fp,"%8s %8s %8s %8s\n","V","dVdx","dVdy","d2dV");
+ +                      
+ +            fprintf(fp,"grid[%3d]={\n",bShowNumbers?i:-1);
+ +                      
+ +            for(j=0;j<nelem;j++)
+ +            {
+ +                if( (j%cmap_grid->grid_spacing)==0)
+ +                {
+ +                    fprintf(fp,"%8.1f\n",idx);
+ +                    idx+=dx;
+ +                }
+ +                              
+ +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4]);
+ +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+1]);
+ +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+2]);
+ +                fprintf(fp,"%8.3f\n",cmap_grid->cmapdata[i].cmap[j*4+3]);
+ +            }
+ +            fprintf(fp,"\n");
+ +        }
+ +    }
+ +      
+ +}
+ +
+ +void pr_ffparams(FILE *fp,int indent,const char *title,
+ +                 gmx_ffparams_t *ffparams,
+ +                 gmx_bool bShowNumbers)
+ +{
+ +  int i,j;
+ +  
+ +  indent=pr_title(fp,indent,title);
+ +  (void) pr_indent(fp,indent);
+ +  (void) fprintf(fp,"atnr=%d\n",ffparams->atnr);
+ +  (void) pr_indent(fp,indent);
+ +  (void) fprintf(fp,"ntypes=%d\n",ffparams->ntypes);
+ +  for (i=0; i<ffparams->ntypes; i++) {
+ +      (void) pr_indent(fp,indent+INDENT);
+ +      (void) fprintf(fp,"functype[%d]=%s, ",
+ +                     bShowNumbers?i:-1,
+ +                     interaction_function[ffparams->functype[i]].name);
+ +      pr_iparams(fp,ffparams->functype[i],&ffparams->iparams[i]);
+ +  }
+ +  (void) pr_double(fp,indent,"reppow",ffparams->reppow);
+ +  (void) pr_real(fp,indent,"fudgeQQ",ffparams->fudgeQQ);
+ +  pr_cmap(fp,indent,"cmap",&ffparams->cmap_grid,bShowNumbers);
+ +}
+ +
+ +void pr_idef(FILE *fp,int indent,const char *title,t_idef *idef, gmx_bool bShowNumbers)
+ +{
+ +  int i,j;
+ +  
+ +  if (available(fp,idef,indent,title)) {  
+ +    indent=pr_title(fp,indent,title);
+ +    (void) pr_indent(fp,indent);
+ +    (void) fprintf(fp,"atnr=%d\n",idef->atnr);
+ +    (void) pr_indent(fp,indent);
+ +    (void) fprintf(fp,"ntypes=%d\n",idef->ntypes);
+ +    for (i=0; i<idef->ntypes; i++) {
+ +      (void) pr_indent(fp,indent+INDENT);
+ +      (void) fprintf(fp,"functype[%d]=%s, ",
+ +                   bShowNumbers?i:-1,
+ +                   interaction_function[idef->functype[i]].name);
+ +      pr_iparams(fp,idef->functype[i],&idef->iparams[i]);
+ +    }
+ +    (void) pr_real(fp,indent,"fudgeQQ",idef->fudgeQQ);
+ +
+ +    for(j=0; (j<F_NRE); j++)
+ +      pr_ilist(fp,indent,interaction_function[j].longname,
+ +               idef->functype,&idef->il[j],bShowNumbers);
+ +  }
+ +}
+ +
+ +static int pr_block_title(FILE *fp,int indent,const char *title,t_block *block)
+ +{
+ +  int i;
+ +
+ +  if (available(fp,block,indent,title))
+ +    {
+ +      indent=pr_title(fp,indent,title);
+ +      (void) pr_indent(fp,indent);
+ +      (void) fprintf(fp,"nr=%d\n",block->nr);
+ +    }
+ +  return indent;
+ +}
+ +
+ +static int pr_blocka_title(FILE *fp,int indent,const char *title,t_blocka *block)
+ +{
+ +  int i;
+ +
+ +  if (available(fp,block,indent,title))
+ +    {
+ +      indent=pr_title(fp,indent,title);
+ +      (void) pr_indent(fp,indent);
+ +      (void) fprintf(fp,"nr=%d\n",block->nr);
+ +      (void) pr_indent(fp,indent);
+ +      (void) fprintf(fp,"nra=%d\n",block->nra);
+ +    }
+ +  return indent;
+ +}
+ +
+ +static void low_pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block, gmx_bool bShowNumbers)
+ +{
+ +  int i;
+ +  
+ +  if (available(fp,block,indent,title))
+ +    {
+ +      indent=pr_blocka_title(fp,indent,title,block);
+ +      for (i=0; i<=block->nr; i++)
+ +        {
+ +          (void) pr_indent(fp,indent+INDENT);
+ +          (void) fprintf(fp,"%s->index[%d]=%u\n",
+ +                       title,bShowNumbers?i:-1,block->index[i]);
+ +        }
+ +      for (i=0; i<block->nra; i++)
+ +        {
+ +          (void) pr_indent(fp,indent+INDENT);
+ +          (void) fprintf(fp,"%s->a[%d]=%u\n",
+ +                       title,bShowNumbers?i:-1,block->a[i]);
+ +        }
+ +    }
+ +}
+ +
+ +void pr_block(FILE *fp,int indent,const char *title,t_block *block,gmx_bool bShowNumbers)
+ +{
+ +  int i,j,ok,size,start,end;
+ +  
+ +  if (available(fp,block,indent,title))
+ +    {
+ +      indent=pr_block_title(fp,indent,title,block);
+ +      start=0;
+ +      end=start;
+ +      if ((ok=(block->index[start]==0))==0)
+ +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
+ +      else
+ +        for (i=0; i<block->nr; i++)
+ +          {
+ +            end=block->index[i+1];
+ +            size=pr_indent(fp,indent);
+ +            if (end<=start)
+ +              size+=fprintf(fp,"%s[%d]={}\n",title,i);
+ +            else
+ +              size+=fprintf(fp,"%s[%d]={%d..%d}\n",
+ +                          title,bShowNumbers?i:-1,
+ +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
+ +            start=end;
+ +          }
+ +    }
+ +}
+ +
+ +void pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block,gmx_bool bShowNumbers)
+ +{
+ +  int i,j,ok,size,start,end;
+ +  
+ +  if (available(fp,block,indent,title))
+ +    {
+ +      indent=pr_blocka_title(fp,indent,title,block);
+ +      start=0;
+ +      end=start;
+ +      if ((ok=(block->index[start]==0))==0)
+ +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
+ +      else
+ +        for (i=0; i<block->nr; i++)
+ +          {
+ +            end=block->index[i+1];
+ +            size=pr_indent(fp,indent);
+ +            if (end<=start)
+ +              size+=fprintf(fp,"%s[%d]={",title,i);
+ +            else
+ +              size+=fprintf(fp,"%s[%d][%d..%d]={",
+ +                          title,bShowNumbers?i:-1,
+ +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
+ +            for (j=start; j<end; j++)
+ +              {
+ +                if (j>start) size+=fprintf(fp,", ");
+ +                if ((size)>(USE_WIDTH))
+ +                  {
+ +                    (void) fprintf(fp,"\n");
+ +                    size=pr_indent(fp,indent+INDENT);
+ +                  }
+ +                size+=fprintf(fp,"%u",block->a[j]);
+ +              }
+ +            (void) fprintf(fp,"}\n");
+ +            start=end;
+ +          }
+ +      if ((end!=block->nra)||(!ok)) 
+ +        {
+ +          (void) pr_indent(fp,indent);
+ +          (void) fprintf(fp,"tables inconsistent, dumping complete tables:\n");
+ +          low_pr_blocka(fp,indent,title,block,bShowNumbers);
+ +        }
+ +    }
+ +}
+ +
+ +static void pr_strings(FILE *fp,int indent,const char *title,char ***nm,int n, gmx_bool bShowNumbers)
+ +{
+ +  int i;
+ +
+ +  if (available(fp,nm,indent,title))
+ +    {  
+ +      indent=pr_title_n(fp,indent,title,n);
+ +      for (i=0; i<n; i++)
+ +        {
+ +          (void) pr_indent(fp,indent);
+ +          (void) fprintf(fp,"%s[%d]={name=\"%s\"}\n",
+ +                       title,bShowNumbers?i:-1,*(nm[i]));
+ +        }
+ +    }
+ +}
+ +
+ +static void pr_strings2(FILE *fp,int indent,const char *title,
+ +                      char ***nm,char ***nmB,int n, gmx_bool bShowNumbers)
+ +{
+ +  int i;
+ +
+ +  if (available(fp,nm,indent,title))
+ +    {  
+ +      indent=pr_title_n(fp,indent,title,n);
+ +      for (i=0; i<n; i++)
+ +        {
+ +          (void) pr_indent(fp,indent);
+ +          (void) fprintf(fp,"%s[%d]={name=\"%s\",nameB=\"%s\"}\n",
+ +                       title,bShowNumbers?i:-1,*(nm[i]),*(nmB[i]));
+ +        }
+ +    }
+ +}
+ +
+ +static void pr_resinfo(FILE *fp,int indent,const char *title,t_resinfo *resinfo,int n, gmx_bool bShowNumbers)
+ +{
+ +    int i;
+ +    
+ +    if (available(fp,resinfo,indent,title))
+ +    {  
+ +        indent=pr_title_n(fp,indent,title,n);
+ +        for (i=0; i<n; i++)
+ +        {
+ +            (void) pr_indent(fp,indent);
+ +            (void) fprintf(fp,"%s[%d]={name=\"%s\", nr=%d, ic='%c'}\n",
+ +                           title,bShowNumbers?i:-1,
+ +                           *(resinfo[i].name),resinfo[i].nr,
+ +                           (resinfo[i].ic == '\0') ? ' ' : resinfo[i].ic);
+ +        }
+ +    }
+ +}
+ +
+ +static void pr_atom(FILE *fp,int indent,const char *title,t_atom *atom,int n)
+ +{
+ +  int i,j;
+ +  
+ +  if (available(fp,atom,indent,title)) {  
+ +    indent=pr_title_n(fp,indent,title,n);
+ +    for (i=0; i<n; i++) {
+ +      (void) pr_indent(fp,indent);
+ +      fprintf(fp,"%s[%6d]={type=%3d, typeB=%3d, ptype=%8s, m=%12.5e, "
+ +              "q=%12.5e, mB=%12.5e, qB=%12.5e, resind=%5d, atomnumber=%3d}\n",
+ +              title,i,atom[i].type,atom[i].typeB,ptype_str[atom[i].ptype],
+ +              atom[i].m,atom[i].q,atom[i].mB,atom[i].qB,
+ +              atom[i].resind,atom[i].atomnumber);
+ +    }
+ +  }
+ +}
+ +
+ +static void pr_grps(FILE *fp,int indent,const char *title,t_grps grps[],
+ +                  char **grpname[], gmx_bool bShowNumbers)
+ +{
+ +    int i,j;
+ +
+ +    for(i=0; (i<egcNR); i++)
+ +    {
+ +        fprintf(fp,"%s[%-12s] nr=%d, name=[",title,gtypes[i],grps[i].nr);
+ +        for(j=0; (j<grps[i].nr); j++)
+ +        {
+ +            fprintf(fp," %s",*(grpname[grps[i].nm_ind[j]]));
+ +        }
+ +        fprintf(fp,"]\n");
+ +    }
+ +}
+ +
+ +static void pr_groups(FILE *fp,int indent,const char *title,
+ +                      gmx_groups_t *groups,
+ +                      gmx_bool bShowNumbers)
+ +{
+ +    int grpnr[egcNR];
+ +    int nat_max,i,g;
+ +
+ +    pr_grps(fp,indent,"grp",groups->grps,groups->grpname,bShowNumbers);
+ +    pr_strings(fp,indent,"grpname",groups->grpname,groups->ngrpname,bShowNumbers);
+ +
+ +    (void) pr_indent(fp,indent);
+ +    fprintf(fp,"groups          ");
+ +    for(g=0; g<egcNR; g++)
+ +    {
+ +       printf(" %5.5s",gtypes[g]);
+ +    }
+ +    printf("\n");
+ +
+ +    (void) pr_indent(fp,indent);
+ +    fprintf(fp,"allocated       ");
+ +    nat_max = 0;
+ +    for(g=0; g<egcNR; g++)
+ +    {
+ +        printf(" %5d",groups->ngrpnr[g]);
+ +        nat_max = max(nat_max,groups->ngrpnr[g]);
+ +    }
+ +    printf("\n");
+ +
+ +    if (nat_max == 0)
+ +    {
+ +        (void) pr_indent(fp,indent);
+ +        fprintf(fp,"groupnr[%5s] =","*");
+ +        for(g=0; g<egcNR; g++)
+ +        {
+ +            fprintf(fp,"  %3d ",0);
+ +        }
+ +        fprintf(fp,"\n");
+ +    }
+ +    else
+ +    {
+ +        for(i=0; i<nat_max; i++)
+ +        {
+ +            (void) pr_indent(fp,indent);
+ +            fprintf(fp,"groupnr[%5d] =",i);
+ +            for(g=0; g<egcNR; g++)
+ +            {
+ +                fprintf(fp,"  %3d ",
+ +                        groups->grpnr[g] ? groups->grpnr[g][i] : 0);
+ +            }
+ +            fprintf(fp,"\n");
+ +        }
+ +    }
+ +}
+ +
+ +void pr_atoms(FILE *fp,int indent,const char *title,t_atoms *atoms, 
+ +            gmx_bool bShownumbers)
+ +{
+ +  if (available(fp,atoms,indent,title))
+ +    {
+ +      indent=pr_title(fp,indent,title);
+ +      pr_atom(fp,indent,"atom",atoms->atom,atoms->nr);
+ +      pr_strings(fp,indent,"atom",atoms->atomname,atoms->nr,bShownumbers);
+ +      pr_strings2(fp,indent,"type",atoms->atomtype,atoms->atomtypeB,atoms->nr,bShownumbers);
+ +      pr_resinfo(fp,indent,"residue",atoms->resinfo,atoms->nres,bShownumbers);
+ +    }
+ +}
+ +
+ +
+ +void pr_atomtypes(FILE *fp,int indent,const char *title,t_atomtypes *atomtypes, 
+ +                gmx_bool bShowNumbers)
+ +{
+ +  int i;
+ +  if (available(fp,atomtypes,indent,title)) 
+ +  {
+ +    indent=pr_title(fp,indent,title);
+ +    for(i=0;i<atomtypes->nr;i++) {
+ +      pr_indent(fp,indent);
+ +              fprintf(fp,
+ +                              "atomtype[%3d]={radius=%12.5e, volume=%12.5e, gb_radius=%12.5e, surftens=%12.5e, atomnumber=%4d, S_hct=%12.5e)}\n",
+ +                              bShowNumbers?i:-1,atomtypes->radius[i],atomtypes->vol[i],
+ +                              atomtypes->gb_radius[i],
+ +                              atomtypes->surftens[i],atomtypes->atomnumber[i],atomtypes->S_hct[i]);
+ +    }
+ +  }
+ +}
+ +
+ +static void pr_moltype(FILE *fp,int indent,const char *title,
+ +                       gmx_moltype_t *molt,int n,
+ +                       gmx_ffparams_t *ffparams,
+ +                       gmx_bool bShowNumbers)
+ +{
+ +    int j;
+ +
+ +    indent = pr_title_n(fp,indent,title,n);
+ +    (void) pr_indent(fp,indent);
+ +    (void) fprintf(fp,"name=\"%s\"\n",*(molt->name));
+ +    pr_atoms(fp,indent,"atoms",&(molt->atoms),bShowNumbers);
+ +    pr_block(fp,indent,"cgs",&molt->cgs, bShowNumbers);
+ +    pr_blocka(fp,indent,"excls",&molt->excls, bShowNumbers);
+ +    for(j=0; (j<F_NRE); j++) {
+ +        pr_ilist(fp,indent,interaction_function[j].longname,
+ +                 ffparams->functype,&molt->ilist[j],bShowNumbers);
+ +    }
+ +}
+ +
+ +static void pr_molblock(FILE *fp,int indent,const char *title,
+ +                        gmx_molblock_t *molb,int n,
+ +                        gmx_moltype_t *molt,
+ +                        gmx_bool bShowNumbers)
+ +{
+ +    indent = pr_title_n(fp,indent,title,n);
+ +    (void) pr_indent(fp,indent);
+ +    (void) fprintf(fp,"%-20s = %d \"%s\"\n",
+ +                   "moltype",molb->type,*(molt[molb->type].name));
+ +    pr_int(fp,indent,"#molecules",molb->nmol);
+ +    pr_int(fp,indent,"#atoms_mol",molb->natoms_mol);
+ +    pr_int(fp,indent,"#posres_xA",molb->nposres_xA);
+ +    if (molb->nposres_xA > 0) {
+ +        pr_rvecs(fp,indent,"posres_xA",molb->posres_xA,molb->nposres_xA);
+ +    }
+ +    pr_int(fp,indent,"#posres_xB",molb->nposres_xB);
+ +    if (molb->nposres_xB > 0) {
+ +        pr_rvecs(fp,indent,"posres_xB",molb->posres_xB,molb->nposres_xB);
+ +    }
+ +}
+ +
+ +void pr_mtop(FILE *fp,int indent,const char *title,gmx_mtop_t *mtop,
+ +             gmx_bool bShowNumbers)
+ +{
+ +    int mt,mb;
+ +
+ +    if (available(fp,mtop,indent,title)) {
+ +        indent=pr_title(fp,indent,title);
+ +        (void) pr_indent(fp,indent);
+ +        (void) fprintf(fp,"name=\"%s\"\n",*(mtop->name));
+ +        pr_int(fp,indent,"#atoms",mtop->natoms);
++        pr_int(fp,indent,"#molblock",mtop->nmolblock);
+ +        for(mb=0; mb<mtop->nmolblock; mb++) {
+ +            pr_molblock(fp,indent,"molblock",&mtop->molblock[mb],mb,
+ +                        mtop->moltype,bShowNumbers);
+ +        }
+ +        pr_ffparams(fp,indent,"ffparams",&(mtop->ffparams),bShowNumbers);
+ +        pr_atomtypes(fp,indent,"atomtypes",&(mtop->atomtypes),bShowNumbers);
+ +        for(mt=0; mt<mtop->nmoltype; mt++) {
+ +            pr_moltype(fp,indent,"moltype",&mtop->moltype[mt],mt,
+ +                       &mtop->ffparams,bShowNumbers);
+ +        }
+ +        pr_groups(fp,indent,"groups",&mtop->groups,bShowNumbers);
+ +    }
+ +}
+ +
+ +void pr_top(FILE *fp,int indent,const char *title,t_topology *top, gmx_bool bShowNumbers)
+ +{
+ +  if (available(fp,top,indent,title)) {
+ +    indent=pr_title(fp,indent,title);
+ +    (void) pr_indent(fp,indent);
+ +    (void) fprintf(fp,"name=\"%s\"\n",*(top->name));
+ +    pr_atoms(fp,indent,"atoms",&(top->atoms),bShowNumbers);
+ +    pr_atomtypes(fp,indent,"atomtypes",&(top->atomtypes),bShowNumbers);
+ +    pr_block(fp,indent,"cgs",&top->cgs, bShowNumbers);
+ +    pr_block(fp,indent,"mols",&top->mols, bShowNumbers);
+ +    pr_blocka(fp,indent,"excls",&top->excls, bShowNumbers);
+ +    pr_idef(fp,indent,"idef",&top->idef,bShowNumbers);
+ +  }
+ +}
+ +
+ +void pr_header(FILE *fp,int indent,const char *title,t_tpxheader *sh)
+ +{
+ +  char buf[22];
+ +    
+ +  if (available(fp,sh,indent,title))
+ +    {
+ +      indent=pr_title(fp,indent,title);
+ +      pr_indent(fp,indent);
+ +      fprintf(fp,"bIr    = %spresent\n",sh->bIr?"":"not ");
+ +      pr_indent(fp,indent);
+ +      fprintf(fp,"bBox   = %spresent\n",sh->bBox?"":"not ");
+ +      pr_indent(fp,indent);
+ +      fprintf(fp,"bTop   = %spresent\n",sh->bTop?"":"not ");
+ +      pr_indent(fp,indent);
+ +      fprintf(fp,"bX     = %spresent\n",sh->bX?"":"not ");
+ +      pr_indent(fp,indent);
+ +      fprintf(fp,"bV     = %spresent\n",sh->bV?"":"not ");
+ +      pr_indent(fp,indent);
+ +      fprintf(fp,"bF     = %spresent\n",sh->bF?"":"not ");
+ +      
+ +      pr_indent(fp,indent);
+ +      fprintf(fp,"natoms = %d\n",sh->natoms);
+ +      pr_indent(fp,indent);
+ +      fprintf(fp,"lambda = %e\n",sh->lambda);
+ +    }
+ +}
+ +
+ +void pr_commrec(FILE *fp,int indent,t_commrec *cr)
+ +{
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"commrec:\n");
+ +  indent+=2;
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"nodeid    = %d\n",cr->nodeid);
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"nnodes    = %d\n",cr->nnodes);
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"npmenodes = %d\n",cr->npmenodes);
+ +  /*
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"threadid  = %d\n",cr->threadid);
+ +  pr_indent(fp,indent);
+ +  fprintf(fp,"nthreads  = %d\n",cr->nthreads);
+ +  */
+ +}
diff --cc src/gromacs/gmxlib/version.h
Simple merge
diff --cc src/gromacs/gmxpreprocess/calc_verletbuf.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..7855d372549138ed592d0a63e186f3d1fb2e12d7

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/gromacs/gmxpreprocess/calc_verletbuf.c
@@@ -1,0 -1,0 +1,716 @@@
++/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
++ *
++ *                This source code is part of
++ *
++ *                 G   R   O   M   A   C   S
++ *
++ *          GROningen MAchine for Chemical Simulations
++ *
++ *                        VERSION 3.2.03
++ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
++ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
++ * Copyright (c) 2001-2004, The GROMACS development team,
++ * check out http://www.gromacs.org for more information.
++
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * If you want to redistribute modifications, please consider that
++ * scientific software is very special. Version control is crucial -
++ * bugs must be traceable. We will be happy to consider code for
++ * inclusion in the official distribution, but derived work must not
++ * be called official GROMACS. Details are found in the README & COPYING
++ * files - if they are missing, get the official version at www.gromacs.org.
++ *
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the papers on the package - you can find them in the top README file.
++ *
++ * For more info, check our website at http://www.gromacs.org
++ *
++ * And Hey:
++ * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
++ */
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++
++#include <assert.h>
++
++#include <sys/types.h>
++#include <math.h>
++#include "typedefs.h"
++#include "physics.h"
++#include "smalloc.h"
++#include "gmx_fatal.h"
++#include "macros.h"
++#include "vec.h"
++#include "coulomb.h"
++#include "calc_verletbuf.h"
++#include "../mdlib/nbnxn_consts.h"
++
++/* Struct for unique atom type for calculating the energy drift.
++ * The atom displacement depends on mass and constraints.
++ * The energy jump for given distance depend on LJ type and q.
++ */
++typedef struct
++{
++    real     mass; /* mass */
++    int      type; /* type (used for LJ parameters) */
++    real     q;    /* charge */
++    int      con;  /* constrained: 0, else 1, if 1, use #DOF=2 iso 3 */
++    int      n;    /* total #atoms of this type in the system */
++} verletbuf_atomtype_t;
++
++
++void verletbuf_get_list_setup(gmx_bool bGPU,
++                              verletbuf_list_setup_t *list_setup)
++{
++    list_setup->cluster_size_i     = NBNXN_CPU_CLUSTER_I_SIZE;
++
++    if (bGPU)
++    {
++        list_setup->cluster_size_j = NBNXN_GPU_CLUSTER_SIZE;
++    }
++    else
++    {
++#ifndef GMX_X86_SSE2
++        list_setup->cluster_size_j = NBNXN_CPU_CLUSTER_I_SIZE;
++#else
++        int simd_width;
++
++#ifdef GMX_X86_AVX_256
++        simd_width = 256;
++#else
++        simd_width = 128;
++#endif
++        list_setup->cluster_size_j = simd_width/(sizeof(real)*8);
++#endif
++    }
++}
++
++static void add_at(verletbuf_atomtype_t **att_p,int *natt_p,
++                   real mass,int type,real q,int con,int nmol)
++{
++    verletbuf_atomtype_t *att;
++    int natt,i;
++
++    if (mass == 0)
++    {
++        /* Ignore massless particles */
++        return;
++    }
++
++    att  = *att_p;
++    natt = *natt_p;
++
++    i = 0;
++    while (i < natt &&
++           !(mass == att[i].mass &&
++             type == att[i].type &&
++             q    == att[i].q &&
++             con  == att[i].con))
++    {
++        i++;
++    }
++
++    if (i < natt)
++    {
++        att[i].n += nmol;
++    }
++    else
++    {
++        (*natt_p)++;
++        srenew(*att_p,*natt_p);
++        (*att_p)[i].mass = mass;
++        (*att_p)[i].type = type;
++        (*att_p)[i].q    = q;
++        (*att_p)[i].con  = con;
++        (*att_p)[i].n    = nmol;
++    }
++}
++
++static void get_verlet_buffer_atomtypes(const gmx_mtop_t *mtop,
++                                        verletbuf_atomtype_t **att_p,
++                                        int *natt_p,
++                                        int *n_nonlin_vsite)
++{
++    verletbuf_atomtype_t *att;
++    int natt;
++    int mb,nmol,ft,i,j,a1,a2,a3,a;
++    const t_atoms *atoms;
++    const t_ilist *il;
++    const t_atom *at;
++    const t_iparams *ip;
++    real *con_m,*vsite_m,cam[5];
++
++    att  = NULL;
++    natt = 0;
++
++    if (n_nonlin_vsite != NULL)
++    {
++        *n_nonlin_vsite = 0;
++    }
++
++    for(mb=0; mb<mtop->nmolblock; mb++)
++    {
++        nmol = mtop->molblock[mb].nmol;
++
++        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
++
++        /* Check for constraints, as they affect the kinetic energy */
++        snew(con_m,atoms->nr);
++        snew(vsite_m,atoms->nr);
++
++        for(ft=F_CONSTR; ft<=F_CONSTRNC; ft++)
++        {
++            il = &mtop->moltype[mtop->molblock[mb].type].ilist[ft];
++
++            for(i=0; i<il->nr; i+=1+NRAL(ft))
++            {
++                a1 = il->iatoms[i+1];
++                a2 = il->iatoms[i+2];
++                con_m[a1] += atoms->atom[a2].m;
++                con_m[a2] += atoms->atom[a1].m;
++            }
++        }
++
++        il = &mtop->moltype[mtop->molblock[mb].type].ilist[F_SETTLE];
++
++        for(i=0; i<il->nr; i+=1+NRAL(F_SETTLE))
++        {
++            a1 = il->iatoms[i+1];
++            a2 = il->iatoms[i+2];
++            a3 = il->iatoms[i+3];
++            con_m[a1] += atoms->atom[a2].m + atoms->atom[a3].m;
++            con_m[a2] += atoms->atom[a1].m + atoms->atom[a3].m;
++            con_m[a3] += atoms->atom[a1].m + atoms->atom[a2].m;
++        }
++
++        /* Check for virtual sites, determine mass from constructing atoms */
++        for(ft=0; ft<F_NRE; ft++)
++        {
++            if (IS_VSITE(ft))
++            {
++                il = &mtop->moltype[mtop->molblock[mb].type].ilist[ft];
++
++                for(i=0; i<il->nr; i+=1+NRAL(ft))
++                {
++                    ip = &mtop->ffparams.iparams[il->iatoms[i]];
++
++                    a1 = il->iatoms[i+1];
++
++                    for(j=1; j<NRAL(ft); j++)
++                    {
++                        cam[j] = atoms->atom[il->iatoms[i+1+j]].m;
++                        if (cam[j] == 0)
++                        {
++                            cam[j] = vsite_m[il->iatoms[i+1+j]];
++                        }
++                        if (cam[j] == 0)
++                        {
++                            gmx_fatal(FARGS,"In molecule type '%s' %s construction involves atom %d, which is a virtual site of equal or high complexity. This is not supported.",
++                                      *mtop->moltype[mtop->molblock[mb].type].name,
++                                      interaction_function[ft].longname,
++                                      il->iatoms[i+1+j]+1);
++                        }
++                    }
++
++                    switch(ft)
++                    {
++                    case F_VSITE2:
++                        /* Exact except for ignoring constraints */
++                        vsite_m[a1] = (cam[2]*sqr(1-ip->vsite.a) + cam[1]*sqr(ip->vsite.a))/(cam[1]*cam[2]);
++                        break;
++                    case F_VSITE3:
++                        /* Exact except for ignoring constraints */
++                        vsite_m[a1] = (cam[2]*cam[3]*sqr(1-ip->vsite.a-ip->vsite.b) + cam[1]*cam[3]*sqr(ip->vsite.a) + cam[1]*cam[2]*sqr(ip->vsite.b))/(cam[1]*cam[2]*cam[3]);
++                        break;
++                    default:
++                        /* Use the mass of the lightest constructing atom.
++                         * This is an approximation.
++                         * If the distance of the virtual site to the
++                         * constructing atom is less than all distances
++                         * between constructing atoms, this is a safe
++                         * over-estimate of the displacement of the vsite.
++                         * This condition holds for all H mass replacement
++                         * replacement vsite constructions, except for SP2/3
++                         * groups. In SP3 groups one H will have a F_VSITE3
++                         * construction, so even there the total drift
++                         * estimation shouldn't be far off.
++                         */
++                        assert(j>=1);
++                        vsite_m[a1] = cam[1];
++                        for(j=2; j<NRAL(ft); j++)
++                        {
++                            vsite_m[a1] = min(vsite_m[a1],cam[j]);
++                        }
++                        if (n_nonlin_vsite != NULL)
++                        {
++                            *n_nonlin_vsite += nmol;
++                        }
++                        break;
++                    }
++                }
++            }
++        }
++
++        for(a=0; a<atoms->nr; a++)
++        {
++            at = &atoms->atom[a];
++            /* We consider an atom constrained, #DOF=2, when it is
++             * connected with constraints to one or more atoms with
++             * total mass larger than 1.5 that of the atom itself.
++             */
++            add_at(&att,&natt,
++                   at->m,at->type,at->q,con_m[a] > 1.5*at->m,nmol);
++        }
++
++        sfree(vsite_m);
++        sfree(con_m);
++    }
++
++    if (gmx_debug_at)
++    {
++        for(a=0; a<natt; a++)
++        {
++            fprintf(debug,"type %d: m %5.2f t %d q %6.3f con %d n %d\n",
++                    a,att[a].mass,att[a].type,att[a].q,att[a].con,att[a].n);
++        }
++    }
++
++    *att_p  = att;
++    *natt_p = natt;
++}
++
++static void approx_2dof(real s2,real x,
++                        real *shift,real *scale)
++{
++    /* A particle with 1 DOF constrained has 2 DOFs instead of 3.
++     * This code is also used for particles with multiple constraints,
++     * in which case we overestimate the displacement.
++     * The 2DOF distribution is sqrt(pi/2)*erfc(r/(sqrt(2)*s))/(2*s).
++     * We approximate this with scale*Gaussian(s,r+shift),
++     * by matching the distribution value and derivative at x.
++     * This is a tight overestimate for all r>=0 at any s and x.
++     */
++    real ex,er;
++
++    ex = exp(-x*x/(2*s2));
++    er = gmx_erfc(x/sqrt(2*s2));
++
++    *shift = -x + sqrt(2*s2/M_PI)*ex/er;
++    *scale = 0.5*M_PI*exp(ex*ex/(M_PI*er*er))*er;
++}
++
++static real ener_drift(const verletbuf_atomtype_t *att,int natt,
++                       const gmx_ffparams_t *ffp,
++                       real kT_fac,
++                       real md_ljd,real md_ljr,real md_el,real dd_el,
++                       real r_buffer,
++                       real rlist,real boxvol)
++{
++    double drift_tot,pot1,pot2,pot;
++    int    i,j;
++    real   s2i,s2j,s2,s;
++    int    ti,tj;
++    real   md,dd;
++    real   sc_fac,rsh;
++    double c_exp,c_erfc;
++
++    drift_tot = 0;
++
++    /* Loop over the different atom type pairs */
++    for(i=0; i<natt; i++)
++    {
++        s2i = kT_fac/att[i].mass;
++        ti  = att[i].type;
++
++        for(j=i; j<natt; j++)
++        {
++            s2j = kT_fac/att[j].mass;
++            tj = att[j].type;
++
++            /* Note that attractive and repulsive potentials for individual
++             * pairs will partially cancel.
++             */
++            /* -dV/dr at the cut-off for LJ + Coulomb */
++            md =
++                md_ljd*ffp->iparams[ti*ffp->atnr+tj].lj.c6 +
++                md_ljr*ffp->iparams[ti*ffp->atnr+tj].lj.c12 +
++                md_el*att[i].q*att[j].q;
++
++            /* d2V/dr2 at the cut-off for Coulomb, we neglect LJ */
++            dd = dd_el*att[i].q*att[j].q;
++
++            s2  = s2i + s2j;
++
++            rsh    = r_buffer;
++            sc_fac = 1.0;
++            /* For constraints: adapt r and scaling for the Gaussian */
++            if (att[i].con)
++            {
++                real sh,sc;
++                approx_2dof(s2i,r_buffer*s2i/s2,&sh,&sc);
++                rsh    += sh;
++                sc_fac *= sc;
++            }
++            if (att[j].con)
++            {
++                real sh,sc;
++                approx_2dof(s2j,r_buffer*s2j/s2,&sh,&sc);
++                rsh    += sh;
++                sc_fac *= sc;
++            }
++
++            /* Exact contribution of an atom pair with Gaussian displacement
++             * with sigma s to the energy drift for a potential with
++             * derivative -md and second derivative dd at the cut-off.
++             * The only catch is that for potentials that change sign
++             * near the cut-off there could be an unlucky compensation
++             * of positive and negative energy drift.
++             * Such potentials are extremely rare though.
++             *
++             * Note that pot has unit energy*length, as the linear
++             * atom density still needs to be put in.
++             */
++            c_exp  = exp(-rsh*rsh/(2*s2))/sqrt(2*M_PI);
++            c_erfc = 0.5*gmx_erfc(rsh/(sqrt(2*s2)));
++            s      = sqrt(s2);
++
++            pot1 = sc_fac*
++                md/2*((rsh*rsh + s2)*c_erfc - rsh*s*c_exp);
++            pot2 = sc_fac*
++                dd/6*(s*(rsh*rsh + 2*s2)*c_exp - rsh*(rsh*rsh + 3*s2)*c_erfc);
++            pot = pot1 + pot2;
++
++            if (gmx_debug_at)
++            {
++                fprintf(debug,"n %d %d d s %.3f %.3f con %d md %8.1e dd %8.1e pot1 %8.1e pot2 %8.1e pot %8.1e\n",
++                        att[i].n,att[j].n,sqrt(s2i),sqrt(s2j),
++                        att[i].con+att[j].con,
++                        md,dd,pot1,pot2,pot);
++            }
++
++            /* Multiply by the number of atom pairs */
++            if (j == i)
++            {
++                pot *= (double)att[i].n*(att[i].n - 1)/2;
++            }
++            else
++            {
++                pot *= (double)att[i].n*att[j].n;
++            }
++            /* We need the line density to get the energy drift of the system.
++             * The effective average r^2 is close to (rlist+sigma)^2.
++             */
++            pot *= 4*M_PI*sqr(rlist + s)/boxvol;
++
++            /* Add the unsigned drift to avoid cancellation of errors */
++            drift_tot += fabs(pot);
++        }
++    }
++
++    return drift_tot;
++}
++
++static real surface_frac(int cluster_size,real particle_distance,real rlist)
++{
++    real d,area_rel;
++
++    if (rlist < 0.5*particle_distance)
++    {
++        /* We have non overlapping spheres */
++        return 1.0;
++    }
++
++    /* Half the inter-particle distance relative to rlist */
++    d = 0.5*particle_distance/rlist;
++
++    /* Determine the area of the surface at distance rlist to the closest
++     * particle, relative to surface of a sphere of radius rlist.
++     * The formulas below assume close to cubic cells for the pair search grid,
++     * which the pair search code tries to achieve.
++     * Note that in practice particle distances will not be delta distributed,
++     * but have some spread, often involving shorter distances,
++     * as e.g. O-H bonds in a water molecule. Thus the estimates below will
++     * usually be slightly too high and thus conservative.
++     */
++    switch (cluster_size)
++    {
++    case 1:
++        /* One particle: trivial */
++        area_rel = 1.0;
++        break;
++    case 2:
++        /* Two particles: two spheres at fractional distance 2*a */
++        area_rel = 1.0 + d;
++        break;
++    case 4:
++        /* We assume a perfect, symmetric tetrahedron geometry.
++         * The surface around a tetrahedron is too complex for a full
++         * analytical solution, so we use a Taylor expansion.
++         */
++        area_rel = (1.0 + 1/M_PI*(6*acos(1/sqrt(3))*d +
++                                  sqrt(3)*d*d*(1.0 +
++                                               5.0/18.0*d*d +
++                                               7.0/45.0*d*d*d*d +
++                                               83.0/756.0*d*d*d*d*d*d)));
++        break;
++    default:
++        gmx_incons("surface_frac called with unsupported cluster_size");
++        area_rel = 1.0;
++    }
++        
++    return area_rel/cluster_size;
++}
++
++void calc_verlet_buffer_size(const gmx_mtop_t *mtop,real boxvol,
++                             const t_inputrec *ir,real drift_target,
++                             const verletbuf_list_setup_t *list_setup,
++                             int *n_nonlin_vsite,
++                             real *rlist)
++{
++    double resolution;
++    char *env;
++
++    real particle_distance;
++    real nb_clust_frac_pairs_not_in_list_at_cutoff;
++
++    verletbuf_atomtype_t *att=NULL;
++    int  natt=-1,i;
++    double reppow;
++    real md_ljd,md_ljr,md_el,dd_el;
++    real elfac;
++    real kT_fac,mass_min;
++    int  ib0,ib1,ib;
++    real rb,rl;
++    real drift;
++
++    /* Resolution of the buffer size */
++    resolution = 0.001;
++
++    env = getenv("GMX_VERLET_BUFFER_RES");
++    if (env != NULL)
++    {
++        sscanf(env,"%lf",&resolution);
++    }
++
++    /* In an atom wise pair-list there would be no pairs in the list
++     * beyond the pair-list cut-off.
++     * However, we use a pair-list of groups vs groups of atoms.
++     * For groups of 4 atoms, the parallelism of SSE instructions, only
++     * 10% of the atoms pairs are not in the list just beyond the cut-off.
++     * As this percentage increases slowly compared to the decrease of the
++     * Gaussian displacement distribution over this range, we can simply
++     * reduce the drift by this fraction.
++     * For larger groups, e.g. of 8 atoms, this fraction will be lower,
++     * so then buffer size will be on the conservative (large) side.
++     *
++     * Note that the formulas used here do not take into account
++     * cancellation of errors which could occur by missing both
++     * attractive and repulsive interactions.
++     *
++     * The only major assumption is homogeneous particle distribution.
++     * For an inhomogeneous system, such as a liquid-vapor system,
++     * the buffer will be underestimated. The actual energy drift
++     * will be higher by the factor: local/homogeneous particle density.
++     *
++     * The results of this estimate have been checked againt simulations.
++     * In most cases the real drift differs by less than a factor 2.
++     */
++
++    /* Worst case assumption: HCP packing of particles gives largest distance */
++    particle_distance = pow(boxvol*sqrt(2)/mtop->natoms,1.0/3.0);
++
++    get_verlet_buffer_atomtypes(mtop,&att,&natt,n_nonlin_vsite);
++    assert(att != NULL && natt >= 0);
++
++    if (debug)
++    {
++        fprintf(debug,"particle distance assuming HCP packing: %f nm\n",
++                particle_distance);
++        fprintf(debug,"energy drift atom types: %d\n",natt);
++    }
++
++    reppow = mtop->ffparams.reppow;
++    md_ljd = 0;
++    md_ljr = 0;
++    if (ir->vdwtype == evdwCUT)
++    {
++        /* -dV/dr of -r^-6 and r^-repporw */
++        md_ljd = -6*pow(ir->rvdw,-7.0);
++        md_ljr = reppow*pow(ir->rvdw,-(reppow+1));
++        /* The contribution of the second derivative is negligible */
++    }
++    else
++    {
++        gmx_fatal(FARGS,"Energy drift calculation is only implemented for plain cut-off Lennard-Jones interactions");
++    }
++
++    elfac = ONE_4PI_EPS0/ir->epsilon_r;
++
++    /* Determine md=-dV/dr and dd=d^2V/dr^2 */
++    md_el = 0;
++    dd_el = 0;
++    if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype))
++    {
++        real eps_rf,k_rf;
++
++        if (ir->coulombtype == eelCUT)
++        {
++            eps_rf = 1;
++            k_rf = 0;
++        }
++        else
++        {
++            eps_rf = ir->epsilon_rf/ir->epsilon_r;
++            if (eps_rf != 0)
++            {
++                k_rf = pow(ir->rcoulomb,-3.0)*(eps_rf - ir->epsilon_r)/(2*eps_rf + ir->epsilon_r);
++            }
++            else
++            {
++                /* epsilon_rf = infinity */
++                k_rf = 0.5*pow(ir->rcoulomb,-3.0);
++            }
++        }
++
++        if (eps_rf > 0)
++        {
++            md_el = elfac*(pow(ir->rcoulomb,-2.0) - 2*k_rf*ir->rcoulomb);
++        }
++        dd_el = elfac*(2*pow(ir->rcoulomb,-3.0) + 2*k_rf);
++    }
++    else if (EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD)
++    {
++        real b,rc,br;
++
++        b  = calc_ewaldcoeff(ir->rcoulomb,ir->ewald_rtol);
++        rc = ir->rcoulomb;
++        br = b*rc;
++        md_el = elfac*(2*b*exp(-br*br)/(sqrt(M_PI)*rc) + gmx_erfc(br)/(rc*rc));
++        dd_el = elfac/(rc*rc)*(4*b*(1 + br*br)*exp(-br*br)/sqrt(M_PI) + 2*gmx_erfc(br)/rc);
++    }
++    else
++    {
++        gmx_fatal(FARGS,"Energy drift calculation is only implemented for Reaction-Field and Ewald electrostatics");
++    }
++
++    /* Determine the variance of the atomic displacement
++     * over nstlist-1 steps: kT_fac
++     * For inertial dynamics (not Brownian dynamics) the mass factor
++     * is not included in kT_fac, it is added later.
++     */
++    if (ir->eI == eiBD)
++    {
++        /* Get the displacement distribution from the random component only.
++         * With accurate integration the systematic (force) displacement
++         * should be negligible (unless nstlist is extremely large, which
++         * you wouldn't do anyhow).
++         */
++        kT_fac = 2*BOLTZ*ir->opts.ref_t[0]*(ir->nstlist-1)*ir->delta_t;
++        if (ir->bd_fric > 0)
++        {
++            /* This is directly sigma^2 of the displacement */
++            kT_fac /= ir->bd_fric;
++
++            /* Set the masses to 1 as kT_fac is the full sigma^2,
++             * but we divide by m in ener_drift().
++             */
++            for(i=0; i<natt; i++)
++            {
++                att[i].mass = 1;
++            }
++        }
++        else
++        {
++            real tau_t;
++
++            /* Per group tau_t is not implemented yet, use the maximum */
++            tau_t = ir->opts.tau_t[0];
++            for(i=1; i<ir->opts.ngtc; i++)
++            {
++                tau_t = max(tau_t,ir->opts.tau_t[i]);
++            }
++
++            kT_fac *= tau_t;
++            /* This kT_fac needs to be divided by the mass to get sigma^2 */
++        }
++    }
++    else
++    {
++        kT_fac = BOLTZ*ir->opts.ref_t[0]*sqr((ir->nstlist-1)*ir->delta_t);
++    }
++
++    mass_min = att[0].mass;
++    for(i=1; i<natt; i++)
++    {
++        mass_min = min(mass_min,att[i].mass);
++    }
++
++    if (debug)
++    {
++        fprintf(debug,"md_ljd %e md_ljr %e\n",md_ljd,md_ljr);
++        fprintf(debug,"md_el %e dd_el %e\n",md_el,dd_el);
++        fprintf(debug,"sqrt(kT_fac) %f\n",sqrt(kT_fac));
++        fprintf(debug,"mass_min %f\n",mass_min);
++    }
++
++    /* Search using bisection */
++    ib0 = -1;
++    /* The drift will be neglible at 5 times the max sigma */
++    ib1 = (int)(5*2*sqrt(kT_fac/mass_min)/resolution) + 1;
++    while (ib1 - ib0 > 1)
++    {
++        ib = (ib0 + ib1)/2;
++        rb = ib*resolution;
++        rl = max(ir->rvdw,ir->rcoulomb) + rb;
++
++        /* Calculate the average energy drift at the last step
++         * of the nstlist steps at which the pair-list is used.
++         */
++        drift = ener_drift(att,natt,&mtop->ffparams,
++                           kT_fac,
++                           md_ljd,md_ljr,md_el,dd_el,rb,
++                           rl,boxvol);
++
++        /* Correct for the fact that we are using a Ni x Nj particle pair list
++         * and not a 1 x 1 particle pair list. This reduces the drift.
++         */
++        /* We don't have a formula for 8 (yet), use 4 which is conservative */
++        nb_clust_frac_pairs_not_in_list_at_cutoff =
++            surface_frac(min(list_setup->cluster_size_i,4),
++                         particle_distance,rl)*
++            surface_frac(min(list_setup->cluster_size_j,4),
++                         particle_distance,rl);
++        drift *= nb_clust_frac_pairs_not_in_list_at_cutoff;
++
++        /* Convert the drift to drift per unit time per atom */
++        drift /= ir->nstlist*ir->delta_t*mtop->natoms;
++
++        if (debug)
++        {
++            fprintf(debug,"ib %3d %3d %3d rb %.3f %dx%d fac %.3f drift %f\n",
++                    ib0,ib,ib1,rb,
++                    list_setup->cluster_size_i,list_setup->cluster_size_j,
++                    nb_clust_frac_pairs_not_in_list_at_cutoff,
++                    drift);
++        }
++
++        if (fabs(drift) > drift_target)
++        {
++            ib0 = ib;
++        }
++        else
++        {
++            ib1 = ib;
++        }
++    }
++
++    sfree(att);
++
++    *rlist = max(ir->rvdw,ir->rcoulomb) + ib1*resolution;
++}
diff --cc src/gromacs/gmxpreprocess/calc_verletbuf.h

index 0000000000000000000000000000000000000000,d9a85fcb58388e6ab5998b75e58989ed406e1cc7..d9a85fcb58388e6ab5998b75e58989ed406e1cc7

mode 000000,100644..100644
--- /dev/null
--- 2/src/kernel/calc_verletbuf.h
+++ b/src/gromacs/gmxpreprocess/calc_verletbuf.h
diff --cc src/gromacs/gmxpreprocess/readir.c

index a0b21b1bf15c7bad81bf1135c54410ade6042b43,0000000000000000000000000000000000000000..7a8a61a9b7d2f2c5df501ff879fabb0bc79dde51

mode 100644,000000..100644
--- 1/src/gromacs/gmxpreprocess/readir.c
--- /dev/null
+++ b/src/gromacs/gmxpreprocess/readir.c
@@@ -1,3247 -1,0 +1,3416 @@@
-   /* BASIC CUT-OFF STUFF */
-   if (ir->rcoulomb < 0)
-   {
-       warning_error(wi,"rcoulomb should be >= 0");
-   }
-   if (ir->rvdw < 0)
-   {
-       warning_error(wi,"rvdw should be >= 0");
-   }
-   if (ir->rlist < 0)
-   {
-       warning_error(wi,"rlist should be >= 0");
-   }
-   if (ir->rlist == 0 ||
-       !((EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > ir->rlist) ||
-         (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)    && ir->rvdw     > ir->rlist))) {
-     /* No switched potential and/or no twin-range:
-      * we can set the long-range cut-off to the maximum of the other cut-offs.
-      */
-     ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
-   } else if (ir->rlistlong < 0) {
-     ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
-     sprintf(warn_buf,"rlistlong was not set, setting it to %g (no buffer)",
-           ir->rlistlong);
-     warning(wi,warn_buf);
-   }
-   if (ir->rlistlong == 0 && ir->ePBC != epbcNONE) {
-       warning_error(wi,"Can not have an infinite cut-off with PBC");
-   }
-   if (ir->rlistlong > 0 && (ir->rlist == 0 || ir->rlistlong < ir->rlist)) {
-       warning_error(wi,"rlistlong can not be shorter than rlist");
-   }
-   if (IR_TWINRANGE(*ir) && ir->nstlist <= 0) {
-       warning_error(wi,"Can not have nstlist<=0 with twin-range interactions");
-   }
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <ctype.h>
+ +#include <stdlib.h>
+ +#include <limits.h>
+ +#include "sysstuff.h"
+ +#include "smalloc.h"
+ +#include "typedefs.h"
+ +#include "physics.h"
+ +#include "names.h"
+ +#include "gmx_fatal.h"
+ +#include "macros.h"
+ +#include "index.h"
+ +#include "symtab.h"
+ +#include "string2.h"
+ +#include "readinp.h"
+ +#include "warninp.h"
+ +#include "readir.h" 
+ +#include "toputil.h"
+ +#include "index.h"
+ +#include "network.h"
+ +#include "vec.h"
+ +#include "pbc.h"
+ +#include "mtop_util.h"
+ +#include "chargegroup.h"
+ +#include "inputrec.h"
+ +
+ +#define MAXPTR 254
+ +#define NOGID  255
+ +#define MAXLAMBDAS 1024
+ +
+ +/* Resource parameters 
+ + * Do not change any of these until you read the instruction
+ + * in readinp.h. Some cpp's do not take spaces after the backslash
+ + * (like the c-shell), which will give you a very weird compiler
+ + * message.
+ + */
+ +
+ +static char tcgrps[STRLEN],tau_t[STRLEN],ref_t[STRLEN],
+ +  acc[STRLEN],accgrps[STRLEN],freeze[STRLEN],frdim[STRLEN],
+ +  energy[STRLEN],user1[STRLEN],user2[STRLEN],vcm[STRLEN],xtc_grps[STRLEN],
+ +  couple_moltype[STRLEN],orirefitgrp[STRLEN],egptable[STRLEN],egpexcl[STRLEN],
+ +  wall_atomtype[STRLEN],wall_density[STRLEN],deform[STRLEN],QMMM[STRLEN];
+ +static char fep_lambda[efptNR][STRLEN];
+ +static char lambda_weights[STRLEN];
+ +static char **pull_grp;
+ +static char **rot_grp;
+ +static char anneal[STRLEN],anneal_npoints[STRLEN],
+ +  anneal_time[STRLEN],anneal_temp[STRLEN];
+ +static char QMmethod[STRLEN],QMbasis[STRLEN],QMcharge[STRLEN],QMmult[STRLEN],
+ +  bSH[STRLEN],CASorbitals[STRLEN], CASelectrons[STRLEN],SAon[STRLEN],
+ +  SAoff[STRLEN],SAsteps[STRLEN],bTS[STRLEN],bOPT[STRLEN]; 
+ +static char efield_x[STRLEN],efield_xt[STRLEN],efield_y[STRLEN],
+ +  efield_yt[STRLEN],efield_z[STRLEN],efield_zt[STRLEN];
+ +
+ +enum {
+ +    egrptpALL,         /* All particles have to be a member of a group.     */
+ +    egrptpALL_GENREST, /* A rest group with name is generated for particles *
+ +                        * that are not part of any group.                   */
+ +    egrptpPART,        /* As egrptpALL_GENREST, but no name is generated    *
+ +                        * for the rest group.                               */
+ +    egrptpONE          /* Merge all selected groups into one group,         *
+ +                        * make a rest group for the remaining particles.    */
+ +};
+ +
+ +
+ +void init_ir(t_inputrec *ir, t_gromppopts *opts)
+ +{
+ +  snew(opts->include,STRLEN); 
+ +  snew(opts->define,STRLEN);
+ +  snew(ir->fepvals,1);
+ +  snew(ir->expandedvals,1);
+ +  snew(ir->simtempvals,1);
+ +}
+ +
+ +static void GetSimTemps(int ntemps, t_simtemp *simtemp, double *temperature_lambdas)
+ +{
+ +
+ +    int i;
+ +
+ +    for (i=0;i<ntemps;i++)
+ +    {
+ +        /* simple linear scaling -- allows more control */
+ +        if (simtemp->eSimTempScale == esimtempLINEAR)
+ +        {
+ +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*temperature_lambdas[i];
+ +        }
+ +        else if (simtemp->eSimTempScale == esimtempGEOMETRIC)  /* should give roughly equal acceptance for constant heat capacity . . . */
+ +        {
+ +            simtemp->temperatures[i] = simtemp->simtemp_low * pow(simtemp->simtemp_high/simtemp->simtemp_low,(1.0*i)/(ntemps-1));
+ +        }
+ +        else if (simtemp->eSimTempScale == esimtempEXPONENTIAL)
+ +        {
+ +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*((exp(temperature_lambdas[i])-1)/(exp(1.0)-1));
+ +        }
+ +        else
+ +        {
+ +            char errorstr[128];
+ +            sprintf(errorstr,"eSimTempScale=%d not defined",simtemp->eSimTempScale);
+ +            gmx_fatal(FARGS,errorstr);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +
+ +static void _low_check(gmx_bool b,char *s,warninp_t wi)
+ +{
+ +    if (b)
+ +    {
+ +        warning_error(wi,s);
+ +    }
+ +}
+ +
+ +static void check_nst(const char *desc_nst,int nst,
+ +                      const char *desc_p,int *p,
+ +                      warninp_t wi)
+ +{
+ +    char buf[STRLEN];
+ +
+ +    if (*p > 0 && *p % nst != 0)
+ +    {
+ +        /* Round up to the next multiple of nst */
+ +        *p = ((*p)/nst + 1)*nst;
+ +        sprintf(buf,"%s should be a multiple of %s, changing %s to %d\n",
+ +              desc_p,desc_nst,desc_p,*p);
+ +        warning(wi,buf);
+ +    }
+ +}
+ +
+ +static gmx_bool ir_NVE(const t_inputrec *ir)
+ +{
+ +    return ((ir->eI == eiMD || EI_VV(ir->eI)) && ir->etc == etcNO);
+ +}
+ +
+ +static int lcd(int n1,int n2)
+ +{
+ +    int d,i;
+ +    
+ +    d = 1;
+ +    for(i=2; (i<=n1 && i<=n2); i++)
+ +    {
+ +        if (n1 % i == 0 && n2 % i == 0)
+ +        {
+ +            d = i;
+ +        }
+ +    }
+ +    
+ +  return d;
+ +}
+ +
++static void process_interaction_modifier(const t_inputrec *ir,int *eintmod)
++{
++    if (*eintmod == eintmodPOTSHIFT_VERLET)
++    {
++        if (ir->cutoff_scheme == ecutsVERLET)
++        {
++            *eintmod = eintmodPOTSHIFT;
++        }
++        else
++        {
++            *eintmod = eintmodNONE;
++        }
++    }
++}
++
+ +void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
+ +              warninp_t wi)
+ +/* Check internal consistency */
+ +{
+ +    /* Strange macro: first one fills the err_buf, and then one can check 
+ +     * the condition, which will print the message and increase the error
+ +     * counter.
+ +     */
+ +#define CHECK(b) _low_check(b,err_buf,wi)
+ +    char err_buf[256],warn_buf[STRLEN];
+ +    int i,j;
+ +    int  ns_type=0;
+ +    real dt_coupl=0;
+ +    real dt_pcoupl;
+ +    int  nstcmin;
+ +    t_lambda *fep = ir->fepvals;
+ +    t_expanded *expand = ir->expandedvals;
+ +
+ +  set_warning_line(wi,mdparin,-1);
+ +
-     sprintf(err_buf,"With coulombtype = %s, rcoulomb must be >= rlist",
-           eel_names[ir->coulombtype]);
-     CHECK(ir->rlist > ir->rcoulomb);
++    /* BASIC CUT-OFF STUFF */
++    if (ir->rcoulomb < 0)
++    {
++        warning_error(wi,"rcoulomb should be >= 0");
++    }
++    if (ir->rvdw < 0)
++    {
++        warning_error(wi,"rvdw should be >= 0");
++    }
++    if (ir->rlist < 0 &&
++        !(ir->cutoff_scheme == ecutsVERLET && ir->verletbuf_drift > 0))
++    {
++        warning_error(wi,"rlist should be >= 0");
++    }
++
++    process_interaction_modifier(ir,&ir->coulomb_modifier);
++    process_interaction_modifier(ir,&ir->vdw_modifier);
++
++    if (ir->cutoff_scheme == ecutsGROUP)
++    {
++        if (ir->coulomb_modifier != eintmodNONE ||
++            ir->vdw_modifier != eintmodNONE)
++        {
++            warning_error(wi,"potential modifiers are not supported (yet) with the group cut-off scheme");
++        }
++
++        /* BASIC CUT-OFF STUFF */
++        if (ir->rlist == 0 ||
++            !((EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > ir->rlist) ||
++              (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)    && ir->rvdw     > ir->rlist))) {
++            /* No switched potential and/or no twin-range:
++             * we can set the long-range cut-off to the maximum of the other cut-offs.
++             */
++            ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
++        }
++        else if (ir->rlistlong < 0)
++        {
++            ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
++            sprintf(warn_buf,"rlistlong was not set, setting it to %g (no buffer)",
++                    ir->rlistlong);
++            warning(wi,warn_buf);
++        }
++        if (ir->rlistlong == 0 && ir->ePBC != epbcNONE)
++        {
++            warning_error(wi,"Can not have an infinite cut-off with PBC");
++        }
++        if (ir->rlistlong > 0 && (ir->rlist == 0 || ir->rlistlong < ir->rlist))
++        {
++            warning_error(wi,"rlistlong can not be shorter than rlist");
++        }
++        if (IR_TWINRANGE(*ir) && ir->nstlist <= 0)
++        {
++            warning_error(wi,"Can not have nstlist<=0 with twin-range interactions");
++        }
++    }
++
++    if (ir->cutoff_scheme == ecutsVERLET)
++    {
++        real rc_max;
++
++        /* Normal Verlet type neighbor-list, currently only limited feature support */
++        if (inputrec2nboundeddim(ir) < 3)
++        {
++            warning_error(wi,"With Verlet lists only full pbc or pbc=xy with walls is supported");
++        }
++        if (ir->rcoulomb != ir->rvdw)
++        {
++            warning_error(wi,"With Verlet lists rcoulomb!=rvdw is not supported");
++        }
++        if (ir->vdwtype != evdwCUT)
++        {
++            warning_error(wi,"With Verlet lists only cut-off LJ interactions are supported");
++        }
++        if (!(ir->coulombtype == eelCUT ||
++              (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC) ||
++              EEL_PME(ir->coulombtype) || ir->coulombtype == eelEWALD))
++        {
++            warning_error(wi,"With Verlet lists only cut-off, reaction-field, PME and Ewald electrostatics are supported");
++        }
++
++        if (ir->nstlist <= 0)
++        {
++             warning_error(wi,"With Verlet lists nstlist should be larger than 0");
++        }
++
++        if (ir->nstlist < 10)
++        {
++            warning_note(wi,"With Verlet lists the optimal nstlist is >= 10, with GPUs >= 20. Note that with the Verlet scheme, nstlist has no effect on the accuracy of your simulation.");
++        }
++
++        rc_max = max(ir->rvdw,ir->rcoulomb);
++
++        if (ir->verletbuf_drift <= 0)
++        {
++            if (ir->verletbuf_drift == 0)
++            {
++                warning_error(wi,"Can not have an energy drift of exactly 0");
++            }
++
++            if (ir->rlist < rc_max)
++            {
++                warning_error(wi,"With verlet lists rlist can not be smaller than rvdw or rcoulomb");
++            }
++            
++            if (ir->rlist == rc_max && ir->nstlist > 1)
++            {
++                warning_note(wi,"rlist is equal to rvdw and/or rcoulomb: there is no explicit Verlet buffer. The cluster pair list does have a buffering effect, but choosing a larger rlist might be necessary for good energy conservation.");
++            }
++        }
++        else
++        {
++            if (ir->rlist > rc_max)
++            {
++                warning_note(wi,"You have set rlist larger than the interaction cut-off, but you also have verlet-buffer-drift > 0. Will set rlist using verlet-buffer-drift.");
++            }
++
++            if (ir->nstlist == 1)
++            {
++                /* No buffer required */
++                ir->rlist = rc_max;
++            }
++            else
++            {
++                if (EI_DYNAMICS(ir->eI))
++                {
++                    if (EI_MD(ir->eI) && ir->etc == etcNO)
++                    {
++                        warning_error(wi,"Temperature coupling is required for calculating rlist using the energy drift with verlet-buffer-drift > 0. Either use temperature coupling or set rlist yourself together with verlet-buffer-drift = -1."); 
++                    }
++
++                    if (inputrec2nboundeddim(ir) < 3)
++                    {
++                        warning_error(wi,"The box volume is required for calculating rlist from the energy drift with verlet-buffer-drift > 0. You are using at least one unbounded dimension, so no volume can be computed. Either use a finite box, or set rlist yourself together with verlet-buffer-drift = -1.");
++                    }
++                    /* Set rlist temporarily so we can continue processing */
++                    ir->rlist = rc_max;
++                }
++                else
++                {
++                    /* Set the buffer to 5% of the cut-off */
++                    ir->rlist = 1.05*rc_max;
++                }
++            }
++        }
++
++        /* No twin-range calculations with Verlet lists */
++        ir->rlistlong = ir->rlist;
++    }
+ +
+ +    /* GENERAL INTEGRATOR STUFF */
+ +    if (!(ir->eI == eiMD || EI_VV(ir->eI)))
+ +    {
+ +        ir->etc = etcNO;
+ +    }
+ +    if (ir->eI == eiVVAK) {
+ +        sprintf(warn_buf,"Integrator method %s is implemented primarily for validation purposes; for molecular dynamics, you should probably be using %s or %s",ei_names[eiVVAK],ei_names[eiMD],ei_names[eiVV]);
+ +        warning_note(wi,warn_buf);
+ +    }
+ +    if (!EI_DYNAMICS(ir->eI))
+ +    {
+ +        ir->epc = epcNO;
+ +    }
+ +    if (EI_DYNAMICS(ir->eI))
+ +    {
+ +        if (ir->nstcalcenergy < 0)
+ +        {
+ +            ir->nstcalcenergy = ir_optimal_nstcalcenergy(ir);
+ +            if (ir->nstenergy != 0 && ir->nstenergy < ir->nstcalcenergy)
+ +            {
+ +                /* nstcalcenergy larger than nstener does not make sense.
+ +                 * We ideally want nstcalcenergy=nstener.
+ +                 */
+ +                if (ir->nstlist > 0)
+ +                {
+ +                    ir->nstcalcenergy = lcd(ir->nstenergy,ir->nstlist);
+ +                }
+ +                else
+ +                {
+ +                    ir->nstcalcenergy = ir->nstenergy;
+ +                }
+ +            }
+ +        }
++        else if (ir->nstenergy > 0 && ir->nstcalcenergy > ir->nstenergy)
++        {
++            /* If the user sets nstenergy small, we should respect that */
++            sprintf(warn_buf,"Setting nstcalcenergy (%d) equal to nstenergy (%d)",ir->nstcalcenergy,ir->nstenergy);
++            ir->nstcalcenergy = ir->nstenergy;
++        }
++
+ +        if (ir->epc != epcNO)
+ +        {
+ +            if (ir->nstpcouple < 0)
+ +            {
+ +                ir->nstpcouple = ir_optimal_nstpcouple(ir);
+ +            }
+ +        }
+ +        if (IR_TWINRANGE(*ir))
+ +        {
+ +            check_nst("nstlist",ir->nstlist,
+ +                      "nstcalcenergy",&ir->nstcalcenergy,wi);
+ +            if (ir->epc != epcNO)
+ +            {
+ +                check_nst("nstlist",ir->nstlist,
+ +                          "nstpcouple",&ir->nstpcouple,wi); 
+ +            }
+ +        }
+ +
+ +        if (ir->nstcalcenergy > 1)
+ +        {
+ +            /* for storing exact averages nstenergy should be
+ +             * a multiple of nstcalcenergy
+ +             */
+ +            check_nst("nstcalcenergy",ir->nstcalcenergy,
+ +                      "nstenergy",&ir->nstenergy,wi);
+ +            if (ir->efep != efepNO)
+ +            {
+ +                /* nstdhdl should be a multiple of nstcalcenergy */
+ +                check_nst("nstcalcenergy",ir->nstcalcenergy,
+ +                          "nstdhdl",&ir->fepvals->nstdhdl,wi);
+ +            }
+ +        }
+ +    }
+ +
+ +  /* LD STUFF */
+ +  if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
+ +      ir->bContinuation && ir->ld_seed != -1) {
+ +      warning_note(wi,"You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
+ +  }
+ +
+ +  /* TPI STUFF */
+ +  if (EI_TPI(ir->eI)) {
+ +    sprintf(err_buf,"TPI only works with pbc = %s",epbc_names[epbcXYZ]);
+ +    CHECK(ir->ePBC != epbcXYZ);
+ +    sprintf(err_buf,"TPI only works with ns = %s",ens_names[ensGRID]);
+ +    CHECK(ir->ns_type != ensGRID);
+ +    sprintf(err_buf,"with TPI nstlist should be larger than zero");
+ +    CHECK(ir->nstlist <= 0);
+ +    sprintf(err_buf,"TPI does not work with full electrostatics other than PME");
+ +    CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype));
+ +  }
+ +
+ +  /* SHAKE / LINCS */
+ +  if ( (opts->nshake > 0) && (opts->bMorse) ) {
+ +      sprintf(warn_buf,
+ +              "Using morse bond-potentials while constraining bonds is useless");
+ +      warning(wi,warn_buf);
+ +  }
+ +
+ +  if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
+ +      ir->bContinuation && ir->ld_seed != -1) {
+ +      warning_note(wi,"You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
+ +  }
+ +  /* verify simulated tempering options */
+ +
+ +  if (ir->bSimTemp) {
+ +      gmx_bool bAllTempZero = TRUE;
+ +      for (i=0;i<fep->n_lambda;i++)
+ +      {
+ +          sprintf(err_buf,"Entry %d for %s must be between 0 and 1, instead is %g",i,efpt_names[efptTEMPERATURE],fep->all_lambda[efptTEMPERATURE][i]);
+ +          CHECK((fep->all_lambda[efptTEMPERATURE][i] < 0) || (fep->all_lambda[efptTEMPERATURE][i] > 1));
+ +          if (fep->all_lambda[efptTEMPERATURE][i] > 0)
+ +          {
+ +              bAllTempZero = FALSE;
+ +          }
+ +      }
+ +      sprintf(err_buf,"if simulated tempering is on, temperature-lambdas may not be all zero");
+ +      CHECK(bAllTempZero==TRUE);
+ +
+ +      sprintf(err_buf,"Simulated tempering is currently only compatible with md-vv");
+ +      CHECK(ir->eI != eiVV);
+ +
+ +      /* check compatability of the temperature coupling with simulated tempering */
+ +
+ +      if (ir->etc == etcNOSEHOOVER) {
+ +          sprintf(warn_buf,"Nose-Hoover based temperature control such as [%s] my not be entirelyconsistent with simulated tempering",etcoupl_names[ir->etc]);
+ +          warning_note(wi,warn_buf);
+ +      }
+ +
+ +      /* check that the temperatures make sense */
+ +
+ +      sprintf(err_buf,"Higher simulated tempering temperature (%g) must be >= than the simulated tempering lower temperature (%g)",ir->simtempvals->simtemp_high,ir->simtempvals->simtemp_low);
+ +      CHECK(ir->simtempvals->simtemp_high <= ir->simtempvals->simtemp_low);
+ +
+ +      sprintf(err_buf,"Higher simulated tempering temperature (%g) must be >= zero",ir->simtempvals->simtemp_high);
+ +      CHECK(ir->simtempvals->simtemp_high <= 0);
+ +
+ +      sprintf(err_buf,"Lower simulated tempering temperature (%g) must be >= zero",ir->simtempvals->simtemp_low);
+ +      CHECK(ir->simtempvals->simtemp_low <= 0);
+ +  }
+ +
+ +  /* verify free energy options */
+ +
+ +  if (ir->efep != efepNO) {
+ +      fep = ir->fepvals;
+ +      sprintf(err_buf,"The soft-core power is %d and can only be 1 or 2",
+ +              fep->sc_power);
+ +      CHECK(fep->sc_alpha!=0 && fep->sc_power!=1 && fep->sc_power!=2);
+ +
+ +      sprintf(err_buf,"The soft-core sc-r-power is %d and can only be 6 or 48",
+ +              (int)fep->sc_r_power);
+ +      CHECK(fep->sc_alpha!=0 && fep->sc_r_power!=6.0 && fep->sc_r_power!=48.0);
+ +
+ +      /* check validity of options */
+ +      if (fep->n_lambda > 0 && ir->rlist < max(ir->rvdw,ir->rcoulomb))
+ +      {
+ +          sprintf(warn_buf,
+ +                  "For foreign lambda free energy differences it is assumed that the soft-core interactions have no effect beyond the neighborlist cut-off");
+ +          warning(wi,warn_buf);
+ +      }
+ +
+ +      sprintf(err_buf,"Can't use postive delta-lambda (%g) if initial state/lambda does not start at zero",fep->delta_lambda);
+ +      CHECK(fep->delta_lambda > 0 && ((fep->init_fep_state !=0) ||  (fep->init_lambda !=0)));
+ +
+ +      sprintf(err_buf,"Can't use postive delta-lambda (%g) with expanded ensemble simulations",fep->delta_lambda);
+ +      CHECK(fep->delta_lambda > 0 && (ir->efep == efepEXPANDED));
+ +
+ +      sprintf(err_buf,"Free-energy not implemented for Ewald");
+ +      CHECK(ir->coulombtype==eelEWALD);
+ +
+ +      /* check validty of lambda inputs */
+ +      sprintf(err_buf,"initial thermodynamic state %d does not exist, only goes to %d",fep->init_fep_state,fep->n_lambda);
+ +      CHECK((fep->init_fep_state > fep->n_lambda));
+ +
+ +      for (j=0;j<efptNR;j++)
+ +      {
+ +          for (i=0;i<fep->n_lambda;i++)
+ +          {
+ +              sprintf(err_buf,"Entry %d for %s must be between 0 and 1, instead is %g",i,efpt_names[j],fep->all_lambda[j][i]);
+ +              CHECK((fep->all_lambda[j][i] < 0) || (fep->all_lambda[j][i] > 1));
+ +          }
+ +      }
+ +
+ +      if ((fep->sc_alpha>0) && (!fep->bScCoul))
+ +      {
+ +          for (i=0;i<fep->n_lambda;i++)
+ +          {
+ +              sprintf(err_buf,"For state %d, vdw-lambdas (%f) is changing with vdw softcore, while coul-lambdas (%f) is nonzero without coulomb softcore: this will lead to crashes, and is not supported.",i,fep->all_lambda[efptVDW][i],
+ +                      fep->all_lambda[efptCOUL][i]);
+ +              CHECK((fep->sc_alpha>0) &&
+ +                    (((fep->all_lambda[efptCOUL][i] > 0.0) &&
+ +                      (fep->all_lambda[efptCOUL][i] < 1.0)) &&
+ +                     ((fep->all_lambda[efptVDW][i] > 0.0) &&
+ +                      (fep->all_lambda[efptVDW][i] < 1.0))));
+ +          }
+ +      }
+ +
+ +      if ((fep->bScCoul) && (EEL_PME(ir->coulombtype)))
+ +      {
+ +          sprintf(warn_buf,"With coulomb soft core, the reciprocal space calculation will not necessarily cancel.  It may be necessary to decrease the reciprocal space energy, and increase the cutoff radius to get sufficiently close matches to energies with free energy turned off.");
+ +          warning(wi, warn_buf);
+ +      }
+ +
+ +      /*  Free Energy Checks -- In an ideal world, slow growth and FEP would
+ +          be treated differently, but that's the next step */
+ +
+ +      for (i=0;i<efptNR;i++) {
+ +          for (j=0;j<fep->n_lambda;j++) {
+ +              sprintf(err_buf,"%s[%d] must be between 0 and 1",efpt_names[i],j);
+ +              CHECK((fep->all_lambda[i][j] < 0) || (fep->all_lambda[i][j] > 1));
+ +          }
+ +      }
+ +  }
+ +
+ +  if ((ir->bSimTemp) || (ir->efep == efepEXPANDED)) {
+ +      fep = ir->fepvals;
+ +      expand = ir->expandedvals;
+ +
+ +      /* checking equilibration of weights inputs for validity */
+ +
+ +      sprintf(err_buf,"weight-equil-number-all-lambda (%d) is ignored if lmc-weights-equil is not equal to %s",
+ +              expand->equil_n_at_lam,elmceq_names[elmceqNUMATLAM]);
+ +      CHECK((expand->equil_n_at_lam>0) && (expand->elmceq!=elmceqNUMATLAM));
+ +
+ +      sprintf(err_buf,"weight-equil-number-samples (%d) is ignored if lmc-weights-equil is not equal to %s",
+ +              expand->equil_samples,elmceq_names[elmceqSAMPLES]);
+ +      CHECK((expand->equil_samples>0) && (expand->elmceq!=elmceqSAMPLES));
+ +
+ +      sprintf(err_buf,"weight-equil-number-steps (%d) is ignored if lmc-weights-equil is not equal to %s",
+ +              expand->equil_steps,elmceq_names[elmceqSTEPS]);
+ +      CHECK((expand->equil_steps>0) && (expand->elmceq!=elmceqSTEPS));
+ +
+ +      sprintf(err_buf,"weight-equil-wl-delta (%d) is ignored if lmc-weights-equil is not equal to %s",
+ +              expand->equil_samples,elmceq_names[elmceqWLDELTA]);
+ +      CHECK((expand->equil_wl_delta>0) && (expand->elmceq!=elmceqWLDELTA));
+ +
+ +      sprintf(err_buf,"weight-equil-count-ratio (%f) is ignored if lmc-weights-equil is not equal to %s",
+ +              expand->equil_ratio,elmceq_names[elmceqRATIO]);
+ +      CHECK((expand->equil_ratio>0) && (expand->elmceq!=elmceqRATIO));
+ +
+ +      sprintf(err_buf,"weight-equil-number-all-lambda (%d) must be a positive integer if lmc-weights-equil=%s",
+ +              expand->equil_n_at_lam,elmceq_names[elmceqNUMATLAM]);
+ +      CHECK((expand->equil_n_at_lam<=0) && (expand->elmceq==elmceqNUMATLAM));
+ +
+ +      sprintf(err_buf,"weight-equil-number-samples (%d) must be a positive integer if lmc-weights-equil=%s",
+ +              expand->equil_samples,elmceq_names[elmceqSAMPLES]);
+ +      CHECK((expand->equil_samples<=0) && (expand->elmceq==elmceqSAMPLES));
+ +
+ +      sprintf(err_buf,"weight-equil-number-steps (%d) must be a positive integer if lmc-weights-equil=%s",
+ +              expand->equil_steps,elmceq_names[elmceqSTEPS]);
+ +      CHECK((expand->equil_steps<=0) && (expand->elmceq==elmceqSTEPS));
+ +
+ +      sprintf(err_buf,"weight-equil-wl-delta (%f) must be > 0 if lmc-weights-equil=%s",
+ +              expand->equil_wl_delta,elmceq_names[elmceqWLDELTA]);
+ +      CHECK((expand->equil_wl_delta<=0) && (expand->elmceq==elmceqWLDELTA));
+ +
+ +      sprintf(err_buf,"weight-equil-count-ratio (%f) must be > 0 if lmc-weights-equil=%s",
+ +              expand->equil_ratio,elmceq_names[elmceqRATIO]);
+ +      CHECK((expand->equil_ratio<=0) && (expand->elmceq==elmceqRATIO));
+ +
+ +      sprintf(err_buf,"lmc-weights-equil=%s only possible when lmc-stats = %s or lmc-stats %s",
+ +              elmceq_names[elmceqWLDELTA],elamstats_names[elamstatsWL],elamstats_names[elamstatsWWL]);
+ +      CHECK((expand->elmceq==elmceqWLDELTA) && (!EWL(expand->elamstats)));
+ +
+ +      sprintf(err_buf,"lmc-repeats (%d) must be greater than 0",expand->lmc_repeats);
+ +      CHECK((expand->lmc_repeats <= 0));
+ +      sprintf(err_buf,"minimum-var-min (%d) must be greater than 0",expand->minvarmin);
+ +      CHECK((expand->minvarmin <= 0));
+ +      sprintf(err_buf,"weight-c-range (%d) must be greater or equal to 0",expand->c_range);
+ +      CHECK((expand->c_range < 0));
+ +      sprintf(err_buf,"init-lambda-state (%d) must be zero if lmc-forced-nstart (%d)> 0 and lmc-move != 'no'",
+ +              fep->init_fep_state, expand->lmc_forced_nstart);
+ +      CHECK((fep->init_fep_state!=0) && (expand->lmc_forced_nstart>0) && (expand->elmcmove!=elmcmoveNO));
+ +      sprintf(err_buf,"lmc-forced-nstart (%d) must not be negative",expand->lmc_forced_nstart);
+ +      CHECK((expand->lmc_forced_nstart < 0));
+ +      sprintf(err_buf,"init-lambda-state (%d) must be in the interval [0,number of lambdas)",fep->init_fep_state);
+ +      CHECK((fep->init_fep_state < 0) || (fep->init_fep_state >= fep->n_lambda));
+ +
+ +      sprintf(err_buf,"init-wl-delta (%f) must be greater than or equal to 0",expand->init_wl_delta);
+ +      CHECK((expand->init_wl_delta < 0));
+ +      sprintf(err_buf,"wl-ratio (%f) must be between 0 and 1",expand->wl_ratio);
+ +      CHECK((expand->wl_ratio <= 0) || (expand->wl_ratio >= 1));
+ +      sprintf(err_buf,"wl-scale (%f) must be between 0 and 1",expand->wl_scale);
+ +      CHECK((expand->wl_scale <= 0) || (expand->wl_scale >= 1));
+ +
+ +      /* if there is no temperature control, we need to specify an MC temperature */
+ +      sprintf(err_buf,"If there is no temperature control, and lmc-mcmove!= 'no',mc_temperature must be set to a positive number");
+ +      if (expand->nstTij > 0)
+ +      {
+ +          sprintf(err_buf,"nst-transition-matrix (%d) must be an integer multiple of nstlog (%d)",
+ +                  expand->nstTij,ir->nstlog);
+ +          CHECK((mod(expand->nstTij,ir->nstlog)!=0));
+ +      }
+ +  }
+ +
+ +  /* PBC/WALLS */
+ +  sprintf(err_buf,"walls only work with pbc=%s",epbc_names[epbcXY]);
+ +  CHECK(ir->nwall && ir->ePBC!=epbcXY);
+ +
+ +  /* VACUUM STUFF */
+ +  if (ir->ePBC != epbcXYZ && ir->nwall != 2) {
+ +    if (ir->ePBC == epbcNONE) {
+ +      if (ir->epc != epcNO) {
+ +          warning(wi,"Turning off pressure coupling for vacuum system");
+ +          ir->epc = epcNO;
+ +      }
+ +    } else {
+ +      sprintf(err_buf,"Can not have pressure coupling with pbc=%s",
+ +            epbc_names[ir->ePBC]);
+ +      CHECK(ir->epc != epcNO);
+ +    }
+ +    sprintf(err_buf,"Can not have Ewald with pbc=%s",epbc_names[ir->ePBC]);
+ +    CHECK(EEL_FULL(ir->coulombtype));
+ +
+ +    sprintf(err_buf,"Can not have dispersion correction with pbc=%s",
+ +          epbc_names[ir->ePBC]);
+ +    CHECK(ir->eDispCorr != edispcNO);
+ +  }
+ +
+ +  if (ir->rlist == 0.0) {
+ +    sprintf(err_buf,"can only have neighborlist cut-off zero (=infinite)\n"
+ +          "with coulombtype = %s or coulombtype = %s\n"
+ +          "without periodic boundary conditions (pbc = %s) and\n"
+ +          "rcoulomb and rvdw set to zero",
+ +          eel_names[eelCUT],eel_names[eelUSER],epbc_names[epbcNONE]);
+ +    CHECK(((ir->coulombtype != eelCUT) && (ir->coulombtype != eelUSER)) ||
+ +        (ir->ePBC     != epbcNONE) ||
+ +        (ir->rcoulomb != 0.0)      || (ir->rvdw != 0.0));
+ +
+ +    if (ir->nstlist < 0) {
+ +        warning_error(wi,"Can not have heuristic neighborlist updates without cut-off");
+ +    }
+ +    if (ir->nstlist > 0) {
+ +        warning_note(wi,"Simulating without cut-offs is usually (slightly) faster with nstlist=0, nstype=simple and particle decomposition");
+ +    }
+ +  }
+ +
+ +  /* COMM STUFF */
+ +  if (ir->nstcomm == 0) {
+ +    ir->comm_mode = ecmNO;
+ +  }
+ +  if (ir->comm_mode != ecmNO) {
+ +    if (ir->nstcomm < 0) {
+ +        warning(wi,"If you want to remove the rotation around the center of mass, you should set comm_mode = Angular instead of setting nstcomm < 0. nstcomm is modified to its absolute value");
+ +      ir->nstcomm = abs(ir->nstcomm);
+ +    }
+ +
+ +    if (ir->nstcalcenergy > 0 && ir->nstcomm < ir->nstcalcenergy) {
+ +        warning_note(wi,"nstcomm < nstcalcenergy defeats the purpose of nstcalcenergy, setting nstcomm to nstcalcenergy");
+ +        ir->nstcomm = ir->nstcalcenergy;
+ +    }
+ +
+ +    if (ir->comm_mode == ecmANGULAR) {
+ +      sprintf(err_buf,"Can not remove the rotation around the center of mass with periodic molecules");
+ +      CHECK(ir->bPeriodicMols);
+ +      if (ir->ePBC != epbcNONE)
+ +          warning(wi,"Removing the rotation around the center of mass in a periodic system (this is not a problem when you have only one molecule).");
+ +    }
+ +  }
+ +
+ +  if (EI_STATE_VELOCITY(ir->eI) && ir->ePBC == epbcNONE && ir->comm_mode != ecmANGULAR) {
+ +      warning_note(wi,"Tumbling and or flying ice-cubes: We are not removing rotation around center of mass in a non-periodic system. You should probably set comm_mode = ANGULAR.");
+ +  }
+ +  
+ +  sprintf(err_buf,"Twin-range neighbour searching (NS) with simple NS"
+ +        " algorithm not implemented");
+ +  CHECK(((ir->rcoulomb > ir->rlist) || (ir->rvdw > ir->rlist))
+ +      && (ir->ns_type == ensSIMPLE));
+ +
+ +  /* TEMPERATURE COUPLING */
+ +  if (ir->etc == etcYES)
+ +    {
+ +        ir->etc = etcBERENDSEN;
+ +        warning_note(wi,"Old option for temperature coupling given: "
+ +                     "changing \"yes\" to \"Berendsen\"\n");
+ +    }
+ +
+ +    if ((ir->etc == etcNOSEHOOVER) || (ir->epc == epcMTTK))
+ +    {
+ +        if (ir->opts.nhchainlength < 1)
+ +        {
+ +            sprintf(warn_buf,"number of Nose-Hoover chains (currently %d) cannot be less than 1,reset to 1\n",ir->opts.nhchainlength);
+ +            ir->opts.nhchainlength =1;
+ +            warning(wi,warn_buf);
+ +        }
+ +        
+ +        if (ir->etc==etcNOSEHOOVER && !EI_VV(ir->eI) && ir->opts.nhchainlength > 1)
+ +        {
+ +            warning_note(wi,"leapfrog does not yet support Nose-Hoover chains, nhchainlength reset to 1");
+ +            ir->opts.nhchainlength = 1;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        ir->opts.nhchainlength = 0;
+ +    }
+ +
+ +    if (ir->eI == eiVVAK) {
+ +        sprintf(err_buf,"%s implemented primarily for validation, and requires nsttcouple = 1 and nstpcouple = 1.",
+ +                ei_names[eiVVAK]);
+ +        CHECK((ir->nsttcouple != 1) || (ir->nstpcouple != 1));
+ +    }
+ +
+ +    if (ETC_ANDERSEN(ir->etc))
+ +    {
+ +        sprintf(err_buf,"%s temperature control not supported for integrator %s.",etcoupl_names[ir->etc],ei_names[ir->eI]);
+ +        CHECK(!(EI_VV(ir->eI)));
+ +
+ +        for (i=0;i<ir->opts.ngtc;i++)
+ +        {
+ +            sprintf(err_buf,"all tau_t must currently be equal using Andersen temperature control, violated for group %d",i);
+ +            CHECK(ir->opts.tau_t[0] != ir->opts.tau_t[i]);
+ +            sprintf(err_buf,"all tau_t must be postive using Andersen temperature control, tau_t[%d]=%10.6f",
+ +                    i,ir->opts.tau_t[i]);
+ +            CHECK(ir->opts.tau_t[i]<0);
+ +        }
+ +        if (ir->nstcomm > 0 && (ir->etc == etcANDERSEN)) {
+ +            sprintf(warn_buf,"Center of mass removal not necessary for %s.  All velocities of coupled groups are rerandomized periodically, so flying ice cube errors will not occur.",etcoupl_names[ir->etc]);
+ +            warning_note(wi,warn_buf);
+ +        }
+ +
+ +        sprintf(err_buf,"nstcomm must be 1, not %d for %s, as velocities of atoms in coupled groups are randomized every time step",ir->nstcomm,etcoupl_names[ir->etc]);
+ +        CHECK(ir->nstcomm > 1 && (ir->etc == etcANDERSEN));
+ +
+ +        for (i=0;i<ir->opts.ngtc;i++)
+ +        {
+ +            int nsteps = (int)(ir->opts.tau_t[i]/ir->delta_t);
+ +            sprintf(err_buf,"tau_t/delta_t for group %d for temperature control method %s must be a multiple of nstcomm (%d), as velocities of atoms in coupled groups are randomized every time step. The input tau_t (%8.3f) leads to %d steps per randomization",i,etcoupl_names[ir->etc],ir->nstcomm,ir->opts.tau_t[i],nsteps);
+ +            CHECK((nsteps % ir->nstcomm) && (ir->etc == etcANDERSENMASSIVE));
+ +        }
+ +    }
+ +    if (ir->etc == etcBERENDSEN)
+ +    {
+ +        sprintf(warn_buf,"The %s thermostat does not generate the correct kinetic energy distribution. You might want to consider using the %s thermostat.",
+ +                ETCOUPLTYPE(ir->etc),ETCOUPLTYPE(etcVRESCALE));
+ +        warning_note(wi,warn_buf);
+ +    }
+ +
+ +    if ((ir->etc==etcNOSEHOOVER || ETC_ANDERSEN(ir->etc))
+ +        && ir->epc==epcBERENDSEN)
+ +    {
+ +        sprintf(warn_buf,"Using Berendsen pressure coupling invalidates the "
+ +                "true ensemble for the thermostat");
+ +        warning(wi,warn_buf);
+ +    }
+ +
+ +    /* PRESSURE COUPLING */
+ +    if (ir->epc == epcISOTROPIC)
+ +    {
+ +        ir->epc = epcBERENDSEN;
+ +        warning_note(wi,"Old option for pressure coupling given: "
+ +                     "changing \"Isotropic\" to \"Berendsen\"\n"); 
+ +    }
+ +
+ +    if (ir->epc != epcNO)
+ +    {
+ +        dt_pcoupl = ir->nstpcouple*ir->delta_t;
+ +
+ +        sprintf(err_buf,"tau-p must be > 0 instead of %g\n",ir->tau_p);
+ +        CHECK(ir->tau_p <= 0);
+ +
+ +        if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
+ +        {
+ +            sprintf(warn_buf,"For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
+ +                    EPCOUPLTYPE(ir->epc),ir->tau_p,pcouple_min_integration_steps(ir->epc),dt_pcoupl);
+ +            warning(wi,warn_buf);
+ +        }
+ +
+ +        sprintf(err_buf,"compressibility must be > 0 when using pressure"
+ +                " coupling %s\n",EPCOUPLTYPE(ir->epc));
+ +        CHECK(ir->compress[XX][XX] < 0 || ir->compress[YY][YY] < 0 ||
+ +              ir->compress[ZZ][ZZ] < 0 ||
+ +              (trace(ir->compress) == 0 && ir->compress[YY][XX] <= 0 &&
+ +               ir->compress[ZZ][XX] <= 0 && ir->compress[ZZ][YY] <= 0));
+ +        
+ +        if (epcPARRINELLORAHMAN == ir->epc && opts->bGenVel)
+ +        {
+ +            sprintf(warn_buf,
+ +                    "You are generating velocities so I am assuming you "
+ +                    "are equilibrating a system. You are using "
+ +                    "%s pressure coupling, but this can be "
+ +                    "unstable for equilibration. If your system crashes, try "
+ +                    "equilibrating first with Berendsen pressure coupling. If "
+ +                    "you are not equilibrating the system, you can probably "
+ +                    "ignore this warning.",
+ +                    epcoupl_names[ir->epc]);
+ +            warning(wi,warn_buf);
+ +        }
+ +    }
+ +
+ +    if (EI_VV(ir->eI))
+ +    {
+ +        if (ir->epc > epcNO)
+ +        {
+ +            if ((ir->epc!=epcBERENDSEN) && (ir->epc!=epcMTTK))
+ +            {
+ +                warning_error(wi,"for md-vv and md-vv-avek, can only use Berendsen and Martyna-Tuckerman-Tobias-Klein (MTTK) equations for pressure control; MTTK is equivalent to Parrinello-Rahman.");
+ +            }
+ +        }
+ +    }
+ +
+ +  /* ELECTROSTATICS */
+ +  /* More checks are in triple check (grompp.c) */
+ +
+ +  if (ir->coulombtype == eelSWITCH) {
+ +    sprintf(warn_buf,"coulombtype = %s is only for testing purposes and can lead to serious artifacts, advice: use coulombtype = %s",
+ +          eel_names[ir->coulombtype],
+ +          eel_names[eelRF_ZERO]);
+ +    warning(wi,warn_buf);
+ +  }
+ +
+ +  if (ir->epsilon_r!=1 && ir->implicit_solvent==eisGBSA) {
+ +    sprintf(warn_buf,"epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric",ir->epsilon_r);
+ +    warning_note(wi,warn_buf);
+ +  }
+ +
+ +  if (EEL_RF(ir->coulombtype) && ir->epsilon_rf==1 && ir->epsilon_r!=1) {
+ +    sprintf(warn_buf,"epsilon-r = %g and epsilon-rf = 1 with reaction field, assuming old format and exchanging epsilon-r and epsilon-rf",ir->epsilon_r);
+ +    warning(wi,warn_buf);
+ +    ir->epsilon_rf = ir->epsilon_r;
+ +    ir->epsilon_r  = 1.0;
+ +  }
+ +
+ +  if (getenv("GALACTIC_DYNAMICS") == NULL) {  
+ +    sprintf(err_buf,"epsilon-r must be >= 0 instead of %g\n",ir->epsilon_r);
+ +    CHECK(ir->epsilon_r < 0);
+ +  }
+ +  
+ +  if (EEL_RF(ir->coulombtype)) {
+ +    /* reaction field (at the cut-off) */
+ +    
+ +    if (ir->coulombtype == eelRF_ZERO) {
+ +       sprintf(err_buf,"With coulombtype = %s, epsilon-rf must be 0",
+ +             eel_names[ir->coulombtype]);
+ +      CHECK(ir->epsilon_rf != 0);
+ +    }
+ +
+ +    sprintf(err_buf,"epsilon-rf must be >= epsilon-r");
+ +    CHECK((ir->epsilon_rf < ir->epsilon_r && ir->epsilon_rf != 0) ||
+ +        (ir->epsilon_r == 0));
+ +    if (ir->epsilon_rf == ir->epsilon_r) {
+ +      sprintf(warn_buf,"Using epsilon-rf = epsilon-r with %s does not make sense",
+ +            eel_names[ir->coulombtype]);
+ +      warning(wi,warn_buf);
+ +    }
+ +  }
+ +  /* Allow rlist>rcoulomb for tabulated long range stuff. This just
+ +   * means the interaction is zero outside rcoulomb, but it helps to
+ +   * provide accurate energy conservation.
+ +   */
+ +  if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype)) {
+ +    if (EEL_SWITCHED(ir->coulombtype)) {
+ +      sprintf(err_buf,
+ +            "With coulombtype = %s rcoulomb_switch must be < rcoulomb",
+ +            eel_names[ir->coulombtype]);
+ +      CHECK(ir->rcoulomb_switch >= ir->rcoulomb);
+ +    }
+ +  } else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype)) {
-     } else {
++      if (ir->cutoff_scheme == ecutsGROUP) {
++          sprintf(err_buf,"With coulombtype = %s, rcoulomb must be >= rlist",
++                  eel_names[ir->coulombtype]);
++          CHECK(ir->rlist > ir->rcoulomb);
++      }
+ +  }
+ +
+ +  if (EEL_FULL(ir->coulombtype)) {
+ +    if (ir->coulombtype==eelPMESWITCH || ir->coulombtype==eelPMEUSER ||
+ +        ir->coulombtype==eelPMEUSERSWITCH) {
+ +      sprintf(err_buf,"With coulombtype = %s, rcoulomb must be <= rlist",
+ +            eel_names[ir->coulombtype]);
+ +      CHECK(ir->rcoulomb > ir->rlist);
-     sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist",evdw_names[ir->vdwtype]);
-     CHECK(ir->rlist > ir->rvdw);
-   }
-   if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype)
-       && (ir->rlistlong <= ir->rcoulomb)) {
-     sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rcoulomb.",
-           IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
-     warning_note(wi,warn_buf);
-   }
-   if (EVDW_SWITCHED(ir->vdwtype) && (ir->rlistlong <= ir->rvdw)) {
-     sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rvdw.",
-           IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
-     warning_note(wi,warn_buf);
++    } else if (ir->cutoff_scheme == ecutsGROUP) {
+ +      if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD) {
+ +      sprintf(err_buf,
+ +              "With coulombtype = %s, rcoulomb must be equal to rlist\n"
+ +              "If you want optimal energy conservation or exact integration use %s",
+ +              eel_names[ir->coulombtype],eel_names[eelPMESWITCH]);
+ +      } else { 
+ +      sprintf(err_buf,
+ +              "With coulombtype = %s, rcoulomb must be equal to rlist",
+ +              eel_names[ir->coulombtype]);
+ +      }
+ +      CHECK(ir->rcoulomb != ir->rlist);
+ +    }
+ +  }
+ +
+ +  if (EEL_PME(ir->coulombtype)) {
+ +    if (ir->pme_order < 3) {
+ +        warning_error(wi,"pme-order can not be smaller than 3");
+ +    }
+ +  }
+ +
+ +  if (ir->nwall==2 && EEL_FULL(ir->coulombtype)) {
+ +    if (ir->ewald_geometry == eewg3D) {
+ +      sprintf(warn_buf,"With pbc=%s you should use ewald-geometry=%s",
+ +            epbc_names[ir->ePBC],eewg_names[eewg3DC]);
+ +      warning(wi,warn_buf);
+ +    }
+ +    /* This check avoids extra pbc coding for exclusion corrections */
+ +    sprintf(err_buf,"wall-ewald-zfac should be >= 2");
+ +    CHECK(ir->wall_ewald_zfac < 2);
+ +  }
+ +
+ +  if (EVDW_SWITCHED(ir->vdwtype)) {
+ +    sprintf(err_buf,"With vdwtype = %s rvdw-switch must be < rvdw",
+ +          evdw_names[ir->vdwtype]);
+ +    CHECK(ir->rvdw_switch >= ir->rvdw);
+ +  } else if (ir->vdwtype == evdwCUT) {
-   /* ENERGY CONSERVATION */
-   if (ir_NVE(ir))
-   {
-       if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0)
-       {
-           sprintf(warn_buf,"You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
-                   evdw_names[evdwSHIFT]);
-           warning_note(wi,warn_buf);
-       }
-       if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0)
-       {
-           sprintf(warn_buf,"You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
-                   eel_names[eelPMESWITCH],eel_names[eelRF_ZERO]);
-           warning_note(wi,warn_buf);
-       }
-   }
++      if (ir->cutoff_scheme == ecutsGROUP) {
++          sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist",evdw_names[ir->vdwtype]);
++          CHECK(ir->rlist > ir->rvdw);
++      }
+ +  }
++    if (ir->cutoff_scheme == ecutsGROUP)
++    {
++        if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype)
++            && (ir->rlistlong <= ir->rcoulomb))
++        {
++            sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rcoulomb.",
++                    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
++            warning_note(wi,warn_buf);
++        }
++        if (EVDW_SWITCHED(ir->vdwtype) && (ir->rlistlong <= ir->rvdw))
++        {
++            sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rvdw.",
++                    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
++            warning_note(wi,warn_buf);
++        }
++    }
+ +
+ +  if (ir->vdwtype == evdwUSER && ir->eDispCorr != edispcNO) {
+ +      warning_note(wi,"You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6.");
+ +  }
+ +
+ +  if (ir->nstlist == -1) {
+ +    sprintf(err_buf,
+ +          "nstlist=-1 only works with switched or shifted potentials,\n"
+ +          "suggestion: use vdw-type=%s and coulomb-type=%s",
+ +          evdw_names[evdwSHIFT],eel_names[eelPMESWITCH]);
+ +    CHECK(!(EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) &&
+ +            EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)));
+ +
+ +    sprintf(err_buf,"With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
+ +    CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
+ +  }
+ +  sprintf(err_buf,"nstlist can not be smaller than -1");
+ +  CHECK(ir->nstlist < -1);
+ +
+ +  if (ir->eI == eiLBFGS && (ir->coulombtype==eelCUT || ir->vdwtype==evdwCUT)
+ +     && ir->rvdw != 0) {
+ +    warning(wi,"For efficient BFGS minimization, use switch/shift/pme instead of cut-off.");
+ +  }
+ +
+ +  if (ir->eI == eiLBFGS && ir->nbfgscorr <= 0) {
+ +    warning(wi,"Using L-BFGS with nbfgscorr<=0 just gets you steepest descent.");
+ +  }
+ +
-   if (ir->bAdress && !EI_SD(ir->eI)){
-        warning_error(wi,"AdresS simulation supports only stochastic dynamics");
-   }
-   if (ir->bAdress && ir->epc != epcNO){
-        warning_error(wi,"AdresS simulation does not support pressure coupling");
-   }
-   if (ir->bAdress && (EEL_FULL(ir->coulombtype))){
-        warning_error(wi,"AdresS simulation does not support long-range electrostatics");
-   }
- 
++    /* ENERGY CONSERVATION */
++    if (ir_NVE(ir) && ir->cutoff_scheme == ecutsGROUP)
++    {
++        if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0)
++        {
++            sprintf(warn_buf,"You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
++                    evdw_names[evdwSHIFT]);
++            warning_note(wi,warn_buf);
++        }
++        if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0)
++        {
++            sprintf(warn_buf,"You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
++                    eel_names[eelPMESWITCH],eel_names[eelRF_ZERO]);
++            warning_note(wi,warn_buf);
++        }
++    }
+ +
+ +  /* IMPLICIT SOLVENT */
+ +  if(ir->coulombtype==eelGB_NOTUSED)
+ +  {
+ +    ir->coulombtype=eelCUT;
+ +    ir->implicit_solvent=eisGBSA;
+ +    fprintf(stderr,"Note: Old option for generalized born electrostatics given:\n"
+ +          "Changing coulombtype from \"generalized-born\" to \"cut-off\" and instead\n"
+ +            "setting implicit-solvent value to \"GBSA\" in input section.\n");
+ +  }
+ +
+ +  if(ir->sa_algorithm==esaSTILL)
+ +  {
+ +    sprintf(err_buf,"Still SA algorithm not available yet, use %s or %s instead\n",esa_names[esaAPPROX],esa_names[esaNO]);
+ +    CHECK(ir->sa_algorithm == esaSTILL);
+ +  }
+ +  
+ +  if(ir->implicit_solvent==eisGBSA)
+ +  {
+ +    sprintf(err_buf,"With GBSA implicit solvent, rgbradii must be equal to rlist.");
+ +    CHECK(ir->rgbradii != ir->rlist);
+ +        
+ +    if(ir->coulombtype!=eelCUT)
+ +        {
+ +                sprintf(err_buf,"With GBSA, coulombtype must be equal to %s\n",eel_names[eelCUT]);
+ +                CHECK(ir->coulombtype!=eelCUT);
+ +        }
+ +        if(ir->vdwtype!=evdwCUT)
+ +        {
+ +                sprintf(err_buf,"With GBSA, vdw-type must be equal to %s\n",evdw_names[evdwCUT]);
+ +                CHECK(ir->vdwtype!=evdwCUT);
+ +        }
+ +    if(ir->nstgbradii<1)
+ +    {
+ +      sprintf(warn_buf,"Using GBSA with nstgbradii<1, setting nstgbradii=1");
+ +      warning_note(wi,warn_buf);
+ +      ir->nstgbradii=1;
+ +    }
+ +    if(ir->sa_algorithm==esaNO)
+ +    {
+ +      sprintf(warn_buf,"No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n");
+ +      warning_note(wi,warn_buf);
+ +    }
+ +    if(ir->sa_surface_tension<0 && ir->sa_algorithm!=esaNO)
+ +    {
+ +      sprintf(warn_buf,"Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n");
+ +      warning_note(wi,warn_buf);
+ +      
+ +      if(ir->gb_algorithm==egbSTILL)
+ +      {
+ +        ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100;
+ +      }
+ +      else
+ +      {
+ +        ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100;
+ +      }
+ +    }
+ +    if(ir->sa_surface_tension==0 && ir->sa_algorithm!=esaNO)
+ +    {
+ +      sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n");
+ +      CHECK(ir->sa_surface_tension==0 && ir->sa_algorithm!=esaNO);
+ +    }
+ +    
+ +  }
+ +
-   ITYPE ("nstcomm",   ir->nstcomm,    10);
++    if (ir->bAdress)
++    {
++        if (ir->cutoff_scheme != ecutsGROUP)
++        {
++            warning_error(wi,"AdresS simulation supports only cutoff-scheme=group");
++        }
++        if (!EI_SD(ir->eI))
++        {
++            warning_error(wi,"AdresS simulation supports only stochastic dynamics");
++        }
++        if (ir->epc != epcNO)
++        {
++            warning_error(wi,"AdresS simulation does not support pressure coupling");
++        }
++        if (EEL_FULL(ir->coulombtype))
++        {
++            warning_error(wi,"AdresS simulation does not support long-range electrostatics");
++        }
++    }
+ +}
+ +
+ +/* count the number of text elemets separated by whitespace in a string.
+ +    str = the input string
+ +    maxptr = the maximum number of allowed elements
+ +    ptr = the output array of pointers to the first character of each element
+ +    returns: the number of elements. */
+ +int str_nelem(const char *str,int maxptr,char *ptr[])
+ +{
+ +  int  np=0;
+ +  char *copy0,*copy;
+ +  
+ +  copy0=strdup(str); 
+ +  copy=copy0;
+ +  ltrim(copy);
+ +  while (*copy != '\0') {
+ +    if (np >= maxptr)
+ +      gmx_fatal(FARGS,"Too many groups on line: '%s' (max is %d)",
+ +                str,maxptr);
+ +    if (ptr) 
+ +      ptr[np]=copy;
+ +    np++;
+ +    while ((*copy != '\0') && !isspace(*copy))
+ +      copy++;
+ +    if (*copy != '\0') {
+ +      *copy='\0';
+ +      copy++;
+ +    }
+ +    ltrim(copy);
+ +  }
+ +  if (ptr == NULL)
+ +    sfree(copy0);
+ +
+ +  return np;
+ +}
+ +
+ +/* interpret a number of doubles from a string and put them in an array,
+ +   after allocating space for them.
+ +   str = the input string
+ +   n = the (pre-allocated) number of doubles read
+ +   r = the output array of doubles. */
+ +static void parse_n_real(char *str,int *n,real **r)
+ +{
+ +  char *ptr[MAXPTR];
+ +  int  i;
+ +
+ +  *n = str_nelem(str,MAXPTR,ptr);
+ +
+ +  snew(*r,*n);
+ +  for(i=0; i<*n; i++) {
+ +    (*r)[i] = strtod(ptr[i],NULL);
+ +  }
+ +}
+ +
+ +static void do_fep_params(t_inputrec *ir, char fep_lambda[][STRLEN],char weights[STRLEN]) {
+ +
+ +    int i,j,max_n_lambda,nweights,nfep[efptNR];
+ +    t_lambda *fep = ir->fepvals;
+ +    t_expanded *expand = ir->expandedvals;
+ +    real **count_fep_lambdas;
+ +    gmx_bool bOneLambda = TRUE;
+ +
+ +    snew(count_fep_lambdas,efptNR);
+ +
+ +    /* FEP input processing */
+ +    /* first, identify the number of lambda values for each type.
+ +       All that are nonzero must have the same number */
+ +
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        parse_n_real(fep_lambda[i],&(nfep[i]),&(count_fep_lambdas[i]));
+ +    }
+ +
+ +    /* now, determine the number of components.  All must be either zero, or equal. */
+ +
+ +    max_n_lambda = 0;
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        if (nfep[i] > max_n_lambda) {
+ +            max_n_lambda = nfep[i];  /* here's a nonzero one.  All of them
+ +                                        must have the same number if its not zero.*/
+ +            break;
+ +        }
+ +    }
+ +
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        if (nfep[i] == 0)
+ +        {
+ +            ir->fepvals->separate_dvdl[i] = FALSE;
+ +        }
+ +        else if (nfep[i] == max_n_lambda)
+ +        {
+ +            if (i!=efptTEMPERATURE)  /* we treat this differently -- not really a reason to compute the derivative with
+ +                                        respect to the temperature currently */
+ +            {
+ +                ir->fepvals->separate_dvdl[i] = TRUE;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            gmx_fatal(FARGS,"Number of lambdas (%d) for FEP type %s not equal to number of other types (%d)",
+ +                      nfep[i],efpt_names[i],max_n_lambda);
+ +        }
+ +    }
+ +    /* we don't print out dhdl if the temperature is changing, since we can't correctly define dhdl in this case */
+ +    ir->fepvals->separate_dvdl[efptTEMPERATURE] = FALSE;
+ +
+ +    /* the number of lambdas is the number we've read in, which is either zero
+ +       or the same for all */
+ +    fep->n_lambda = max_n_lambda;
+ +
+ +    /* allocate space for the array of lambda values */
+ +    snew(fep->all_lambda,efptNR);
+ +    /* if init_lambda is defined, we need to set lambda */
+ +    if ((fep->init_lambda > 0) && (fep->n_lambda == 0))
+ +    {
+ +        ir->fepvals->separate_dvdl[efptFEP] = TRUE;
+ +    }
+ +    /* otherwise allocate the space for all of the lambdas, and transfer the data */
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        snew(fep->all_lambda[i],fep->n_lambda);
+ +        if (nfep[i] > 0)  /* if it's zero, then the count_fep_lambda arrays
+ +                             are zero */
+ +        {
+ +            for (j=0;j<fep->n_lambda;j++)
+ +            {
+ +                fep->all_lambda[i][j] = (double)count_fep_lambdas[i][j];
+ +            }
+ +            sfree(count_fep_lambdas[i]);
+ +        }
+ +    }
+ +    sfree(count_fep_lambdas);
+ +
+ +    /* "fep-vals" is either zero or the full number. If zero, we'll need to define fep-lambdas for internal
+ +       bookkeeping -- for now, init_lambda */
+ +
+ +    if ((nfep[efptFEP] == 0) && (fep->init_lambda >= 0) && (fep->init_lambda <= 1))
+ +    {
+ +        for (i=0;i<fep->n_lambda;i++)
+ +        {
+ +            fep->all_lambda[efptFEP][i] = fep->init_lambda;
+ +        }
+ +    }
+ +
+ +    /* check to see if only a single component lambda is defined, and soft core is defined.
+ +       In this case, turn on coulomb soft core */
+ +
+ +    if (max_n_lambda == 0)
+ +    {
+ +        bOneLambda = TRUE;
+ +    }
+ +    else
+ +    {
+ +        for (i=0;i<efptNR;i++)
+ +        {
+ +            if ((nfep[i] != 0) && (i!=efptFEP))
+ +            {
+ +                bOneLambda = FALSE;
+ +            }
+ +        }
+ +    }
+ +    if ((bOneLambda) && (fep->sc_alpha > 0))
+ +    {
+ +        fep->bScCoul = TRUE;
+ +    }
+ +
+ +    /* Fill in the others with the efptFEP if they are not explicitly
+ +       specified (i.e. nfep[i] == 0).  This means if fep is not defined,
+ +       they are all zero. */
+ +
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        if ((nfep[i] == 0) && (i!=efptFEP))
+ +        {
+ +            for (j=0;j<fep->n_lambda;j++)
+ +            {
+ +                fep->all_lambda[i][j] = fep->all_lambda[efptFEP][j];
+ +            }
+ +        }
+ +    }
+ +
+ +
+ +    /* make it easier if sc_r_power = 48 by increasing it to the 4th power, to be in the right scale. */
+ +    if (fep->sc_r_power == 48)
+ +    {
+ +        if (fep->sc_alpha > 0.1)
+ +        {
+ +            gmx_fatal(FARGS,"sc_alpha (%f) for sc_r_power = 48 should usually be between 0.001 and 0.004", fep->sc_alpha);
+ +        }
+ +    }
+ +
+ +    expand = ir->expandedvals;
+ +    /* now read in the weights */
+ +    parse_n_real(weights,&nweights,&(expand->init_lambda_weights));
+ +    if (nweights == 0)
+ +    {
+ +        expand->bInit_weights = FALSE;
+ +        snew(expand->init_lambda_weights,fep->n_lambda); /* initialize to zero */
+ +    }
+ +    else if (nweights != fep->n_lambda)
+ +    {
+ +        gmx_fatal(FARGS,"Number of weights (%d) is not equal to number of lambda values (%d)",
+ +                  nweights,fep->n_lambda);
+ +    }
+ +    else
+ +    {
+ +        expand->bInit_weights = TRUE;
+ +    }
+ +    if ((expand->nstexpanded < 0) && (ir->efep != efepNO)) {
+ +        expand->nstexpanded = fep->nstdhdl;
+ +        /* if you don't specify nstexpanded when doing expanded ensemble free energy calcs, it is set to nstdhdl */
+ +    }
+ +    if ((expand->nstexpanded < 0) && ir->bSimTemp) {
+ +        expand->nstexpanded = ir->nstlist;
+ +        /* if you don't specify nstexpanded when doing expanded ensemble simulated tempering, it is set to nstlist*/
+ +    }
+ +}
+ +
+ +
+ +static void do_simtemp_params(t_inputrec *ir) {
+ +
+ +    snew(ir->simtempvals->temperatures,ir->fepvals->n_lambda);
+ +    GetSimTemps(ir->fepvals->n_lambda,ir->simtempvals,ir->fepvals->all_lambda[efptTEMPERATURE]);
+ +
+ +    return;
+ +}
+ +
+ +static void do_wall_params(t_inputrec *ir,
+ +                           char *wall_atomtype, char *wall_density,
+ +                           t_gromppopts *opts)
+ +{
+ +    int  nstr,i;
+ +    char *names[MAXPTR];
+ +    double dbl;
+ +
+ +    opts->wall_atomtype[0] = NULL;
+ +    opts->wall_atomtype[1] = NULL;
+ +
+ +    ir->wall_atomtype[0] = -1;
+ +    ir->wall_atomtype[1] = -1;
+ +    ir->wall_density[0] = 0;
+ +    ir->wall_density[1] = 0;
+ +  
+ +    if (ir->nwall > 0)
+ +    {
+ +        nstr = str_nelem(wall_atomtype,MAXPTR,names);
+ +        if (nstr != ir->nwall)
+ +        {
+ +            gmx_fatal(FARGS,"Expected %d elements for wall_atomtype, found %d",
+ +                      ir->nwall,nstr);
+ +        }
+ +        for(i=0; i<ir->nwall; i++)
+ +        {
+ +            opts->wall_atomtype[i] = strdup(names[i]);
+ +        }
+ +    
+ +        if (ir->wall_type == ewt93 || ir->wall_type == ewt104) {
+ +            nstr = str_nelem(wall_density,MAXPTR,names);
+ +            if (nstr != ir->nwall)
+ +            {
+ +                gmx_fatal(FARGS,"Expected %d elements for wall-density, found %d",ir->nwall,nstr);
+ +            }
+ +            for(i=0; i<ir->nwall; i++)
+ +            {
+ +                sscanf(names[i],"%lf",&dbl);
+ +                if (dbl <= 0)
+ +                {
+ +                    gmx_fatal(FARGS,"wall-density[%d] = %f\n",i,dbl);
+ +                }
+ +                ir->wall_density[i] = dbl;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void add_wall_energrps(gmx_groups_t *groups,int nwall,t_symtab *symtab)
+ +{
+ +  int  i;
+ +  t_grps *grps;
+ +  char str[STRLEN];
+ +  
+ +  if (nwall > 0) {
+ +    srenew(groups->grpname,groups->ngrpname+nwall);
+ +    grps = &(groups->grps[egcENER]);
+ +    srenew(grps->nm_ind,grps->nr+nwall);
+ +    for(i=0; i<nwall; i++) {
+ +      sprintf(str,"wall%d",i);
+ +      groups->grpname[groups->ngrpname] = put_symtab(symtab,str);
+ +      grps->nm_ind[grps->nr++] = groups->ngrpname++;
+ +    }
+ +  }
+ +}
+ +
+ +void read_expandedparams(int *ninp_p,t_inpfile **inp_p,
+ +                         t_expanded *expand,warninp_t wi)
+ +{
+ +  int  ninp,nerror=0;
+ +  t_inpfile *inp;
+ +
+ +  ninp   = *ninp_p;
+ +  inp    = *inp_p;
+ +
+ +  /* read expanded ensemble parameters */
+ +  CCTYPE ("expanded ensemble variables");
+ +  ITYPE ("nstexpanded",expand->nstexpanded,-1);
+ +  EETYPE("lmc-stats", expand->elamstats, elamstats_names);
+ +  EETYPE("lmc-move", expand->elmcmove, elmcmove_names);
+ +  EETYPE("lmc-weights-equil",expand->elmceq,elmceq_names);
+ +  ITYPE ("weight-equil-number-all-lambda",expand->equil_n_at_lam,-1);
+ +  ITYPE ("weight-equil-number-samples",expand->equil_samples,-1);
+ +  ITYPE ("weight-equil-number-steps",expand->equil_steps,-1);
+ +  RTYPE ("weight-equil-wl-delta",expand->equil_wl_delta,-1);
+ +  RTYPE ("weight-equil-count-ratio",expand->equil_ratio,-1);
+ +  CCTYPE("Seed for Monte Carlo in lambda space");
+ +  ITYPE ("lmc-seed",expand->lmc_seed,-1);
+ +  RTYPE ("mc-temperature",expand->mc_temp,-1);
+ +  ITYPE ("lmc-repeats",expand->lmc_repeats,1);
+ +  ITYPE ("lmc-gibbsdelta",expand->gibbsdeltalam,-1);
+ +  ITYPE ("lmc-forced-nstart",expand->lmc_forced_nstart,0);
+ +  EETYPE("symmetrized-transition-matrix", expand->bSymmetrizedTMatrix, yesno_names);
+ +  ITYPE("nst-transition-matrix", expand->nstTij, -1);
+ +  ITYPE ("mininum-var-min",expand->minvarmin, 100); /*default is reasonable */
+ +  ITYPE ("weight-c-range",expand->c_range, 0); /* default is just C=0 */
+ +  RTYPE ("wl-scale",expand->wl_scale,0.8);
+ +  RTYPE ("wl-ratio",expand->wl_ratio,0.8);
+ +  RTYPE ("init-wl-delta",expand->init_wl_delta,1.0);
+ +  EETYPE("wl-oneovert",expand->bWLoneovert,yesno_names);
+ +
+ +  *ninp_p   = ninp;
+ +  *inp_p    = inp;
+ +
+ +  return;
+ +}
+ +
+ +void get_ir(const char *mdparin,const char *mdparout,
+ +            t_inputrec *ir,t_gromppopts *opts,
+ +            warninp_t wi)
+ +{
+ +  char      *dumstr[2];
+ +  double    dumdub[2][6];
+ +  t_inpfile *inp;
+ +  const char *tmp;
+ +  int       i,j,m,ninp;
+ +  char      warn_buf[STRLEN];
+ +  t_lambda  *fep = ir->fepvals;
+ +  t_expanded *expand = ir->expandedvals;
+ +
+ +  inp = read_inpfile(mdparin, &ninp, NULL, wi);
+ +
+ +  snew(dumstr[0],STRLEN);
+ +  snew(dumstr[1],STRLEN);
+ +
+ +  /* remove the following deprecated commands */
+ +  REM_TYPE("title");
+ +  REM_TYPE("cpp");
+ +  REM_TYPE("domain-decomposition");
+ +  REM_TYPE("andersen-seed");
+ +  REM_TYPE("dihre");
+ +  REM_TYPE("dihre-fc");
+ +  REM_TYPE("dihre-tau");
+ +  REM_TYPE("nstdihreout");
+ +  REM_TYPE("nstcheckpoint");
+ +
+ +  /* replace the following commands with the clearer new versions*/
+ +  REPL_TYPE("unconstrained-start","continuation");
+ +  REPL_TYPE("foreign-lambda","fep-lambdas");
+ +
+ +  CCTYPE ("VARIOUS PREPROCESSING OPTIONS");
+ +  CTYPE ("Preprocessor information: use cpp syntax.");
+ +  CTYPE ("e.g.: -I/home/joe/doe -I/home/mary/roe");
+ +  STYPE ("include",   opts->include,  NULL);
+ +  CTYPE ("e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)");
+ +  STYPE ("define",    opts->define,   NULL);
+ +    
+ +  CCTYPE ("RUN CONTROL PARAMETERS");
+ +  EETYPE("integrator",  ir->eI,         ei_names);
+ +  CTYPE ("Start time and timestep in ps");
+ +  RTYPE ("tinit",     ir->init_t,     0.0);
+ +  RTYPE ("dt",                ir->delta_t,    0.001);
+ +  STEPTYPE ("nsteps",   ir->nsteps,     0);
+ +  CTYPE ("For exact run continuation or redoing part of a run");
+ +  STEPTYPE ("init-step",ir->init_step,  0);
+ +  CTYPE ("Part index is updated automatically on checkpointing (keeps files separate)");
+ +  ITYPE ("simulation-part", ir->simulation_part, 1);
+ +  CTYPE ("mode for center of mass motion removal");
+ +  EETYPE("comm-mode",   ir->comm_mode,  ecm_names);
+ +  CTYPE ("number of steps for center of mass motion removal");
-   ITYPE ("nstcalcenergy",ir->nstcalcenergy,   -1);
-   ITYPE ("nstenergy",   ir->nstenergy,  100);
++  ITYPE ("nstcomm",   ir->nstcomm,    100);
+ +  CTYPE ("group(s) for center of mass motion removal");
+ +  STYPE ("comm-grps",   vcm,            NULL);
+ +  
+ +  CCTYPE ("LANGEVIN DYNAMICS OPTIONS");
+ +  CTYPE ("Friction coefficient (amu/ps) and random seed");
+ +  RTYPE ("bd-fric",     ir->bd_fric,    0.0);
+ +  ITYPE ("ld-seed",     ir->ld_seed,    1993);
+ +  
+ +  /* Em stuff */
+ +  CCTYPE ("ENERGY MINIMIZATION OPTIONS");
+ +  CTYPE ("Force tolerance and initial step-size");
+ +  RTYPE ("emtol",       ir->em_tol,     10.0);
+ +  RTYPE ("emstep",      ir->em_stepsize,0.01);
+ +  CTYPE ("Max number of iterations in relax-shells");
+ +  ITYPE ("niter",       ir->niter,      20);
+ +  CTYPE ("Step size (ps^2) for minimization of flexible constraints");
+ +  RTYPE ("fcstep",      ir->fc_stepsize, 0);
+ +  CTYPE ("Frequency of steepest descents steps when doing CG");
+ +  ITYPE ("nstcgsteep",        ir->nstcgsteep, 1000);
+ +  ITYPE ("nbfgscorr",   ir->nbfgscorr,  10); 
+ +
+ +  CCTYPE ("TEST PARTICLE INSERTION OPTIONS");
+ +  RTYPE ("rtpi",      ir->rtpi,       0.05);
+ +
+ +  /* Output options */
+ +  CCTYPE ("OUTPUT CONTROL OPTIONS");
+ +  CTYPE ("Output frequency for coords (x), velocities (v) and forces (f)");
+ +  ITYPE ("nstxout",   ir->nstxout,    0);
+ +  ITYPE ("nstvout",   ir->nstvout,    0);
+ +  ITYPE ("nstfout",   ir->nstfout,    0);
+ +  ir->nstcheckpoint = 1000;
+ +  CTYPE ("Output frequency for energies to log file and energy file");
+ +  ITYPE ("nstlog",    ir->nstlog,     1000);
-   RTYPE ("fourierspacing", opts->fourierspacing,0.12);
++  ITYPE ("nstcalcenergy",ir->nstcalcenergy,   100);
++  ITYPE ("nstenergy",   ir->nstenergy,  1000);
+ +  CTYPE ("Output frequency and precision for .xtc file");
+ +  ITYPE ("nstxtcout",   ir->nstxtcout,  0);
+ +  RTYPE ("xtc-precision",ir->xtcprec,   1000.0);
+ +  CTYPE ("This selects the subset of atoms for the .xtc file. You can");
+ +  CTYPE ("select multiple groups. By default all atoms will be written.");
+ +  STYPE ("xtc-grps",    xtc_grps,       NULL);
+ +  CTYPE ("Selection of energy groups");
+ +  STYPE ("energygrps",  energy,         NULL);
+ +
+ +  /* Neighbor searching */  
+ +  CCTYPE ("NEIGHBORSEARCHING PARAMETERS");
++  CTYPE ("cut-off scheme (group: using charge groups, Verlet: particle based cut-offs)");
++  EETYPE("cutoff-scheme",     ir->cutoff_scheme,    ecutscheme_names);
+ +  CTYPE ("nblist update frequency");
+ +  ITYPE ("nstlist",   ir->nstlist,    10);
+ +  CTYPE ("ns algorithm (simple or grid)");
+ +  EETYPE("ns-type",     ir->ns_type,    ens_names);
+ +  /* set ndelta to the optimal value of 2 */
+ +  ir->ndelta = 2;
+ +  CTYPE ("Periodic boundary conditions: xyz, no, xy");
+ +  EETYPE("pbc",         ir->ePBC,       epbc_names);
+ +  EETYPE("periodic-molecules", ir->bPeriodicMols, yesno_names);
++  CTYPE ("Allowed energy drift due to the Verlet buffer in kJ/mol/ps per atom,");
++  CTYPE ("a value of -1 means: use rlist");
++  RTYPE("verlet-buffer-drift", ir->verletbuf_drift,    0.005);
+ +  CTYPE ("nblist cut-off");
+ +  RTYPE ("rlist",     ir->rlist,      -1);
+ +  CTYPE ("long-range cut-off for switched potentials");
+ +  RTYPE ("rlistlong", ir->rlistlong,  -1);
+ +
+ +  /* Electrostatics */
+ +  CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW");
+ +  CTYPE ("Method for doing electrostatics");
+ +  EETYPE("coulombtype",       ir->coulombtype,    eel_names);
++  EETYPE("coulomb-modifier",  ir->coulomb_modifier,    eintmod_names);
+ +  CTYPE ("cut-off lengths");
+ +  RTYPE ("rcoulomb-switch",   ir->rcoulomb_switch,    0.0);
+ +  RTYPE ("rcoulomb",  ir->rcoulomb,   -1);
+ +  CTYPE ("Relative dielectric constant for the medium and the reaction field");
+ +  RTYPE ("epsilon-r",   ir->epsilon_r,  1.0);
+ +  RTYPE ("epsilon-rf",  ir->epsilon_rf, 0.0);
+ +  CTYPE ("Method for doing Van der Waals");
+ +  EETYPE("vdw-type",  ir->vdwtype,    evdw_names);
++  EETYPE("vdw-modifier",      ir->vdw_modifier,    eintmod_names);
+ +  CTYPE ("cut-off lengths");
+ +  RTYPE ("rvdw-switch",       ir->rvdw_switch,        0.0);
+ +  RTYPE ("rvdw",      ir->rvdw,       -1);
+ +  CTYPE ("Apply long range dispersion corrections for Energy and Pressure");
+ +  EETYPE("DispCorr",    ir->eDispCorr,  edispc_names);
+ +  CTYPE ("Extension of the potential lookup tables beyond the cut-off");
+ +  RTYPE ("table-extension", ir->tabext, 1.0);
+ +  CTYPE ("Seperate tables between energy group pairs");
+ +  STYPE ("energygrp-table", egptable,   NULL);
+ +  CTYPE ("Spacing for the PME/PPPM FFT grid");
-           if ((ir->etc == etcVRESCALE && ir->opts.tau_t[i] >= 0) || 
-               (ir->etc != etcVRESCALE && ir->opts.tau_t[i] >  0))
++  RTYPE ("fourierspacing", ir->fourier_spacing,0.12);
+ +  CTYPE ("FFT grid size, when a value is 0 fourierspacing will be used");
+ +  ITYPE ("fourier-nx",  ir->nkx,         0);
+ +  ITYPE ("fourier-ny",  ir->nky,         0);
+ +  ITYPE ("fourier-nz",  ir->nkz,         0);
+ +  CTYPE ("EWALD/PME/PPPM parameters");
+ +  ITYPE ("pme-order",   ir->pme_order,   4);
+ +  RTYPE ("ewald-rtol",  ir->ewald_rtol, 0.00001);
+ +  EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names);
+ +  RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0);
+ +  EETYPE("optimize-fft",ir->bOptFFT,  yesno_names);
+ +
+ +  CCTYPE("IMPLICIT SOLVENT ALGORITHM");
+ +  EETYPE("implicit-solvent", ir->implicit_solvent, eis_names);
+ +      
+ +  CCTYPE ("GENERALIZED BORN ELECTROSTATICS"); 
+ +  CTYPE ("Algorithm for calculating Born radii");
+ +  EETYPE("gb-algorithm", ir->gb_algorithm, egb_names);
+ +  CTYPE ("Frequency of calculating the Born radii inside rlist");
+ +  ITYPE ("nstgbradii", ir->nstgbradii, 1);
+ +  CTYPE ("Cutoff for Born radii calculation; the contribution from atoms");
+ +  CTYPE ("between rlist and rgbradii is updated every nstlist steps");
+ +  RTYPE ("rgbradii",  ir->rgbradii, 1.0);
+ +  CTYPE ("Dielectric coefficient of the implicit solvent");
+ +  RTYPE ("gb-epsilon-solvent",ir->gb_epsilon_solvent, 80.0);
+ +  CTYPE ("Salt concentration in M for Generalized Born models");
+ +  RTYPE ("gb-saltconc",  ir->gb_saltconc, 0.0);
+ +  CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)");
+ +  RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0);
+ +  RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8);
+ +  RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85);
+ +  RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009);
+ +  EETYPE("sa-algorithm", ir->sa_algorithm, esa_names);
+ +  CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA");
+ +  CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models.");
+ +  RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1);
+ +               
+ +  /* Coupling stuff */
+ +  CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS");
+ +  CTYPE ("Temperature coupling");
+ +  EETYPE("tcoupl",    ir->etc,        etcoupl_names);
+ +  ITYPE ("nsttcouple", ir->nsttcouple,  -1);
+ +  ITYPE("nh-chain-length",     ir->opts.nhchainlength, NHCHAINLENGTH);
+ +  EETYPE("print-nose-hoover-chain-variables", ir->bPrintNHChains, yesno_names);
+ +  CTYPE ("Groups to couple separately");
+ +  STYPE ("tc-grps",     tcgrps,         NULL);
+ +  CTYPE ("Time constant (ps) and reference temperature (K)");
+ +  STYPE ("tau-t",     tau_t,          NULL);
+ +  STYPE ("ref-t",     ref_t,          NULL);
+ +  CTYPE ("pressure coupling");
+ +  EETYPE("pcoupl",    ir->epc,        epcoupl_names);
+ +  EETYPE("pcoupltype",        ir->epct,       epcoupltype_names);
+ +  ITYPE ("nstpcouple", ir->nstpcouple,  -1);
+ +  CTYPE ("Time constant (ps), compressibility (1/bar) and reference P (bar)");
+ +  RTYPE ("tau-p",     ir->tau_p,      1.0);
+ +  STYPE ("compressibility",   dumstr[0],      NULL);
+ +  STYPE ("ref-p",       dumstr[1],      NULL);
+ +  CTYPE ("Scaling of reference coordinates, No, All or COM");
+ +  EETYPE ("refcoord-scaling",ir->refcoord_scaling,erefscaling_names);
+ +
+ +  /* QMMM */
+ +  CCTYPE ("OPTIONS FOR QMMM calculations");
+ +  EETYPE("QMMM", ir->bQMMM, yesno_names);
+ +  CTYPE ("Groups treated Quantum Mechanically");
+ +  STYPE ("QMMM-grps",  QMMM,          NULL);
+ +  CTYPE ("QM method");
+ +  STYPE("QMmethod",     QMmethod, NULL);
+ +  CTYPE ("QMMM scheme");
+ +  EETYPE("QMMMscheme",  ir->QMMMscheme,    eQMMMscheme_names);
+ +  CTYPE ("QM basisset");
+ +  STYPE("QMbasis",      QMbasis, NULL);
+ +  CTYPE ("QM charge");
+ +  STYPE ("QMcharge",    QMcharge,NULL);
+ +  CTYPE ("QM multiplicity");
+ +  STYPE ("QMmult",      QMmult,NULL);
+ +  CTYPE ("Surface Hopping");
+ +  STYPE ("SH",          bSH, NULL);
+ +  CTYPE ("CAS space options");
+ +  STYPE ("CASorbitals",      CASorbitals,   NULL);
+ +  STYPE ("CASelectrons",     CASelectrons,  NULL);
+ +  STYPE ("SAon", SAon, NULL);
+ +  STYPE ("SAoff",SAoff,NULL);
+ +  STYPE ("SAsteps",  SAsteps, NULL);
+ +  CTYPE ("Scale factor for MM charges");
+ +  RTYPE ("MMChargeScaleFactor", ir->scalefactor, 1.0);
+ +  CTYPE ("Optimization of QM subsystem");
+ +  STYPE ("bOPT",          bOPT, NULL);
+ +  STYPE ("bTS",          bTS, NULL);
+ +
+ +  /* Simulated annealing */
+ +  CCTYPE("SIMULATED ANNEALING");
+ +  CTYPE ("Type of annealing for each temperature group (no/single/periodic)");
+ +  STYPE ("annealing",   anneal,      NULL);
+ +  CTYPE ("Number of time points to use for specifying annealing in each group");
+ +  STYPE ("annealing-npoints", anneal_npoints, NULL);
+ +  CTYPE ("List of times at the annealing points for each group");
+ +  STYPE ("annealing-time",       anneal_time,       NULL);
+ +  CTYPE ("Temp. at each annealing point, for each group.");
+ +  STYPE ("annealing-temp",  anneal_temp,  NULL);
+ +  
+ +  /* Startup run */
+ +  CCTYPE ("GENERATE VELOCITIES FOR STARTUP RUN");
+ +  EETYPE("gen-vel",     opts->bGenVel,  yesno_names);
+ +  RTYPE ("gen-temp",    opts->tempi,    300.0);
+ +  ITYPE ("gen-seed",    opts->seed,     173529);
+ +  
+ +  /* Shake stuff */
+ +  CCTYPE ("OPTIONS FOR BONDS");
+ +  EETYPE("constraints",       opts->nshake,   constraints);
+ +  CTYPE ("Type of constraint algorithm");
+ +  EETYPE("constraint-algorithm",  ir->eConstrAlg, econstr_names);
+ +  CTYPE ("Do not constrain the start configuration");
+ +  EETYPE("continuation", ir->bContinuation, yesno_names);
+ +  CTYPE ("Use successive overrelaxation to reduce the number of shake iterations");
+ +  EETYPE("Shake-SOR", ir->bShakeSOR, yesno_names);
+ +  CTYPE ("Relative tolerance of shake");
+ +  RTYPE ("shake-tol", ir->shake_tol, 0.0001);
+ +  CTYPE ("Highest order in the expansion of the constraint coupling matrix");
+ +  ITYPE ("lincs-order", ir->nProjOrder, 4);
+ +  CTYPE ("Number of iterations in the final step of LINCS. 1 is fine for");
+ +  CTYPE ("normal simulations, but use 2 to conserve energy in NVE runs.");
+ +  CTYPE ("For energy minimization with constraints it should be 4 to 8.");
+ +  ITYPE ("lincs-iter", ir->nLincsIter, 1);
+ +  CTYPE ("Lincs will write a warning to the stderr if in one step a bond"); 
+ +  CTYPE ("rotates over more degrees than");
+ +  RTYPE ("lincs-warnangle", ir->LincsWarnAngle, 30.0);
+ +  CTYPE ("Convert harmonic bonds to morse potentials");
+ +  EETYPE("morse",       opts->bMorse,yesno_names);
+ +
+ +  /* Energy group exclusions */
+ +  CCTYPE ("ENERGY GROUP EXCLUSIONS");
+ +  CTYPE ("Pairs of energy groups for which all non-bonded interactions are excluded");
+ +  STYPE ("energygrp-excl", egpexcl,     NULL);
+ +  
+ +  /* Walls */
+ +  CCTYPE ("WALLS");
+ +  CTYPE ("Number of walls, type, atom types, densities and box-z scale factor for Ewald");
+ +  ITYPE ("nwall", ir->nwall, 0);
+ +  EETYPE("wall-type",     ir->wall_type,   ewt_names);
+ +  RTYPE ("wall-r-linpot", ir->wall_r_linpot, -1);
+ +  STYPE ("wall-atomtype", wall_atomtype, NULL);
+ +  STYPE ("wall-density",  wall_density,  NULL);
+ +  RTYPE ("wall-ewald-zfac", ir->wall_ewald_zfac, 3);
+ +  
+ +  /* COM pulling */
+ +  CCTYPE("COM PULLING");
+ +  CTYPE("Pull type: no, umbrella, constraint or constant-force");
+ +  EETYPE("pull",          ir->ePull, epull_names);
+ +  if (ir->ePull != epullNO) {
+ +    snew(ir->pull,1);
+ +    pull_grp = read_pullparams(&ninp,&inp,ir->pull,&opts->pull_start,wi);
+ +  }
+ +  
+ +  /* Enforced rotation */
+ +  CCTYPE("ENFORCED ROTATION");
+ +  CTYPE("Enforced rotation: No or Yes");
+ +  EETYPE("rotation",       ir->bRot, yesno_names);
+ +  if (ir->bRot) {
+ +    snew(ir->rot,1);
+ +    rot_grp = read_rotparams(&ninp,&inp,ir->rot,wi);
+ +  }
+ +
+ +  /* Refinement */
+ +  CCTYPE("NMR refinement stuff");
+ +  CTYPE ("Distance restraints type: No, Simple or Ensemble");
+ +  EETYPE("disre",       ir->eDisre,     edisre_names);
+ +  CTYPE ("Force weighting of pairs in one distance restraint: Conservative or Equal");
+ +  EETYPE("disre-weighting", ir->eDisreWeighting, edisreweighting_names);
+ +  CTYPE ("Use sqrt of the time averaged times the instantaneous violation");
+ +  EETYPE("disre-mixed", ir->bDisreMixed, yesno_names);
+ +  RTYPE ("disre-fc",  ir->dr_fc,      1000.0);
+ +  RTYPE ("disre-tau", ir->dr_tau,     0.0);
+ +  CTYPE ("Output frequency for pair distances to energy file");
+ +  ITYPE ("nstdisreout", ir->nstdisreout, 100);
+ +  CTYPE ("Orientation restraints: No or Yes");
+ +  EETYPE("orire",       opts->bOrire,   yesno_names);
+ +  CTYPE ("Orientation restraints force constant and tau for time averaging");
+ +  RTYPE ("orire-fc",  ir->orires_fc,  0.0);
+ +  RTYPE ("orire-tau", ir->orires_tau, 0.0);
+ +  STYPE ("orire-fitgrp",orirefitgrp,    NULL);
+ +  CTYPE ("Output frequency for trace(SD) and S to energy file");
+ +  ITYPE ("nstorireout", ir->nstorireout, 100);
+ +
+ +  /* free energy variables */
+ +  CCTYPE ("Free energy variables");
+ +  EETYPE("free-energy", ir->efep, efep_names);
+ +  STYPE ("couple-moltype",  couple_moltype,  NULL);
+ +  EETYPE("couple-lambda0", opts->couple_lam0, couple_lam);
+ +  EETYPE("couple-lambda1", opts->couple_lam1, couple_lam);
+ +  EETYPE("couple-intramol", opts->bCoupleIntra, yesno_names);
+ +
+ +  RTYPE ("init-lambda", fep->init_lambda,-1); /* start with -1 so
+ +                                                 we can recognize if
+ +                                                 it was not entered */
+ +  ITYPE ("init-lambda-state", fep->init_fep_state,0);
+ +  RTYPE ("delta-lambda",fep->delta_lambda,0.0);
+ +  ITYPE ("nstdhdl",fep->nstdhdl, 10);
+ +  STYPE ("fep-lambdas", fep_lambda[efptFEP], NULL);
+ +  STYPE ("mass-lambdas", fep_lambda[efptMASS], NULL);
+ +  STYPE ("coul-lambdas", fep_lambda[efptCOUL], NULL);
+ +  STYPE ("vdw-lambdas", fep_lambda[efptVDW], NULL);
+ +  STYPE ("bonded-lambdas", fep_lambda[efptBONDED], NULL);
+ +  STYPE ("restraint-lambdas", fep_lambda[efptRESTRAINT], NULL);
+ +  STYPE ("temperature-lambdas", fep_lambda[efptTEMPERATURE], NULL);
+ +  STYPE ("init-lambda-weights",lambda_weights,NULL);
+ +  EETYPE("dhdl-print-energy", fep->bPrintEnergy, yesno_names);
+ +  RTYPE ("sc-alpha",fep->sc_alpha,0.0);
+ +  ITYPE ("sc-power",fep->sc_power,1);
+ +  RTYPE ("sc-r-power",fep->sc_r_power,6.0);
+ +  RTYPE ("sc-sigma",fep->sc_sigma,0.3);
+ +  EETYPE("sc-coul",fep->bScCoul,yesno_names);
+ +  ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
+ +  RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
+ +  EETYPE("separate-dhdl-file", fep->separate_dhdl_file,
+ +                               separate_dhdl_file_names);
+ +  EETYPE("dhdl-derivatives", fep->dhdl_derivatives, dhdl_derivatives_names);
+ +  ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
+ +  RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
+ +
+ +  /* Non-equilibrium MD stuff */  
+ +  CCTYPE("Non-equilibrium MD stuff");
+ +  STYPE ("acc-grps",    accgrps,        NULL);
+ +  STYPE ("accelerate",  acc,            NULL);
+ +  STYPE ("freezegrps",  freeze,         NULL);
+ +  STYPE ("freezedim",   frdim,          NULL);
+ +  RTYPE ("cos-acceleration", ir->cos_accel, 0);
+ +  STYPE ("deform",      deform,         NULL);
+ +
+ +  /* simulated tempering variables */
+ +  CCTYPE("simulated tempering variables");
+ +  EETYPE("simulated-tempering",ir->bSimTemp,yesno_names);
+ +  EETYPE("simulated-tempering-scaling",ir->simtempvals->eSimTempScale,esimtemp_names);
+ +  RTYPE("sim-temp-low",ir->simtempvals->simtemp_low,300.0);
+ +  RTYPE("sim-temp-high",ir->simtempvals->simtemp_high,300.0);
+ +
+ +  /* expanded ensemble variables */
+ +  if (ir->efep==efepEXPANDED || ir->bSimTemp)
+ +  {
+ +      read_expandedparams(&ninp,&inp,expand,wi);
+ +  }
+ +
+ +  /* Electric fields */
+ +  CCTYPE("Electric fields");
+ +  CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
+ +  CTYPE ("and a phase angle (real)");
+ +  STYPE ("E-x",       efield_x,       NULL);
+ +  STYPE ("E-xt",      efield_xt,      NULL);
+ +  STYPE ("E-y",       efield_y,       NULL);
+ +  STYPE ("E-yt",      efield_yt,      NULL);
+ +  STYPE ("E-z",       efield_z,       NULL);
+ +  STYPE ("E-zt",      efield_zt,      NULL);
+ +  
+ +  /* AdResS defined thingies */
+ +  CCTYPE ("AdResS parameters");
+ +  EETYPE("adress",       ir->bAdress, yesno_names);
+ +  if (ir->bAdress) {
+ +    snew(ir->adress,1);
+ +    read_adressparams(&ninp,&inp,ir->adress,wi);
+ +  }
+ +
+ +  /* User defined thingies */
+ +  CCTYPE ("User defined thingies");
+ +  STYPE ("user1-grps",  user1,          NULL);
+ +  STYPE ("user2-grps",  user2,          NULL);
+ +  ITYPE ("userint1",    ir->userint1,   0);
+ +  ITYPE ("userint2",    ir->userint2,   0);
+ +  ITYPE ("userint3",    ir->userint3,   0);
+ +  ITYPE ("userint4",    ir->userint4,   0);
+ +  RTYPE ("userreal1",   ir->userreal1,  0);
+ +  RTYPE ("userreal2",   ir->userreal2,  0);
+ +  RTYPE ("userreal3",   ir->userreal3,  0);
+ +  RTYPE ("userreal4",   ir->userreal4,  0);
+ +#undef CTYPE
+ +
+ +  write_inpfile(mdparout,ninp,inp,FALSE,wi);
+ +  for (i=0; (i<ninp); i++) {
+ +    sfree(inp[i].name);
+ +    sfree(inp[i].value);
+ +  }
+ +  sfree(inp);
+ +
+ +  /* Process options if necessary */
+ +  for(m=0; m<2; m++) {
+ +    for(i=0; i<2*DIM; i++)
+ +      dumdub[m][i]=0.0;
+ +    if(ir->epc) {
+ +      switch (ir->epct) {
+ +      case epctISOTROPIC:
+ +      if (sscanf(dumstr[m],"%lf",&(dumdub[m][XX]))!=1) {
+ +        warning_error(wi,"Pressure coupling not enough values (I need 1)");
+ +      }
+ +      dumdub[m][YY]=dumdub[m][ZZ]=dumdub[m][XX];
+ +      break;
+ +      case epctSEMIISOTROPIC:
+ +      case epctSURFACETENSION:
+ +      if (sscanf(dumstr[m],"%lf%lf",
+ +                 &(dumdub[m][XX]),&(dumdub[m][ZZ]))!=2) {
+ +        warning_error(wi,"Pressure coupling not enough values (I need 2)");
+ +      }
+ +      dumdub[m][YY]=dumdub[m][XX];
+ +      break;
+ +      case epctANISOTROPIC:
+ +      if (sscanf(dumstr[m],"%lf%lf%lf%lf%lf%lf",
+ +                 &(dumdub[m][XX]),&(dumdub[m][YY]),&(dumdub[m][ZZ]),
+ +                 &(dumdub[m][3]),&(dumdub[m][4]),&(dumdub[m][5]))!=6) {
+ +        warning_error(wi,"Pressure coupling not enough values (I need 6)");
+ +      }
+ +      break;
+ +      default:
+ +      gmx_fatal(FARGS,"Pressure coupling type %s not implemented yet",
+ +                  epcoupltype_names[ir->epct]);
+ +      }
+ +    }
+ +  }
+ +  clear_mat(ir->ref_p);
+ +  clear_mat(ir->compress);
+ +  for(i=0; i<DIM; i++) {
+ +    ir->ref_p[i][i]    = dumdub[1][i];
+ +    ir->compress[i][i] = dumdub[0][i];
+ +  }
+ +  if (ir->epct == epctANISOTROPIC) {
+ +    ir->ref_p[XX][YY] = dumdub[1][3];
+ +    ir->ref_p[XX][ZZ] = dumdub[1][4];
+ +    ir->ref_p[YY][ZZ] = dumdub[1][5];
+ +    if (ir->ref_p[XX][YY]!=0 && ir->ref_p[XX][ZZ]!=0 && ir->ref_p[YY][ZZ]!=0) {
+ +      warning(wi,"All off-diagonal reference pressures are non-zero. Are you sure you want to apply a threefold shear stress?\n");
+ +    }
+ +    ir->compress[XX][YY] = dumdub[0][3];
+ +    ir->compress[XX][ZZ] = dumdub[0][4];
+ +    ir->compress[YY][ZZ] = dumdub[0][5];
+ +    for(i=0; i<DIM; i++) {
+ +      for(m=0; m<i; m++) {
+ +      ir->ref_p[i][m] = ir->ref_p[m][i];
+ +      ir->compress[i][m] = ir->compress[m][i];
+ +      }
+ +    }
+ +  } 
+ +  
+ +  if (ir->comm_mode == ecmNO)
+ +    ir->nstcomm = 0;
+ +
+ +  opts->couple_moltype = NULL;
+ +  if (strlen(couple_moltype) > 0) 
+ +  {
+ +      if (ir->efep != efepNO) 
+ +      {
+ +          opts->couple_moltype = strdup(couple_moltype);
+ +          if (opts->couple_lam0 == opts->couple_lam1)
+ +          {
+ +              warning(wi,"The lambda=0 and lambda=1 states for coupling are identical");
+ +          }
+ +          if (ir->eI == eiMD && (opts->couple_lam0 == ecouplamNONE ||
+ +                                 opts->couple_lam1 == ecouplamNONE)) 
+ +          {
+ +              warning(wi,"For proper sampling of the (nearly) decoupled state, stochastic dynamics should be used");
+ +          }
+ +      }
+ +      else
+ +      {
+ +          warning(wi,"Can not couple a molecule with free_energy = no");
+ +      }
+ +  }
+ +  /* FREE ENERGY AND EXPANDED ENSEMBLE OPTIONS */
+ +  if (ir->efep != efepNO) {
+ +      if (fep->delta_lambda > 0) {
+ +          ir->efep = efepSLOWGROWTH;
+ +      }
+ +  }
+ +
+ +  if (ir->bSimTemp) {
+ +      fep->bPrintEnergy = TRUE;
+ +      /* always print out the energy to dhdl if we are doing expanded ensemble, since we need the total energy
+ +         if the temperature is changing. */
+ +  }
+ +
+ +  if ((ir->efep != efepNO) || ir->bSimTemp)
+ +  {
+ +      ir->bExpanded = FALSE;
+ +      if ((ir->efep == efepEXPANDED) || ir->bSimTemp)
+ +      {
+ +          ir->bExpanded = TRUE;
+ +      }
+ +      do_fep_params(ir,fep_lambda,lambda_weights);
+ +      if (ir->bSimTemp) { /* done after fep params */
+ +          do_simtemp_params(ir);
+ +      }
+ +  }
+ +  else
+ +  {
+ +      ir->fepvals->n_lambda = 0;
+ +  }
+ +
+ +  /* WALL PARAMETERS */
+ +
+ +  do_wall_params(ir,wall_atomtype,wall_density,opts);
+ +
+ +  /* ORIENTATION RESTRAINT PARAMETERS */
+ +  
+ +  if (opts->bOrire && str_nelem(orirefitgrp,MAXPTR,NULL)!=1) {
+ +      warning_error(wi,"ERROR: Need one orientation restraint fit group\n");
+ +  }
+ +
+ +  /* DEFORMATION PARAMETERS */
+ +
+ +  clear_mat(ir->deform);
+ +  for(i=0; i<6; i++)
+ +  {
+ +      dumdub[0][i] = 0;
+ +  }
+ +  m = sscanf(deform,"%lf %lf %lf %lf %lf %lf",
+ +           &(dumdub[0][0]),&(dumdub[0][1]),&(dumdub[0][2]),
+ +           &(dumdub[0][3]),&(dumdub[0][4]),&(dumdub[0][5]));
+ +  for(i=0; i<3; i++)
+ +  {
+ +      ir->deform[i][i] = dumdub[0][i];
+ +  }
+ +  ir->deform[YY][XX] = dumdub[0][3];
+ +  ir->deform[ZZ][XX] = dumdub[0][4];
+ +  ir->deform[ZZ][YY] = dumdub[0][5];
+ +  if (ir->epc != epcNO) {
+ +    for(i=0; i<3; i++)
+ +      for(j=0; j<=i; j++)
+ +      if (ir->deform[i][j]!=0 && ir->compress[i][j]!=0) {
+ +        warning_error(wi,"A box element has deform set and compressibility > 0");
+ +      }
+ +    for(i=0; i<3; i++)
+ +      for(j=0; j<i; j++)
+ +      if (ir->deform[i][j]!=0) {
+ +        for(m=j; m<DIM; m++)
+ +          if (ir->compress[m][j]!=0) {
+ +            sprintf(warn_buf,"An off-diagonal box element has deform set while compressibility > 0 for the same component of another box vector, this might lead to spurious periodicity effects.");
+ +            warning(wi,warn_buf);
+ +          }
+ +      }
+ +  }
+ +
+ +  sfree(dumstr[0]);
+ +  sfree(dumstr[1]);
+ +}
+ +
+ +static int search_QMstring(char *s,int ng,const char *gn[])
+ +{
+ +  /* same as normal search_string, but this one searches QM strings */
+ +  int i;
+ +
+ +  for(i=0; (i<ng); i++)
+ +    if (gmx_strcasecmp(s,gn[i]) == 0)
+ +      return i;
+ +
+ +  gmx_fatal(FARGS,"this QM method or basisset (%s) is not implemented\n!",s);
+ +
+ +  return -1;
+ +
+ +} /* search_QMstring */
+ +
+ +
+ +int search_string(char *s,int ng,char *gn[])
+ +{
+ +  int i;
+ +  
+ +  for(i=0; (i<ng); i++)
+ +  {
+ +    if (gmx_strcasecmp(s,gn[i]) == 0)
+ +    {
+ +      return i;
+ +    }
+ +  }
+ +    
+ +  gmx_fatal(FARGS,
+ +            "Group %s referenced in the .mdp file was not found in the index file.\n"
+ +            "Group names must match either [moleculetype] names or custom index group\n"
+ +            "names, in which case you must supply an index file to the '-n' option\n"
+ +            "of grompp.",
+ +            s);
+ +  
+ +  return -1;
+ +}
+ +
+ +static gmx_bool do_numbering(int natoms,gmx_groups_t *groups,int ng,char *ptrs[],
+ +                         t_blocka *block,char *gnames[],
+ +                         int gtype,int restnm,
+ +                         int grptp,gmx_bool bVerbose,
+ +                         warninp_t wi)
+ +{
+ +    unsigned short *cbuf;
+ +    t_grps *grps=&(groups->grps[gtype]);
+ +    int    i,j,gid,aj,ognr,ntot=0;
+ +    const char *title;
+ +    gmx_bool   bRest;
+ +    char   warn_buf[STRLEN];
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Starting numbering %d groups of type %d\n",ng,gtype);
+ +    }
+ +  
+ +    title = gtypes[gtype];
+ +    
+ +    snew(cbuf,natoms);
+ +    /* Mark all id's as not set */
+ +    for(i=0; (i<natoms); i++)
+ +    {
+ +        cbuf[i] = NOGID;
+ +    }
+ +  
+ +    snew(grps->nm_ind,ng+1); /* +1 for possible rest group */
+ +    for(i=0; (i<ng); i++)
+ +    {
+ +        /* Lookup the group name in the block structure */
+ +        gid = search_string(ptrs[i],block->nr,gnames);
+ +        if ((grptp != egrptpONE) || (i == 0))
+ +        {
+ +            grps->nm_ind[grps->nr++]=gid;
+ +        }
+ +        if (debug) 
+ +        {
+ +            fprintf(debug,"Found gid %d for group %s\n",gid,ptrs[i]);
+ +        }
+ +    
+ +        /* Now go over the atoms in the group */
+ +        for(j=block->index[gid]; (j<block->index[gid+1]); j++)
+ +        {
+ +
+ +            aj=block->a[j];
+ +      
+ +            /* Range checking */
+ +            if ((aj < 0) || (aj >= natoms)) 
+ +            {
+ +                gmx_fatal(FARGS,"Invalid atom number %d in indexfile",aj);
+ +            }
+ +            /* Lookup up the old group number */
+ +            ognr = cbuf[aj];
+ +            if (ognr != NOGID)
+ +            {
+ +                gmx_fatal(FARGS,"Atom %d in multiple %s groups (%d and %d)",
+ +                          aj+1,title,ognr+1,i+1);
+ +            }
+ +            else
+ +            {
+ +                /* Store the group number in buffer */
+ +                if (grptp == egrptpONE)
+ +                {
+ +                    cbuf[aj] = 0;
+ +                }
+ +                else
+ +                {
+ +                    cbuf[aj] = i;
+ +                }
+ +                ntot++;
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* Now check whether we have done all atoms */
+ +    bRest = FALSE;
+ +    if (ntot != natoms)
+ +    {
+ +        if (grptp == egrptpALL)
+ +        {
+ +            gmx_fatal(FARGS,"%d atoms are not part of any of the %s groups",
+ +                      natoms-ntot,title);
+ +        }
+ +        else if (grptp == egrptpPART)
+ +        {
+ +            sprintf(warn_buf,"%d atoms are not part of any of the %s groups",
+ +                    natoms-ntot,title);
+ +            warning_note(wi,warn_buf);
+ +        }
+ +        /* Assign all atoms currently unassigned to a rest group */
+ +        for(j=0; (j<natoms); j++)
+ +        {
+ +            if (cbuf[j] == NOGID)
+ +            {
+ +                cbuf[j] = grps->nr;
+ +                bRest = TRUE;
+ +            }
+ +        }
+ +        if (grptp != egrptpPART)
+ +        {
+ +            if (bVerbose)
+ +            {
+ +                fprintf(stderr,
+ +                        "Making dummy/rest group for %s containing %d elements\n",
+ +                        title,natoms-ntot);
+ +            }
+ +            /* Add group name "rest" */ 
+ +            grps->nm_ind[grps->nr] = restnm;
+ +            
+ +            /* Assign the rest name to all atoms not currently assigned to a group */
+ +            for(j=0; (j<natoms); j++)
+ +            {
+ +                if (cbuf[j] == NOGID)
+ +                {
+ +                    cbuf[j] = grps->nr;
+ +                }
+ +            }
+ +            grps->nr++;
+ +        }
+ +    }
+ +    
+ +    if (grps->nr == 1 && (ntot == 0 || ntot == natoms))
+ +    {
+ +        /* All atoms are part of one (or no) group, no index required */
+ +        groups->ngrpnr[gtype] = 0;
+ +        groups->grpnr[gtype]  = NULL;
+ +    }
+ +    else
+ +    {
+ +        groups->ngrpnr[gtype] = natoms;
+ +        snew(groups->grpnr[gtype],natoms);
+ +        for(j=0; (j<natoms); j++)
+ +        {
+ +            groups->grpnr[gtype][j] = cbuf[j];
+ +        }
+ +    }
+ +    
+ +    sfree(cbuf);
+ +
+ +    return (bRest && grptp == egrptpPART);
+ +}
+ +
+ +static void calc_nrdf(gmx_mtop_t *mtop,t_inputrec *ir,char **gnames)
+ +{
+ +  t_grpopts *opts;
+ +  gmx_groups_t *groups;
+ +  t_pull  *pull;
+ +  int     natoms,ai,aj,i,j,d,g,imin,jmin,nc;
+ +  t_iatom *ia;
+ +  int     *nrdf2,*na_vcm,na_tot;
+ +  double  *nrdf_tc,*nrdf_vcm,nrdf_uc,n_sub=0;
+ +  gmx_mtop_atomloop_all_t aloop;
+ +  t_atom  *atom;
+ +  int     mb,mol,ftype,as;
+ +  gmx_molblock_t *molb;
+ +  gmx_moltype_t *molt;
+ +
+ +  /* Calculate nrdf. 
+ +   * First calc 3xnr-atoms for each group
+ +   * then subtract half a degree of freedom for each constraint
+ +   *
+ +   * Only atoms and nuclei contribute to the degrees of freedom...
+ +   */
+ +
+ +  opts = &ir->opts;
+ +  
+ +  groups = &mtop->groups;
+ +  natoms = mtop->natoms;
+ +
+ +  /* Allocate one more for a possible rest group */
+ +  /* We need to sum degrees of freedom into doubles,
+ +   * since floats give too low nrdf's above 3 million atoms.
+ +   */
+ +  snew(nrdf_tc,groups->grps[egcTC].nr+1);
+ +  snew(nrdf_vcm,groups->grps[egcVCM].nr+1);
+ +  snew(na_vcm,groups->grps[egcVCM].nr+1);
+ +  
+ +  for(i=0; i<groups->grps[egcTC].nr; i++)
+ +    nrdf_tc[i] = 0;
+ +  for(i=0; i<groups->grps[egcVCM].nr+1; i++)
+ +    nrdf_vcm[i] = 0;
+ +
+ +  snew(nrdf2,natoms);
+ +  aloop = gmx_mtop_atomloop_all_init(mtop);
+ +  while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
+ +    nrdf2[i] = 0;
+ +    if (atom->ptype == eptAtom || atom->ptype == eptNucleus) {
+ +      g = ggrpnr(groups,egcFREEZE,i);
+ +      /* Double count nrdf for particle i */
+ +      for(d=0; d<DIM; d++) {
+ +      if (opts->nFreeze[g][d] == 0) {
+ +        nrdf2[i] += 2;
+ +      }
+ +      }
+ +      nrdf_tc [ggrpnr(groups,egcTC ,i)] += 0.5*nrdf2[i];
+ +      nrdf_vcm[ggrpnr(groups,egcVCM,i)] += 0.5*nrdf2[i];
+ +    }
+ +  }
+ +
+ +  as = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    molb = &mtop->molblock[mb];
+ +    molt = &mtop->moltype[molb->type];
+ +    atom = molt->atoms.atom;
+ +    for(mol=0; mol<molb->nmol; mol++) {
+ +      for (ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
+ +      ia = molt->ilist[ftype].iatoms;
+ +      for(i=0; i<molt->ilist[ftype].nr; ) {
+ +        /* Subtract degrees of freedom for the constraints,
+ +         * if the particles still have degrees of freedom left.
+ +         * If one of the particles is a vsite or a shell, then all
+ +         * constraint motion will go there, but since they do not
+ +         * contribute to the constraints the degrees of freedom do not
+ +         * change.
+ +         */
+ +        ai = as + ia[1];
+ +        aj = as + ia[2];
+ +        if (((atom[ia[1]].ptype == eptNucleus) ||
+ +             (atom[ia[1]].ptype == eptAtom)) &&
+ +            ((atom[ia[2]].ptype == eptNucleus) ||
+ +             (atom[ia[2]].ptype == eptAtom))) {
+ +          if (nrdf2[ai] > 0) 
+ +            jmin = 1;
+ +          else
+ +            jmin = 2;
+ +          if (nrdf2[aj] > 0)
+ +            imin = 1;
+ +          else
+ +            imin = 2;
+ +          imin = min(imin,nrdf2[ai]);
+ +          jmin = min(jmin,nrdf2[aj]);
+ +          nrdf2[ai] -= imin;
+ +          nrdf2[aj] -= jmin;
+ +          nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
+ +          nrdf_tc [ggrpnr(groups,egcTC ,aj)] -= 0.5*jmin;
+ +          nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
+ +          nrdf_vcm[ggrpnr(groups,egcVCM,aj)] -= 0.5*jmin;
+ +        }
+ +        ia += interaction_function[ftype].nratoms+1;
+ +        i  += interaction_function[ftype].nratoms+1;
+ +      }
+ +      }
+ +      ia = molt->ilist[F_SETTLE].iatoms;
+ +      for(i=0; i<molt->ilist[F_SETTLE].nr; ) {
+ +      /* Subtract 1 dof from every atom in the SETTLE */
+ +      for(j=0; j<3; j++) {
+ +      ai = as + ia[1+j];
+ +        imin = min(2,nrdf2[ai]);
+ +        nrdf2[ai] -= imin;
+ +        nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
+ +        nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
+ +      }
+ +      ia += 4;
+ +      i  += 4;
+ +      }
+ +      as += molt->atoms.nr;
+ +    }
+ +  }
+ +
+ +  if (ir->ePull == epullCONSTRAINT) {
+ +    /* Correct nrdf for the COM constraints.
+ +     * We correct using the TC and VCM group of the first atom
+ +     * in the reference and pull group. If atoms in one pull group
+ +     * belong to different TC or VCM groups it is anyhow difficult
+ +     * to determine the optimal nrdf assignment.
+ +     */
+ +    pull = ir->pull;
+ +    if (pull->eGeom == epullgPOS) {
+ +      nc = 0;
+ +      for(i=0; i<DIM; i++) {
+ +      if (pull->dim[i])
+ +        nc++;
+ +      }
+ +    } else {
+ +      nc = 1;
+ +    }
+ +    for(i=0; i<pull->ngrp; i++) {
+ +      imin = 2*nc;
+ +      if (pull->grp[0].nat > 0) {
+ +      /* Subtract 1/2 dof from the reference group */
+ +      ai = pull->grp[0].ind[0];
+ +      if (nrdf_tc[ggrpnr(groups,egcTC,ai)] > 1) {
+ +        nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5;
+ +        nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5;
+ +        imin--;
+ +      }
+ +      }
+ +      /* Subtract 1/2 dof from the pulled group */
+ +      ai = pull->grp[1+i].ind[0];
+ +      nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
+ +      nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
+ +      if (nrdf_tc[ggrpnr(groups,egcTC,ai)] < 0)
+ +      gmx_fatal(FARGS,"Center of mass pulling constraints caused the number of degrees of freedom for temperature coupling group %s to be negative",gnames[groups->grps[egcTC].nm_ind[ggrpnr(groups,egcTC,ai)]]);
+ +    }
+ +  }
+ +  
+ +  if (ir->nstcomm != 0) {
+ +    /* Subtract 3 from the number of degrees of freedom in each vcm group
+ +     * when com translation is removed and 6 when rotation is removed
+ +     * as well.
+ +     */
+ +    switch (ir->comm_mode) {
+ +    case ecmLINEAR:
+ +      n_sub = ndof_com(ir);
+ +      break;
+ +    case ecmANGULAR:
+ +      n_sub = 6;
+ +      break;
+ +    default:
+ +      n_sub = 0;
+ +      gmx_incons("Checking comm_mode");
+ +    }
+ +    
+ +    for(i=0; i<groups->grps[egcTC].nr; i++) {
+ +      /* Count the number of atoms of TC group i for every VCM group */
+ +      for(j=0; j<groups->grps[egcVCM].nr+1; j++)
+ +      na_vcm[j] = 0;
+ +      na_tot = 0;
+ +      for(ai=0; ai<natoms; ai++)
+ +      if (ggrpnr(groups,egcTC,ai) == i) {
+ +        na_vcm[ggrpnr(groups,egcVCM,ai)]++;
+ +        na_tot++;
+ +      }
+ +      /* Correct for VCM removal according to the fraction of each VCM
+ +       * group present in this TC group.
+ +       */
+ +      nrdf_uc = nrdf_tc[i];
+ +      if (debug) {
+ +      fprintf(debug,"T-group[%d] nrdf_uc = %g, n_sub = %g\n",
+ +              i,nrdf_uc,n_sub);
+ +      }
+ +      nrdf_tc[i] = 0;
+ +      for(j=0; j<groups->grps[egcVCM].nr+1; j++) {
+ +      if (nrdf_vcm[j] > n_sub) {
+ +        nrdf_tc[i] += nrdf_uc*((double)na_vcm[j]/(double)na_tot)*
+ +          (nrdf_vcm[j] - n_sub)/nrdf_vcm[j];
+ +      }
+ +      if (debug) {
+ +        fprintf(debug,"  nrdf_vcm[%d] = %g, nrdf = %g\n",
+ +                j,nrdf_vcm[j],nrdf_tc[i]);
+ +      }
+ +      }
+ +    }
+ +  }
+ +  for(i=0; (i<groups->grps[egcTC].nr); i++) {
+ +    opts->nrdf[i] = nrdf_tc[i];
+ +    if (opts->nrdf[i] < 0)
+ +      opts->nrdf[i] = 0;
+ +    fprintf(stderr,
+ +          "Number of degrees of freedom in T-Coupling group %s is %.2f\n",
+ +          gnames[groups->grps[egcTC].nm_ind[i]],opts->nrdf[i]);
+ +  }
+ +  
+ +  sfree(nrdf2);
+ +  sfree(nrdf_tc);
+ +  sfree(nrdf_vcm);
+ +  sfree(na_vcm);
+ +}
+ +
+ +static void decode_cos(char *s,t_cosines *cosine,gmx_bool bTime)
+ +{
+ +  char   *t;
+ +  char   format[STRLEN],f1[STRLEN];
+ +  double a,phi;
+ +  int    i;
+ +  
+ +  t=strdup(s);
+ +  trim(t);
+ +  
+ +  cosine->n=0;
+ +  cosine->a=NULL;
+ +  cosine->phi=NULL;
+ +  if (strlen(t)) {
+ +    sscanf(t,"%d",&(cosine->n));
+ +    if (cosine->n <= 0) {
+ +      cosine->n=0;
+ +    } else {
+ +      snew(cosine->a,cosine->n);
+ +      snew(cosine->phi,cosine->n);
+ +      
+ +      sprintf(format,"%%*d");
+ +      for(i=0; (i<cosine->n); i++) {
+ +      strcpy(f1,format);
+ +      strcat(f1,"%lf%lf");
+ +      if (sscanf(t,f1,&a,&phi) < 2)
+ +        gmx_fatal(FARGS,"Invalid input for electric field shift: '%s'",t);
+ +      cosine->a[i]=a;
+ +      cosine->phi[i]=phi;
+ +      strcat(format,"%*lf%*lf");
+ +      }
+ +    }
+ +  }
+ +  sfree(t);
+ +}
+ +
+ +static gmx_bool do_egp_flag(t_inputrec *ir,gmx_groups_t *groups,
+ +                      const char *option,const char *val,int flag)
+ +{
+ +  /* The maximum number of energy group pairs would be MAXPTR*(MAXPTR+1)/2.
+ +   * But since this is much larger than STRLEN, such a line can not be parsed.
+ +   * The real maximum is the number of names that fit in a string: STRLEN/2.
+ +   */
+ +#define EGP_MAX (STRLEN/2)
+ +  int  nelem,i,j,k,nr;
+ +  char *names[EGP_MAX];
+ +  char ***gnames;
+ +  gmx_bool bSet;
+ +
+ +  gnames = groups->grpname;
+ +
+ +  nelem = str_nelem(val,EGP_MAX,names);
+ +  if (nelem % 2 != 0)
+ +    gmx_fatal(FARGS,"The number of groups for %s is odd",option);
+ +  nr = groups->grps[egcENER].nr;
+ +  bSet = FALSE;
+ +  for(i=0; i<nelem/2; i++) {
+ +    j = 0;
+ +    while ((j < nr) &&
+ +         gmx_strcasecmp(names[2*i],*(gnames[groups->grps[egcENER].nm_ind[j]])))
+ +      j++;
+ +    if (j == nr)
+ +      gmx_fatal(FARGS,"%s in %s is not an energy group\n",
+ +                names[2*i],option);
+ +    k = 0;
+ +    while ((k < nr) &&
+ +         gmx_strcasecmp(names[2*i+1],*(gnames[groups->grps[egcENER].nm_ind[k]])))
+ +      k++;
+ +    if (k==nr)
+ +      gmx_fatal(FARGS,"%s in %s is not an energy group\n",
+ +            names[2*i+1],option);
+ +    if ((j < nr) && (k < nr)) {
+ +      ir->opts.egp_flags[nr*j+k] |= flag;
+ +      ir->opts.egp_flags[nr*k+j] |= flag;
+ +      bSet = TRUE;
+ +    }
+ +  }
+ +
+ +  return bSet;
+ +}
+ +
+ +void do_index(const char* mdparin, const char *ndx,
+ +              gmx_mtop_t *mtop,
+ +              gmx_bool bVerbose,
+ +              t_inputrec *ir,rvec *v,
+ +              warninp_t wi)
+ +{
+ +  t_blocka *grps;
+ +  gmx_groups_t *groups;
+ +  int     natoms;
+ +  t_symtab *symtab;
+ +  t_atoms atoms_all;
+ +  char    warnbuf[STRLEN],**gnames;
+ +  int     nr,ntcg,ntau_t,nref_t,nacc,nofg,nSA,nSA_points,nSA_time,nSA_temp;
+ +  real    tau_min;
+ +  int     nstcmin;
+ +  int     nacg,nfreeze,nfrdim,nenergy,nvcm,nuser;
+ +  char    *ptr1[MAXPTR],*ptr2[MAXPTR],*ptr3[MAXPTR];
+ +  int     i,j,k,restnm;
+ +  real    SAtime;
+ +  gmx_bool    bExcl,bTable,bSetTCpar,bAnneal,bRest;
+ +  int     nQMmethod,nQMbasis,nQMcharge,nQMmult,nbSH,nCASorb,nCASelec,
+ +    nSAon,nSAoff,nSAsteps,nQMg,nbOPT,nbTS;
+ +  char    warn_buf[STRLEN];
+ +
+ +  if (bVerbose)
+ +    fprintf(stderr,"processing index file...\n");
+ +  debug_gmx();
+ +  if (ndx == NULL) {
+ +    snew(grps,1);
+ +    snew(grps->index,1);
+ +    snew(gnames,1);
+ +    atoms_all = gmx_mtop_global_atoms(mtop);
+ +    analyse(&atoms_all,grps,&gnames,FALSE,TRUE);
+ +    free_t_atoms(&atoms_all,FALSE);
+ +  } else {
+ +    grps = init_index(ndx,&gnames);
+ +  }
+ +
+ +  groups = &mtop->groups;
+ +  natoms = mtop->natoms;
+ +  symtab = &mtop->symtab;
+ +
+ +  snew(groups->grpname,grps->nr+1);
+ +  
+ +  for(i=0; (i<grps->nr); i++) {
+ +    groups->grpname[i] = put_symtab(symtab,gnames[i]);
+ +  }
+ +  groups->grpname[i] = put_symtab(symtab,"rest");
+ +  restnm=i;
+ +  srenew(gnames,grps->nr+1);
+ +  gnames[restnm] = *(groups->grpname[i]);
+ +  groups->ngrpname = grps->nr+1;
+ +
+ +  set_warning_line(wi,mdparin,-1);
+ +
+ +  ntau_t = str_nelem(tau_t,MAXPTR,ptr1);
+ +  nref_t = str_nelem(ref_t,MAXPTR,ptr2);
+ +  ntcg   = str_nelem(tcgrps,MAXPTR,ptr3);
+ +  if ((ntau_t != ntcg) || (nref_t != ntcg)) {
+ +    gmx_fatal(FARGS,"Invalid T coupling input: %d groups, %d ref-t values and "
+ +                "%d tau-t values",ntcg,nref_t,ntau_t);
+ +  }
+ +
+ +  bSetTCpar = (ir->etc || EI_SD(ir->eI) || ir->eI==eiBD || EI_TPI(ir->eI));
+ +  do_numbering(natoms,groups,ntcg,ptr3,grps,gnames,egcTC,
+ +               restnm,bSetTCpar ? egrptpALL : egrptpALL_GENREST,bVerbose,wi);
+ +  nr = groups->grps[egcTC].nr;
+ +  ir->opts.ngtc = nr;
+ +  snew(ir->opts.nrdf,nr);
+ +  snew(ir->opts.tau_t,nr);
+ +  snew(ir->opts.ref_t,nr);
+ +  if (ir->eI==eiBD && ir->bd_fric==0) {
+ +    fprintf(stderr,"bd-fric=0, so tau-t will be used as the inverse friction constant(s)\n");
+ +  }
+ +
+ +  if (bSetTCpar)
+ +  {
+ +      if (nr != nref_t)
+ +      {
+ +          gmx_fatal(FARGS,"Not enough ref-t and tau-t values!");
+ +      }
+ +      
+ +      tau_min = 1e20;
+ +      for(i=0; (i<nr); i++)
+ +      {
+ +          ir->opts.tau_t[i] = strtod(ptr1[i],NULL);
+ +          if ((ir->eI == eiBD || ir->eI == eiSD2) && ir->opts.tau_t[i] <= 0)
+ +          {
+ +              sprintf(warn_buf,"With integrator %s tau-t should be larger than 0",ei_names[ir->eI]);
+ +              warning_error(wi,warn_buf);
+ +          }
++
++          if (ir->etc != etcVRESCALE && ir->opts.tau_t[i] == 0)
++          {
++              warning_note(wi,"tau-t = -1 is the new value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
++          }
++
++          if (ir->opts.tau_t[i] >= 0)
+ +          {
+ +              tau_min = min(tau_min,ir->opts.tau_t[i]);
+ +          }
+ +      }
+ +      if (ir->etc != etcNO && ir->nsttcouple == -1)
+ +      {
+ +            ir->nsttcouple = ir_optimal_nsttcouple(ir);
+ +      }
+ +
+ +      if (EI_VV(ir->eI)) 
+ +      {
+ +          if ((ir->etc==etcNOSEHOOVER) && (ir->epc==epcBERENDSEN)) {
+ +              gmx_fatal(FARGS,"Cannot do Nose-Hoover temperature with Berendsen pressure control with md-vv; use either vrescale temperature with berendsen pressure or Nose-Hoover temperature with MTTK pressure");
+ +          }
+ +          if ((ir->epc==epcMTTK) && (ir->etc>etcNO))
+ +          {
+ +              int mincouple;
+ +              mincouple = ir->nsttcouple;
+ +              if (ir->nstpcouple < mincouple)
+ +              {
+ +                  mincouple = ir->nstpcouple;
+ +              }
+ +              ir->nstpcouple = mincouple;
+ +              ir->nsttcouple = mincouple;
+ +              sprintf(warn_buf,"for current Trotter decomposition methods with vv, nsttcouple and nstpcouple must be equal.  Both have been reset to min(nsttcouple,nstpcouple) = %d",mincouple);
+ +              warning_note(wi,warn_buf);
+ +          }
+ +      }
+ +      /* velocity verlet with averaged kinetic energy KE = 0.5*(v(t+1/2) - v(t-1/2)) is implemented
+ +         primarily for testing purposes, and does not work with temperature coupling other than 1 */
+ +
+ +      if (ETC_ANDERSEN(ir->etc)) {
+ +          if (ir->nsttcouple != 1) {
+ +              ir->nsttcouple = 1;
+ +              sprintf(warn_buf,"Andersen temperature control methods assume nsttcouple = 1; there is no need for larger nsttcouple > 1, since no global parameters are computed. nsttcouple has been reset to 1");
+ +              warning_note(wi,warn_buf);
+ +          }
+ +      }
+ +      nstcmin = tcouple_min_integration_steps(ir->etc);
+ +      if (nstcmin > 1)
+ +      {
+ +          if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
+ +          {
+ +              sprintf(warn_buf,"For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
+ +                      ETCOUPLTYPE(ir->etc),
+ +                      tau_min,nstcmin,
+ +                      ir->nsttcouple*ir->delta_t);
+ +              warning(wi,warn_buf);
+ +          }
+ +      }
+ +      for(i=0; (i<nr); i++)
+ +      {
+ +          ir->opts.ref_t[i] = strtod(ptr2[i],NULL);
+ +          if (ir->opts.ref_t[i] < 0)
+ +          {
+ +              gmx_fatal(FARGS,"ref-t for group %d negative",i);
+ +          }
+ +      }
+ +      /* set the lambda mc temperature to the md integrator temperature (which should be defined
+ +         if we are in this conditional) if mc_temp is negative */
+ +      if (ir->expandedvals->mc_temp < 0)
+ +      {
+ +          ir->expandedvals->mc_temp = ir->opts.ref_t[0];  /*for now, set to the first reft */
+ +      }
+ +  }
+ +
+ +  /* Simulated annealing for each group. There are nr groups */
+ +  nSA = str_nelem(anneal,MAXPTR,ptr1);
+ +  if (nSA == 1 && (ptr1[0][0]=='n' || ptr1[0][0]=='N'))
+ +     nSA = 0;
+ +  if(nSA>0 && nSA != nr) 
+ +    gmx_fatal(FARGS,"Not enough annealing values: %d (for %d groups)\n",nSA,nr);
+ +  else {
+ +    snew(ir->opts.annealing,nr);
+ +    snew(ir->opts.anneal_npoints,nr);
+ +    snew(ir->opts.anneal_time,nr);
+ +    snew(ir->opts.anneal_temp,nr);
+ +    for(i=0;i<nr;i++) {
+ +      ir->opts.annealing[i]=eannNO;
+ +      ir->opts.anneal_npoints[i]=0;
+ +      ir->opts.anneal_time[i]=NULL;
+ +      ir->opts.anneal_temp[i]=NULL;
+ +    }
+ +    if (nSA > 0) {
+ +      bAnneal=FALSE;
+ +      for(i=0;i<nr;i++) { 
+ +      if(ptr1[i][0]=='n' || ptr1[i][0]=='N') {
+ +        ir->opts.annealing[i]=eannNO;
+ +      } else if(ptr1[i][0]=='s'|| ptr1[i][0]=='S') {
+ +        ir->opts.annealing[i]=eannSINGLE;
+ +        bAnneal=TRUE;
+ +      } else if(ptr1[i][0]=='p'|| ptr1[i][0]=='P') {
+ +        ir->opts.annealing[i]=eannPERIODIC;
+ +        bAnneal=TRUE;
+ +      } 
+ +      } 
+ +      if(bAnneal) {
+ +      /* Read the other fields too */
+ +      nSA_points = str_nelem(anneal_npoints,MAXPTR,ptr1);
+ +      if(nSA_points!=nSA) 
+ +          gmx_fatal(FARGS,"Found %d annealing-npoints values for %d groups\n",nSA_points,nSA);
+ +      for(k=0,i=0;i<nr;i++) {
+ +        ir->opts.anneal_npoints[i]=strtol(ptr1[i],NULL,10);
+ +        if(ir->opts.anneal_npoints[i]==1)
+ +          gmx_fatal(FARGS,"Please specify at least a start and an end point for annealing\n");
+ +        snew(ir->opts.anneal_time[i],ir->opts.anneal_npoints[i]);
+ +        snew(ir->opts.anneal_temp[i],ir->opts.anneal_npoints[i]);
+ +        k += ir->opts.anneal_npoints[i];
+ +      }
+ +
+ +      nSA_time = str_nelem(anneal_time,MAXPTR,ptr1);
+ +      if(nSA_time!=k) 
+ +          gmx_fatal(FARGS,"Found %d annealing-time values, wanter %d\n",nSA_time,k);
+ +      nSA_temp = str_nelem(anneal_temp,MAXPTR,ptr2);
+ +      if(nSA_temp!=k) 
+ +          gmx_fatal(FARGS,"Found %d annealing-temp values, wanted %d\n",nSA_temp,k);
+ +
+ +      for(i=0,k=0;i<nr;i++) {
+ +        
+ +        for(j=0;j<ir->opts.anneal_npoints[i];j++) {
+ +          ir->opts.anneal_time[i][j]=strtod(ptr1[k],NULL);
+ +          ir->opts.anneal_temp[i][j]=strtod(ptr2[k],NULL);
+ +          if(j==0) {
+ +            if(ir->opts.anneal_time[i][0] > (ir->init_t+GMX_REAL_EPS))
+ +              gmx_fatal(FARGS,"First time point for annealing > init_t.\n");      
+ +          } else { 
+ +            /* j>0 */
+ +            if(ir->opts.anneal_time[i][j]<ir->opts.anneal_time[i][j-1])
+ +              gmx_fatal(FARGS,"Annealing timepoints out of order: t=%f comes after t=%f\n",
+ +                          ir->opts.anneal_time[i][j],ir->opts.anneal_time[i][j-1]);
+ +          }
+ +          if(ir->opts.anneal_temp[i][j]<0) 
+ +            gmx_fatal(FARGS,"Found negative temperature in annealing: %f\n",ir->opts.anneal_temp[i][j]);    
+ +          k++;
+ +        }
+ +      }
+ +      /* Print out some summary information, to make sure we got it right */
+ +      for(i=0,k=0;i<nr;i++) {
+ +        if(ir->opts.annealing[i]!=eannNO) {
+ +          j = groups->grps[egcTC].nm_ind[i];
+ +          fprintf(stderr,"Simulated annealing for group %s: %s, %d timepoints\n",
+ +                  *(groups->grpname[j]),eann_names[ir->opts.annealing[i]],
+ +                  ir->opts.anneal_npoints[i]);
+ +          fprintf(stderr,"Time (ps)   Temperature (K)\n");
+ +          /* All terms except the last one */
+ +          for(j=0;j<(ir->opts.anneal_npoints[i]-1);j++) 
+ +              fprintf(stderr,"%9.1f      %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
+ +          
+ +          /* Finally the last one */
+ +          j = ir->opts.anneal_npoints[i]-1;
+ +          if(ir->opts.annealing[i]==eannSINGLE)
+ +            fprintf(stderr,"%9.1f-     %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
+ +          else {
+ +            fprintf(stderr,"%9.1f      %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
+ +            if(fabs(ir->opts.anneal_temp[i][j]-ir->opts.anneal_temp[i][0])>GMX_REAL_EPS)
+ +              warning_note(wi,"There is a temperature jump when your annealing loops back.\n");
+ +          }
+ +        }
+ +      } 
+ +      }
+ +    }
+ +  }   
+ +
+ +  if (ir->ePull != epullNO) {
+ +    make_pull_groups(ir->pull,pull_grp,grps,gnames);
+ +  }
+ +  
+ +  if (ir->bRot) {
+ +    make_rotation_groups(ir->rot,rot_grp,grps,gnames);
+ +  }
+ +
+ +  nacc = str_nelem(acc,MAXPTR,ptr1);
+ +  nacg = str_nelem(accgrps,MAXPTR,ptr2);
+ +  if (nacg*DIM != nacc)
+ +    gmx_fatal(FARGS,"Invalid Acceleration input: %d groups and %d acc. values",
+ +              nacg,nacc);
+ +  do_numbering(natoms,groups,nacg,ptr2,grps,gnames,egcACC,
+ +               restnm,egrptpALL_GENREST,bVerbose,wi);
+ +  nr = groups->grps[egcACC].nr;
+ +  snew(ir->opts.acc,nr);
+ +  ir->opts.ngacc=nr;
+ +  
+ +  for(i=k=0; (i<nacg); i++)
+ +    for(j=0; (j<DIM); j++,k++)
+ +      ir->opts.acc[i][j]=strtod(ptr1[k],NULL);
+ +  for( ;(i<nr); i++)
+ +    for(j=0; (j<DIM); j++)
+ +      ir->opts.acc[i][j]=0;
+ +  
+ +  nfrdim  = str_nelem(frdim,MAXPTR,ptr1);
+ +  nfreeze = str_nelem(freeze,MAXPTR,ptr2);
+ +  if (nfrdim != DIM*nfreeze)
+ +    gmx_fatal(FARGS,"Invalid Freezing input: %d groups and %d freeze values",
+ +              nfreeze,nfrdim);
+ +  do_numbering(natoms,groups,nfreeze,ptr2,grps,gnames,egcFREEZE,
+ +               restnm,egrptpALL_GENREST,bVerbose,wi);
+ +  nr = groups->grps[egcFREEZE].nr;
+ +  ir->opts.ngfrz=nr;
+ +  snew(ir->opts.nFreeze,nr);
+ +  for(i=k=0; (i<nfreeze); i++)
+ +    for(j=0; (j<DIM); j++,k++) {
+ +      ir->opts.nFreeze[i][j]=(gmx_strncasecmp(ptr1[k],"Y",1)==0);
+ +      if (!ir->opts.nFreeze[i][j]) {
+ +      if (gmx_strncasecmp(ptr1[k],"N",1) != 0) {
+ +        sprintf(warnbuf,"Please use Y(ES) or N(O) for freezedim only "
+ +                "(not %s)", ptr1[k]);
+ +        warning(wi,warn_buf);
+ +      }
+ +      }
+ +    }
+ +  for( ; (i<nr); i++)
+ +    for(j=0; (j<DIM); j++)
+ +      ir->opts.nFreeze[i][j]=0;
+ +  
+ +  nenergy=str_nelem(energy,MAXPTR,ptr1);
+ +  do_numbering(natoms,groups,nenergy,ptr1,grps,gnames,egcENER,
+ +               restnm,egrptpALL_GENREST,bVerbose,wi);
+ +  add_wall_energrps(groups,ir->nwall,symtab);
+ +  ir->opts.ngener = groups->grps[egcENER].nr;
+ +  nvcm=str_nelem(vcm,MAXPTR,ptr1);
+ +  bRest =
+ +    do_numbering(natoms,groups,nvcm,ptr1,grps,gnames,egcVCM,
+ +                 restnm,nvcm==0 ? egrptpALL_GENREST : egrptpPART,bVerbose,wi);
+ +  if (bRest) {
+ +    warning(wi,"Some atoms are not part of any center of mass motion removal group.\n"
+ +          "This may lead to artifacts.\n"
+ +          "In most cases one should use one group for the whole system.");
+ +  }
+ +
+ +  /* Now we have filled the freeze struct, so we can calculate NRDF */ 
+ +  calc_nrdf(mtop,ir,gnames);
+ +
+ +  if (v && NULL) {
+ +    real fac,ntot=0;
+ +    
+ +    /* Must check per group! */
+ +    for(i=0; (i<ir->opts.ngtc); i++) 
+ +      ntot += ir->opts.nrdf[i];
+ +    if (ntot != (DIM*natoms)) {
+ +      fac = sqrt(ntot/(DIM*natoms));
+ +      if (bVerbose)
+ +      fprintf(stderr,"Scaling velocities by a factor of %.3f to account for constraints\n"
+ +              "and removal of center of mass motion\n",fac);
+ +      for(i=0; (i<natoms); i++)
+ +      svmul(fac,v[i],v[i]);
+ +    }
+ +  }
+ +  
+ +  nuser=str_nelem(user1,MAXPTR,ptr1);
+ +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcUser1,
+ +               restnm,egrptpALL_GENREST,bVerbose,wi);
+ +  nuser=str_nelem(user2,MAXPTR,ptr1);
+ +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcUser2,
+ +               restnm,egrptpALL_GENREST,bVerbose,wi);
+ +  nuser=str_nelem(xtc_grps,MAXPTR,ptr1);
+ +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcXTC,
+ +               restnm,egrptpONE,bVerbose,wi);
+ +  nofg = str_nelem(orirefitgrp,MAXPTR,ptr1);
+ +  do_numbering(natoms,groups,nofg,ptr1,grps,gnames,egcORFIT,
+ +               restnm,egrptpALL_GENREST,bVerbose,wi);
+ +
+ +  /* QMMM input processing */
+ +  nQMg          = str_nelem(QMMM,MAXPTR,ptr1);
+ +  nQMmethod     = str_nelem(QMmethod,MAXPTR,ptr2);
+ +  nQMbasis      = str_nelem(QMbasis,MAXPTR,ptr3);
+ +  if((nQMmethod != nQMg)||(nQMbasis != nQMg)){
+ +    gmx_fatal(FARGS,"Invalid QMMM input: %d groups %d basissets"
+ +            " and %d methods\n",nQMg,nQMbasis,nQMmethod);
+ +  }
+ +  /* group rest, if any, is always MM! */
+ +  do_numbering(natoms,groups,nQMg,ptr1,grps,gnames,egcQMMM,
+ +               restnm,egrptpALL_GENREST,bVerbose,wi);
+ +  nr = nQMg; /*atoms->grps[egcQMMM].nr;*/
+ +  ir->opts.ngQM = nQMg;
+ +  snew(ir->opts.QMmethod,nr);
+ +  snew(ir->opts.QMbasis,nr);
+ +  for(i=0;i<nr;i++){
+ +    /* input consists of strings: RHF CASSCF PM3 .. These need to be
+ +     * converted to the corresponding enum in names.c
+ +     */
+ +    ir->opts.QMmethod[i] = search_QMstring(ptr2[i],eQMmethodNR,
+ +                                           eQMmethod_names);
+ +    ir->opts.QMbasis[i]  = search_QMstring(ptr3[i],eQMbasisNR,
+ +                                           eQMbasis_names);
+ +
+ +  }
+ +  nQMmult   = str_nelem(QMmult,MAXPTR,ptr1);
+ +  nQMcharge = str_nelem(QMcharge,MAXPTR,ptr2);
+ +  nbSH      = str_nelem(bSH,MAXPTR,ptr3);
+ +  snew(ir->opts.QMmult,nr);
+ +  snew(ir->opts.QMcharge,nr);
+ +  snew(ir->opts.bSH,nr);
+ +
+ +  for(i=0;i<nr;i++){
+ +    ir->opts.QMmult[i]   = strtol(ptr1[i],NULL,10);
+ +    ir->opts.QMcharge[i] = strtol(ptr2[i],NULL,10);
+ +    ir->opts.bSH[i]      = (gmx_strncasecmp(ptr3[i],"Y",1)==0);
+ +  }
+ +
+ +  nCASelec  = str_nelem(CASelectrons,MAXPTR,ptr1);
+ +  nCASorb   = str_nelem(CASorbitals,MAXPTR,ptr2);
+ +  snew(ir->opts.CASelectrons,nr);
+ +  snew(ir->opts.CASorbitals,nr);
+ +  for(i=0;i<nr;i++){
+ +    ir->opts.CASelectrons[i]= strtol(ptr1[i],NULL,10);
+ +    ir->opts.CASorbitals[i] = strtol(ptr2[i],NULL,10);
+ +  }
+ +  /* special optimization options */
+ +
+ +  nbOPT = str_nelem(bOPT,MAXPTR,ptr1);
+ +  nbTS = str_nelem(bTS,MAXPTR,ptr2);
+ +  snew(ir->opts.bOPT,nr);
+ +  snew(ir->opts.bTS,nr);
+ +  for(i=0;i<nr;i++){
+ +    ir->opts.bOPT[i] = (gmx_strncasecmp(ptr1[i],"Y",1)==0);
+ +    ir->opts.bTS[i]  = (gmx_strncasecmp(ptr2[i],"Y",1)==0);
+ +  }
+ +  nSAon     = str_nelem(SAon,MAXPTR,ptr1);
+ +  nSAoff    = str_nelem(SAoff,MAXPTR,ptr2);
+ +  nSAsteps  = str_nelem(SAsteps,MAXPTR,ptr3);
+ +  snew(ir->opts.SAon,nr);
+ +  snew(ir->opts.SAoff,nr);
+ +  snew(ir->opts.SAsteps,nr);
+ +
+ +  for(i=0;i<nr;i++){
+ +    ir->opts.SAon[i]    = strtod(ptr1[i],NULL);
+ +    ir->opts.SAoff[i]   = strtod(ptr2[i],NULL);
+ +    ir->opts.SAsteps[i] = strtol(ptr3[i],NULL,10);
+ +  }
+ +  /* end of QMMM input */
+ +
+ +  if (bVerbose)
+ +    for(i=0; (i<egcNR); i++) {
+ +      fprintf(stderr,"%-16s has %d element(s):",gtypes[i],groups->grps[i].nr); 
+ +      for(j=0; (j<groups->grps[i].nr); j++)
+ +      fprintf(stderr," %s",*(groups->grpname[groups->grps[i].nm_ind[j]]));
+ +      fprintf(stderr,"\n");
+ +    }
+ +
+ +  nr = groups->grps[egcENER].nr;
+ +  snew(ir->opts.egp_flags,nr*nr);
+ +
+ +  bExcl = do_egp_flag(ir,groups,"energygrp-excl",egpexcl,EGP_EXCL);
++    if (bExcl && ir->cutoff_scheme == ecutsVERLET) 
++    {
++        warning_error(wi,"Energy group exclusions are not (yet) implemented for the Verlet scheme");
++    } 
+ +  if (bExcl && EEL_FULL(ir->coulombtype))
+ +    warning(wi,"Can not exclude the lattice Coulomb energy between energy groups");
+ +
+ +  bTable = do_egp_flag(ir,groups,"energygrp-table",egptable,EGP_TABLE);
+ +  if (bTable && !(ir->vdwtype == evdwUSER) && 
+ +      !(ir->coulombtype == eelUSER) && !(ir->coulombtype == eelPMEUSER) &&
+ +      !(ir->coulombtype == eelPMEUSERSWITCH))
+ +    gmx_fatal(FARGS,"Can only have energy group pair tables in combination with user tables for VdW and/or Coulomb");
+ +
+ +  decode_cos(efield_x,&(ir->ex[XX]),FALSE);
+ +  decode_cos(efield_xt,&(ir->et[XX]),TRUE);
+ +  decode_cos(efield_y,&(ir->ex[YY]),FALSE);
+ +  decode_cos(efield_yt,&(ir->et[YY]),TRUE);
+ +  decode_cos(efield_z,&(ir->ex[ZZ]),FALSE);
+ +  decode_cos(efield_zt,&(ir->et[ZZ]),TRUE);
+ +
+ +  if (ir->bAdress)
+ +    do_adress_index(ir->adress,groups,gnames,&(ir->opts),wi);
+ +
+ +  for(i=0; (i<grps->nr); i++)
+ +    sfree(gnames[i]);
+ +  sfree(gnames);
+ +  done_blocka(grps);
+ +  sfree(grps);
+ +
+ +}
+ +
+ +
+ +
+ +static void check_disre(gmx_mtop_t *mtop)
+ +{
+ +  gmx_ffparams_t *ffparams;
+ +  t_functype *functype;
+ +  t_iparams  *ip;
+ +  int i,ndouble,ftype;
+ +  int label,old_label;
+ +  
+ +  if (gmx_mtop_ftype_count(mtop,F_DISRES) > 0) {
+ +    ffparams  = &mtop->ffparams;
+ +    functype  = ffparams->functype;
+ +    ip        = ffparams->iparams;
+ +    ndouble   = 0;
+ +    old_label = -1;
+ +    for(i=0; i<ffparams->ntypes; i++) {
+ +      ftype = functype[i];
+ +      if (ftype == F_DISRES) {
+ +      label = ip[i].disres.label;
+ +      if (label == old_label) {
+ +        fprintf(stderr,"Distance restraint index %d occurs twice\n",label);
+ +        ndouble++;
+ +      }
+ +      old_label = label;
+ +      }
+ +    }
+ +    if (ndouble>0)
+ +      gmx_fatal(FARGS,"Found %d double distance restraint indices,\n"
+ +              "probably the parameters for multiple pairs in one restraint "
+ +              "are not identical\n",ndouble);
+ +  }
+ +}
+ +
+ +static gmx_bool absolute_reference(t_inputrec *ir,gmx_mtop_t *sys,
+ +                                   gmx_bool posres_only,
+ +                                   ivec AbsRef)
+ +{
+ +    int d,g,i;
+ +    gmx_mtop_ilistloop_t iloop;
+ +    t_ilist *ilist;
+ +    int nmol;
+ +    t_iparams *pr;
+ +
+ +    clear_ivec(AbsRef);
+ +
+ +    if (!posres_only)
+ +    {
+ +        /* Check the COM */
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            AbsRef[d] = (d < ndof_com(ir) ? 0 : 1);
+ +        }
+ +        /* Check for freeze groups */
+ +        for(g=0; g<ir->opts.ngfrz; g++)
+ +        {
+ +            for(d=0; d<DIM; d++)
+ +            {
+ +                if (ir->opts.nFreeze[g][d] != 0)
+ +                {
+ +                    AbsRef[d] = 1;
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    /* Check for position restraints */
+ +    iloop = gmx_mtop_ilistloop_init(sys);
+ +    while (gmx_mtop_ilistloop_next(iloop,&ilist,&nmol))
+ +    {
+ +        if (nmol > 0 &&
+ +            (AbsRef[XX] == 0 || AbsRef[YY] == 0 || AbsRef[ZZ] == 0))
+ +        {
+ +            for(i=0; i<ilist[F_POSRES].nr; i+=2)
+ +            {
+ +                pr = &sys->ffparams.iparams[ilist[F_POSRES].iatoms[i]];
+ +                for(d=0; d<DIM; d++)
+ +                {
+ +                    if (pr->posres.fcA[d] != 0)
+ +                    {
+ +                        AbsRef[d] = 1;
+ +                    }
+ +                }
+ +            }
+ +            for(i=0; i<ilist[F_FBPOSRES].nr; i+=2)
+ +            {
+ +                /* Check for flat-bottom posres */
+ +                pr = &sys->ffparams.iparams[ilist[F_FBPOSRES].iatoms[i]];
+ +                if (pr->fbposres.k != 0)
+ +                {
+ +                    switch(pr->fbposres.geom)
+ +                    {
+ +                    case efbposresSPHERE:
+ +                        AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
+ +                        break;
+ +                    case efbposresCYLINDER:
+ +                        AbsRef[XX] = AbsRef[YY] = 1;
+ +                        break;
+ +                    case efbposresX: /* d=XX */
+ +                    case efbposresY: /* d=YY */
+ +                    case efbposresZ: /* d=ZZ */
+ +                        d = pr->fbposres.geom - efbposresX;
+ +                        AbsRef[d] = 1;
+ +                        break;
+ +                    default:
+ +                        gmx_fatal(FARGS," Invalid geometry for flat-bottom position restraint.\n"
+ +                                  "Expected nr between 1 and %d. Found %d\n", efbposresNR-1,
+ +                                  pr->fbposres.geom);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    return (AbsRef[XX] != 0 && AbsRef[YY] != 0 && AbsRef[ZZ] != 0);
+ +}
+ +
+ +void triple_check(const char *mdparin,t_inputrec *ir,gmx_mtop_t *sys,
+ +                  warninp_t wi)
+ +{
+ +  char err_buf[256];
+ +  int  i,m,g,nmol,npct;
+ +  gmx_bool bCharge,bAcc;
+ +  real gdt_max,*mgrp,mt;
+ +  rvec acc;
+ +  gmx_mtop_atomloop_block_t aloopb;
+ +  gmx_mtop_atomloop_all_t aloop;
+ +  t_atom *atom;
+ +  ivec AbsRef;
+ +  char warn_buf[STRLEN];
+ +
+ +  set_warning_line(wi,mdparin,-1);
+ +
+ +  if (EI_DYNAMICS(ir->eI) && !EI_SD(ir->eI) && ir->eI != eiBD &&
+ +      ir->comm_mode == ecmNO &&
+ +      !(absolute_reference(ir,sys,FALSE,AbsRef) || ir->nsteps <= 10)) {
+ +    warning(wi,"You are not using center of mass motion removal (mdp option comm-mode), numerical rounding errors can lead to build up of kinetic energy of the center of mass");
+ +  }
+ +
+ +    /* Check for pressure coupling with absolute position restraints */
+ +    if (ir->epc != epcNO && ir->refcoord_scaling == erscNO)
+ +    {
+ +        absolute_reference(ir,sys,TRUE,AbsRef);
+ +        {
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                if (AbsRef[m] && norm2(ir->compress[m]) > 0)
+ +                {
+ +                    warning(wi,"You are using pressure coupling with absolute position restraints, this will give artifacts. Use the refcoord_scaling option.");
+ +                    break;
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +  bCharge = FALSE;
+ +  aloopb = gmx_mtop_atomloop_block_init(sys);
+ +  while (gmx_mtop_atomloop_block_next(aloopb,&atom,&nmol)) {
+ +    if (atom->q != 0 || atom->qB != 0) {
+ +      bCharge = TRUE;
+ +    }
+ +  }
+ +  
+ +  if (!bCharge) {
+ +    if (EEL_FULL(ir->coulombtype)) {
+ +      sprintf(err_buf,
+ +            "You are using full electrostatics treatment %s for a system without charges.\n"
+ +            "This costs a lot of performance for just processing zeros, consider using %s instead.\n",
+ +            EELTYPE(ir->coulombtype),EELTYPE(eelCUT));
+ +      warning(wi,err_buf);
+ +    }
+ +  } else {
+ +    if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent) {
+ +      sprintf(err_buf,
+ +            "You are using a plain Coulomb cut-off, which might produce artifacts.\n"
+ +            "You might want to consider using %s electrostatics.\n",
+ +            EELTYPE(eelPME));
+ +      warning_note(wi,err_buf);
+ +    }
+ +  }
+ +
+ +  /* Generalized reaction field */  
+ +  if (ir->opts.ngtc == 0) {
+ +    sprintf(err_buf,"No temperature coupling while using coulombtype %s",
+ +          eel_names[eelGRF]);
+ +    CHECK(ir->coulombtype == eelGRF);
+ +  }
+ +  else {
+ +    sprintf(err_buf,"When using coulombtype = %s"
+ +          " ref-t for temperature coupling should be > 0",
+ +          eel_names[eelGRF]);
+ +    CHECK((ir->coulombtype == eelGRF) && (ir->opts.ref_t[0] <= 0));
+ +  }
+ +
+ +    if (ir->eI == eiSD1 &&
+ +        (gmx_mtop_ftype_count(sys,F_CONSTR) > 0 ||
+ +         gmx_mtop_ftype_count(sys,F_SETTLE) > 0))
+ +    {
+ +        sprintf(warn_buf,"With constraints integrator %s is less accurate, consider using %s instead",ei_names[ir->eI],ei_names[eiSD2]);
+ +        warning_note(wi,warn_buf);
+ +    }
+ +    
+ +  bAcc = FALSE;
+ +  for(i=0; (i<sys->groups.grps[egcACC].nr); i++) {
+ +    for(m=0; (m<DIM); m++) {
+ +      if (fabs(ir->opts.acc[i][m]) > 1e-6) {
+ +      bAcc = TRUE;
+ +      }
+ +    }
+ +  }
+ +  if (bAcc) {
+ +    clear_rvec(acc);
+ +    snew(mgrp,sys->groups.grps[egcACC].nr);
+ +    aloop = gmx_mtop_atomloop_all_init(sys);
+ +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
+ +      mgrp[ggrpnr(&sys->groups,egcACC,i)] += atom->m;
+ +    }
+ +    mt = 0.0;
+ +    for(i=0; (i<sys->groups.grps[egcACC].nr); i++) {
+ +      for(m=0; (m<DIM); m++)
+ +      acc[m] += ir->opts.acc[i][m]*mgrp[i];
+ +      mt += mgrp[i];
+ +    }
+ +    for(m=0; (m<DIM); m++) {
+ +      if (fabs(acc[m]) > 1e-6) {
+ +      const char *dim[DIM] = { "X", "Y", "Z" };
+ +      fprintf(stderr,
+ +              "Net Acceleration in %s direction, will %s be corrected\n",
+ +              dim[m],ir->nstcomm != 0 ? "" : "not");
+ +      if (ir->nstcomm != 0 && m < ndof_com(ir)) {
+ +        acc[m] /= mt;
+ +        for (i=0; (i<sys->groups.grps[egcACC].nr); i++)
+ +          ir->opts.acc[i][m] -= acc[m];
+ +      }
+ +      }
+ +    }
+ +    sfree(mgrp);
+ +  }
+ +
+ +  if (ir->efep != efepNO && ir->fepvals->sc_alpha != 0 &&
+ +      !gmx_within_tol(sys->ffparams.reppow,12.0,10*GMX_DOUBLE_EPS)) {
+ +    gmx_fatal(FARGS,"Soft-core interactions are only supported with VdW repulsion power 12");
+ +  }
+ +
+ +  if (ir->ePull != epullNO) {
+ +    if (ir->pull->grp[0].nat == 0) {
+ +        absolute_reference(ir,sys,FALSE,AbsRef);
+ +      for(m=0; m<DIM; m++) {
+ +      if (ir->pull->dim[m] && !AbsRef[m]) {
+ +        warning(wi,"You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
+ +        break;
+ +      }
+ +      }
+ +    }
+ +
+ +    if (ir->pull->eGeom == epullgDIRPBC) {
+ +      for(i=0; i<3; i++) {
+ +      for(m=0; m<=i; m++) {
+ +        if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
+ +            ir->deform[i][m] != 0) {
+ +          for(g=1; g<ir->pull->ngrp; g++) {
+ +            if (ir->pull->grp[g].vec[m] != 0) {
+ +              gmx_fatal(FARGS,"Can not have dynamic box while using pull geometry '%s' (dim %c)",EPULLGEOM(ir->pull->eGeom),'x'+m);
+ +            }
+ +          }
+ +        }
+ +      }
+ +      }
+ +    }
+ +  }
+ +
+ +  check_disre(sys);
+ +}
+ +
+ +void double_check(t_inputrec *ir,matrix box,gmx_bool bConstr,warninp_t wi)
+ +{
+ +  real min_size;
+ +  gmx_bool bTWIN;
+ +  char warn_buf[STRLEN];
+ +  const char *ptr;
+ +  
+ +  ptr = check_box(ir->ePBC,box);
+ +  if (ptr) {
+ +      warning_error(wi,ptr);
+ +  }  
+ +
+ +  if (bConstr && ir->eConstrAlg == econtSHAKE) {
+ +    if (ir->shake_tol <= 0.0) {
+ +      sprintf(warn_buf,"ERROR: shake-tol must be > 0 instead of %g\n",
+ +              ir->shake_tol);
+ +      warning_error(wi,warn_buf);
+ +    }
+ +
+ +    if (IR_TWINRANGE(*ir) && ir->nstlist > 1) {
+ +      sprintf(warn_buf,"With twin-range cut-off's and SHAKE the virial and the pressure are incorrect.");
+ +      if (ir->epc == epcNO) {
+ +      warning(wi,warn_buf);
+ +      } else {
+ +          warning_error(wi,warn_buf);
+ +      }
+ +    }
+ +  }
+ +
+ +  if( (ir->eConstrAlg == econtLINCS) && bConstr) {
+ +    /* If we have Lincs constraints: */
+ +    if(ir->eI==eiMD && ir->etc==etcNO &&
+ +       ir->eConstrAlg==econtLINCS && ir->nLincsIter==1) {
+ +      sprintf(warn_buf,"For energy conservation with LINCS, lincs_iter should be 2 or larger.\n");
+ +      warning_note(wi,warn_buf);
+ +    }
+ +    
+ +    if ((ir->eI == eiCG || ir->eI == eiLBFGS) && (ir->nProjOrder<8)) {
+ +      sprintf(warn_buf,"For accurate %s with LINCS constraints, lincs-order should be 8 or more.",ei_names[ir->eI]);
+ +      warning_note(wi,warn_buf);
+ +    }
+ +    if (ir->epc==epcMTTK) {
+ +        warning_error(wi,"MTTK not compatible with lincs -- use shake instead.");
+ +    }
+ +  }
+ +
+ +  if (ir->LincsWarnAngle > 90.0) {
+ +    sprintf(warn_buf,"lincs-warnangle can not be larger than 90 degrees, setting it to 90.\n");
+ +    warning(wi,warn_buf);
+ +    ir->LincsWarnAngle = 90.0;
+ +  }
+ +
+ +  if (ir->ePBC != epbcNONE) {
+ +    if (ir->nstlist == 0) {
+ +      warning(wi,"With nstlist=0 atoms are only put into the box at step 0, therefore drifting atoms might cause the simulation to crash.");
+ +    }
+ +    bTWIN = (ir->rlistlong > ir->rlist);
+ +    if (ir->ns_type == ensGRID) {
+ +      if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC,box)) {
+ +          sprintf(warn_buf,"ERROR: The cut-off length is longer than half the shortest box vector or longer than the smallest box diagonal element. Increase the box size or decrease %s.\n",
+ +              bTWIN ? (ir->rcoulomb==ir->rlistlong ? "rcoulomb" : "rvdw"):"rlist");
+ +          warning_error(wi,warn_buf);
+ +      }
+ +    } else {
+ +      min_size = min(box[XX][XX],min(box[YY][YY],box[ZZ][ZZ]));
+ +      if (2*ir->rlistlong >= min_size) {
+ +          sprintf(warn_buf,"ERROR: One of the box lengths is smaller than twice the cut-off length. Increase the box size or decrease rlist.");
+ +          warning_error(wi,warn_buf);
+ +      if (TRICLINIC(box))
+ +        fprintf(stderr,"Grid search might allow larger cut-off's than simple search with triclinic boxes.");
+ +      }
+ +    }
+ +  }
+ +}
+ +
+ +void check_chargegroup_radii(const gmx_mtop_t *mtop,const t_inputrec *ir,
+ +                             rvec *x,
+ +                             warninp_t wi)
+ +{
+ +    real rvdw1,rvdw2,rcoul1,rcoul2;
+ +    char warn_buf[STRLEN];
+ +
+ +    calc_chargegroup_radii(mtop,x,&rvdw1,&rvdw2,&rcoul1,&rcoul2);
+ +
+ +    if (rvdw1 > 0)
+ +    {
+ +        printf("Largest charge group radii for Van der Waals: %5.3f, %5.3f nm\n",
+ +               rvdw1,rvdw2);
+ +    }
+ +    if (rcoul1 > 0)
+ +    {
+ +        printf("Largest charge group radii for Coulomb:       %5.3f, %5.3f nm\n",
+ +               rcoul1,rcoul2);
+ +    }
+ +
+ +    if (ir->rlist > 0)
+ +    {
+ +        if (rvdw1  + rvdw2  > ir->rlist ||
+ +            rcoul1 + rcoul2 > ir->rlist)
+ +        {
+ +            sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than rlist (%f)\n",max(rvdw1+rvdw2,rcoul1+rcoul2),ir->rlist);
+ +            warning(wi,warn_buf);
+ +        }
+ +        else
+ +        {
+ +            /* Here we do not use the zero at cut-off macro,
+ +             * since user defined interactions might purposely
+ +             * not be zero at the cut-off.
+ +             */
+ +            if (EVDW_IS_ZERO_AT_CUTOFF(ir->vdwtype) &&
+ +                rvdw1 + rvdw2 > ir->rlist - ir->rvdw)
+ +            {
+ +                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than rlist (%f) - rvdw (%f)\n",
+ +                        rvdw1+rvdw2,
+ +                        ir->rlist,ir->rvdw);
+ +                if (ir_NVE(ir))
+ +                {
+ +                    warning(wi,warn_buf);
+ +                }
+ +                else
+ +                {
+ +                    warning_note(wi,warn_buf);
+ +                }
+ +            }
+ +            if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype) &&
+ +                rcoul1 + rcoul2 > ir->rlistlong - ir->rcoulomb)
+ +            {
+ +                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than %s (%f) - rcoulomb (%f)\n",
+ +                        rcoul1+rcoul2,
+ +                        ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
+ +                        ir->rlistlong,ir->rcoulomb);
+ +                if (ir_NVE(ir))
+ +                {
+ +                    warning(wi,warn_buf);
+ +                }
+ +                else
+ +                {
+ +                    warning_note(wi,warn_buf);
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
diff --cc src/gromacs/gmxpreprocess/readir.h
Simple merge
diff --cc src/gromacs/legacyheaders/bondf.h

index 1c4023b5e314aa6f3eea14c9d5b1b2d00d20779e,0000000000000000000000000000000000000000..d97873bb288dc76b3d4bb4c8069dd00c344edbf7

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/bondf.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/bondf.h
@@@ -1,160 -1,0 +1,168 @@@
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gromacs Runs On Most of All Computer Systems
+ + */
+ +
+ +#ifndef _bondf_h
+ +#define _bondf_h
+ +
+ +
+ +#include <stdio.h>
+ +#include "typedefs.h"
+ +#include "nrnb.h"
+ +#include "pbc.h"
+ +#include "genborn.h"
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +int glatnr(int *global_atom_index,int i);
+ +/* Returns the global topology atom number belonging to local atom index i.
+ + * This function is intended for writing ascii output
+ + * and returns atom numbers starting at 1.
+ + * When global_atom_index=NULL returns i+1.
+ + */
+ +
+ +void calc_bonds(FILE *fplog,const gmx_multisim_t *ms,
+ +                const t_idef *idef,
+ +                rvec x[],history_t *hist,
+ +                rvec f[],t_forcerec *fr,
+ +                const t_pbc *pbc,const t_graph *g,
+ +                gmx_enerdata_t *enerd,t_nrnb *nrnb,real *lambda,
+ +                const t_mdatoms *md,
+ +                t_fcdata *fcd,int *ddgatindex,
+ +                t_atomtypes *atype, gmx_genborn_t *born,
++              int force_flags,
+ +                gmx_bool bPrintSepPot,gmx_large_int_t step);
+ +/* 
+ + * The function calc_bonds() calculates all bonded force interactions.
+ + * The "bonds" are specified as follows:
+ + *   int nbonds
+ + *        the total number of bonded interactions.
+ + *   t_iatom *forceatoms
+ + *     specifies which atoms are involved in a bond of a certain 
+ + *     type, see also struct t_idef.
+ + *   t_functype *functype
+ + *        defines for every bonded force type what type of function to 
+ + *     use, see also struct t_idef.
+ + *   t_iparams *forceparams
+ + *        defines the parameters for every bond type, see also struct 
+ + *     t_idef.
+ + *   real epot[NR_F]
+ + *     total potential energy split up over the function types.
+ + *   int *ddgatindex
+ + *     global atom number indices, should be NULL when not using DD.
+ + *   gmx_bool bPrintSepPot
+ + *     if TRUE print local potential and dVdlambda for each bonded type.
+ + *   int step
+ + *     used with bPrintSepPot
+ + *   return value:
+ + *        the total potential energy (sum over epot).
+ + */
+ +
+ +void calc_bonds_lambda(FILE *fplog,
+ +                            const t_idef *idef,
+ +                            rvec x[],
+ +                            t_forcerec *fr,
+ +                            const t_pbc *pbc,const t_graph *g,
+ +                            gmx_enerdata_t *enerd,t_nrnb *nrnb,
+ +                            real *lambda,
+ +                            const t_mdatoms *md,
+ +                            t_fcdata *fcd,int *global_atom_index);
+ +/* As calc_bonds, but only determines the potential energy
+ + * for the perturbed interactions.
+ + * The shift forces in fr are not affected.
+ + */
+ +
+ +real posres(int nbonds,
+ +                 const t_iatom forceatoms[],const t_iparams forceparams[],
+ +                 const rvec x[],rvec f[],rvec vir_diag,
+ +                 t_pbc *pbc,
+ +                 real lambda,real *dvdlambda,
+ +                 int refcoord_scaling,int ePBC,rvec comA,rvec comB);
+ +/* Position restraints require a different pbc treatment from other bondeds */
+ +
+ +real fbposres(int nbonds,
+ +               const t_iatom forceatoms[],const t_iparams forceparams[],
+ +               const rvec x[],rvec f[],rvec vir_diag,
+ +               t_pbc *pbc, int refcoord_scaling,int ePBC,rvec com);
+ +/* Flat-bottom posres. Same PBC treatment as in normal position restraints */
+ +
+ +real bond_angle(const rvec xi,const rvec xj,const rvec xk,
+ +                     const t_pbc *pbc,
+ +                     rvec r_ij,rvec r_kj,real *costh,
+ +                     int *t1,int *t2);        /* out */
+ +/* Calculate bond-angle. No PBC is taken into account (use mol-shift) */
+ +
+ +real dih_angle(const rvec xi,const rvec xj,const rvec xk,const rvec xl,
+ +                    const t_pbc *pbc,
+ +                    rvec r_ij,rvec r_kj,rvec r_kl,rvec m,rvec n, /* out */
+ +                    real *sign,
+ +                    int *t1,int *t2,int *t3);
+ +/* Calculate dihedral-angle. No PBC is taken into account (use mol-shift) */
+ +
+ +void do_dih_fup(int i,int j,int k,int l,real ddphi,
+ +                     rvec r_ij,rvec r_kj,rvec r_kl,
+ +                     rvec m,rvec n,rvec f[],rvec fshift[],
+ +                     const t_pbc *pbc,const t_graph *g,
+ +                     const rvec *x,int t1,int t2,int t3);
+ +/* Do an update of the forces for dihedral potentials */
+ +
+ +void make_dp_periodic(real *dp);
+ +/* make a dihedral fall in the range (-pi,pi) */
+ +
+ +/*************************************************************************
+ + *
+ + *  Bonded force functions
+ + *
+ + *************************************************************************/
+ +  t_ifunc bonds,g96bonds,morse_bonds,cubic_bonds,FENE_bonds,restraint_bonds;
+ +  t_ifunc angles,g96angles,cross_bond_bond,cross_bond_angle,urey_bradley,quartic_angles,linear_angles;
+ +  t_ifunc pdihs,idihs,rbdihs;
+ +  t_ifunc tab_bonds,tab_angles,tab_dihs;
+ +  t_ifunc polarize,anharm_polarize,water_pol,thole_pol,angres,angresz,dihres,unimplemented;
+ +
+ +
++/* Initialize the setup for the bonded force buffer reduction
++ * over threads. This should be called each time the bonded setup
++ * changes; i.e. at start-up without domain decomposition and at DD.
++ */ 
++void init_bonded_thread_force_reduction(t_forcerec *fr,
++                                        const t_idef *idef);
++
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
+ +#endif        /* _bondf_h */
diff --cc src/gromacs/legacyheaders/constr.h
Simple merge
diff --cc src/gromacs/legacyheaders/coulomb.h
Simple merge
diff --cc src/gromacs/legacyheaders/domdec.h
Simple merge
diff --cc src/gromacs/legacyheaders/domdec_network.h
Simple merge
diff --cc src/gromacs/legacyheaders/force.h
Simple merge
diff --cc src/gromacs/legacyheaders/futil.h

index 62cfb52ac415414d696f0a7af4a94bde5a29193e,0000000000000000000000000000000000000000..db0b0c5c965309c9b87e1af276f277c6bac7e69b

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/futil.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/futil.h
@@@ -1,202 -1,0 +1,203 @@@
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gromacs Runs On Most of All Computer Systems
+ + */
+ +
+ +#ifndef _futil_h
+ +#define _futil_h
+ +
+ +#include <stdio.h>
+ +#include "typedefs.h"
++#include "types/commrec.h"
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +#if 0
+ +}
+ +#endif
+ +
+ +/* Native windows uses backslash path separators.
+ + * Cygwin and everybody else in the world use slash.
+ + * When reading the PATH environment variable, Unix separates entries
+ + * with colon, while windows uses semicolon.
+ + */
+ +#include "../utility/gmx_header_config.h"
+ +#ifdef GMX_NATIVE_WINDOWS
+ +#define DIR_SEPARATOR '\\'
+ +#define PATH_SEPARATOR ";"
+ +#else
+ +#define DIR_SEPARATOR '/'
+ +#define PATH_SEPARATOR ":"
+ +#endif
+ +
+ +
+ +/* Now get the maximum path size. */
+ +#ifdef PATH_MAX
+ +#  define GMX_PATH_MAX PATH_MAX
+ +#elif defined MAX_PATH
+ +#  define GMX_PATH_MAX MAX_PATH
+ +#else
+ +#  define GMX_PATH_MAX 4096
+ +#endif
+ +
+ +
+ +#ifdef HAVE_FSEEKO
+ +   typedef off_t              gmx_off_t;
+ +#  define SIZEOF_GMX_OFF_T   SIZEOF_OFF_T
+ +#elif defined HAVE__FSEEKI64 
+ +   typedef __int64            gmx_off_t;
+ +#  define SIZEOF_GMX_OFF_T   8
+ +#else
+ +   /* Almost certainly 64 bits, and guaranteed to be available */
+ +   typedef gmx_large_int_t    gmx_off_t;
+ +#  define SIZEOF_GMX_OFF_T   SIZEOF_GMX_LARGE_INT
+ +#endif    
+ +
+ +
+ +  
+ +void no_buffers(void);
+ +/* Turn off buffering of files (which is default) for debugging purposes */
+ +
+ +gmx_bool gmx_fexist(const char *fname);
+ +/* Return TRUE when fname exists, FALSE otherwise */
+ +
+ +gmx_bool gmx_fexist_master(const char *fname, t_commrec *cr);
+ +/* Return TRUE when fname exists, FALSE otherwise, bcast from master to others */
+ +
+ +gmx_bool gmx_eof(FILE *fp);
+ +/* Return TRUE on end-of-file, FALSE otherwise */
+ +
+ +gmx_bool is_pipe(FILE *fp);
+ +/* Check whether the file (opened by ffopen) is a pipe */
+ +
+ +/*  Make a backup of file if necessary.  
+ +    Return false if there was a problem.
+ +*/
+ +gmx_bool make_backup(const char * file);
+ +
+ +FILE *ffopen(const char *file, const char *mode);
+ +/* Return a valid file pointer when successful, exits otherwise 
+ + * If the file is in compressed format, open a pipe which uncompresses
+ + * the file! Therefore, files must be closed with ffclose (see below)
+ + */
+ +
+ +int ffclose(FILE *fp);
+ +/* Close files or pipes */
+ +
+ +
+ +void frewind(FILE *fp);
+ +/* Does not rewind pipes, but does so for normal files */
+ +
+ +#define rewind frewind
+ +
+ +
+ +int gmx_fseek(FILE *stream, gmx_off_t offset, int whence); 
+ +/* OS-independent fseek. 64-bit when available */
+ +
+ +gmx_off_t gmx_ftell(FILE *stream); 
+ +/* OS-independent fseek. 64-bit when available. */
+ +
+ +
+ +gmx_bool is_pipe(FILE *fp);
+ +
+ +char *gmxlibfn(const char *file);
+ +/* allocates and returns a string with the full file name for a library file */
+ +
+ +FILE *libopen(const char *file);
+ +/* Open a library file for reading. This looks in the current directory
+ + * first, and then in the library directory. If the file is not found,
+ + * it terminates with a fatal_error
+ + */
+ +
+ +/* Opaque data type to list directories */
+ +typedef struct gmx_directory *
+ +gmx_directory_t;
+ +
+ +/* Open a directory for reading. The first argument should be a pointer
+ + * to a declared gmx_directory_t variable. Returns 0 on success.
+ + */
+ +int
+ +gmx_directory_open(gmx_directory_t *p_gmxdir,const char *dirname);
+ +
+ +    
+ +/* Given an initialized gmx_directory_t, if there are more files in
+ + * the directory this routine returns 0 and write the next name
+ + * into the USER-PROVIDED buffer name. The last argument is the max
+ + * number of characters that will be written. Just as strncpy, the
+ + * string will NOT be terminated it it is longer than maxlength_name.
+ + */
+ +int
+ +gmx_directory_nextfile(gmx_directory_t gmxdir,char *name,int maxlength_name);
+ +    
+ +/* Release all data for a directory structure */
+ +int 
+ +gmx_directory_close(gmx_directory_t gmxdir);
+ +    
+ +
+ +    
+ +gmx_bool get_libdir(char *libdir);
+ +
+ +char *low_gmxlibfn(const char *file,gmx_bool bAddCWD,gmx_bool bFatal);
+ +
+ +FILE *low_libopen(const char *file,gmx_bool bFatal);
+ +/* The same as the above, but does not terminate if (!bFatal) */
+ +
+ +/* Create unique name for temp file (wrapper around mkstemp). 
+ + * Buf should be at least 7 bytes long 
+ + */
+ +void gmx_tmpnam(char *buf);
+ +
+ +/* truncte the file to the specified length */
+ +int gmx_truncatefile(char *path, gmx_off_t length);
+ +
+ +/* rename/move the file (atomically, if the OS makes that available) oldname 
+ +   to newname */
+ +int gmx_file_rename(const char *oldname, const char *newname);
+ +
+ +/* copy the file (data only) oldname to newname. if copy_if_empty==FALSE,
+ +   the file won't be copied if it's empty.*/
+ +int gmx_file_copy(const char *oldname, const char *newname, gmx_bool copy_if_empty);
+ +
+ +/* do an fsync() on an open file pointer. 
+ +   Only use this during checkpointing! */
+ +int gmx_fsync(FILE *fp);
+ +
+ +void gmx_chdir(const char *directory);
+ +void gmx_getcwd(char *buffer, size_t size);
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
+ +#endif        /* _futil_h */
diff --cc src/gromacs/legacyheaders/genborn.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_avx_double.h

index 0000000000000000000000000000000000000000,190b4ccc0dab1bac888795e83614726d3561819c..190b4ccc0dab1bac888795e83614726d3561819c

mode 000000,100644..100644
--- /dev/null
--- 2/include/gmx_avx_double.h
+++ b/src/gromacs/legacyheaders/gmx_avx_double.h
diff --cc src/gromacs/legacyheaders/gmx_avx_single.h

index 0000000000000000000000000000000000000000,f0697e1021b6ab6ed4303688ca0f56497ccfb3a2..f0697e1021b6ab6ed4303688ca0f56497ccfb3a2

mode 000000,100644..100644
--- /dev/null
--- 2/include/gmx_avx_single.h
+++ b/src/gromacs/legacyheaders/gmx_avx_single.h
diff --cc src/gromacs/legacyheaders/gmx_cpuid.h

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..25c42e1db2c7a720a635e747fd515384110e5192

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/gromacs/legacyheaders/gmx_cpuid.h
@@@ -1,0 -1,0 +1,259 @@@
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
++ * 
++ * This file is part of GROMACS.
++ * Copyright (c) 2012-  
++ *
++ * Written by the Gromacs development team under coordination of
++ * David van der Spoel, Berk Hess, and Erik Lindahl.
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the research papers on the package. Check out http://www.gromacs.org
++ *
++ * And Hey:
++ * Gnomes, ROck Monsters And Chili Sauce
++ */
++#ifndef GMX_CPUID_H_
++#define GMX_CPUID_H_
++
++#include <stdio.h>
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++#if 0
++} /* fixes auto-indentation problems */
++#endif
++
++
++/* Currently identifiable CPU Vendors */
++enum gmx_cpuid_vendor
++{
++    GMX_CPUID_VENDOR_CANNOTDETECT,   /* Should only be used if something fails */
++    GMX_CPUID_VENDOR_UNKNOWN,
++    GMX_CPUID_VENDOR_INTEL,
++    GMX_CPUID_VENDOR_AMD,
++    GMX_CPUID_NVENDORS
++};
++
++
++/* CPU feature/property list, to be used as indices into the feature array of the
++ * gmxcpuid_t data structure.
++ *
++ * To facilitate looking things up, we keep this list alphabetical.
++ * The list is NOT exhaustive - we have basically added stuff that might be
++ * useful in an application like Gromacs.
++ *
++ * AMD and Intel tend to share most architectural elements, and even if the
++ * flags might have to be detected in different ways (different cpuid registers),
++ * once the flag is present the functions should be identical. Unfortunately the
++ * trend right now (2012) seems to be that they are diverging. This means that
++ * we need to use specific flags to the compiler to maximize performance, and
++ * then the binaries might not be portable between Intel and AMD as they were
++ * before when we only needed to check for SSE and/or SSE2 support in Gromacs.
++ */
++enum gmx_cpuid_feature
++{
++    GMX_CPUID_FEATURE_CANNOTDETECT,      /* Flag set if we could not detect on this CPU  */
++    GMX_CPUID_FEATURE_X86_AES,           /* x86 advanced encryption standard accel.      */
++    GMX_CPUID_FEATURE_X86_APIC,          /* APIC support                                 */
++    GMX_CPUID_FEATURE_X86_AVX,           /* Advanced vector extensions                   */
++    GMX_CPUID_FEATURE_X86_AVX2,          /* AVX2 including gather support (not used yet) */
++    GMX_CPUID_FEATURE_X86_CLFSH,         /* Supports CLFLUSH instruction                 */
++    GMX_CPUID_FEATURE_X86_CMOV,          /* Conditional move insn support                */
++    GMX_CPUID_FEATURE_X86_CX8,           /* Supports CMPXCHG8B (8-byte compare-exchange) */
++    GMX_CPUID_FEATURE_X86_CX16,          /* Supports CMPXCHG16B (16-byte compare-exchg)  */
++    GMX_CPUID_FEATURE_X86_F16C,          /* Supports 16-bit FP conversion instructions   */
++    GMX_CPUID_FEATURE_X86_FMA,           /* Fused-multiply add support (mainly for AVX)  */
++    GMX_CPUID_FEATURE_X86_FMA4,          /* 4-operand FMA, only on AMD for now           */
++    GMX_CPUID_FEATURE_X86_HTT,           /* Hyper-Threading supported                    */
++    GMX_CPUID_FEATURE_X86_LAHF_LM,       /* LAHF/SAHF support in 64 bits                 */
++    GMX_CPUID_FEATURE_X86_MISALIGNSSE,   /* Support for misaligned SSE data instructions */
++    GMX_CPUID_FEATURE_X86_MMX,           /* MMX registers and instructions               */
++    GMX_CPUID_FEATURE_X86_MSR,           /* Supports Intel model-specific-registers      */
++    GMX_CPUID_FEATURE_X86_NONSTOP_TSC,   /* Invariant TSC (constant rate in ACPI states) */
++    GMX_CPUID_FEATURE_X86_PCID,          /* Process context identifier support           */
++    GMX_CPUID_FEATURE_X86_PCLMULDQ,      /* Carry-less 64-bit multiplication supported   */
++    GMX_CPUID_FEATURE_X86_PDCM,          /* Perfmon and Debug Capability                 */
++    GMX_CPUID_FEATURE_X86_PDPE1GB,       /* Support for 1GB pages                        */
++    GMX_CPUID_FEATURE_X86_POPCNT,        /* Supports the POPCNT (population count) insn  */
++    GMX_CPUID_FEATURE_X86_PSE,           /* Supports 4MB-pages (page size extension)     */
++    GMX_CPUID_FEATURE_X86_RDRND,         /* RDRAND high-quality hardware random numbers  */
++    GMX_CPUID_FEATURE_X86_RDTSCP,        /* Serializing rdtscp instruction available     */
++    GMX_CPUID_FEATURE_X86_SSE2,          /* SSE 2                                        */
++    GMX_CPUID_FEATURE_X86_SSE3,          /* SSE 3                                        */
++    GMX_CPUID_FEATURE_X86_SSE4A,         /* SSE 4A                                       */
++    GMX_CPUID_FEATURE_X86_SSE4_1,        /* SSE 4.1                                      */
++    GMX_CPUID_FEATURE_X86_SSE4_2,        /* SSE 4.2                                      */
++    GMX_CPUID_FEATURE_X86_SSSE3,         /* Supplemental SSE3                            */
++    GMX_CPUID_FEATURE_X86_TDT,           /* TSC deadline timer                           */
++    GMX_CPUID_FEATURE_X86_X2APIC,        /* Extended xAPIC Support                       */
++    GMX_CPUID_FEATURE_X86_XOP,           /* AMD extended instructions, only AMD for now  */
++    GMX_CPUID_NFEATURES
++};
++
++
++/* Currently supported acceleration instruction sets, intrinsics or other similar combinations
++ * in Gromacs. There is not always a 1-to-1 correspondence with feature flags; on some AMD
++ * hardware we prefer to use 128bit AVX instructions (although 256-bit ones could be executed),
++ * and we still haven't written the AVX2 kernels.
++ */
++enum gmx_cpuid_acceleration
++{
++    GMX_CPUID_ACCELERATION_CANNOTDETECT,    /* Should only be used if something fails */
++    GMX_CPUID_ACCELERATION_NONE,
++    GMX_CPUID_ACCELERATION_X86_SSE2,
++    GMX_CPUID_ACCELERATION_X86_SSE4_1,
++    GMX_CPUID_ACCELERATION_X86_AVX_128_FMA,
++    GMX_CPUID_ACCELERATION_X86_AVX_256,
++    GMX_CPUID_NACCELERATIONS
++};
++
++/* Text strings corresponding to CPU vendors */
++extern const char *
++gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS];
++
++/* Text strings for CPU feature indices */
++extern const char *
++gmx_cpuid_feature_string[GMX_CPUID_NFEATURES];
++
++/* Text strings for Gromacs acceleration/instruction sets */
++extern const char *
++gmx_cpuid_acceleration_string[GMX_CPUID_NACCELERATIONS];
++
++
++/* Abstract data type with CPU detection information. Set by gmx_cpuid_init(). */
++typedef struct gmx_cpuid *
++gmx_cpuid_t;
++
++
++/* Fill the data structure by using CPU detection instructions.
++ * Return 0 on success, 1 if something bad happened.
++ */
++int
++gmx_cpuid_init              (gmx_cpuid_t *              cpuid);
++
++
++/* Return the vendor id as enumerated type. Use gmx_cpuid_vendor_string[]
++ * to get the corresponding text string.
++ */
++enum gmx_cpuid_vendor
++gmx_cpuid_vendor            (gmx_cpuid_t                cpuid);
++
++
++/* Return a constant pointer to the processor brand string. */
++const char *
++gmx_cpuid_brand             (gmx_cpuid_t                cpuid);
++
++
++/* Return processor family version. For a chip of version 1.2.3, this is 1 */
++int
++gmx_cpuid_family            (gmx_cpuid_t                cpuid);
++
++/* Return processor model version, For a chip of version 1.2.3, this is 2. */
++int
++gmx_cpuid_model             (gmx_cpuid_t                cpuid);
++
++/* Return processor stepping version, For a chip of version 1.2.3, this is 3. */
++int
++gmx_cpuid_stepping          (gmx_cpuid_t                cpuid);
++
++
++/* Check whether a particular CPUID feature is set.
++ * Returns 0 if flag "feature" is not set, 1 if the flag is set. We cannot use
++ * gmx_bool here since this file must be possible to compile without simple.h.
++ */
++int
++gmx_cpuid_feature           (gmx_cpuid_t                cpuid,
++                             enum gmx_cpuid_feature     feature);
++
++
++/* Enumerated values for x86 SMT enabled-status. Note that this does not refer
++ * to Hyper-Threading support (that is the flag GMX_CPUID_FEATURE_X86_HTT), but
++ * whether Hyper-Threading is _enabled_ and _used_ in bios right now.
++ */
++enum gmx_cpuid_x86_smt
++{
++    GMX_CPUID_X86_SMT_CANNOTDETECT,
++    GMX_CPUID_X86_SMT_DISABLED,
++    GMX_CPUID_X86_SMT_ENABLED
++};
++
++/* Returns the status of x86 SMT support. IMPORTANT: There are non-zero
++ * return values for this routine that still do not indicate supported and
++ * enabled smt/Hyper-Threading. You need to carefully check the return value
++ * against the enumerated type values to see what you are getting.
++ *
++ * Long-term, this functionality will move to a new hardware topology detection
++ * layer, but that will require a lot of new code and a working interface to the
++ * hwloc library. Surprisingly, there is no simple way to find out that
++ * Hyper-Threading is actually turned on without fully enumerating and checking
++ * all the cores, which we presently can only do on Linux. This means a couple
++ * of things:
++ *
++ * 1) If you want to know whether your CPU _supports_ Hyper-Threading in the
++ *    first place, check the GMX_CPUID_FEATURE_X86_HTT flag instead!
++ * 2) There are several scenarios where this routine will say that it cannot
++ *    detect whether SMT is enabled and used right now.
++ * 3) If you need support on non-Linux x86, you have to write it :-)
++ * 4) Don't invest too much efforts, since this will be replaced with
++ *    full hardware topology detection in the future.
++ * 5) Don't worry if the detection does not work. It is not a catastrophe, but
++ *    but we get slightly better performance on x86 if we use Hyper-Threading
++ *    cores in direct space, but not reciprocal space.
++ *
++ * Since this routine presently only supports Hyper-Threading we say X86_SMT
++ * in order not to give the impression we can detect any SMT. We haven't
++ * even tested the performance on other SMT implementations, so it is not
++ * obvious we shouldn't use SMT there.
++ */
++enum gmx_cpuid_x86_smt
++gmx_cpuid_x86_smt(gmx_cpuid_t cpuid);
++
++
++
++/* Formats a text string (up to n characters) from the data structure.
++ * The output will have max 80 chars between newline characters.
++ */
++int
++gmx_cpuid_formatstring      (gmx_cpuid_t                cpuid,
++                             char *                     s,
++                             int                        n);
++
++
++/* Suggests a suitable gromacs acceleration based on the support in the
++ * hardware.
++ */
++enum gmx_cpuid_acceleration
++gmx_cpuid_acceleration_suggest  (gmx_cpuid_t                    cpuid);
++
++
++/* Check if this binary was compiled with the same acceleration as we
++ * would suggest for the current hardware. Always print stats to the log file
++ * if it is non-NULL, and print a warning in stdout if we don't have a match.
++ */
++int
++gmx_cpuid_acceleration_check    (gmx_cpuid_t                cpuid,
++                                 FILE *                     log);
++
++
++/* Release resources used by data structure. Note that the pointer to the
++ * CPU brand string will no longer be valid once this routine has been called.
++ */
++void
++gmx_cpuid_done              (gmx_cpuid_t                cpuid);
++
++
++
++
++#ifdef __cplusplus
++}
++#endif
++
++
++#endif /* GMX_CPUID_H_ */
diff --cc src/gromacs/legacyheaders/gmx_detect_hardware.h

index 0000000000000000000000000000000000000000,a3acf0b7b473f814016f100f1cdc952a98df657a..a3acf0b7b473f814016f100f1cdc952a98df657a

mode 000000,100644..100644
--- /dev/null
--- 2/include/gmx_detect_hardware.h
+++ b/src/gromacs/legacyheaders/gmx_detect_hardware.h
diff --cc src/gromacs/legacyheaders/gmx_fatal.h

index faea213c91c97f8d6921c59ba065785a7f77ee31,0000000000000000000000000000000000000000..02d0f6cdacc7c0e31fd06665590576e6360dc223

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/gmx_fatal.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/gmx_fatal.h
@@@ -1,215 -1,0 +1,208 @@@
- #include "typedefs.h"
+ +
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gromacs Runs On Most of All Computer Systems
+ + */
+ +
+ +#ifndef _fatal_h
+ +#define _fatal_h
+ +
+ +#include <stdio.h>
+ +#include <stdarg.h>
+ +#include <errno.h>
-  */
- 
- void
- gmx_fatal_collective(int f_errno,const char *file,int line,
-                    t_commrec *cr,gmx_domdec_t *dd,
-                    const char *fmt,...) GMX_ATTRIBUTE_NORETURN;
- /* As gmx_fatal, but only the master process prints the error message.
-  * This should only be called one of the following two situations:
-  * 1) On all nodes in cr->mpi_comm_mysim, with cr!=NULL,dd==NULL.
-  * 2) On all nodes in dd->mpi_comm_all,   with cr==NULL,dd!=NULL.
-  * This will call MPI_Finalize instead of MPI_Abort when possible,
-  * This is useful for handling errors in code that is executed identically
-  * for all processes.
++#include "types/simple.h"
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +#ifndef __has_feature      // Optional.
+ +#define __has_feature(x) 0 // Compatibility with non-clang compilers.
+ +#endif
+ +
+ +/* This documentation block seems to produce warnings with some Doxygen
+ + * versions, so it's disabled for now.  Maybe because the file itself
+ + * is not documented. */
+ +/* \def GMX_ATTRIBUTE_NORETURN
+ + * \brief
+ + * Indicate that a function is not expected to return.
+ + *
+ + * WARNING: In general this flag should not be used for compiler
+ + * optimizations, since set_gmx_error_handler can be set to a
+ + * handler which does not quit.
+ + */
+ +#ifndef GMX_ATTRIBUTE_NORETURN
+ +#if __has_feature(attribute_analyzer_noreturn)
+ +#define GMX_ATTRIBUTE_NORETURN __attribute__((analyzer_noreturn))
+ +#else
+ +#define GMX_ATTRIBUTE_NORETURN
+ +#endif
+ +#endif
+ +  
+ +void 
+ +_where(const char *file,int line);
+ +#define where() _where(__FILE__,__LINE__)
+ +/* Prints filename and line to stdlog and only on amba memvail */
+ +  
+ +void 
+ +_set_fatal_tmp_file(const char *fn, const char *file, int line);
+ +#define set_fatal_tmp_file(fn) _set_fatal_tmp_file(fn,__FILE__,__LINE__)
+ +/* set filename to be removed when fatal_error is called */
+ +
+ +void 
+ +_unset_fatal_tmp_file(const char *fn, const char *file, int line);
+ +#define unset_fatal_tmp_file(fn) _unset_fatal_tmp_file(fn,__FILE__,__LINE__)
+ +/* unsets filename to be removed */
+ +
+ +void 
+ +gmx_fatal(int fatal_errno,const char *file,int line,const char *fmt,...) GMX_ATTRIBUTE_NORETURN;
+ +#define FARGS 0,__FILE__,__LINE__
+ +/*
+ + * Routine gmx_fatal prints 
+ + *
+ + *    "fatal error file %s line %s \n\t " 
+ + *
+ + * followed by the string specified by fmt and supplied parameters. If 
+ + * errno is 0, only the message and arguments are printed. If errno is 
+ + * a legal system errno or -1, a perror like message is printed after the
+ + * first message, if errno is -1, the last system errno will be used.
+ + * The format of fmt is that like printf etc, only %d, %x, %c, %f, %g and %s
+ + * are allowed as format specifiers.
+ + *
++ * In case all MPI processes want to stop with the same fatal error,
++ * use gmx_fatal_collective, declared in gmx_fatal_collective.h,
++ * to avoid having as many error messages as processes.
++ *
+ + * Tip of the week:
+ + * call this function using the FARGS macro:
+ + * gmx_fatal(FARGS,fmt,...)
++ *
+ + */
+ +
+ +void
+ +gmx_fatal_set_log_file(FILE *fp);
+ +/* Set the log file for printing error messages */
+ +  
+ +void 
+ +_invalid_case(const char *fn,int line);
+ +#define invalid_case() _invalid_case(__FILE__,__LINE__)
+ +/* Issue a warning stating 'Invalid case in switch' */
+ +  
+ +void _unexpected_eof(const char *fn,int line,const char *srcfn,int srcline);
+ +#define unexpected_eof(fn,line) _unexpected_eof(fn,line,__FILE__,__LINE__)
+ +
+ +/* 
+ + * Functions can write to this file for debug info
+ + * Before writing to it, it should be checked whether
+ + * the file is not NULL:
+ + * if (debug) fprintf(debug,"%s","Hallo");
+ + */
+ +extern FILE *debug;
+ +extern gmx_bool gmx_debug_at;
+ +
+ +void init_debug (const int dbglevel,const char *dbgfile);
+ +  
+ +gmx_bool bDebugMode(void);
+ +/* Return TRUE when the program was started in debug mode */
+ +  
+ +#if (defined __sgi && defined USE_SGI_FPE)
+ +void doexceptions(void);
+ +/* Set exception handlers for debugging */
+ +#endif
+ +
+ +  /* warn_str is allowed to be NULL.
+ +   */
+ +  void _range_check(int n,int n_min,int n_max,const char *warn_str,
+ +                         const char *var,
+ +                         const char *file,int line);
+ +
+ +#define range_check_mesg(n,n_min,n_max,str) _range_check(n,n_min,n_max,str,#n,__FILE__,__LINE__)
+ +  /* Range check will terminate with an error message if not
+ +   * n E [ n_min, n_max >
+ +   * That is n_min is inclusive but not n_max.
+ +   */
+ +
+ +#define range_check(n,n_min,n_max) _range_check(n,n_min,n_max,NULL,#n,__FILE__,__LINE__)
+ +  /* Range check will terminate with an error message if not
+ +   * n E [ n_min, n_max >
+ +   * That is n_min is inclusive but not n_max.
+ +   */
+ +
+ +  char *gmx_strerror(const char *key);
+ +  /* Return error message corresponding to the key.
+ +   * Maybe a multi-line message.
+ +   * The messages are stored in src/gmxlib/fatal.c
+ +   */
+ +  
+ +  void _gmx_error(const char *key,const char *msg,const char *file,int line) GMX_ATTRIBUTE_NORETURN;
+ +#define gmx_error(key,msg) _gmx_error(key,msg,__FILE__,__LINE__)
+ +  /* Error msg of type key is generated and the program is 
+ +   * terminated unless and error handle is set (see below)
+ +   */
+ +
+ +  /* Some common error types */
+ +#define gmx_bug(msg)    gmx_error("bug",msg)
+ +#define gmx_call(msg)   gmx_error("call",msg)
+ +#define gmx_comm(msg)   gmx_error("comm",msg)
+ +#define gmx_file(msg)   gmx_error("file",msg)
+ +#define gmx_cmd(msg)    gmx_error("cmd",msg)
+ +#define gmx_impl(msg)   gmx_error("impl",msg)
+ +#define gmx_incons(msg) gmx_error("incons",msg)
+ +#define gmx_input(msg)  gmx_error("input",msg)
+ +#define gmx_mem(msg)    gmx_error("mem",msg)
+ +#define gmx_open(fn)    gmx_error("open",fn) 
+ +  
+ +void 
+ +set_gmx_error_handler(void (*func)(const char *msg));
+ +/* An error function will be called that terminates the program 
+ +   * with a fatal error, unless you override it with another function.
+ +   * i.e.:
+ +   * set_gmx_error_handler(my_func);
+ +   * where my_func is a function that takes a string as an argument.
+ +   * The string may be a multi-line string.
+ +   */
+ +
+ +void gmx_warning(const char *fmt,...);
+ +/* Print a warning message to stderr.
+ + * The format of fmt is that like printf etc, only %d, %x, %c, %f, %g and %s
+ + * are allowed as format specifiers.
+ + * The message string should NOT start with "WARNING"
+ + * and should NOT end with a newline.
+ + */
+ +
++
+ +#ifdef __cplusplus
+ +         }
+ +#endif
+ +
+ +#endif        /* _fatal_h */
diff --cc src/gromacs/legacyheaders/gmx_fatal_collective.h

index 0000000000000000000000000000000000000000,96f5e055b32292a0fc9bd59820073ce118776af8..96f5e055b32292a0fc9bd59820073ce118776af8

mode 000000,100644..100644
--- /dev/null
--- 2/include/gmx_fatal_collective.h
+++ b/src/gromacs/legacyheaders/gmx_fatal_collective.h
diff --cc src/gromacs/legacyheaders/gmx_hash.h

index 0000000000000000000000000000000000000000,8d0ca4ed9578e078c82fc7030db54f36252cfe7e..8d0ca4ed9578e078c82fc7030db54f36252cfe7e

mode 000000,100644..100644
--- /dev/null
--- 2/include/gmx_hash.h
+++ b/src/gromacs/legacyheaders/gmx_hash.h
diff --cc src/gromacs/legacyheaders/gmx_math_x86_avx_128_fma_double.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_math_x86_avx_128_fma_single.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_math_x86_avx_256_double.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_math_x86_avx_256_single.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_math_x86_sse2_double.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_math_x86_sse4_1_double.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_math_x86_sse4_1_single.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_omp.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_omp_nthreads.h

index 0000000000000000000000000000000000000000,5ab8252ffde877a1ce6ed91b2fd4a86641a305f6..5ab8252ffde877a1ce6ed91b2fd4a86641a305f6

mode 000000,100644..100644
--- /dev/null
--- 2/include/gmx_omp_nthreads.h
+++ b/src/gromacs/legacyheaders/gmx_omp_nthreads.h
diff --cc src/gromacs/legacyheaders/gmx_wallcycle.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_x86_avx_128_fma.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_x86_simd_double.h

index 0000000000000000000000000000000000000000,42c4d1cf404c0a7f3eec180c36e4e228780a528d..42c4d1cf404c0a7f3eec180c36e4e228780a528d

mode 000000,100644..100644
--- /dev/null
--- 2/include/gmx_x86_simd_double.h
+++ b/src/gromacs/legacyheaders/gmx_x86_simd_double.h
diff --cc src/gromacs/legacyheaders/gmx_x86_simd_macros.h

index 0000000000000000000000000000000000000000,b896d396baadfa88505d90541c207911e91ccac9..b896d396baadfa88505d90541c207911e91ccac9

mode 000000,100644..100644
--- /dev/null
--- 2/include/gmx_x86_simd_macros.h
+++ b/src/gromacs/legacyheaders/gmx_x86_simd_macros.h
diff --cc src/gromacs/legacyheaders/gmx_x86_simd_single.h

index 0000000000000000000000000000000000000000,10e8836a75ec5240996c959cfae99ff89368192d..10e8836a75ec5240996c959cfae99ff89368192d

mode 000000,100644..100644
--- /dev/null
--- 2/include/gmx_x86_simd_single.h
+++ b/src/gromacs/legacyheaders/gmx_x86_simd_single.h
diff --cc src/gromacs/legacyheaders/gpu_utils.h

index 0000000000000000000000000000000000000000,751936a4fb19e9ccc43042096089801b928aec57..751936a4fb19e9ccc43042096089801b928aec57

mode 000000,100644..100644
--- /dev/null
--- 2/include/gpu_utils.h
+++ b/src/gromacs/legacyheaders/gpu_utils.h
diff --cc src/gromacs/legacyheaders/main.h
Simple merge
diff --cc src/gromacs/legacyheaders/maths.h

index 0f64b63579fa078108f2b926fe5f632506e8ecef,0000000000000000000000000000000000000000..bbc2b811eac189ef06dc7cc4517e532e23cb0bf0

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/maths.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/maths.h
@@@ -1,154 -1,0 +1,163 @@@
- real    gmx_erf(real x);
- real    gmx_erfc(real x);
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gromacs Runs On Most of All Computer Systems
+ + */
+ +
+ +#ifndef _maths_h
+ +#define _maths_h
+ +
+ +#include <math.h>
+ +#include "types/simple.h"
+ +#include "typedefs.h"
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +#ifndef M_PI
+ +#define       M_PI            3.14159265358979323846
+ +#endif
+ +
+ +#ifndef M_PI_2
+ +#define       M_PI_2          1.57079632679489661923
+ +#endif
+ +
+ +#ifndef M_2PI
+ +#define       M_2PI           6.28318530717958647692
+ +#endif
+ +    
+ +#ifndef M_SQRT2
+ +#define M_SQRT2 sqrt(2.0)
+ +#endif
+ +
+ +#ifndef M_1_PI
+ +#define M_1_PI      0.31830988618379067154
+ +#endif
+ +
+ +int           gmx_nint(real a);
+ +real    sign(real x,real y);
+ +
+ +int           gmx_nint(real a);
+ +real    sign(real x,real y);
+ +real    cuberoot (real a);
++double  gmx_erfd(double x);
++double  gmx_erfcd(double x);
++float   gmx_erff(float x);
++float   gmx_erfcf(float x);
++#ifdef GMX_DOUBLE
++#define gmx_erf(x)   gmx_erfd(x)
++#define gmx_erfc(x)  gmx_erfcd(x)
++#else
++#define gmx_erf(x)   gmx_erff(x)
++#define gmx_erfc(x)  gmx_erfcf(x)
++#endif
+ +
+ +gmx_bool gmx_isfinite(real x);
+ +
+ +/*! \brief Check if two numbers are within a tolerance
+ + *
+ + *  This routine checks if the relative difference between two numbers is
+ + *  approximately within the given tolerance, defined as
+ + *  fabs(f1-f2)<=tolerance*fabs(f1+f2).
+ + *
+ + *  To check if two floating-point numbers are almost identical, use this routine 
+ + *  with the tolerance GMX_REAL_EPS, or GMX_DOUBLE_EPS if the check should be
+ + *  done in double regardless of Gromacs precision.
+ + *  
+ + *  To check if two algorithms produce similar results you will normally need
+ + *  to relax the tolerance significantly since many operations (e.g. summation)
+ + *  accumulate floating point errors.
+ + *
+ + *  \param f1  First number to compare
+ + *  \param f2  Second number to compare
+ + *  \param tol Tolerance to use
+ + *
+ + *  \return 1 if the relative difference is within tolerance, 0 if not.
+ + */
+ +static int
+ +gmx_within_tol(double   f1,
+ +               double   f2,
+ +               double   tol)
+ +{
+ +    /* The or-equal is important - otherwise we return false if f1==f2==0 */
+ +    if( fabs(f1-f2) <= tol*0.5*(fabs(f1)+fabs(f2)) )
+ +    {
+ +        return 1;
+ +    }
+ +    else
+ +    {
+ +        return 0;
+ +    }
+ +}
+ +
+ +
+ +
+ +/** 
+ + * Check if a number is smaller than some preset safe minimum
+ + * value, currently defined as GMX_REAL_MIN/GMX_REAL_EPS.
+ + *
+ + * If a number is smaller than this value we risk numerical overflow
+ + * if any number larger than 1.0/GMX_REAL_EPS is divided by it.
+ + *
+ + * \return 1  if 'almost' numerically zero, 0 otherwise.
+ + */
+ +static int
+ +gmx_numzero(double a)
+ +{
+ +  return gmx_within_tol(a,0.0,GMX_REAL_MIN/GMX_REAL_EPS);
+ +}
+ +
+ +
+ +static real
+ +gmx_log2(real x)
+ +{
+ +  const real iclog2 = 1.0/log( 2.0 );
+ +
+ +    return log( x ) * iclog2;
+ +}
+ +
+ +/*! /brief Multiply two large ints
+ + *
+ + *  Returns true when overflow did not occur.
+ + */
+ +gmx_bool
+ +check_int_multiply_for_overflow(gmx_large_int_t a,
+ +                                gmx_large_int_t b,
+ +                                gmx_large_int_t *result);
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
+ +#endif        /* _maths_h */
diff --cc src/gromacs/legacyheaders/md_logging.h

index 0000000000000000000000000000000000000000,e197520b31ba0340f41fded5738592e22255657c..e197520b31ba0340f41fded5738592e22255657c

mode 000000,100644..100644
--- /dev/null
--- 2/include/md_logging.h
+++ b/src/gromacs/legacyheaders/md_logging.h
diff --cc src/gromacs/legacyheaders/md_support.h

index 0000000000000000000000000000000000000000,7543ceaeba0278c747221c45f146508c36cdde9a..7543ceaeba0278c747221c45f146508c36cdde9a

mode 000000,100644..100644
--- /dev/null
--- 2/include/md_support.h
+++ b/src/gromacs/legacyheaders/md_support.h
diff --cc src/gromacs/legacyheaders/mdebin.h
Simple merge
diff --cc src/gromacs/legacyheaders/mdrun.h
Simple merge
diff --cc src/gromacs/legacyheaders/mtop_util.h
Simple merge
diff --cc src/gromacs/legacyheaders/mvdata.h
Simple merge
diff --cc src/gromacs/legacyheaders/names.h
Simple merge
diff --cc src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h

index 0000000000000000000000000000000000000000,48e54632f5d24593c2e4a29bf00d5de8801e9d3b..48e54632f5d24593c2e4a29bf00d5de8801e9d3b

mode 000000,100644..100644
--- /dev/null
--- 2/include/nbnxn_cuda_data_mgmt.h
+++ b/src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h
diff --cc src/gromacs/legacyheaders/nbnxn_search.h

index 0000000000000000000000000000000000000000,9c078c90fbe814bb523af91d2c62daf1a5d87fd4..9c078c90fbe814bb523af91d2c62daf1a5d87fd4

mode 000000,100644..100644
--- /dev/null
--- 2/include/nbnxn_search.h
+++ b/src/gromacs/legacyheaders/nbnxn_search.h
diff --cc src/gromacs/legacyheaders/network.h
Simple merge
diff --cc src/gromacs/legacyheaders/nrnb.h
Simple merge
diff --cc src/gromacs/legacyheaders/nsgrid.h
Simple merge
diff --cc src/gromacs/legacyheaders/pbc.h
Simple merge
diff --cc src/gromacs/legacyheaders/physics.h
Simple merge
diff --cc src/gromacs/legacyheaders/pmalloc_cuda.h

index 0000000000000000000000000000000000000000,4cc2c760973f28bf1d79fbcfa92262661180dce4..4cc2c760973f28bf1d79fbcfa92262661180dce4

mode 000000,100644..100644
--- /dev/null
--- 2/include/pmalloc_cuda.h
+++ b/src/gromacs/legacyheaders/pmalloc_cuda.h
diff --cc src/gromacs/legacyheaders/pme.h
Simple merge
diff --cc src/gromacs/legacyheaders/sim_util.h

index 0000000000000000000000000000000000000000,9282f25107ec18a2a6670886b2e5c980f6a856c8..9282f25107ec18a2a6670886b2e5c980f6a856c8

mode 000000,100644..100644
--- /dev/null
--- 2/include/sim_util.h
+++ b/src/gromacs/legacyheaders/sim_util.h
diff --cc src/gromacs/legacyheaders/smalloc.h

index ea3cd18d90af8aefa0bf5936cff95e8b18d7c36e,0000000000000000000000000000000000000000..f76337a9d389ab1b483dff219719dffcb7b1a2c3

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/smalloc.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/smalloc.h
@@@ -1,266 -1,0 +1,268 @@@
-                         unsigned nelem,size_t elsize,size_t alignment); 
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gromacs Runs On Most of All Computer Systems
+ + */
+ +
+ +#ifndef _smalloc_h
+ +#define _smalloc_h
+ +
+ +#include <stdlib.h>
+ +
+ +/*
+ + * Memory allocation routines in gromacs:
+ + *
+ + * If an allocation fails, the program is halted by means of the
+ + * fatal_error routine, which outputs source file and line number
+ + * and the name of the variable involved.
+ + *
+ + * Macro's which can be used:
+ + *
+ + * snew(ptr,nelem)
+ + *    Allocates memory for nelem elements and returns this in ptr.
+ + *    The allocated memory is initialized to zeros.
+ + *
+ + * srenew(ptr,nelem)
+ + *    Reallocates memory for nelem elements and returns this in ptr.
+ + *
+ + * smalloc(ptr,size)
+ + *    Allocates memory for size bytes and returns this in ptr.
+ + *
+ + * scalloc(ptr,nelem,elsize)
+ + *    Allocates memory for nelem elements of size elsize and returns 
+ + *    this in ptr.
+ + *
+ + * srealloc(ptr,size)
+ + *    Reallocates memory for size bytes and returns this in ptr.
+ + *
+ + * sfree(ptr)
+ + *    Frees memory referenced by ptr.
+ + *
+ + * snew_aligned(ptr,nelem,alignment)
+ + *    Allocates memory for nelem elements and returns this in ptr.
+ + *    The allocated memory is initialized to zeroes.
+ + *    alignment=n will constrain ptr to be n-byte aligned.
+ + *    This pointer should only be freed with sfree_aligned, since
+ + *    it may not be the value returned by the underlying malloc.
+ + *
+ + * sfree_aligned(ptr)
+ + *    Frees aligned memory referenced by ptr.
+ + *
+ + ****************************************************************************
+ + *
+ + * Functions which are used by the macro's:
+ + *
+ + * extern void *save_malloc(char *name,char *file,int line,int size);
+ + *    Like alloc, returns a pointer to the allocated space, uses name, file
+ + *    and line to generate an error message when allocation failed.
+ + *
+ + * extern void *save_calloc(char *name,char *file,int line, 
+ + *                          size_t nelem,size_t elsize);
+ + *    Like calloc, returns a pointer to the allocated space, uses name, file
+ + *    and line to generate an error message when allocation failed.
+ + *
+ + * extern void *save_realloc(char *name,char *file,int line,
+ + *                           void *ptr,size_t size);
+ + *    Like realloc, returns a pointer to the allocated space, uses name, file
+ + *    and line to generate an error message when allocation failed.
+ + *    If ptr equals NULL, malloc is called in stead of realloc, in this way
+ + *    it is possible to combine first and later allocations.
+ + *
+ + * extern void save_free(char *name,char *file,int line, void *ptr);
+ + *    Like free, uses name, file and line to generate an error message when 
+ + *    the free failed.
+ + *
+ + * extern size_t maxavail();
+ + *    Returns the maximum available allocation unit, by applying a binary
+ + *    search on the largest block of memory available. After allocation
+ + *    it invokes free to restore the original state. So it is important
+ + *    that free can undo the effect of a malloc.
+ + * 
+ + * extern size_t memavail();
+ + *    Returns the total of available allocation unit, by applying maxavail
+ + *    until no space is left, it then frees all allocated space and returns
+ + *    the sum of the previously allocated space. As mentioned with maxavail,
+ + *    it is important that free can undo the effect of a malloc.
+ + * 
+ + * extern void *save_malloc_aligned(char *name,char *file,int line,size_t size,size_t alignment);
+ + *    Like alloc, returns a pointer to the allocated space, uses name, file
+ + *    and line to generate an error message when allocation failed.
+ + *    The returned pointer will be n-byte aligned, where n=alignment.
+ + *    The pointer should only be freed with a call to save_free.
+ + *
+ + * extern void save_free_aligned(char *name,char *file,int line, void *ptr);
+ + *    Like free, uses name, file and line to generate an error message when 
+ + *    the free failed. This function is intended to be called for
+ + *    pointers allocated with save_malloc_aligned, and may not work
+ + *    on normal pointers.
+ + */
+ +
+ +#ifdef __cplusplus
+ +extern "C" { 
+ +#endif
+ +
+ +void *save_malloc(const char *name,const char *file,int line,size_t size); 
+ +void *save_calloc(const char *name,const char *file,int line,
+ +                size_t nelem,size_t elsize); 
+ +void *save_realloc(const char *name,const char *file,int line,
+ +                 void *ptr,size_t nelem,size_t elsize);
+ +void save_free(const char *name,const char *file,int line, void *ptr);
+ +size_t maxavail(void);
+ +size_t memavail(void);
+ +
+ +/* Aligned-memory counterparts */
+ +
++void *save_malloc_aligned(const char *name,const char *file,int line,
++                          unsigned nelem,size_t elsize,size_t alignment);
+ +void *save_calloc_aligned(const char *name,const char *file,int line,
++                          unsigned nelem,size_t elsize,size_t alignment);
+ +void save_free_aligned(const char *name,const char *file,int line, void *ptr);
+ +
+ +#ifdef __cplusplus
+ +}
+ +
+ +/* Use of sizeof(T) in _snew() and _srenew() can cause obscure bugs if
+ + * several files define distinct data structures with identical names and
+ + * allocate memory for them using the macros below.
+ + * For this reason, the size of an element is passed as a parameter.
+ + *
+ + * The C versions work fine in such cases, but when compiled with a C++
+ + * compiler (and if the compiler does not inline the calls), the linker cannot
+ + * tell that data structures with identical names are actually different and
+ + * links calls to these template functions incorrectly, which can result in
+ + * allocation of an incorrect amount of memory if the element size is computed
+ + * within the function. Even with the size passed as a parameter, incorrect
+ + * linkage will occur, but as the type is now only present in the cast, it
+ + * should not cause problems.
+ + */
+ +template <typename T>
+ +void _snew(const char *name, const char *file, int line,
+ +           T *&ptr, size_t nelem, size_t elsize)
+ +{
+ +    ptr = (T *)save_calloc(name, file, line, nelem, elsize);
+ +}
+ +template <typename T>
+ +void _srenew(const char *name, const char *file, int line,
+ +             T *&ptr, size_t nelem, size_t elsize)
+ +{
+ +    ptr = (T *)save_realloc(name, file, line, ptr, nelem, elsize);
+ +}
+ +template <typename T>
+ +void _smalloc(const char *name, const char *file, int line, T *&ptr, size_t size)
+ +{
+ +    ptr = (T *)save_malloc(name, file, line, size);
+ +}
+ +template <typename T>
+ +void _srealloc(const char *name, const char *file, int line, T *&ptr, size_t size)
+ +{
+ +    ptr = (T *)save_realloc(name, file, line, ptr, size, sizeof(char));
+ +}
+ +template <typename T>
+ +void _snew_aligned(const char *name, const char *file, int line,
+ +                 T *&ptr, size_t nelem, size_t elsize,size_t alignment)
+ +{
+ +  ptr = (T *)save_calloc_aligned(name, file, line, nelem, elsize, alignment);
+ +}
+ +
+ +#define snew(ptr,nelem) _snew(#ptr,__FILE__,__LINE__,(ptr),(nelem),sizeof(*(ptr)))
+ +#define srenew(ptr,nelem) _srenew(#ptr,__FILE__,__LINE__,(ptr),(nelem),sizeof(*(ptr)))
+ +#define smalloc(ptr, size) _smalloc(#ptr,__FILE__,__LINE__,(ptr),(size))
+ +#define srealloc(ptr, size) _srealloc(#ptr,__FILE__,__LINE__,(ptr),(size))
+ +#define snew_aligned(ptr,nelem,alignment) _snew_aligned(#ptr,__FILE__,__LINE__,(ptr),(nelem),sizeof(*(ptr)),alignment)
+ +
+ +#else /* __cplusplus */
+ +
+ +/* These macros work in C, not in C++ */
+ +#define snew(ptr,nelem) (ptr)=save_calloc(#ptr,__FILE__,__LINE__,\
+ +                      (nelem),sizeof(*(ptr)))
+ +#define srenew(ptr,nelem) (ptr)=save_realloc(#ptr,__FILE__,__LINE__,\
+ +                      (ptr),(nelem),sizeof(*(ptr)))
+ +#define smalloc(ptr,size) (ptr)=save_malloc(#ptr,__FILE__,__LINE__,size)
+ +#define scalloc(ptr,nelem,elsize)\
+ +              (ptr)=save_calloc(#ptr,__FILE__,__LINE__,nelem,elsize)
+ +#define srealloc(ptr,size) (ptr)=save_realloc(#ptr,__FILE__,__LINE__,\
+ +                      (ptr),size,1)
+ +#define snew_aligned(ptr,nelem,alignment) (ptr)=save_calloc_aligned(#ptr,__FILE__,__LINE__,(nelem),sizeof(*(ptr)),alignment)
+ +#endif /* __cplusplus */
+ +
+ +#define sfree(ptr) save_free(#ptr,__FILE__,__LINE__,(ptr))
+ +
+ +/* call this ONLY with a pointer obtained through snew_aligned or 
+ +   smalloc_aligned: */
+ +#define sfree_aligned(ptr) save_free_aligned(#ptr,__FILE__,__LINE__,(ptr))
+ +
+ +#ifdef __cplusplus
+ +
+ +#include "../utility/common.h"
+ +
+ +namespace gmx
+ +{
+ +
+ +/*! \brief
+ + * Stripped-down version of scoped_ptr that uses sfree().
+ + *
+ + * Currently only implements constructor from a pointer value and destructor;
+ + * other operations can be added if they become necessary.
+ + *
+ + * This is currently in smalloc.h, as this header also declares sfree().
+ + * If more flexible guards/smart pointers are needed for C pointers, this class
+ + * should be moved to a separate header under src/gromacs/utility/ together
+ + * with that more flexible implementation.
+ + * Currently, boost::shared_ptr is used in a few locations, but is not suitable
+ + * for all cases.  A scoped_ptr with deleter support would be a general enough
+ + * implementation for all uses.  C++11 unique_ptr has this, but for non-C++11
+ + * suppoer we need something else.
+ + *
+ + * Methods in this class do not throw.
+ + */
+ +class scoped_ptr_sfree
+ +{
+ +    public:
+ +        /*! \brief
+ +         * Initializes a scoped_ptr that frees \p ptr on scope exit.
+ +         *
+ +         * \param[in] ptr  Pointer to use for initialization.
+ +         */
+ +        explicit scoped_ptr_sfree(void *ptr) : ptr_(ptr) {}
+ +        //! Frees the pointer passed to the constructor.
+ +        ~scoped_ptr_sfree() { sfree(ptr_); }
+ +
+ +    private:
+ +        void                   *ptr_;
+ +
+ +        GMX_DISALLOW_COPY_AND_ASSIGN(scoped_ptr_sfree);
+ +};
+ +
+ +} // namespace gmx
+ +#endif /* __cplusplus */
+ +
+ +#endif        /* _smalloc_h */
diff --cc src/gromacs/legacyheaders/sysstuff.h

index 7950376cb5288bffa589a509bc748b600479ba28,0000000000000000000000000000000000000000..a7b66e1476fcda4764259ebdc2a789ae9e3be827

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/sysstuff.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/sysstuff.h
@@@ -1,48 -1,0 +1,49 @@@
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gromacs Runs On Most of All Computer Systems
+ + */
+ +
+ +#ifndef _sysstuff_h
+ +#define _sysstuff_h
+ +
+ +#ifndef _386_
+ +#include <stdlib.h>
+ +#endif
+ +#include <stdio.h>
+ +#include <errno.h>
+ +#include <signal.h>
++#include "gromacs/version.h"
+ +#include <limits.h>
+ +#include <time.h>
+ +
+ +#endif        /* _sysstuff_h */
diff --cc src/gromacs/legacyheaders/tables.h

index 0000000000000000000000000000000000000000,ec942425a03f15885ddc991bf73dc07bf4de42a0..ec942425a03f15885ddc991bf73dc07bf4de42a0

mode 000000,100644..100644
--- /dev/null
--- 2/include/tables.h
+++ b/src/gromacs/legacyheaders/tables.h
diff --cc src/gromacs/legacyheaders/thread_mpi/atomic/gcc_intrinsics.h
Simple merge
diff --cc src/gromacs/legacyheaders/thread_mpi/mpi_bindings.h
Simple merge
diff --cc src/gromacs/legacyheaders/typedefs.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/commrec.h

index 7d049e803f14fc81e00eab2e879a954b1f722161,0000000000000000000000000000000000000000..1fd00e8ae43b81102e9d0d6317741a182008b931

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/types/commrec.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/types/commrec.h
@@@ -1,314 -1,0 +1,334 @@@
-   int  j0;       /* j-cell start               */
-   int  j1;       /* j-cell end                 */
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GRoups of Organic Molecules in ACtion for Science
+ + */
+ +#ifndef _commrec_h
+ +#define _commrec_h
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#else
+ +#ifdef GMX_THREAD_MPI
+ +#include "../thread_mpi/tmpi.h"
+ +#include "../thread_mpi/mpi_bindings.h"
+ +#else
+ +typedef void* MPI_Comm;
+ +typedef void* MPI_Request;
+ +typedef void* MPI_Group;
+ +#define MPI_COMM_NULL NULL
+ +#endif
+ +#endif
+ +
+ +#include "idef.h"
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +
+ +#define DD_MAXZONE  8
+ +#define DD_MAXIZONE 4
+ +
+ +typedef struct gmx_domdec_master *gmx_domdec_master_p_t;
+ +
+ +typedef struct {
-   int  *ga2la_vsite;
++  int  j0;       /* j-zone start               */
++  int  j1;       /* j-zone end                 */
+ +  int  cg1;      /* i-charge-group end         */
+ +  int  jcg0;     /* j-charge-group start       */
+ +  int  jcg1;     /* j-charge-group end         */
+ +  ivec shift0;   /* Minimum shifts to consider */
+ +  ivec shift1;   /* Maximum shifts to consider */
+ +} gmx_domdec_ns_ranges_t;
+ +
++typedef struct {
++  rvec x0;       /* Zone lower corner in triclinic coordinates         */
++  rvec x1;       /* Zone upper corner in triclinic coordinates         */
++  rvec bb_x0;    /* Zone bounding box lower corner in Cartesian coords */
++  rvec bb_x1;    /* Zone bounding box upper corner in Cartesian coords */
++} gmx_domdec_zone_size_t;
++
+ +typedef struct {
+ +  /* The number of zones including the home zone */
+ +  int  n;
+ +  /* The shift of the zones with respect to the home zone */
+ +  ivec shift[DD_MAXZONE];
+ +  /* The charge group boundaries for the zones */
+ +  int  cg_range[DD_MAXZONE+1];
+ +  /* The number of neighbor search zones with i-particles */
+ +  int  nizone;
+ +  /* The neighbor search charge group ranges for each i-zone */
+ +  gmx_domdec_ns_ranges_t izone[DD_MAXIZONE];
++  /* Boundaries of the zones */
++  gmx_domdec_zone_size_t size[DD_MAXZONE];
++  /* The cg density of the home zone */
++  real dens_zone0;
+ +} gmx_domdec_zones_t;
+ +
+ +typedef struct gmx_ga2la *gmx_ga2la_t;
+ +
++typedef struct gmx_hash *gmx_hash_t;
++
+ +typedef struct gmx_reverse_top *gmx_reverse_top_p_t;
+ +
+ +typedef struct gmx_domdec_constraints *gmx_domdec_constraints_p_t;
+ +
+ +typedef struct gmx_domdec_specat_comm *gmx_domdec_specat_comm_p_t;
+ +
+ +typedef struct gmx_domdec_comm *gmx_domdec_comm_p_t;
+ +
+ +typedef struct gmx_pme_comm_n_box *gmx_pme_comm_n_box_p_t;
+ +
+ +typedef struct {
+ +  int  npbcdim;
+ +  int  nboundeddim;
+ +  rvec box0;
+ +  rvec box_size;
+ +  /* Tells if the box is skewed for each of the three cartesian directions */
+ +  ivec tric_dir;
+ +  rvec skew_fac;
+ +  /* Orthogonal vectors for triclinic cells, Cartesian index */
+ +  rvec v[DIM][DIM];
+ +  /* Normal vectors for the cells walls */
+ +  rvec normal[DIM];
+ +} gmx_ddbox_t;
+ +
+ +
+ +typedef struct {
+ +  /* these buffers are used as destination buffers if MPI_IN_PLACE isn't
+ +     supported.*/
+ +  int *ibuf; /* for ints */
+ +  int ibuf_alloc;
+ +
+ +  gmx_large_int_t *libuf;
+ +  int libuf_alloc;
+ +
+ +  float *fbuf; /* for floats */
+ +  int fbuf_alloc;
+ +
+ +  double *dbuf; /* for doubles */
+ +  int dbuf_alloc;
+ +} mpi_in_place_buf_t;
+ +
+ +
+ +typedef struct {
+ +  /* The DD particle-particle nodes only */
+ +  /* The communication setup within the communicator all
+ +   * defined in dd->comm in domdec.c
+ +   */
+ +  int  nnodes;
+ +  MPI_Comm mpi_comm_all;
+ +  /* Use MPI_Sendrecv communication instead of non-blocking calls */
+ +  gmx_bool bSendRecv2;
+ +  /* The local DD cell index and rank */
+ +  ivec ci;
+ +  int  rank;
+ +  ivec master_ci;
+ +  int  masterrank;
+ +  /* Communication with the PME only nodes */
+ +  int  pme_nodeid;
+ +  gmx_bool pme_receive_vir_ener;
+ +  gmx_pme_comm_n_box_p_t cnb;
+ +  int  nreq_pme;
+ +  MPI_Request req_pme[4];
+ +  
+ +
+ +  /* The communication setup, identical for each cell, cartesian index */
+ +  ivec nc;
+ +  int  ndim;
+ +  ivec dim;  /* indexed by 0 to ndim */
+ +  gmx_bool bGridJump;
+ +
+ +  /* PBC from dim 0 to npbcdim */
+ +  int npbcdim;
+ +
+ +  /* Screw PBC? */
+ +  gmx_bool bScrewPBC;
+ +
+ +  /* Forward and backward neighboring cells, indexed by 0 to ndim */
+ +  int  neighbor[DIM][2];
+ +
+ +  /* Only available on the master node */
+ +  gmx_domdec_master_p_t ma;
+ +
+ +  /* Are there inter charge group constraints */
+ +  gmx_bool bInterCGcons;
++  gmx_bool bInterCGsettles;
+ +
+ +  /* Global atom number to interaction list */
+ +  gmx_reverse_top_p_t reverse_top;
+ +  int  nbonded_global;
+ +  int  nbonded_local;
+ +
+ +  /* The number of inter charge-group exclusions */
+ +  int  n_intercg_excl;
+ +
+ +  /* Vsite stuff */
++  gmx_hash_t  ga2la_vsite;
+ +  gmx_domdec_specat_comm_p_t vsite_comm;
+ +
+ +  /* Constraint stuff */
+ +  gmx_domdec_constraints_p_t constraints;
+ +  gmx_domdec_specat_comm_p_t constraint_comm;
+ +
+ +  /* The local to gobal charge group index and local cg to local atom index */
+ +  int  ncg_home;
+ +  int  ncg_tot;
+ +  int  *index_gl;
+ +  int  *cgindex;
+ +  int  cg_nalloc;
+ +  /* Local atom to local cg index, only for special cases */
+ +  int  *la2lc;
+ +  int  la2lc_nalloc;
+ +
+ +  /* The number of home atoms */
+ +  int  nat_home;
+ +  /* The total number of atoms: home and received zones */
+ +  int  nat_tot;
+ +  /* Index from the local atoms to the global atoms */
+ +  int  *gatindex;
+ +  int  gatindex_nalloc;
+ +
+ +  /* Global atom number to local atom number list */
+ +  gmx_ga2la_t ga2la;
+ +
+ +  /* Communication stuff */
+ +  gmx_domdec_comm_p_t comm;
+ +
+ +  /* The partioning count, to keep track of the state */
+ +  gmx_large_int_t ddp_count;
+ +
+ +
+ +  /* gmx_pme_recv_f buffer */
+ +  int pme_recv_f_alloc;
+ +  rvec *pme_recv_f_buf;
+ +
+ +} gmx_domdec_t;
+ +
+ +typedef struct gmx_partdec *gmx_partdec_p_t;
+ +
+ +typedef struct {
+ +  int nsim;
+ +  int sim;
+ +  MPI_Group mpi_group_masters;
+ +  MPI_Comm mpi_comm_masters;
+ +  /* these buffers are used as destination buffers if MPI_IN_PLACE isn't
+ +     supported.*/
+ +  mpi_in_place_buf_t *mpb;
+ +} gmx_multisim_t;
+ +
+ +#define DUTY_PP  (1<<0)
+ +#define DUTY_PME (1<<1)
+ +
+ +typedef struct {
+ +  int      bUse;
+ +  MPI_Comm comm_intra;
+ +  int      rank_intra;
+ +  MPI_Comm comm_inter;
+ +  
+ +} gmx_nodecomm_t;
+ +
+ +typedef struct {
+ +      int dummy;
+ +} gmx_commrec_thread_t;
+ +
+ +typedef struct {
+ +  /* The nodeids in one sim are numbered sequentially from 0.
+ +   * All communication within some simulation should happen
+ +   * in mpi_comm_mysim, or its subset mpi_comm_mygroup.
+ +   */
+ +  int sim_nodeid,nnodes,npmenodes;
+ +
+ +  /* thread numbers: */
+ +  /* Not used yet: int threadid, nthreads; */
+ +  /* The nodeid in the PP/PME, PP or PME group */
+ +  int nodeid;
+ +  MPI_Comm mpi_comm_mysim;
+ +  MPI_Comm mpi_comm_mygroup;
+ +
++  /* intra-node stuff */
++  int nodeid_intra;         /* ID over all intra nodes */ 
++  int nodeid_group_intra;   /* ID within my group (separate 0-n IDs for PP/PME-only nodes) */
++  int nnodes_intra;         /* total number of intra nodes */
++  int nnodes_pp_intra;      /* total number of PP intra nodes */
++
+ +#ifdef GMX_THREAD_SHM_FDECOMP
+ +  gmx_commrec_thread_t thread;
+ +#endif
+ +
+ +  gmx_nodecomm_t nc;
+ +  
+ +  /* For domain decomposition */
+ +  gmx_domdec_t *dd;
+ +
+ +  /* For particle decomposition */
+ +  gmx_partdec_p_t pd;
+ +
+ +  /* The duties of this node, see the defines above */
+ +  int duty;
+ +
+ +  gmx_multisim_t *ms;
+ +
+ +  /* these buffers are used as destination buffers if MPI_IN_PLACE isn't
+ +     supported.*/
+ +  mpi_in_place_buf_t *mpb;
+ +} t_commrec;
+ +
+ +#define MASTERNODE(cr)     (((cr)->nodeid == 0) || !PAR(cr))
+ +  /* #define MASTERTHREAD(cr)   ((cr)->threadid == 0) */
+ +  /* #define MASTER(cr)         (MASTERNODE(cr) && MASTERTHREAD(cr)) */
+ +#define MASTER(cr)         MASTERNODE(cr)
+ +#define SIMMASTER(cr)      ((MASTER(cr) && ((cr)->duty & DUTY_PP)) || !PAR(cr))
+ +#define NODEPAR(cr)        ((cr)->nnodes > 1)
+ +  /* #define THREADPAR(cr)      ((cr)->nthreads > 1) */
+ +  /* #define PAR(cr)            (NODEPAR(cr) || THREADPAR(cr)) */
+ +#define PAR(cr)            NODEPAR(cr)
+ +#define RANK(cr,nodeid)    (nodeid)
+ +#define MASTERRANK(cr)     (0)
+ +
+ +#define DOMAINDECOMP(cr)   (((cr)->dd != NULL) && PAR(cr))
+ +#define DDMASTER(dd)       ((dd)->rank == (dd)->masterrank)
+ +
+ +#define PARTDECOMP(cr)     ((cr)->pd != NULL)
+ +
+ +#define MULTISIM(cr)       ((cr)->ms)
+ +#define MSRANK(ms,nodeid)  (nodeid)
+ +#define MASTERSIM(ms)      ((ms)->sim == 0)
+ +
+ +/* The master of all (the node that prints the remaining run time etc.) */
+ +#define MULTIMASTER(cr)    (SIMMASTER(cr) && (!MULTISIM(cr) || MASTERSIM((cr)->ms)))
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +#endif
diff --cc src/gromacs/legacyheaders/types/enums.h

index 393ddb46034c2357c09aea86a0bb667a29cceb4c,0000000000000000000000000000000000000000..3645ffa71b01c57dc58914584a97385004cbdb45

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/types/enums.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/types/enums.h
@@@ -1,340 -1,0 +1,358 @@@
- #define EI_DYNAMICS(e) ((e) == eiMD || EI_SD(e) || (e) == eiBD || EI_VV(e))
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GRoups of Organic Molecules in ACtion for Science
+ + */
+ +
++#ifndef ENUMS_H_
++#define ENUMS_H_
++
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +/* note: these enums should correspond to the names in gmxlib/names.c */
+ +
+ +enum {
+ +  epbcXYZ, epbcNONE, epbcXY, epbcSCREW, epbcNR
+ +};
+ +
+ +enum {
+ +  etcNO, etcBERENDSEN, etcNOSEHOOVER, etcYES, etcANDERSEN, etcANDERSENMASSIVE, etcVRESCALE, etcNR
+ +}; /* yes is an alias for berendsen */
+ +
+ +#define ETC_ANDERSEN(e) (((e) == etcANDERSENMASSIVE) || ((e) == etcANDERSEN))
+ +
+ +enum {
+ +  epcNO, epcBERENDSEN, epcPARRINELLORAHMAN, epcISOTROPIC, epcMTTK, epcNR
+ +}; /* isotropic is an alias for berendsen */
+ +
+ +/* trotter decomposition extended variable parts */
+ +enum {
+ +  etrtNONE, etrtNHC, etrtBAROV, etrtBARONHC, etrtNHC2, etrtBAROV2, etrtBARONHC2, 
+ +  etrtVELOCITY1, etrtVELOCITY2, etrtPOSITION, etrtSKIPALL, etrtNR
+ +};
+ +
+ +/* sequenced parts of the trotter decomposition */
+ +enum {
+ +  ettTSEQ0,  ettTSEQ1,  ettTSEQ2,  ettTSEQ3,  ettTSEQ4, ettTSEQMAX
+ +};
+ +
+ +enum {
+ +  epctISOTROPIC, epctSEMIISOTROPIC, epctANISOTROPIC,
+ +  epctSURFACETENSION, epctNR
+ +};
+ +
+ +enum {
+ +  erscNO, erscALL, erscCOM, erscNR
+ +};
+ +
++enum { 
++  ecutsGROUP, ecutsVERLET, ecutsNR
++};
++
++/* Coulomb / VdW interaction modifiers.
++ * grompp replaces eintmodPOTSHIFT_VERLET by eintmodPOTSHIFT or eintmodNONE.
++ */
++enum {
++    eintmodPOTSHIFT_VERLET, eintmodPOTSHIFT, eintmodNONE, eintmodNR
++};
++
+ +/*
+ + * eelNOTUSED1 used to be GB, but to enable generalized born with different
+ + * forms of electrostatics (RF, switch, etc.) in the future it is now selected
+ + * separately (through the implicit_solvent option).
+ + */
+ +enum {
+ +  eelCUT,     eelRF,     eelGRF,   eelPME,  eelEWALD,  eelP3M_AD, 
+ +  eelPOISSON, eelSWITCH, eelSHIFT, eelUSER, eelGB_NOTUSED, eelRF_NEC, eelENCADSHIFT, 
+ +  eelPMEUSER, eelPMESWITCH, eelPMEUSERSWITCH, eelRF_ZERO, eelNR
+ +};
+ +
+ +/* Ewald geometry */
+ +enum { 
+ +  eewg3D, eewg3DC, eewgNR
+ +};
+ +
+ +#define EEL_RF(e) ((e) == eelRF || (e) == eelGRF || (e) == eelRF_NEC || (e) == eelRF_ZERO )
+ +
+ +#define EEL_PME(e)  ((e) == eelPME || (e) == eelPMESWITCH || (e) == eelPMEUSER || (e) == eelPMEUSERSWITCH || (e) == eelP3M_AD)
+ +#define EEL_FULL(e) (EEL_PME(e) || (e) == eelPOISSON || (e) == eelEWALD)
+ +
+ +#define EEL_SWITCHED(e) ((e) == eelSWITCH || (e) == eelSHIFT || (e) == eelENCADSHIFT || (e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH)
+ +
++#define EEL_USER(e) ((e) == eelUSER || (e) == eelPMEUSER || (e) == (eelPMESWITCH))
++
+ +#define EEL_IS_ZERO_AT_CUTOFF(e) (EEL_SWITCHED(e) || (e) == eelRF_ZERO)
+ +
+ +#define EEL_MIGHT_BE_ZERO_AT_CUTOFF(e) (EEL_IS_ZERO_AT_CUTOFF(e) || (e) == eelUSER || (e) == eelPMEUSER)
+ +
+ +enum {
+ +  evdwCUT, evdwSWITCH, evdwSHIFT, evdwUSER, evdwENCADSHIFT, evdwNR
+ +};
+ +
+ +#define EVDW_SWITCHED(e) ((e) == evdwSWITCH || (e) == evdwSHIFT || (e) == evdwENCADSHIFT)
+ +
+ +#define EVDW_IS_ZERO_AT_CUTOFF(e) EVDW_SWITCHED(e)
+ +
+ +#define EVDW_MIGHT_BE_ZERO_AT_CUTOFF(e) (EVDW_IS_ZERO_AT_CUTOFF(e) || (e) == evdwUSER)
+ +
+ +enum { 
+ +  ensGRID, ensSIMPLE, ensNR
+ +};
+ +
+ +/* eiVV is normal velocity verlet -- eiVVAK uses 1/2*(KE(t-dt/2)+KE(t+dt/2)) as the kinetic energy, and the half step kinetic
+ +   energy for temperature control */
+ +
+ +enum {
+ +  eiMD, eiSteep, eiCG, eiBD, eiSD2, eiNM, eiLBFGS, eiTPI, eiTPIC, eiSD1, eiVV, eiVVAK, eiNR
+ +};
+ +#define EI_VV(e) ((e) == eiVV || (e) == eiVVAK)
++#define EI_MD(e) ((e) == eiMD || EI_VV(e))
+ +#define EI_SD(e) ((e) == eiSD1 || (e) == eiSD2)
+ +#define EI_RANDOM(e) (EI_SD(e) || (e) == eiBD)
+ +/*above integrators may not conserve momenta*/
- #define EI_STATE_VELOCITY(e) ((e) == eiMD || EI_VV(e) || EI_SD(e))
++#define EI_DYNAMICS(e) (EI_MD(e) || EI_SD(e) || (e) == eiBD)
+ +#define EI_ENERGY_MINIMIZATION(e) ((e) == eiSteep || (e) == eiCG || (e) == eiLBFGS)
+ +#define EI_TPI(e) ((e) == eiTPI || (e) == eiTPIC)
+ +
++#define EI_STATE_VELOCITY(e) (EI_MD(e) || EI_SD(e))
+ +
+ +enum {
+ +  econtLINCS, econtSHAKE, econtNR
+ +};
+ +
+ +enum {
+ +  edrNone, edrSimple, edrEnsemble, edrNR
+ +};
+ +
+ +enum {
+ +  edrwConservative, edrwEqual, edrwNR
+ +};
+ +
+ +/* Combination rule things */
+ +enum { 
+ +  eCOMB_NONE, eCOMB_GEOMETRIC, eCOMB_ARITHMETIC, eCOMB_GEOM_SIG_EPS, eCOMB_NR 
+ +};
+ +
+ +/* NBF selection */
+ +enum { 
+ +  eNBF_NONE, eNBF_LJ, eNBF_BHAM, eNBF_NR 
+ +};
+ +
+ +/* simulated tempering methods */
+ +enum {
+ +  esimtempGEOMETRIC, esimtempEXPONENTIAL, esimtempLINEAR, esimtempNR
+ +};
+ +/* FEP selection */
+ +enum {
+ +  efepNO, efepYES, efepSTATIC, efepSLOWGROWTH, efepEXPANDED, efepNR
+ +};
+ +  /* if efepNO, there are no evaluations at other states.
+ +     if efepYES, treated equivalently to efepSTATIC.
+ +     if efepSTATIC, then lambdas do not change during the simulation.
+ +     if efepSLOWGROWTH, then the states change monotonically throughout the simulation.
+ +     if efepEXPANDED, then expanded ensemble simulations are occuring.
+ +  */
+ +
+ +/* FEP coupling types */
+ +enum {
+ +  efptFEP,efptMASS,efptCOUL,efptVDW,efptBONDED,efptRESTRAINT,efptTEMPERATURE,efptNR
+ +};
+ +
+ +/* How the lambda weights are calculated:
+ +   elamstatsMETROPOLIS = using the metropolis criteria
+ +   elamstatsBARKER = using the Barker critera for transition weights - also called unoptimized Bennett
+ +   elamstatsMINVAR = using Barker + minimum variance for weights
+ +   elamstatsWL = Wang-Landu (using visitation counts)
+ +   elamstatsWWL = Weighted Wang-Landau (using optimized gibbs weighted visitation counts)
+ +*/
+ +enum {
+ +  elamstatsNO, elamstatsMETROPOLIS, elamstatsBARKER, elamstatsMINVAR, elamstatsWL, elamstatsWWL, elamstatsNR
+ +};
+ +
+ +#define ELAMSTATS_EXPANDED(e) ((e) > elamstatsNO)
+ +
+ +#define EWL(e) ((e) == elamstatsWL || (e) == elamstatsWWL)
+ +
+ +/* How moves in lambda are calculated:
+ +   elmovemcMETROPOLIS - using the Metropolis criteria, and 50% up and down
+ +   elmovemcBARKER - using the Barker criteria, and 50% up and down
+ +   elmovemcGIBBS - computing the transition using the marginalized probabilities of the lambdas
+ +   elmovemcMETGIBBS - computing the transition using the metropolized version of Gibbs (Monte Carlo Strategies in Scientific computing, Liu, p. 134)
+ +*/
+ +enum {
+ +  elmcmoveNO,elmcmoveMETROPOLIS, elmcmoveBARKER, elmcmoveGIBBS, elmcmoveMETGIBBS, elmcmoveNR
+ +};
+ +
+ +/* how we decide whether weights have reached equilibrium
+ +   elmceqNO - never stop, weights keep going
+ +   elmceqYES - fix the weights from the beginning; no movement
+ +   elmceqWLDELTA - stop when the WL-delta falls below a certain level
+ +   elmceqNUMATLAM - stop when we have a certain number of samples at every step
+ +   elmceqSTEPS - stop when we've run a certain total number of steps
+ +   elmceqSAMPLES - stop when we've run a certain total number of samples
+ +   elmceqRATIO - stop when the ratio of samples (lowest to highest) is sufficiently large
+ +*/
+ +enum {
+ +  elmceqNO,elmceqYES,elmceqWLDELTA,elmceqNUMATLAM,elmceqSTEPS,elmceqSAMPLES,elmceqRATIO,elmceqNR
+ +};
+ +
+ +/* separate_dhdl_file selection */
+ +enum
+ +{
+ +  /* NOTE: YES is the first one. Do NOT interpret this one as a gmx_bool */
+ +  esepdhdlfileYES, esepdhdlfileNO, esepdhdlfileNR
+ +};
+ +
+ +/* dhdl_derivatives selection */
+ +enum
+ +{
+ +  /* NOTE: YES is the first one. Do NOT interpret this one as a gmx_bool */
+ +  edhdlderivativesYES, edhdlderivativesNO, edhdlderivativesNR
+ +};
+ +
+ +/* Solvent model */
+ +enum {
+ +  esolNO, esolSPC, esolTIP4P, esolNR
+ +};
+ +
+ +/* Dispersion correction */
+ +enum {
+ +  edispcNO, edispcEnerPres, edispcEner, edispcAllEnerPres, edispcAllEner, edispcNR
+ +}; 
+ +
+ +/* Shell types, for completion stuff */
+ +enum {
+ +  eshellCSH, eshellBASH, eshellZSH, eshellNR
+ +}; 
+ +
+ +/* Center of mass motion selection */
+ +enum { 
+ +  ecmLINEAR, ecmANGULAR, ecmNO, ecmNR 
+ +};
+ +
+ +/* New version of simulated annealing */
+ +enum { 
+ +  eannNO, eannSINGLE, eannPERIODIC, eannNR 
+ +};
+ +
+ +/* Implicit solvent algorithms */
+ +enum { 
+ +  eisNO, eisGBSA, eisNR
+ +};
+ +
+ +/* Algorithms for calculating GB radii */
+ +enum { 
+ +  egbSTILL, egbHCT, egbOBC, egbNR 
+ +};
+ +
+ +enum {
+ +  esaAPPROX, esaNO, esaSTILL, esaNR
+ +};
+ +
+ +/* Wall types */
+ +enum {
+ +  ewt93, ewt104, ewtTABLE, ewt126, ewtNR
+ +};
+ +
+ +/* Pull stuff */
+ +enum {
+ +  epullNO, epullUMBRELLA, epullCONSTRAINT, epullCONST_F, epullNR
+ +};
+ +
+ +enum {
+ +  epullgDIST, epullgDIR, epullgCYL, epullgPOS, epullgDIRPBC, epullgNR
+ +};
+ +
+ +#define PULL_CYL(pull) ((pull)->eGeom == epullgCYL)
+ +
+ +/* Enforced rotation groups */
+ +enum {
+ +  erotgISO  , erotgISOPF ,
+ +  erotgPM   , erotgPMPF  ,
+ +  erotgRM   , erotgRMPF  ,
+ +  erotgRM2  , erotgRM2PF ,
+ +  erotgFLEX , erotgFLEXT ,
+ +  erotgFLEX2, erotgFLEX2T,
+ +  erotgNR
+ +};
+ +
+ +enum {
+ +    erotgFitRMSD, erotgFitNORM, erotgFitPOT, erotgFitNR
+ +};
+ +
+ +/* QMMM */
+ +enum {
+ +  eQMmethodAM1, eQMmethodPM3, eQMmethodRHF, 
+ +  eQMmethodUHF, eQMmethodDFT, eQMmethodB3LYP, eQMmethodMP2, eQMmethodCASSCF, eQMmethodB3LYPLAN,
+ +  eQMmethodDIRECT, eQMmethodNR
+ +};
+ +
+ +enum {
+ +  eQMbasisSTO3G, eQMbasisSTO3G2, eQMbasis321G, 
+ +  eQMbasis321Gp, eQMbasis321dGp, eQMbasis621G,
+ +  eQMbasis631G, eQMbasis631Gp, eQMbasis631dGp, 
+ +  eQMbasis6311G, eQMbasisNR
+ +};
+ +
+ +enum {
+ +  eQMMMschemenormal,eQMMMschemeoniom,eQMMMschemeNR
+ +};
+ +
+ +enum {
+ +  eMultentOptName, eMultentOptNo, eMultentOptLast, eMultentOptNR
+ +};
+ +
+ +/* flat-bottom posres geometries */
+ +enum {
+ +  efbposresZERO, efbposresSPHERE, efbposresCYLINDER, efbposresX, efbposresY, efbposresZ,
+ +  efbposresNR
+ +};
+ +
+ +enum {
+ +  eAdressOff,eAdressConst, eAdressXSplit, eAdressSphere, eAdressNR
+ +};
+ +
+ +enum {
+ +  eAdressICOff, eAdressICThermoForce, eAdressICNR
+ +};
+ +
+ +enum {
+ +  eAdressSITEcom,eAdressSITEcog, eAdressSITEatom, eAdressSITEatomatom, eAdressSITENR
+ +};
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
++#endif /* ENUMS_H_ */
diff --cc src/gromacs/legacyheaders/types/fcdata.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/force_flags.h

index 0000000000000000000000000000000000000000,759b31587a1fe604abfa12d338492512507e34a4..759b31587a1fe604abfa12d338492512507e34a4

mode 000000,100644..100644
--- /dev/null
--- 2/include/types/force_flags.h
+++ b/src/gromacs/legacyheaders/types/force_flags.h
diff --cc src/gromacs/legacyheaders/types/forcerec.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/graph.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/group.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/hw_info.h

index 0000000000000000000000000000000000000000,1d04f60f1c0f1a8d58870b24fc233eb626b799fb..1d04f60f1c0f1a8d58870b24fc233eb626b799fb

mode 000000,100644..100644
--- /dev/null
--- 2/include/types/hw_info.h
+++ b/src/gromacs/legacyheaders/types/hw_info.h
diff --cc src/gromacs/legacyheaders/types/idef.h

index cbfb71cdaabc70e7e0ee4af5a53b5315d618db2a,0000000000000000000000000000000000000000..a6b51ffeb7e96c695974cd6c8d242585b4c96244

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/types/idef.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/types/idef.h
@@@ -1,325 -1,0 +1,331 @@@
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GRoups of Organic Molecules in ACtion for Science
+ + */
+ +
+ +
+ +#ifndef _idef_h
+ +#define _idef_h
+ +
+ +#include "simple.h"
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +
+ +/* check kernel/toppush.c when you change these numbers */
+ +#define MAXATOMLIST   6
+ +#define MAXFORCEPARAM 12
+ +#define NR_RBDIHS     6
+ +#define NR_FOURDIHS     4
+ +
+ +typedef atom_id t_iatom;
+ +
+ +/* this MUST correspond to the 
+ +   t_interaction_function[F_NRE] in gmxlib/ifunc.c */
+ +enum {
+ +  F_BONDS,
+ +  F_G96BONDS,
+ +  F_MORSE,
+ +  F_CUBICBONDS,
+ +  F_CONNBONDS,
+ +  F_HARMONIC,
+ +  F_FENEBONDS,
+ +  F_TABBONDS,
+ +  F_TABBONDSNC,
+ +  F_RESTRBONDS,
+ +  F_ANGLES, 
+ +  F_G96ANGLES,
+ +  F_LINEAR_ANGLES,
+ +  F_CROSS_BOND_BONDS,
+ +  F_CROSS_BOND_ANGLES,
+ +  F_UREY_BRADLEY,
+ +  F_QUARTIC_ANGLES,
+ +  F_TABANGLES,
+ +  F_PDIHS,
+ +  F_RBDIHS, 
+ +  F_FOURDIHS,
+ +  F_IDIHS, 
+ +  F_PIDIHS, 
+ +  F_TABDIHS,
+ +  F_CMAP,
+ +  F_GB12,
+ +  F_GB13,
+ +  F_GB14,
+ +  F_GBPOL,
+ +  F_NPSOLVATION,
+ +  F_LJ14,
+ +  F_COUL14,
+ +  F_LJC14_Q,
+ +  F_LJC_PAIRS_NB,
+ +  F_LJ,
+ +  F_BHAM,
+ +  F_LJ_LR,
+ +  F_BHAM_LR,
+ +  F_DISPCORR,
+ +  F_COUL_SR,
+ +  F_COUL_LR,
+ +  F_RF_EXCL,
+ +  F_COUL_RECIP,
+ +  F_DPD,
+ +  F_POLARIZATION,
+ +  F_WATER_POL,
+ +  F_THOLE_POL,
+ +  F_ANHARM_POL,
+ +  F_POSRES,
+ +  F_FBPOSRES,
+ +  F_DISRES,
+ +  F_DISRESVIOL,
+ +  F_ORIRES,
+ +  F_ORIRESDEV,
+ +  F_ANGRES,
+ +  F_ANGRESZ,
+ +  F_DIHRES,
+ +  F_DIHRESVIOL,
+ +  F_CONSTR,
+ +  F_CONSTRNC,
+ +  F_SETTLE,
+ +  F_VSITE2,
+ +  F_VSITE3,
+ +  F_VSITE3FD,
+ +  F_VSITE3FAD,
+ +  F_VSITE3OUT,
+ +  F_VSITE4FD,
+ +  F_VSITE4FDN,
+ +  F_VSITEN,
+ +  F_COM_PULL,
+ +  F_EQM,
+ +  F_EPOT,
+ +  F_EKIN,
+ +  F_ETOT,
+ +  F_ECONSERVED,
+ +  F_TEMP,
+ +  F_VTEMP,
+ +  F_PDISPCORR,
+ +  F_PRES,
+ +  F_DHDL_CON,
+ +  F_DVDL,
+ +  F_DKDL,
+ +  F_DVDL_COUL,
+ +  F_DVDL_VDW,
+ +  F_DVDL_BONDED,
+ +  F_DVDL_RESTRAINT,
+ +  F_DVDL_TEMPERATURE, /* not calculated for now, but should just be the energy (NVT) or enthalpy (NPT), or 0 (NVE) */
+ +  F_NRE               /* This number is for the total number of energies      */
+ +};
+ +
+ +#define IS_RESTRAINT_TYPE(ifunc) (((ifunc==F_POSRES) || (ifunc==F_DISRES) || (ifunc==F_RESTRBONDS) || (ifunc==F_DISRESVIOL) || (ifunc==F_ORIRES) || (ifunc==F_ORIRESDEV) || (ifunc==F_ANGRES) || (ifunc == F_ANGRESZ) || (ifunc==F_DIHRES)))
+ +
++/* A macro for checking if ftype is an explicit pair-listed LJ or COULOMB
++ * interaction type:
++ * bonded LJ (usually 1-4), or special listed non-bonded for FEP.
++ */
++#define IS_LISTED_LJ_C(ftype) ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB)
++
+ +typedef union
+ +{
+ +  /* Some parameters have A and B values for free energy calculations.
+ +   * The B values are not used for regular simulations of course.
+ +   * Free Energy for nonbondeds can be computed by changing the atom type.
+ +   * The harmonic type is used for all harmonic potentials:
+ +   * bonds, angles and improper dihedrals
+ +   */
+ +  struct {real a,b,c;                                    } bham;
+ +  struct {real rA,krA,rB,krB;                            } harmonic;
+ +  struct {real klinA,aA,klinB,aB;                          } linangle;
+ +  struct {real lowA,up1A,up2A,kA,lowB,up1B,up2B,kB;        } restraint;
+ +  /* No free energy supported for cubic bonds, FENE, WPOL or cross terms */ 
+ +  struct {real b0,kb,kcub;                                 } cubic;
+ +  struct {real bm,kb;                                      } fene;
+ +  struct {real r1e,r2e,krr;                                } cross_bb;
+ +  struct {real r1e,r2e,r3e,krt;                            } cross_ba;
+ +  struct {real thetaA,kthetaA,r13A,kUBA,thetaB,kthetaB,r13B,kUBB;} u_b;
+ +  struct {real theta,c[5];                                 } qangle; 
+ +  struct {real alpha;                                      } polarize;
+ +  struct {real alpha,drcut,khyp;                           } anharm_polarize;
+ +  struct {real al_x,al_y,al_z,rOH,rHH,rOD;                 } wpol;
+ +  struct {real a,alpha1,alpha2,rfac;                       } thole;
+ +  struct {real c6,c12;                                           } lj;
+ +  struct {real c6A,c12A,c6B,c12B;                        } lj14;
+ +  struct {real fqq,qi,qj,c6,c12;                         } ljc14;
+ +  struct {real qi,qj,c6,c12;                             } ljcnb;
+ +  /* Proper dihedrals can not have different multiplicity when
+ +   * doing free energy calculations, because the potential would not
+ +   * be periodic anymore.
+ +   */ 
+ +  struct {real phiA,cpA;int mult;real phiB,cpB;            } pdihs;
+ +  struct {real dA,dB;                                    } constr;
+ +  /* Settle can not be used for Free energy calculations of water bond geometry.
+ +   * Use shake (or lincs) instead if you have to change the water bonds.
+ +   */
+ +  struct {real doh,dhh;                                   } settle;
+ +  struct {real b0A,cbA,betaA,b0B,cbB,betaB;               } morse;
+ +  struct {real pos0A[DIM],fcA[DIM],pos0B[DIM],fcB[DIM];   } posres;
+ +  struct {real pos0[DIM],r,k; int geom;                   } fbposres;
+ +  struct {real rbcA[NR_RBDIHS], rbcB[NR_RBDIHS];          } rbdihs;
+ +  struct {real a,b,c,d,e,f;                               } vsite;   
+ +  struct {int  n; real a;                                 } vsiten;   
+ +  /* NOTE: npair is only set after reading the tpx file */
+ +  struct {real low,up1,up2,kfac;int type,label,npair;     } disres; 
+ +  struct {real phiA,dphiA,kfacA,phiB,dphiB,kfacB;         } dihres;
+ +  struct {int  ex,power,label; real c,obs,kfac;           } orires;
+ +  struct {int  table;real kA;real kB;                     } tab;
+ +  struct {real sar,st,pi,gbr,bmlt;                        } gb;
+ +  struct {int cmapA,cmapB;                                } cmap;
+ +  struct {real buf[MAXFORCEPARAM];                      } generic; /* Conversion */
+ +} t_iparams;
+ +
+ +typedef int t_functype;
+ +
+ +/*
+ + * The nonperturbed/perturbed interactions are now separated (sorted) in the
+ + * ilist, such that the first 0..(nr_nonperturbed-1) ones are exactly that, and 
+ + * the remaining ones from nr_nonperturbed..(nr-1) are perturbed bonded 
+ + * interactions.
+ + */
+ +typedef struct
+ +{
+ +  int nr;
+ +  int nr_nonperturbed;
+ +  t_iatom *iatoms;
+ +  int nalloc;
+ +} t_ilist;
+ +
+ +/*
+ + * The struct t_ilist defines a list of atoms with their interactions. 
+ + * General field description:
+ + *   int nr
+ + *    the size (nr elements) of the interactions array (iatoms[]).
+ + *   t_iatom *iatoms
+ + *    specifies which atoms are involved in an interaction of a certain 
+ + *       type. The layout of this array is as follows:
+ + *
+ + *      +-----+---+---+---+-----+---+---+-----+---+---+---+-----+---+---+...
+ + *      |type1|at1|at2|at3|type2|at1|at2|type1|at1|at2|at3|type3|at1|at2|
+ + *      +-----+---+---+---+-----+---+---+-----+---+---+---+-----+---+---+...
+ + *
+ + *    So for interaction type type1 3 atoms are needed, and for type2 and 
+ + *      type3 only 2. The type identifier is used to select the function to 
+ + *    calculate the interaction and its actual parameters. This type 
+ + *    identifier is an index in a params[] and functype[] array.
+ + */
+ +
+ +typedef struct
+ +{
+ +      real *cmap; /* Has length 4*grid_spacing*grid_spacing, */
+ +      /* there are 4 entries for each cmap type (V,dVdx,dVdy,d2dVdxdy) */
+ +} cmapdata_t;
+ +
+ +typedef struct
+ +{
+ +      int ngrid;            /* Number of allocated cmap (cmapdata_t ) grids */
+ +      int grid_spacing;     /* Grid spacing */
+ +      cmapdata_t *cmapdata; /* Pointer to grid with actual, pre-interpolated data */
+ +} gmx_cmap_t;
+ +
+ +
+ +typedef struct
+ +{
+ +  int        ntypes;
+ +  int        atnr;
+ +  t_functype *functype;
+ +  t_iparams  *iparams;
+ +  double     reppow;     /* The repulsion power for VdW: C12*r^-reppow   */
+ +  real       fudgeQQ;    /* The scaling factor for Coulomb 1-4: f*q1*q2  */
+ +  gmx_cmap_t cmap_grid;  /* The dihedral correction maps                 */
+ +} gmx_ffparams_t;
+ +
+ +enum {
+ +  ilsortUNKNOWN, ilsortNO_FE, ilsortFE_UNSORTED, ilsortFE_SORTED
+ +};
+ +
+ +typedef struct
+ +{
+ +  int ntypes;
+ +  int atnr;
+ +  t_functype *functype;
+ +  t_iparams  *iparams;
+ +  real fudgeQQ;
+ +  gmx_cmap_t cmap_grid;
+ +  t_iparams  *iparams_posres,*iparams_fbposres;
+ +  int iparams_posres_nalloc,iparams_fbposres_nalloc;
+ +
+ +  t_ilist il[F_NRE];
+ +  int ilsort;
+ +} t_idef;
+ +
+ +/*
+ + * The struct t_idef defines all the interactions for the complete
+ + * simulation. The structure is setup in such a way that the multinode
+ + * version of the program  can use it as easy as the single node version.
+ + * General field description:
+ + *   int ntypes
+ + *    defines the number of elements in functype[] and param[].
+ + *   int nodeid
+ + *      the node id (if parallel machines)
+ + *   int atnr
+ + *      the number of atomtypes
+ + *   t_functype *functype
+ + *    array of length ntypes, defines for every force type what type of 
+ + *      function to use. Every "bond" with the same function but different 
+ + *    force parameters is a different force type. The type identifier in the 
+ + *    forceatoms[] array is an index in this array.
+ + *   t_iparams *iparams
+ + *    array of length ntypes, defines the parameters for every interaction
+ + *      type. The type identifier in the actual interaction list
+ + *      (ilist[ftype].iatoms[]) is an index in this array.
+ + *   gmx_cmap_t cmap_grid
+ + *      the grid for the dihedral pair correction maps.
+ + *   t_iparams *iparams_posres, *iparams_fbposres
+ + *    defines the parameters for position restraints only.
+ + *      Position restraints are the only interactions that have different
+ + *      parameters (reference positions) for different molecules
+ + *      of the same type. ilist[F_POSRES].iatoms[] is an index in this array.
+ + *   t_ilist il[F_NRE]
+ + *      The list of interactions for each type. Note that some,
+ + *      such as LJ and COUL will have 0 entries.
+ + */
+ +
+ +typedef struct {
+ +  int  n;         /* n+1 is the number of points */
+ +  real scale;     /* distance between two points */
+ +  real *tab;      /* the actual tables, per point there are  4 numbers */
+ +} bondedtable_t;
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
+ +
+ +#endif
diff --cc src/gromacs/legacyheaders/types/ifunc.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/inputrec.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/interaction_const.h

index 0000000000000000000000000000000000000000,842d25b6224f02b60764580817cb2fbbca02e7b2..842d25b6224f02b60764580817cb2fbbca02e7b2

mode 000000,100644..100644
--- /dev/null
--- 2/include/types/interaction_const.h
+++ b/src/gromacs/legacyheaders/types/interaction_const.h
diff --cc src/gromacs/legacyheaders/types/nb_verlet.h

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..d36896975478b3311529ee531edce924ad5a51aa

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/gromacs/legacyheaders/types/nb_verlet.h
@@@ -1,0 -1,0 +1,95 @@@
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
++ *
++ *                This source code is part of
++ *
++ *                 G   R   O   M   A   C   S
++ *
++ *          GROningen MAchine for Chemical Simulations
++ *
++ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
++ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
++ * Copyright (c) 2001-2012, The GROMACS development team,
++ * check out http://www.gromacs.org for more information.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * If you want to redistribute modifications, please consider that
++ * scientific software is very special. Version control is crucial -
++ * bugs must be traceable. We will be happy to consider code for
++ * inclusion in the official distribution, but derived work must not
++ * be called official GROMACS. Details are found in the README & COPYING
++ * files - if they are missing, get the official version at www.gromacs.org.
++ *
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the papers on the package - you can find them in the top README file.
++ *
++ * For more info, check our website at http://www.gromacs.org
++ *
++ * And Hey:
++ * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
++ */
++
++#ifndef NB_VERLET_H
++#define NB_VERLET_H
++
++#include "nbnxn_pairlist.h"
++#include "nbnxn_cuda_types_ext.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++/*! Nonbonded NxN kernel types: plain C, SSE/AVX, GPU CUDA, GPU emulation, etc */
++enum { nbkNotSet = 0, 
++       nbk4x4_PlainC, 
++       nbk4xN_X86_SIMD128,
++       nbk4xN_X86_SIMD256,
++       nbk8x8x8_CUDA,
++       nbk8x8x8_PlainC };
++
++/* Atom locality indicator: local, non-local, all, used for calls to:
++   gridding, pair-search, force calculation, x/f buffer operations */
++enum { eatLocal = 0, eatNonlocal = 1, eatAll  };
++
++#define LOCAL_A(x)               ((x) == eatLocal)
++#define NONLOCAL_A(x)            ((x) == eatNonlocal)
++#define LOCAL_OR_NONLOCAL_A(x)   (LOCAL_A(x) || NONLOCAL_A(x))
++
++/* Interaction locality indicator (used in pair-list search/calculations):
++    - local interactions require local atom data and affect local output only;
++    - non-local interactions require both local and non-local atom data and
++      affect both local- and non-local output. */
++enum { eintLocal = 0, eintNonlocal = 1 };
++
++#define LOCAL_I(x)               ((x) == eintLocal)
++#define NONLOCAL_I(x)            ((x) == eintNonlocal)
++
++enum { enbvClearFNo, enbvClearFYes };
++
++typedef struct {
++    nbnxn_pairlist_set_t nbl_lists;   /* pair list(s)                       */
++    nbnxn_atomdata_t     *nbat;       /* atom data                          */
++    int                  kernel_type; /* non-bonded kernel - see enum above */
++} nonbonded_verlet_group_t;
++
++/* non-bonded data structure with Verlet-type cut-off */
++typedef struct {
++    nbnxn_search_t           nbs;   /* n vs n atom pair searching data          */
++    int                      ngrp;  /* number of interaction groups             */
++    nonbonded_verlet_group_t grp[2];/* local and non-local interaction group    */
++
++    gmx_bool         bUseGPU;          /* TRUE when GPU acceleration is used */
++    nbnxn_cuda_ptr_t cu_nbv;           /* pointer to CUDA nb verlet data     */
++    int              min_ci_balanced;  /* pair list balancing parameter
++                                          used for the 8x8x8 CUDA kernels    */
++} nonbonded_verlet_t;
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif /* NB_VERLET_H */
diff --cc src/gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h

index 0000000000000000000000000000000000000000,dd8c6206be0100a3846cdb286c08ac8600d6d447..dd8c6206be0100a3846cdb286c08ac8600d6d447

mode 000000,100644..100644
--- /dev/null
--- 2/include/types/nbnxn_cuda_types_ext.h
+++ b/src/gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h
diff --cc src/gromacs/legacyheaders/types/nbnxn_pairlist.h

index 0000000000000000000000000000000000000000,9490c16b6c39f3cf997a4ca0e9f8c770e948c4ec..9490c16b6c39f3cf997a4ca0e9f8c770e948c4ec

mode 000000,100644..100644
--- /dev/null
--- 2/include/types/nbnxn_pairlist.h
+++ b/src/gromacs/legacyheaders/types/nbnxn_pairlist.h
diff --cc src/gromacs/legacyheaders/types/nrnb.h

index 2e231cdef0378528728f16d4e8413c375f72f2c1,0000000000000000000000000000000000000000..1ea61275398a3e6505c3cfac057af5259fac59cd

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/types/nrnb.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/types/nrnb.h
@@@ -1,136 -1,0 +1,142 @@@
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GRoups of Organic Molecules in ACtion for Science
+ + */
+ +#ifndef _types_nrnb_h
+ +#define _types_nrnb_h
+ +
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +/* The nonbonded kernels are documented in gmxlib/nonbonded_kernels, 
+ + * but here's a lazy version of the numbering. The first position
+ + * is the Coulomb interaction (0 for none), second is Van der Waals
+ + * (again, 0 means no interaction), and the third is the water optimization
+ + * (0 meaning no water optimization = standard atom-atom loop)
+ + *
+ + *                                     value
+ + * pos                 1                   2           3              4
+ + * 1st Coul        Normal,1/r       Reaction-field  Table            Generalized born
+ + * 2nd Vdw         Lennard-Jones    Buckingham      Table             n/a
+ + * 3rd Water. opt  SPC-other atom   SPC-SPC         TIP4p-other at.  TIP4p-TIP4p 
+ + */
+ +
+ +#define eNR_NBKERNEL_NONE -1
+ +
+ +enum 
+ +{
+ +    eNR_NBKERNEL010, eNR_NBKERNEL020, eNR_NBKERNEL030,
+ +    eNR_NBKERNEL100, eNR_NBKERNEL101, eNR_NBKERNEL102, eNR_NBKERNEL103, eNR_NBKERNEL104,
+ +    eNR_NBKERNEL110, eNR_NBKERNEL111, eNR_NBKERNEL112, eNR_NBKERNEL113, eNR_NBKERNEL114,
+ +    eNR_NBKERNEL120, eNR_NBKERNEL121, eNR_NBKERNEL122, eNR_NBKERNEL123, eNR_NBKERNEL124,
+ +    eNR_NBKERNEL130, eNR_NBKERNEL131, eNR_NBKERNEL132, eNR_NBKERNEL133, eNR_NBKERNEL134,
+ +    eNR_NBKERNEL200, eNR_NBKERNEL201, eNR_NBKERNEL202, eNR_NBKERNEL203, eNR_NBKERNEL204,
+ +    eNR_NBKERNEL210, eNR_NBKERNEL211, eNR_NBKERNEL212, eNR_NBKERNEL213, eNR_NBKERNEL214,
+ +    eNR_NBKERNEL220, eNR_NBKERNEL221, eNR_NBKERNEL222, eNR_NBKERNEL223, eNR_NBKERNEL224,
+ +    eNR_NBKERNEL230, eNR_NBKERNEL231, eNR_NBKERNEL232, eNR_NBKERNEL233, eNR_NBKERNEL234,
+ +    eNR_NBKERNEL300, eNR_NBKERNEL301, eNR_NBKERNEL302, eNR_NBKERNEL303, eNR_NBKERNEL304,
+ +    eNR_NBKERNEL310, eNR_NBKERNEL311, eNR_NBKERNEL312, eNR_NBKERNEL313, eNR_NBKERNEL314,
+ +    eNR_NBKERNEL320, eNR_NBKERNEL321, eNR_NBKERNEL322, eNR_NBKERNEL323, eNR_NBKERNEL324,
+ +    eNR_NBKERNEL330, eNR_NBKERNEL331, eNR_NBKERNEL332, eNR_NBKERNEL333, eNR_NBKERNEL334,
+ +    eNR_NBKERNEL400, eNR_NBKERNEL410, eNR_NBKERNEL430,
+ +    eNR_NBKERNEL010NF, eNR_NBKERNEL020NF, eNR_NBKERNEL030NF,
+ +    eNR_NBKERNEL100NF, eNR_NBKERNEL101NF, eNR_NBKERNEL102NF, eNR_NBKERNEL103NF, eNR_NBKERNEL104NF,
+ +    eNR_NBKERNEL110NF, eNR_NBKERNEL111NF, eNR_NBKERNEL112NF, eNR_NBKERNEL113NF, eNR_NBKERNEL114NF,
+ +    eNR_NBKERNEL120NF, eNR_NBKERNEL121NF, eNR_NBKERNEL122NF, eNR_NBKERNEL123NF, eNR_NBKERNEL124NF,
+ +    eNR_NBKERNEL130NF, eNR_NBKERNEL131NF, eNR_NBKERNEL132NF, eNR_NBKERNEL133NF, eNR_NBKERNEL134NF,
+ +    eNR_NBKERNEL200NF, eNR_NBKERNEL201NF, eNR_NBKERNEL202NF, eNR_NBKERNEL203NF, eNR_NBKERNEL204NF,
+ +    eNR_NBKERNEL210NF, eNR_NBKERNEL211NF, eNR_NBKERNEL212NF, eNR_NBKERNEL213NF, eNR_NBKERNEL214NF,
+ +    eNR_NBKERNEL220NF, eNR_NBKERNEL221NF, eNR_NBKERNEL222NF, eNR_NBKERNEL223NF, eNR_NBKERNEL224NF,
+ +    eNR_NBKERNEL230NF, eNR_NBKERNEL231NF, eNR_NBKERNEL232NF, eNR_NBKERNEL233NF, eNR_NBKERNEL234NF,
+ +    eNR_NBKERNEL300NF, eNR_NBKERNEL301NF, eNR_NBKERNEL302NF, eNR_NBKERNEL303NF, eNR_NBKERNEL304NF,
+ +    eNR_NBKERNEL310NF, eNR_NBKERNEL311NF, eNR_NBKERNEL312NF, eNR_NBKERNEL313NF, eNR_NBKERNEL314NF,
+ +    eNR_NBKERNEL320NF, eNR_NBKERNEL321NF, eNR_NBKERNEL322NF, eNR_NBKERNEL323NF, eNR_NBKERNEL324NF,
+ +    eNR_NBKERNEL330NF, eNR_NBKERNEL331NF, eNR_NBKERNEL332NF, eNR_NBKERNEL333NF, eNR_NBKERNEL334NF,
+ +    eNR_NBKERNEL400NF, eNR_NBKERNEL410NF, eNR_NBKERNEL430NF, 
+ +    eNR_NBKERNEL_NR,
+ +    eNR_NBKERNEL_FREE_ENERGY = eNR_NBKERNEL_NR,
+ +    eNR_NBKERNEL_ALLVSALL,
+ +    eNR_NBKERNEL_ALLVSALLGB,
+ +    eNR_NBKERNEL_OUTER,
++    eNR_NBNXN_DIST2,
++    eNR_NBNXN_LJ_RF,  eNR_NBNXN_LJ_RF_E,
++    eNR_NBNXN_LJ_TAB, eNR_NBNXN_LJ_TAB_E,
++    eNR_NBNXN_LJ,     eNR_NBNXN_LJ_E,
++    eNR_NBNXN_RF,     eNR_NBNXN_RF_E,
++    eNR_NBNXN_TAB,    eNR_NBNXN_TAB_E,
+ +    eNR_NB14,
+ +    eNR_BORN_RADII_STILL,     eNR_BORN_RADII_HCT_OBC,
+ +    eNR_BORN_CHAINRULE,
+ +    eNR_BORN_AVA_RADII_STILL, eNR_BORN_AVA_RADII_HCT_OBC,
+ +    eNR_BORN_AVA_CHAINRULE,
+ +    eNR_WEIGHTS,              eNR_SPREADQ,              eNR_SPREADQBSP,
+ +    eNR_GATHERF,              eNR_GATHERFBSP,           eNR_FFT,
+ +    eNR_CONV,                 eNR_SOLVEPME,eNR_NS,      eNR_RESETX,
+ +    eNR_SHIFTX,               eNR_CGCM,                 eNR_FSUM,
+ +    eNR_BONDS,                eNR_G96BONDS,             eNR_FENEBONDS,
+ +    eNR_TABBONDS,             eNR_RESTRBONDS,           eNR_LINEAR_ANGLES,
+ +    eNR_ANGLES,               eNR_G96ANGLES,            eNR_QANGLES,
+ +    eNR_TABANGLES,            eNR_PROPER,               eNR_IMPROPER,
+ +    eNR_RB,                   eNR_FOURDIH,              eNR_TABDIHS,
+ +    eNR_DISRES,               eNR_ORIRES,               eNR_DIHRES,
+ +    eNR_POSRES,               eNR_FBPOSRES,
+ +    eNR_ANGRES,               eNR_ANGRESZ,
+ +    eNR_MORSE,                eNR_CUBICBONDS,           eNR_WALLS,
+ +    eNR_POLARIZE,             eNR_ANHARM_POL,
+ +    eNR_WPOL,                 eNR_THOLE,                eNR_VIRIAL,
+ +    eNR_UPDATE,               eNR_EXTUPDATE,            eNR_STOPCM,
+ +    eNR_PCOUPL,               eNR_EKIN,                 eNR_LINCS,
+ +    eNR_LINCSMAT,             eNR_SHAKE,                eNR_CONSTR_V,
+ +    eNR_SHAKE_RIJ,            eNR_CONSTR_VIR,           eNR_SETTLE,
+ +    eNR_VSITE2,               eNR_VSITE3,               eNR_VSITE3FD,
+ +    eNR_VSITE3FAD,            eNR_VSITE3OUT,            eNR_VSITE4FD,
+ +    eNR_VSITE4FDN,            eNR_VSITEN,               eNR_GB,
+ +    eNR_CMAP,
+ +    eNRNB
+ +};
+ +
+ +
+ +typedef struct {
+ +  double n[eNRNB];
+ +} t_nrnb;
+ +
+ +
+ +typedef struct gmx_wallcycle *gmx_wallcycle_t;
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
+ +#endif
diff --cc src/gromacs/legacyheaders/types/simple.h
Simple merge
diff --cc src/gromacs/legacyheaders/update.h
Simple merge
diff --cc src/gromacs/legacyheaders/vsite.h
Simple merge
diff --cc src/gromacs/mdlib/CMakeLists.txt

index a7637aa7110cef206332336d68ed3d6e05863494,0000000000000000000000000000000000000000..35f6d271d756b6319c303701b54242f609e895f7

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/CMakeLists.txt
--- /dev/null
+++ b/src/gromacs/mdlib/CMakeLists.txt
@@@ -1,8 -1,0 +1,14 @@@
- file(GLOB MDLIB_SOURCES *.c *.cpp)
++
++file(GLOB MDLIB_SOURCES *.c *.cpp nbnxn_kernels/*.c)
+ +if(GMX_FFT_FFTPACK)
+ +list(APPEND MDLIB_SOURCES ${CMAKE_SOURCE_DIR}/src/external/fftpack/fftpack.c)
+ +endif()
+ +set(MDLIB_SOURCES ${MDLIB_SOURCES} PARENT_SCOPE)
+ +if (BUILD_TESTING)
+ +    add_subdirectory(tests)
+ +endif (BUILD_TESTING)
++
++if(GMX_GPU)
++    add_subdirectory(nbnxn_cuda)
++    set(GMX_GPU_LIBRARIES ${GMX_GPU_LIBRARIES} nbnxn_cuda PARENT_SCOPE)
++endif()
diff --cc src/gromacs/mdlib/calcmu.c
Simple merge
diff --cc src/gromacs/mdlib/clincs.c
Simple merge
diff --cc src/gromacs/mdlib/constr.c

index e93cee59f3eee87be9c1ed4469ea1600d335bc01,0000000000000000000000000000000000000000..18d41f059a74b7286f2553428c8b1b35d683ce28

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/constr.c
--- /dev/null
+++ b/src/gromacs/mdlib/constr.c
@@@ -1,1155 -1,0 +1,1374 @@@
-                struct gmx_constr *constr,
-                t_idef *idef,t_inputrec *ir,gmx_ekindata_t *ekind,
-                t_commrec *cr,
-                gmx_large_int_t step,int delta_step,
-                t_mdatoms *md,
-                rvec *x,rvec *xprime,rvec *min_proj,matrix box,
-                real lambda,real *dvdlambda,
-                rvec *v,tensor *vir,
-                t_nrnb *nrnb,int econq,gmx_bool bPscal,real veta, real vetanew)
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include "confio.h"
+ +#include "constr.h"
+ +#include "copyrite.h"
+ +#include "invblock.h"
+ +#include "main.h"
+ +#include "mdrun.h"
+ +#include "nrnb.h"
+ +#include "smalloc.h"
+ +#include "vec.h"
+ +#include "physics.h"
+ +#include "names.h"
+ +#include "txtdump.h"
+ +#include "domdec.h"
+ +#include "pdbio.h"
+ +#include "partdec.h"
+ +#include "splitter.h"
+ +#include "mtop_util.h"
+ +#include "gmxfio.h"
+ +#include "macros.h"
++#include "gmx_omp_nthreads.h"
+ +
+ +typedef struct gmx_constr {
+ +  int              ncon_tot;     /* The total number of constraints    */
+ +  int              nflexcon;     /* The number of flexible constraints */
+ +  int              n_at2con_mt;  /* The size of at2con = #moltypes     */
+ +  t_blocka         *at2con_mt;   /* A list of atoms to constraints     */
++  int              n_at2settle_mt; /* The size of at2settle = #moltypes  */
++  int              **at2settle_mt; /* A list of atoms to settles         */
++  gmx_bool         bInterCGsettles;
+ +  gmx_lincsdata_t  lincsd;       /* LINCS data                         */
+ +  gmx_shakedata_t  shaked;       /* SHAKE data                         */
+ +  gmx_settledata_t settled;      /* SETTLE data                        */
+ +  int              nblocks;      /* The number of SHAKE blocks         */
+ +  int              *sblock;      /* The SHAKE blocks                   */
+ +  int              sblock_nalloc;/* The allocation size of sblock      */
+ +  real             *lagr;        /* Lagrange multipliers for SHAKE     */
+ +  int              lagr_nalloc;  /* The allocation size of lagr        */
+ +  int              maxwarn;      /* The maximum number of warnings     */
+ +  int              warncount_lincs;
+ +  int              warncount_settle;
+ +  gmx_edsam_t      ed;           /* The essential dynamics data        */
+ +
++    tensor           *rmdr_th;   /* Thread local working data          */
++    int              *settle_error; /* Thread local working data          */
++
+ +  gmx_mtop_t       *warn_mtop;   /* Only used for printing warnings    */
+ +} t_gmx_constr;
+ +
+ +typedef struct {
+ +  atom_id iatom[3];
+ +  atom_id blocknr;
+ +} t_sortblock;
+ +
+ +static void *init_vetavars(t_vetavars *vars,
+ +                           gmx_bool constr_deriv,
+ +                           real veta,real vetanew, t_inputrec *ir, gmx_ekindata_t *ekind, gmx_bool bPscal) 
+ +{
+ +    double g;
+ +    int i;
+ +
+ +    /* first, set the alpha integrator variable */
+ +    if ((ir->opts.nrdf[0] > 0) && bPscal) 
+ +    {
+ +        vars->alpha = 1.0 + DIM/((double)ir->opts.nrdf[0]);  
+ +    } else {
+ +        vars->alpha = 1.0;
+ +    }
+ +    g = 0.5*veta*ir->delta_t;
+ +    vars->rscale = exp(g)*series_sinhx(g);
+ +    g = -0.25*vars->alpha*veta*ir->delta_t;
+ +    vars->vscale = exp(g)*series_sinhx(g);
+ +    vars->rvscale = vars->vscale*vars->rscale;
+ +    vars->veta = vetanew;
+ +
+ +    if (constr_deriv)
+ +    {
+ +        snew(vars->vscale_nhc,ir->opts.ngtc);
+ +        if ((ekind==NULL) || (!bPscal))
+ +        {
+ +            for (i=0;i<ir->opts.ngtc;i++)
+ +            {
+ +                vars->vscale_nhc[i] = 1;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            for (i=0;i<ir->opts.ngtc;i++)
+ +            {
+ +                vars->vscale_nhc[i] = ekind->tcstat[i].vscale_nhc;
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        vars->vscale_nhc = NULL;
+ +    }
+ +
+ +    return vars;
+ +}
+ +
+ +static void free_vetavars(t_vetavars *vars) 
+ +{
+ +    if (vars->vscale_nhc != NULL)
+ +    {
+ +        sfree(vars->vscale_nhc);
+ +    }
+ +}
+ +
+ +static int pcomp(const void *p1, const void *p2)
+ +{
+ +  int     db;
+ +  atom_id min1,min2,max1,max2;
+ +  t_sortblock *a1=(t_sortblock *)p1;
+ +  t_sortblock *a2=(t_sortblock *)p2;
+ +  
+ +  db=a1->blocknr-a2->blocknr;
+ +  
+ +  if (db != 0)
+ +    return db;
+ +    
+ +  min1=min(a1->iatom[1],a1->iatom[2]);
+ +  max1=max(a1->iatom[1],a1->iatom[2]);
+ +  min2=min(a2->iatom[1],a2->iatom[2]);
+ +  max2=max(a2->iatom[1],a2->iatom[2]);
+ +  
+ +  if (min1 == min2)
+ +    return max1-max2;
+ +  else
+ +    return min1-min2;
+ +}
+ +
+ +int n_flexible_constraints(struct gmx_constr *constr)
+ +{
+ +  int nflexcon;
+ +
+ +  if (constr)
+ +    nflexcon = constr->nflexcon;
+ +  else
+ +    nflexcon = 0;
+ +
+ +  return nflexcon;
+ +}
+ +
+ +void too_many_constraint_warnings(int eConstrAlg,int warncount)
+ +{
+ +  const char *abort="- aborting to avoid logfile runaway.\n"
+ +    "This normally happens when your system is not sufficiently equilibrated,"
+ +    "or if you are changing lambda too fast in free energy simulations.\n";
+ +  
+ +  gmx_fatal(FARGS,
+ +          "Too many %s warnings (%d)\n"
+ +          "If you know what you are doing you can %s"
+ +          "set the environment variable GMX_MAXCONSTRWARN to -1,\n"
+ +          "but normally it is better to fix the problem",
+ +          (eConstrAlg == econtLINCS) ? "LINCS" : "SETTLE",warncount,
+ +          (eConstrAlg == econtLINCS) ?
+ +          "adjust the lincs warning threshold in your mdp file\nor " : "\n");
+ +}
+ +
+ +static void write_constr_pdb(const char *fn,const char *title,
+ +                             gmx_mtop_t *mtop,
+ +                             int start,int homenr,t_commrec *cr,
+ +                             rvec x[],matrix box)
+ +{
+ +    char fname[STRLEN],format[STRLEN];
+ +    FILE *out;
+ +    int  dd_ac0=0,dd_ac1=0,i,ii,resnr;
+ +    gmx_domdec_t *dd;
+ +    char *anm,*resnm;
+ +  
+ +    dd = NULL;
+ +    if (PAR(cr))
+ +    {
+ +        sprintf(fname,"%s_n%d.pdb",fn,cr->sim_nodeid);
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            dd = cr->dd;
+ +            dd_get_constraint_range(dd,&dd_ac0,&dd_ac1);
+ +            start = 0;
+ +            homenr = dd_ac1;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        sprintf(fname,"%s.pdb",fn);
+ +    }
+ +    sprintf(format,"%s\n",get_pdbformat());
+ +    
+ +    out = gmx_fio_fopen(fname,"w");
+ +    
+ +    fprintf(out,"TITLE     %s\n",title);
+ +    gmx_write_pdb_box(out,-1,box);
+ +    for(i=start; i<start+homenr; i++)
+ +    {
+ +        if (dd != NULL)
+ +        {
+ +            if (i >= dd->nat_home && i < dd_ac0)
+ +            {
+ +                continue;
+ +            }
+ +            ii = dd->gatindex[i];
+ +        }
+ +        else
+ +        {
+ +            ii = i;
+ +        }
+ +        gmx_mtop_atominfo_global(mtop,ii,&anm,&resnr,&resnm);
+ +        fprintf(out,format,"ATOM",(ii+1)%100000,
+ +                anm,resnm,' ',resnr%10000,' ',
+ +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ]);
+ +    }
+ +    fprintf(out,"TER\n");
+ +
+ +    gmx_fio_fclose(out);
+ +}
+ +                           
+ +static void dump_confs(FILE *fplog,gmx_large_int_t step,gmx_mtop_t *mtop,
+ +                     int start,int homenr,t_commrec *cr,
+ +                     rvec x[],rvec xprime[],matrix box)
+ +{
+ +  char buf[256],buf2[22];
+ + 
+ +  char *env=getenv("GMX_SUPPRESS_DUMP");
+ +  if (env)
+ +      return; 
+ +  
+ +  sprintf(buf,"step%sb",gmx_step_str(step,buf2));
+ +  write_constr_pdb(buf,"initial coordinates",
+ +                 mtop,start,homenr,cr,x,box);
+ +  sprintf(buf,"step%sc",gmx_step_str(step,buf2));
+ +  write_constr_pdb(buf,"coordinates after constraining",
+ +                 mtop,start,homenr,cr,xprime,box);
+ +  if (fplog)
+ +  {
+ +      fprintf(fplog,"Wrote pdb files with previous and current coordinates\n");
+ +  }
+ +  fprintf(stderr,"Wrote pdb files with previous and current coordinates\n");
+ +}
+ +
+ +static void pr_sortblock(FILE *fp,const char *title,int nsb,t_sortblock sb[])
+ +{
+ +  int i;
+ +  
+ +  fprintf(fp,"%s\n",title);
+ +  for(i=0; (i<nsb); i++)
+ +    fprintf(fp,"i: %5d, iatom: (%5d %5d %5d), blocknr: %5d\n",
+ +          i,sb[i].iatom[0],sb[i].iatom[1],sb[i].iatom[2],
+ +          sb[i].blocknr);
+ +}
+ +
+ +gmx_bool constrain(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
-     int     ncons,error;
++                   struct gmx_constr *constr,
++                   t_idef *idef,t_inputrec *ir,gmx_ekindata_t *ekind,
++                   t_commrec *cr,
++                   gmx_large_int_t step,int delta_step,
++                   t_mdatoms *md,
++                   rvec *x,rvec *xprime,rvec *min_proj,
++                   gmx_bool bMolPBC,matrix box,
++                   real lambda,real *dvdlambda,
++                   rvec *v,tensor *vir,
++                   t_nrnb *nrnb,int econq,gmx_bool bPscal,
++                   real veta, real vetanew)
+ +{
+ +    gmx_bool    bOK,bDump;
+ +    int     start,homenr,nrend;
+ +    int     i,j,d;
-     t_pbc   pbc;
++    int     ncons,settle_error;
+ +    tensor  rmdr;
+ +    rvec    *vstor;
+ +    real    invdt,vir_fac,t;
+ +    t_ilist *settle;
+ +    int     nsettle;
-     if (constr->lincsd)
++    t_pbc   pbc,*pbc_null;
+ +    char    buf[22];
+ +    t_vetavars vetavar;
++    int     nth,th;
+ +
+ +    if (econq == econqForceDispl && !EI_ENERGY_MINIMIZATION(ir->eI))
+ +    {
+ +        gmx_incons("constrain called for forces displacements while not doing energy minimization, can not do this while the LINCS and SETTLE constraint connection matrices are mass weighted");
+ +    }
+ +    
+ +    bOK   = TRUE;
+ +    bDump = FALSE;
+ +    
+ +    start  = md->start;
+ +    homenr = md->homenr;
+ +    nrend = start+homenr;
+ +
+ +    /* set constants for pressure control integration */ 
+ +    init_vetavars(&vetavar,econq!=econqCoord,
+ +                  veta,vetanew,ir,ekind,bPscal);
+ +
+ +    if (ir->delta_t == 0)
+ +    {
+ +        invdt = 0;
+ +    }
+ +    else
+ +    {
+ +        invdt  = 1/ir->delta_t;
+ +    }
+ +
+ +    if (ir->efep != efepNO && EI_DYNAMICS(ir->eI))
+ +    {
+ +        /* Set the constraint lengths for the step at which this configuration
+ +         * is meant to be. The invmasses should not be changed.
+ +         */
+ +        lambda += delta_step*ir->fepvals->delta_lambda;
+ +    }
+ +    
+ +    if (vir != NULL)
+ +    {
+ +        clear_mat(rmdr);
+ +    }
+ +    
+ +    where();
-                               x,xprime,min_proj,box,lambda,dvdlambda,
++
++    settle  = &idef->il[F_SETTLE];
++    nsettle = settle->nr/(1+NRAL(F_SETTLE));
++
++    if (nsettle > 0)
++    {
++        nth = gmx_omp_nthreads_get(emntSETTLE);
++    }
++    else
++    {
++        nth = 1;
++    }
++
++    if (nth > 1 && constr->rmdr_th == NULL)
++    {
++        snew(constr->rmdr_th,nth);
++        snew(constr->settle_error,nth);
++    }
++    
++    settle_error = -1;
++
++    /* We do not need full pbc when constraints do not cross charge groups,
++     * i.e. when dd->constraint_comm==NULL.
++     * Note that PBC for constraints is different from PBC for bondeds.
++     * For constraints there is both forward and backward communication.
++     */
++    if (ir->ePBC != epbcNONE &&
++        (cr->dd || bMolPBC) && !(cr->dd && cr->dd->constraint_comm==NULL))
++    {
++        /* With pbc=screw the screw has been changed to a shift
++         * by the constraint coordinate communication routine,
++         * so that here we can use normal pbc.
++         */
++        pbc_null = set_pbc_dd(&pbc,ir->ePBC,cr->dd,FALSE,box);
++    }
++    else
++    {
++        pbc_null = NULL;
++    }
++
++    /* Communicate the coordinates required for the non-local constraints
++     * for LINCS and/or SETTLE.
++     */
++    if (cr->dd)
++    {
++        dd_move_x_constraints(cr->dd,box,x,xprime);
++    }
++      else if (PARTDECOMP(cr))
++      {
++              pd_move_x_constraints(cr,x,xprime);
++      }       
++
++    if (constr->lincsd != NULL)
+ +    {
+ +        bOK = constrain_lincs(fplog,bLog,bEner,ir,step,constr->lincsd,md,cr,
-                           idef,ir,box,x,xprime,nrnb,
++                              x,xprime,min_proj,
++                              box,pbc_null,lambda,dvdlambda,
+ +                              invdt,v,vir!=NULL,rmdr,
+ +                              econq,nrnb,
+ +                              constr->maxwarn,&constr->warncount_lincs);
+ +        if (!bOK && constr->maxwarn >= 0)
+ +        {
+ +            if (fplog != NULL)
+ +            {
+ +                fprintf(fplog,"Constraint error in algorithm %s at step %s\n",
+ +                        econstr_names[econtLINCS],gmx_step_str(step,buf));
+ +            }
+ +            bDump = TRUE;
+ +        }
+ +    } 
+ +    
+ +    if (constr->nblocks > 0)
+ +    {
+ +        switch (econq) {
+ +        case (econqCoord):
+ +            bOK = bshakef(fplog,constr->shaked,
+ +                          homenr,md->invmass,constr->nblocks,constr->sblock,
-                           invdt,v,vir!=NULL,rmdr,constr->maxwarn>=0,econq,
-                           &vetavar);
++                          idef,ir,x,xprime,nrnb,
+ +                          constr->lagr,lambda,dvdlambda,
-                           idef,ir,box,x,min_proj,nrnb,
++                          invdt,v,vir!=NULL,rmdr,constr->maxwarn>=0,econq,&vetavar);
+ +            break;
+ +        case (econqVeloc):
+ +            bOK = bshakef(fplog,constr->shaked,
+ +                          homenr,md->invmass,constr->nblocks,constr->sblock,
-                           invdt,NULL,vir!=NULL,rmdr,constr->maxwarn>=0,econq,
-                           &vetavar);
++                          idef,ir,x,min_proj,nrnb,
+ +                          constr->lagr,lambda,dvdlambda,
- 
++                          invdt,NULL,vir!=NULL,rmdr,constr->maxwarn>=0,econq,&vetavar);
+ +            break;
+ +        default:
+ +            gmx_fatal(FARGS,"Internal error, SHAKE called for constraining something else than coordinates");
+ +            break;
+ +        }
-         
-     settle  = &idef->il[F_SETTLE];
-     if (settle->nr > 0)
++        
+ +        if (!bOK && constr->maxwarn >= 0)
+ +        {
+ +            if (fplog != NULL)
+ +            {
+ +                fprintf(fplog,"Constraint error in algorithm %s at step %s\n",
+ +                        econstr_names[econtSHAKE],gmx_step_str(step,buf));
+ +            }
+ +            bDump = TRUE;
+ +        }
+ +    }
-         nsettle = settle->nr/4;
-         
++    
++    if (nsettle > 0)
+ +    {
-             csettle(constr->settled,
-                     nsettle,settle->iatoms,x[0],xprime[0],
-                     invdt,v?v[0]:NULL,vir!=NULL,rmdr,&error,&vetavar);
++        int calcvir_atom_end;
++
++        if (vir == NULL)
++        {
++            calcvir_atom_end = 0;
++        }
++        else
++        {
++            calcvir_atom_end = md->start + md->homenr;
++        }
++
+ +        switch (econq)
+ +        {
+ +        case econqCoord:
-             
-             bOK = (error < 0);
-             if (!bOK && constr->maxwarn >= 0)
++#pragma omp parallel for num_threads(nth) schedule(static)
++            for(th=0; th<nth; th++)
++            {
++                int start_th,end_th;
++
++                if (th > 0)
++                {
++                    clear_mat(constr->rmdr_th[th]);
++                }
++
++                start_th = (nsettle* th   )/nth;
++                end_th   = (nsettle*(th+1))/nth;
++                if (start_th >= 0 && end_th - start_th > 0)
++                {
++                    csettle(constr->settled,
++                            end_th-start_th,
++                            settle->iatoms+start_th*(1+NRAL(F_SETTLE)),
++                            pbc_null,
++                            x[0],xprime[0],
++                            invdt,v?v[0]:NULL,calcvir_atom_end,
++                            th == 0 ? rmdr : constr->rmdr_th[th],
++                            th == 0 ? &settle_error : &constr->settle_error[th],
++                            &vetavar);
++                }
++            }
+ +            inc_nrnb(nrnb,eNR_SETTLE,nsettle);
+ +            if (v != NULL)
+ +            {
+ +                inc_nrnb(nrnb,eNR_CONSTR_V,nsettle*3);
+ +            }
+ +            if (vir != NULL)
+ +            {
+ +                inc_nrnb(nrnb,eNR_CONSTR_VIR,nsettle*3);
+ +            }
-                         step,ddglatnr(cr->dd,settle->iatoms[error*4+1]));
++            break;
++        case econqVeloc:
++        case econqDeriv:
++        case econqForce:
++        case econqForceDispl:
++#pragma omp parallel for num_threads(nth) schedule(static)
++            for(th=0; th<nth; th++)
++            {
++                int start_th,end_th;
++
++                if (th > 0)
++                {
++                    clear_mat(constr->rmdr_th[th]);
++                }
++                
++                start_th = (nsettle* th   )/nth;
++                end_th   = (nsettle*(th+1))/nth;
++
++                if (start_th >= 0 && end_th - start_th > 0)
++                {
++                    settle_proj(fplog,constr->settled,econq,
++                                end_th-start_th,
++                                settle->iatoms+start_th*(1+NRAL(F_SETTLE)),
++                                pbc_null,
++                                x,
++                                xprime,min_proj,calcvir_atom_end,
++                                th == 0 ? rmdr : constr->rmdr_th[th],
++                                &vetavar);
++                }
++            }
++            /* This is an overestimate */
++            inc_nrnb(nrnb,eNR_SETTLE,nsettle);
++            break;
++        case econqDeriv_FlexCon:
++            /* Nothing to do, since the are no flexible constraints in settles */
++            break;
++        default:
++            gmx_incons("Unknown constraint quantity for settle");
++        }
++    }
++
++    if (settle->nr > 0)
++    {
++        /* Combine virial and error info of the other threads */
++        for(i=1; i<nth; i++)
++        {
++            m_add(rmdr,constr->rmdr_th[i],rmdr);
++            settle_error = constr->settle_error[i];
++        } 
++
++        if (econq == econqCoord && settle_error >= 0)
++        {
++            bOK = FALSE;
++            if (constr->maxwarn >= 0)
+ +            {
+ +                char buf[256];
+ +                sprintf(buf,
+ +                        "\nstep " gmx_large_int_pfmt ": Water molecule starting at atom %d can not be "
+ +                        "settled.\nCheck for bad contacts and/or reduce the timestep if appropriate.\n",
-                 break;
-             case econqVeloc:
-             case econqDeriv:
-             case econqForce:
-             case econqForceDispl:
-                 settle_proj(fplog,constr->settled,econq,
-                             nsettle,settle->iatoms,x,
-                             xprime,min_proj,vir!=NULL,rmdr,&vetavar);
-                 /* This is an overestimate */
-                 inc_nrnb(nrnb,eNR_SETTLE,nsettle);
-                 break;
-             case econqDeriv_FlexCon:
-                 /* Nothing to do, since the are no flexible constraints in settles */
-                 break;
-             default:
-                 gmx_incons("Unknown constraint quantity for settle");
++                        step,ddglatnr(cr->dd,settle->iatoms[settle_error*(1+NRAL(F_SETTLE))+1]));
+ +                if (fplog)
+ +                {
+ +                    fprintf(fplog,"%s",buf);
+ +                }
+ +                fprintf(stderr,"%s",buf);
+ +                constr->warncount_settle++;
+ +                if (constr->warncount_settle > constr->maxwarn)
+ +                {
+ +                    too_many_constraint_warnings(-1,constr->warncount_settle);
+ +                }
+ +                bDump = TRUE;
- 
+ +            }
+ +        }
+ +    }
-         
++        
+ +    free_vetavars(&vetavar);
+ +    
+ +    if (vir != NULL)
+ +    {
+ +        switch (econq)
+ +        {
+ +        case econqCoord:
+ +            vir_fac = 0.5/(ir->delta_t*ir->delta_t);
+ +            break;
+ +        case econqVeloc:
+ +            vir_fac = 0.5/ir->delta_t;
+ +            break;
+ +        case econqForce:
+ +        case econqForceDispl:
+ +            vir_fac = 0.5;
+ +            break;
+ +        default:
+ +            vir_fac = 0;
+ +            gmx_incons("Unsupported constraint quantity for virial");
+ +        }
+ +        
+ +        if (EI_VV(ir->eI))
+ +        {
+ +            vir_fac *= 2;  /* only constraining over half the distance here */
+ +        }
+ +        for(i=0; i<DIM; i++)
+ +        {
+ +            for(j=0; j<DIM; j++)
+ +            {
+ +                (*vir)[i][j] = vir_fac*rmdr[i][j];
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (bDump)
+ +    {
+ +        dump_confs(fplog,step,constr->warn_mtop,start,homenr,cr,x,xprime,box);
+ +    }
+ +    
+ +    if (econq == econqCoord)
+ +    {
+ +        if (ir->ePull == epullCONSTRAINT)
+ +        {
+ +            if (EI_DYNAMICS(ir->eI))
+ +            {
+ +                t = ir->init_t + (step + delta_step)*ir->delta_t;
+ +            }
+ +            else
+ +            {
+ +                t = ir->init_t;
+ +            }
+ +            set_pbc(&pbc,ir->ePBC,box);
+ +            pull_constraint(ir->pull,md,&pbc,cr,ir->delta_t,t,x,xprime,v,*vir);
+ +        }
+ +        if (constr->ed && delta_step > 0)
+ +        {
+ +            /* apply the essential dynamcs constraints here */
+ +            do_edsam(ir,step,md,cr,xprime,v,box,constr->ed);
+ +        }
+ +    }
+ +    
+ +    return bOK;
+ +}
+ +
+ +real *constr_rmsd_data(struct gmx_constr *constr)
+ +{
+ +  if (constr->lincsd)
+ +    return lincs_rmsd_data(constr->lincsd);
+ +  else
+ +    return NULL;
+ +}
+ +
+ +real constr_rmsd(struct gmx_constr *constr,gmx_bool bSD2)
+ +{
+ +  if (constr->lincsd)
+ +    return lincs_rmsd(constr->lincsd,bSD2);
+ +  else
+ +    return 0;
+ +}
+ +
+ +static void make_shake_sblock_pd(struct gmx_constr *constr,
+ +                               t_idef *idef,t_mdatoms *md)
+ +{
+ +  int  i,j,m,ncons;
+ +  int  bstart,bnr;
+ +  t_blocka    sblocks;
+ +  t_sortblock *sb;
+ +  t_iatom     *iatom;
+ +  atom_id     *inv_sblock;
+ +
+ +  /* Since we are processing the local topology,
+ +   * the F_CONSTRNC ilist has been concatenated to the F_CONSTR ilist.
+ +   */
+ +  ncons = idef->il[F_CONSTR].nr/3;
+ +
+ +  init_blocka(&sblocks);
+ +  gen_sblocks(NULL,md->start,md->start+md->homenr,idef,&sblocks,FALSE);
+ +  
+ +  /*
+ +    bstart=(idef->nodeid > 0) ? blocks->multinr[idef->nodeid-1] : 0;
+ +    nblocks=blocks->multinr[idef->nodeid] - bstart;
+ +  */
+ +  bstart  = 0;
+ +  constr->nblocks = sblocks.nr;
+ +  if (debug) 
+ +    fprintf(debug,"ncons: %d, bstart: %d, nblocks: %d\n",
+ +          ncons,bstart,constr->nblocks);
+ +  
+ +  /* Calculate block number for each atom */
+ +  inv_sblock = make_invblocka(&sblocks,md->nr);
+ +  
+ +  done_blocka(&sblocks);
+ +  
+ +  /* Store the block number in temp array and
+ +   * sort the constraints in order of the sblock number 
+ +   * and the atom numbers, really sorting a segment of the array!
+ +   */
+ +#ifdef DEBUGIDEF 
+ +  pr_idef(fplog,0,"Before Sort",idef);
+ +#endif
+ +  iatom=idef->il[F_CONSTR].iatoms;
+ +  snew(sb,ncons);
+ +  for(i=0; (i<ncons); i++,iatom+=3) {
+ +    for(m=0; (m<3); m++)
+ +      sb[i].iatom[m] = iatom[m];
+ +    sb[i].blocknr = inv_sblock[iatom[1]];
+ +  }
+ +  
+ +  /* Now sort the blocks */
+ +  if (debug) {
+ +    pr_sortblock(debug,"Before sorting",ncons,sb);
+ +    fprintf(debug,"Going to sort constraints\n");
+ +  }
+ +  
+ +  qsort(sb,ncons,(size_t)sizeof(*sb),pcomp);
+ +  
+ +  if (debug) {
+ +    pr_sortblock(debug,"After sorting",ncons,sb);
+ +  }
+ +  
+ +  iatom=idef->il[F_CONSTR].iatoms;
+ +  for(i=0; (i<ncons); i++,iatom+=3) 
+ +    for(m=0; (m<3); m++)
+ +      iatom[m]=sb[i].iatom[m];
+ +#ifdef DEBUGIDEF
+ +  pr_idef(fplog,0,"After Sort",idef);
+ +#endif
+ +  
+ +  j=0;
+ +  snew(constr->sblock,constr->nblocks+1);
+ +  bnr=-2;
+ +  for(i=0; (i<ncons); i++) {
+ +    if (sb[i].blocknr != bnr) {
+ +      bnr=sb[i].blocknr;
+ +      constr->sblock[j++]=3*i;
+ +    }
+ +  }
+ +  /* Last block... */
+ +  constr->sblock[j++] = 3*ncons;
+ +  
+ +  if (j != (constr->nblocks+1)) {
+ +    fprintf(stderr,"bstart: %d\n",bstart);
+ +    fprintf(stderr,"j: %d, nblocks: %d, ncons: %d\n",
+ +          j,constr->nblocks,ncons);
+ +    for(i=0; (i<ncons); i++)
+ +      fprintf(stderr,"i: %5d  sb[i].blocknr: %5u\n",i,sb[i].blocknr);
+ +    for(j=0; (j<=constr->nblocks); j++)
+ +      fprintf(stderr,"sblock[%3d]=%5d\n",j,(int)constr->sblock[j]);
+ +    gmx_fatal(FARGS,"DEATH HORROR: "
+ +            "sblocks does not match idef->il[F_CONSTR]");
+ +  }
+ +  sfree(sb);
+ +  sfree(inv_sblock);
+ +}
+ +
+ +static void make_shake_sblock_dd(struct gmx_constr *constr,
+ +                               t_ilist *ilcon,t_block *cgs,
+ +                               gmx_domdec_t *dd)
+ +{
+ +  int ncons,c,cg;
+ +  t_iatom *iatom;
+ +
+ +  if (dd->ncg_home+1 > constr->sblock_nalloc) {
+ +    constr->sblock_nalloc = over_alloc_dd(dd->ncg_home+1);
+ +    srenew(constr->sblock,constr->sblock_nalloc);
+ +  }
+ +  
+ +  ncons = ilcon->nr/3;
+ +  iatom = ilcon->iatoms;
+ +  constr->nblocks = 0;
+ +  cg = 0;
+ +  for(c=0; c<ncons; c++) {
+ +    if (c == 0 || iatom[1] >= cgs->index[cg+1]) {
+ +      constr->sblock[constr->nblocks++] = 3*c;
+ +      while (iatom[1] >= cgs->index[cg+1])
+ +      cg++;
+ +    }
+ +    iatom += 3;
+ +  }
+ +  constr->sblock[constr->nblocks] = 3*ncons;
+ +}
+ +
+ +t_blocka make_at2con(int start,int natoms,
+ +                   t_ilist *ilist,t_iparams *iparams,
+ +                   gmx_bool bDynamics,int *nflexiblecons)
+ +{
+ +  int *count,ncon,con,con_tot,nflexcon,ftype,i,a;
+ +  t_iatom  *ia;
+ +  t_blocka at2con;
+ +  gmx_bool bFlexCon;
+ +  
+ +  snew(count,natoms);
+ +  nflexcon = 0;
+ +  for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
+ +    ncon = ilist[ftype].nr/3;
+ +    ia   = ilist[ftype].iatoms;
+ +    for(con=0; con<ncon; con++) {
+ +      bFlexCon = (iparams[ia[0]].constr.dA == 0 &&
+ +                iparams[ia[0]].constr.dB == 0);
+ +      if (bFlexCon)
+ +      nflexcon++;
+ +      if (bDynamics || !bFlexCon) {
+ +      for(i=1; i<3; i++) {
+ +        a = ia[i] - start;
+ +        count[a]++;
+ +      }
+ +      }
+ +      ia += 3;
+ +    }
+ +  }
+ +  *nflexiblecons = nflexcon;
+ +
+ +  at2con.nr = natoms;
+ +  at2con.nalloc_index = at2con.nr+1;
+ +  snew(at2con.index,at2con.nalloc_index);
+ +  at2con.index[0] = 0;
+ +  for(a=0; a<natoms; a++) {
+ +    at2con.index[a+1] = at2con.index[a] + count[a];
+ +    count[a] = 0;
+ +  }
+ +  at2con.nra = at2con.index[natoms];
+ +  at2con.nalloc_a = at2con.nra;
+ +  snew(at2con.a,at2con.nalloc_a);
+ +
+ +  /* The F_CONSTRNC constraints have constraint numbers
+ +   * that continue after the last F_CONSTR constraint.
+ +   */
+ +  con_tot = 0;
+ +  for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
+ +    ncon = ilist[ftype].nr/3;
+ +    ia   = ilist[ftype].iatoms;
+ +    for(con=0; con<ncon; con++) {
+ +      bFlexCon = (iparams[ia[0]].constr.dA == 0 &&
+ +                iparams[ia[0]].constr.dB == 0);
+ +      if (bDynamics || !bFlexCon) {
+ +      for(i=1; i<3; i++) {
+ +        a = ia[i] - start;
+ +        at2con.a[at2con.index[a]+count[a]++] = con_tot;
+ +      }
+ +      }
+ +      con_tot++;
+ +      ia += 3;
+ +    }
+ +  }
+ +  
+ +  sfree(count);
+ +
+ +  return at2con;
+ +}
+ +
++static int *make_at2settle(int natoms,const t_ilist *ilist)
++{
++    int *at2s;
++    int a,stride,s;
++  
++    snew(at2s,natoms);
++    /* Set all to no settle */
++    for(a=0; a<natoms; a++)
++    {
++        at2s[a] = -1;
++    }
++
++    stride = 1 + NRAL(F_SETTLE);
++
++    for(s=0; s<ilist->nr; s+=stride)
++    {
++        at2s[ilist->iatoms[s+1]] = s/stride;
++        at2s[ilist->iatoms[s+2]] = s/stride;
++        at2s[ilist->iatoms[s+3]] = s/stride;
++    }
++
++    return at2s;
++}
++
+ +void set_constraints(struct gmx_constr *constr,
+ +                     gmx_localtop_t *top,t_inputrec *ir,
+ +                     t_mdatoms *md,t_commrec *cr)
+ +{
+ +    t_idef *idef;
+ +    int    ncons;
+ +    t_ilist *settle;
+ +    int    iO,iH;
+ +    
+ +    idef = &top->idef;
+ +       
+ +    if (constr->ncon_tot > 0)
+ +    {
+ +        /* We are using the local topology,
+ +         * so there are only F_CONSTR constraints.
+ +         */
+ +        ncons = idef->il[F_CONSTR].nr/3;
+ +        
+ +        /* With DD we might also need to call LINCS with ncons=0 for
+ +         * communicating coordinates to other nodes that do have constraints.
+ +         */
+ +        if (ir->eConstrAlg == econtLINCS)
+ +        {
+ +            set_lincs(idef,md,EI_DYNAMICS(ir->eI),cr,constr->lincsd);
+ +        }
+ +        if (ir->eConstrAlg == econtSHAKE)
+ +        {
+ +            if (cr->dd)
+ +            {
+ +                make_shake_sblock_dd(constr,&idef->il[F_CONSTR],&top->cgs,cr->dd);
+ +            }
+ +            else
+ +            {
+ +                make_shake_sblock_pd(constr,idef,md);
+ +            }
+ +            if (ncons > constr->lagr_nalloc)
+ +            {
+ +                constr->lagr_nalloc = over_alloc_dd(ncons);
+ +                srenew(constr->lagr,constr->lagr_nalloc);
+ +            }
+ +        }
+ +    }
+ +
+ +    if (idef->il[F_SETTLE].nr > 0 && constr->settled == NULL)
+ +    {
+ +        settle = &idef->il[F_SETTLE];
+ +        iO = settle->iatoms[1];
+ +        iH = settle->iatoms[2];
+ +        constr->settled =
+ +            settle_init(md->massT[iO],md->massT[iH],
+ +                        md->invmass[iO],md->invmass[iH],
+ +                        idef->iparams[settle->iatoms[0]].settle.doh,
+ +                        idef->iparams[settle->iatoms[0]].settle.dhh);
+ +    }
+ +    
+ +    /* Make a selection of the local atoms for essential dynamics */
+ +    if (constr->ed && cr->dd)
+ +    {
+ +        dd_make_local_ed_indices(cr->dd,constr->ed);
+ +    }
+ +}
+ +
+ +static void constr_recur(t_blocka *at2con,
+ +                       t_ilist *ilist,t_iparams *iparams,gmx_bool bTopB,
+ +                       int at,int depth,int nc,int *path,
+ +                       real r0,real r1,real *r2max,
+ +                       int *count)
+ +{
+ +  int  ncon1;
+ +  t_iatom *ia1,*ia2;
+ +  int  c,con,a1;
+ +  gmx_bool bUse;
+ +  t_iatom *ia;
+ +  real len,rn0,rn1;
+ +
+ +  (*count)++;
+ +
+ +  ncon1 = ilist[F_CONSTR].nr/3;
+ +  ia1   = ilist[F_CONSTR].iatoms;
+ +  ia2   = ilist[F_CONSTRNC].iatoms;
+ +
+ +  /* Loop over all constraints connected to this atom */
+ +  for(c=at2con->index[at]; c<at2con->index[at+1]; c++) {
+ +    con = at2con->a[c];
+ +    /* Do not walk over already used constraints */
+ +    bUse = TRUE;
+ +    for(a1=0; a1<depth; a1++) {
+ +      if (con == path[a1])
+ +      bUse = FALSE;
+ +    }
+ +    if (bUse) {
+ +      ia = constr_iatomptr(ncon1,ia1,ia2,con);
+ +      /* Flexible constraints currently have length 0, which is incorrect */
+ +      if (!bTopB)
+ +      len = iparams[ia[0]].constr.dA;
+ +      else
+ +      len = iparams[ia[0]].constr.dB;
+ +      /* In the worst case the bond directions alternate */
+ +      if (nc % 2 == 0) {
+ +      rn0 = r0 + len;
+ +      rn1 = r1;
+ +      } else {
+ +      rn0 = r0;
+ +      rn1 = r1 + len;
+ +      }
+ +      /* Assume angles of 120 degrees between all bonds */
+ +      if (rn0*rn0 + rn1*rn1 + rn0*rn1 > *r2max) {
+ +      *r2max = rn0*rn0 + rn1*rn1 + r0*rn1;
+ +      if (debug) {
+ +        fprintf(debug,"Found longer constraint distance: r0 %5.3f r1 %5.3f rmax %5.3f\n", rn0,rn1,sqrt(*r2max));
+ +        for(a1=0; a1<depth; a1++)
+ +          fprintf(debug," %d %5.3f",
+ +                  path[a1],
+ +                  iparams[constr_iatomptr(ncon1,ia1,ia2,con)[0]].constr.dA);
+ +        fprintf(debug," %d %5.3f\n",con,len);
+ +      }
+ +      }
+ +      /* Limit the number of recursions to 1000*nc,
+ +       * so a call does not take more than a second,
+ +       * even for highly connected systems.
+ +       */
+ +      if (depth + 1 < nc && *count < 1000*nc) {
+ +      if (ia[1] == at)
+ +        a1 = ia[2];
+ +      else
+ +        a1 = ia[1];
+ +      /* Recursion */
+ +      path[depth] = con;
+ +      constr_recur(at2con,ilist,iparams,
+ +                   bTopB,a1,depth+1,nc,path,rn0,rn1,r2max,count);
+ +      path[depth] = -1;
+ +      }
+ +    }
+ +  }
+ +}
+ +
+ +static real constr_r_max_moltype(FILE *fplog,
+ +                               gmx_moltype_t *molt,t_iparams *iparams,
+ +                               t_inputrec *ir)
+ +{
+ +  int natoms,nflexcon,*path,at,count;
+ +
+ +  t_blocka at2con;
+ +  real r0,r1,r2maxA,r2maxB,rmax,lam0,lam1;
+ +
+ +  if (molt->ilist[F_CONSTR].nr   == 0 &&
+ +      molt->ilist[F_CONSTRNC].nr == 0) {
+ +    return 0;
+ +  }
+ +  
+ +  natoms = molt->atoms.nr;
+ +
+ +  at2con = make_at2con(0,natoms,molt->ilist,iparams,
+ +                     EI_DYNAMICS(ir->eI),&nflexcon);
+ +  snew(path,1+ir->nProjOrder);
+ +  for(at=0; at<1+ir->nProjOrder; at++)
+ +    path[at] = -1;
+ +
+ +  r2maxA = 0;
+ +  for(at=0; at<natoms; at++) {
+ +    r0 = 0;
+ +    r1 = 0;
+ +
+ +    count = 0;
+ +    constr_recur(&at2con,molt->ilist,iparams,
+ +               FALSE,at,0,1+ir->nProjOrder,path,r0,r1,&r2maxA,&count);
+ +  }
+ +  if (ir->efep == efepNO) {
+ +    rmax = sqrt(r2maxA);
+ +  } else {
+ +    r2maxB = 0;
+ +    for(at=0; at<natoms; at++) {
+ +      r0 = 0;
+ +      r1 = 0;
+ +      count = 0;
+ +      constr_recur(&at2con,molt->ilist,iparams,
+ +                 TRUE,at,0,1+ir->nProjOrder,path,r0,r1,&r2maxB,&count);
+ +    }
+ +    lam0 = ir->fepvals->init_lambda;
+ +    if (EI_DYNAMICS(ir->eI))
+ +      lam0 += ir->init_step*ir->fepvals->delta_lambda;
+ +    rmax = (1 - lam0)*sqrt(r2maxA) + lam0*sqrt(r2maxB);
+ +    if (EI_DYNAMICS(ir->eI)) {
+ +      lam1 = ir->fepvals->init_lambda + (ir->init_step + ir->nsteps)*ir->fepvals->delta_lambda;
+ +      rmax = max(rmax,(1 - lam1)*sqrt(r2maxA) + lam1*sqrt(r2maxB));
+ +    }
+ +  }
+ +
+ +  done_blocka(&at2con);
+ +  sfree(path);
+ +
+ +  return rmax;
+ +}
+ +
+ +real constr_r_max(FILE *fplog,gmx_mtop_t *mtop,t_inputrec *ir)
+ +{
+ +  int mt;
+ +  real rmax;
+ +
+ +  rmax = 0;
+ +  for(mt=0; mt<mtop->nmoltype; mt++) {
+ +    rmax = max(rmax,
+ +             constr_r_max_moltype(fplog,&mtop->moltype[mt],
+ +                                  mtop->ffparams.iparams,ir));
+ +  }
+ +  
+ +  if (fplog)
+ +    fprintf(fplog,"Maximum distance for %d constraints, at 120 deg. angles, all-trans: %.3f nm\n",1+ir->nProjOrder,rmax);
+ +
+ +  return rmax;
+ +}
+ +
+ +gmx_constr_t init_constraints(FILE *fplog,
+ +                              gmx_mtop_t *mtop,t_inputrec *ir,
+ +                              gmx_edsam_t ed,t_state *state,
+ +                              t_commrec *cr)
+ +{
+ +    int  ncon,nset,nmol,settle_type,i,natoms,mt,nflexcon;
+ +    struct gmx_constr *constr;
+ +    char *env;
+ +    t_ilist *ilist;
+ +    gmx_mtop_ilistloop_t iloop;
+ +    
+ +    ncon =
+ +        gmx_mtop_ftype_count(mtop,F_CONSTR) +
+ +        gmx_mtop_ftype_count(mtop,F_CONSTRNC);
+ +    nset = gmx_mtop_ftype_count(mtop,F_SETTLE);
+ +    
+ +    if (ncon+nset == 0 && ir->ePull != epullCONSTRAINT && ed == NULL) 
+ +    {
+ +        return NULL;
+ +    }
+ +    
+ +    snew(constr,1);
+ +    
+ +    constr->ncon_tot = ncon;
+ +    constr->nflexcon = 0;
+ +    if (ncon > 0) 
+ +    {
+ +        constr->n_at2con_mt = mtop->nmoltype;
+ +        snew(constr->at2con_mt,constr->n_at2con_mt);
+ +        for(mt=0; mt<mtop->nmoltype; mt++) 
+ +        {
+ +            constr->at2con_mt[mt] = make_at2con(0,mtop->moltype[mt].atoms.nr,
+ +                                                mtop->moltype[mt].ilist,
+ +                                                mtop->ffparams.iparams,
+ +                                                EI_DYNAMICS(ir->eI),&nflexcon);
+ +            for(i=0; i<mtop->nmolblock; i++) 
+ +            {
+ +                if (mtop->molblock[i].type == mt) 
+ +                {
+ +                    constr->nflexcon += mtop->molblock[i].nmol*nflexcon;
+ +                }
+ +            }
+ +        }
+ +        
+ +        if (constr->nflexcon > 0) 
+ +        {
+ +            if (fplog) 
+ +            {
+ +                fprintf(fplog,"There are %d flexible constraints\n",
+ +                        constr->nflexcon);
+ +                if (ir->fc_stepsize == 0) 
+ +                {
+ +                    fprintf(fplog,"\n"
+ +                            "WARNING: step size for flexible constraining = 0\n"
+ +                            "         All flexible constraints will be rigid.\n"
+ +                            "         Will try to keep all flexible constraints at their original length,\n"
+ +                            "         but the lengths may exhibit some drift.\n\n");
+ +                    constr->nflexcon = 0;
+ +                }
+ +            }
+ +            if (constr->nflexcon > 0) 
+ +            {
+ +                please_cite(fplog,"Hess2002");
+ +            }
+ +        }
+ +        
+ +        if (ir->eConstrAlg == econtLINCS) 
+ +        {
+ +            constr->lincsd = init_lincs(fplog,mtop,
+ +                                        constr->nflexcon,constr->at2con_mt,
+ +                                        DOMAINDECOMP(cr) && cr->dd->bInterCGcons,
+ +                                        ir->nLincsIter,ir->nProjOrder);
+ +        }
+ +        
+ +        if (ir->eConstrAlg == econtSHAKE) {
+ +            if (DOMAINDECOMP(cr) && cr->dd->bInterCGcons)
+ +            {
+ +                gmx_fatal(FARGS,"SHAKE is not supported with domain decomposition and constraint that cross charge group boundaries, use LINCS");
+ +            }
+ +            if (constr->nflexcon) 
+ +            {
+ +                gmx_fatal(FARGS,"For this system also velocities and/or forces need to be constrained, this can not be done with SHAKE, you should select LINCS");
+ +            }
+ +            please_cite(fplog,"Ryckaert77a");
+ +            if (ir->bShakeSOR) 
+ +            {
+ +                please_cite(fplog,"Barth95a");
+ +            }
+ +
+ +            constr->shaked = shake_init();
+ +        }
+ +    }
+ +  
+ +    if (nset > 0) {
+ +        please_cite(fplog,"Miyamoto92a");
- t_blocka *atom2constraints_moltype(gmx_constr_t constr)
++
++        constr->bInterCGsettles = inter_charge_group_settles(mtop);
++
+ +        /* Check that we have only one settle type */
+ +        settle_type = -1;
+ +        iloop = gmx_mtop_ilistloop_init(mtop);
+ +        while (gmx_mtop_ilistloop_next(iloop,&ilist,&nmol)) 
+ +        {
+ +            for (i=0; i<ilist[F_SETTLE].nr; i+=4) 
+ +            {
+ +                if (settle_type == -1) 
+ +                {
+ +                    settle_type = ilist[F_SETTLE].iatoms[i];
+ +                } 
+ +                else if (ilist[F_SETTLE].iatoms[i] != settle_type) 
+ +                {
+ +                    gmx_fatal(FARGS,
+ +                              "The [molecules] section of your topology specifies more than one block of\n"
+ +                              "a [moleculetype] with a [settles] block. Only one such is allowed. If you\n"
+ +                              "are trying to partition your solvent into different *groups* (e.g. for\n"
+ +                              "freezing, T-coupling, etc.) then you are using the wrong approach. Index\n"
+ +                              "files specify groups. Otherwise, you may wish to change the least-used\n"
+ +                              "block of molecules with SETTLE constraints into 3 normal constraints.");
+ +                }
+ +            }
+ +        }
++
++        constr->n_at2settle_mt = mtop->nmoltype;
++        snew(constr->at2settle_mt,constr->n_at2settle_mt);
++        for(mt=0; mt<mtop->nmoltype; mt++) 
++        {
++            constr->at2settle_mt[mt] =
++                make_at2settle(mtop->moltype[mt].atoms.nr,
++                               &mtop->moltype[mt].ilist[F_SETTLE]);
++        }
+ +    }
+ +    
+ +    constr->maxwarn = 999;
+ +    env = getenv("GMX_MAXCONSTRWARN");
+ +    if (env) 
+ +    {
+ +        constr->maxwarn = 0;
+ +        sscanf(env,"%d",&constr->maxwarn);
+ +        if (fplog) 
+ +        {
+ +            fprintf(fplog,
+ +                    "Setting the maximum number of constraint warnings to %d\n",
+ +                    constr->maxwarn);
+ +        }
+ +        if (MASTER(cr)) 
+ +        {
+ +            fprintf(stderr,
+ +                    "Setting the maximum number of constraint warnings to %d\n",
+ +                    constr->maxwarn);
+ +        }
+ +    }
+ +    if (constr->maxwarn < 0 && fplog) 
+ +    {
+ +        fprintf(fplog,"maxwarn < 0, will not stop on constraint errors\n");
+ +    }
+ +    constr->warncount_lincs  = 0;
+ +    constr->warncount_settle = 0;
+ +    
+ +    /* Initialize the essential dynamics sampling.
+ +     * Put the pointer to the ED struct in constr */
+ +    constr->ed = ed;
+ +    if (ed != NULL) 
+ +    {
+ +        init_edsam(mtop,ir,cr,ed,state->x,state->box);
+ +    }
+ +    
+ +    constr->warn_mtop = mtop;
+ +    
+ +    return constr;
+ +}
+ +
- gmx_bool inter_charge_group_constraints(gmx_mtop_t *mtop)
++const t_blocka *atom2constraints_moltype(gmx_constr_t constr)
+ +{
+ +  return constr->at2con_mt;
+ +}
+ +
++const int **atom2settle_moltype(gmx_constr_t constr)
++{
++    return (const int **)constr->at2settle_mt;
++}
++
+ +
-   const gmx_moltype_t *molt;
-   const t_block *cgs;
-   const t_ilist *il;
-   int  mb;
-   int  nat,*at2cg,cg,a,ftype,i;
-   gmx_bool bInterCG;
- 
-   bInterCG = FALSE;
-   for(mb=0; mb<mtop->nmolblock && !bInterCG; mb++) {
-     molt = &mtop->moltype[mtop->molblock[mb].type];
- 
-     if (molt->ilist[F_CONSTR].nr   > 0 ||
-       molt->ilist[F_CONSTRNC].nr > 0) {
-       cgs  = &molt->cgs;
-       snew(at2cg,molt->atoms.nr);
-       for(cg=0; cg<cgs->nr; cg++) {
-       for(a=cgs->index[cg]; a<cgs->index[cg+1]; a++)
-         at2cg[a] = cg;
-       }
-       
-       for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
-       il = &molt->ilist[ftype];
-       for(i=0; i<il->nr && !bInterCG; i+=3) {
-         if (at2cg[il->iatoms[i+1]] != at2cg[il->iatoms[i+2]])
-           bInterCG = TRUE;
-       }
-       }
-       sfree(at2cg);
++gmx_bool inter_charge_group_constraints(const gmx_mtop_t *mtop)
+ +{
-   }
++    const gmx_moltype_t *molt;
++    const t_block *cgs;
++    const t_ilist *il;
++    int  mb;
++    int  nat,*at2cg,cg,a,ftype,i;
++    gmx_bool bInterCG;
++
++    bInterCG = FALSE;
++    for(mb=0; mb<mtop->nmolblock && !bInterCG; mb++)
++    {
++        molt = &mtop->moltype[mtop->molblock[mb].type];
++
++        if (molt->ilist[F_CONSTR].nr   > 0 ||
++            molt->ilist[F_CONSTRNC].nr > 0 ||
++            molt->ilist[F_SETTLE].nr > 0)
++        {
++            cgs  = &molt->cgs;
++            snew(at2cg,molt->atoms.nr);
++            for(cg=0; cg<cgs->nr; cg++)
++            {
++                for(a=cgs->index[cg]; a<cgs->index[cg+1]; a++)
++                    at2cg[a] = cg;
++            }
++
++            for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++)
++            {
++                il = &molt->ilist[ftype];
++                for(i=0; i<il->nr && !bInterCG; i+=1+NRAL(ftype))
++                {
++                    if (at2cg[il->iatoms[i+1]] != at2cg[il->iatoms[i+2]])
++                    {
++                        bInterCG = TRUE;
++                    }
++                }
++            }
++            
++            sfree(at2cg);
++        }
++    }
++
++    return bInterCG;
++}
++
++gmx_bool inter_charge_group_settles(const gmx_mtop_t *mtop)
++{
++    const gmx_moltype_t *molt;
++    const t_block *cgs;
++    const t_ilist *il;
++    int  mb;
++    int  nat,*at2cg,cg,a,ftype,i;
++    gmx_bool bInterCG;
++
++    bInterCG = FALSE;
++    for(mb=0; mb<mtop->nmolblock && !bInterCG; mb++)
++    {
++        molt = &mtop->moltype[mtop->molblock[mb].type];
++
++        if (molt->ilist[F_SETTLE].nr > 0)
++        {
++            cgs  = &molt->cgs;
++            snew(at2cg,molt->atoms.nr);
++            for(cg=0; cg<cgs->nr; cg++)
++            {
++                for(a=cgs->index[cg]; a<cgs->index[cg+1]; a++)
++                    at2cg[a] = cg;
++            }
++
++            for(ftype=F_SETTLE; ftype<=F_SETTLE; ftype++)
++            {
++                il = &molt->ilist[ftype];
++                for(i=0; i<il->nr && !bInterCG; i+=1+NRAL(F_SETTLE))
++                {
++                    if (at2cg[il->iatoms[i+1]] != at2cg[il->iatoms[i+2]] ||
++                        at2cg[il->iatoms[i+1]] != at2cg[il->iatoms[i+3]])
++                    {
++                        bInterCG = TRUE;
++                    }
++                }
++            }       
++            
++            sfree(at2cg);
++        }
+ +    }
-   return bInterCG;
+ +
++    return bInterCG;
+ +}
+ +
+ +/* helper functions for andersen temperature control, because the
+ + * gmx_constr construct is only defined in constr.c. Return the list
+ + * of blocks (get_sblock) and the number of blocks (get_nblocks).  */
+ +
+ +extern int *get_sblock(struct gmx_constr *constr)
+ +{
+ +    return constr->sblock;
+ +}
+ +
+ +extern int get_nblocks(struct gmx_constr *constr)
+ +{
+ +    return constr->nblocks;
+ +}
diff --cc src/gromacs/mdlib/csettle.c
Simple merge
diff --cc src/gromacs/mdlib/domdec.c

index 2b563d902a5171adfec6bc6e8bd8d7ccfcea7ae6,0000000000000000000000000000000000000000..80c4b5c14c29e24244462c4da999dc461e2585e6

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/domdec.c
--- /dev/null
+++ b/src/gromacs/mdlib/domdec.c
@@@ -1,8653 -1,0 +1,9544 @@@
-     gmx_cgsort_t *sort1,*sort2;
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + * This file is part of Gromacs        Copyright (c) 1991-2008
+ + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gnomes, ROck Monsters And Chili Sauce
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <stdio.h>
+ +#include <time.h>
+ +#include <math.h>
+ +#include <string.h>
+ +#include <stdlib.h>
+ +#include "typedefs.h"
+ +#include "smalloc.h"
++#include "gmx_fatal.h"
++#include "gmx_fatal_collective.h"
+ +#include "vec.h"
+ +#include "domdec.h"
+ +#include "domdec_network.h"
+ +#include "nrnb.h"
+ +#include "pbc.h"
+ +#include "chargegroup.h"
+ +#include "constr.h"
+ +#include "mdatoms.h"
+ +#include "names.h"
+ +#include "pdbio.h"
+ +#include "futil.h"
+ +#include "force.h"
+ +#include "pme.h"
+ +#include "pull.h"
+ +#include "pull_rotation.h"
+ +#include "gmx_wallcycle.h"
+ +#include "mdrun.h"
+ +#include "nsgrid.h"
+ +#include "shellfc.h"
+ +#include "mtop_util.h"
+ +#include "gmxfio.h"
+ +#include "gmx_ga2la.h"
+ +#include "gmx_sort.h"
+ +#include "macros.h"
++#include "nbnxn_search.h"
++#include "bondf.h"
++#include "gmx_omp_nthreads.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#define DDRANK(dd,rank)    (rank)
+ +#define DDMASTERRANK(dd)   (dd->masterrank)
+ +
+ +typedef struct gmx_domdec_master
+ +{
+ +    /* The cell boundaries */
+ +    real **cell_x;
+ +    /* The global charge group division */
+ +    int  *ncg;     /* Number of home charge groups for each node */
+ +    int  *index;   /* Index of nnodes+1 into cg */
+ +    int  *cg;      /* Global charge group index */
+ +    int  *nat;     /* Number of home atoms for each node. */
+ +    int  *ibuf;    /* Buffer for communication */
+ +    rvec *vbuf;    /* Buffer for state scattering and gathering */
+ +} gmx_domdec_master_t;
+ +
+ +typedef struct
+ +{
+ +    /* The numbers of charge groups to send and receive for each cell
+ +     * that requires communication, the last entry contains the total
+ +     * number of atoms that needs to be communicated.
+ +     */
+ +    int nsend[DD_MAXIZONE+2];
+ +    int nrecv[DD_MAXIZONE+2];
+ +    /* The charge groups to send */
+ +    int *index;
+ +    int nalloc;
+ +    /* The atom range for non-in-place communication */
+ +    int cell2at0[DD_MAXIZONE];
+ +    int cell2at1[DD_MAXIZONE];
+ +} gmx_domdec_ind_t;
+ +
+ +typedef struct
+ +{
+ +    int  np;                   /* Number of grid pulses in this dimension */
+ +    int  np_dlb;               /* For dlb, for use with edlbAUTO          */
+ +    gmx_domdec_ind_t *ind;     /* The indices to communicate, size np     */
+ +    int  np_nalloc;
+ +    gmx_bool bInPlace;             /* Can we communicate in place?            */
+ +} gmx_domdec_comm_dim_t;
+ +
+ +typedef struct
+ +{
+ +    gmx_bool *bCellMin;    /* Temp. var.: is this cell size at the limit     */
+ +    real *cell_f;      /* State var.: cell boundaries, box relative      */
+ +    real *old_cell_f;  /* Temp. var.: old cell size                      */
+ +    real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
+ +    real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
+ +    real *bound_min;   /* Temp. var.: lower limit for cell boundary      */
+ +    real *bound_max;   /* Temp. var.: upper limit for cell boundary      */
+ +    gmx_bool bLimited;     /* State var.: is DLB limited in this dim and row */
+ +    real *buf_ncd;     /* Temp. var.                                     */
+ +} gmx_domdec_root_t;
+ +
+ +#define DD_NLOAD_MAX 9
+ +
+ +/* Here floats are accurate enough, since these variables
+ + * only influence the load balancing, not the actual MD results.
+ + */
+ +typedef struct
+ +{
+ +    int  nload;
+ +    float *load;
+ +    float sum;
+ +    float max;
+ +    float sum_m;
+ +    float cvol_min;
+ +    float mdf;
+ +    float pme;
+ +    int   flags;
+ +} gmx_domdec_load_t;
+ +
+ +typedef struct
+ +{
+ +    int  nsc;
+ +    int  ind_gl;
+ +    int  ind;
+ +} gmx_cgsort_t;
+ +
+ +typedef struct
+ +{
-      /* Communication buffer for general use */
++    gmx_cgsort_t *sort;
++    gmx_cgsort_t *sort2;
+ +    int  sort_nalloc;
+ +    gmx_cgsort_t *sort_new;
+ +    int  sort_new_nalloc;
+ +    int  *ibuf;
+ +    int  ibuf_nalloc;
+ +} gmx_domdec_sort_t;
+ +
+ +typedef struct
+ +{
+ +    rvec *v;
+ +    int  nalloc;
+ +} vec_rvec_t;
+ +
+ +/* This enum determines the order of the coordinates.
+ + * ddnatHOME and ddnatZONE should be first and second,
+ + * the others can be ordered as wanted.
+ + */
+ +enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
+ +
+ +enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
+ +const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
+ +
+ +typedef struct
+ +{
+ +    int  dim;      /* The dimension                                          */
+ +    gmx_bool dim_match;/* Tells if DD and PME dims match                         */
+ +    int  nslab;    /* The number of PME slabs in this dimension              */
+ +    real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB    */
+ +    int  *pp_min;  /* The minimum pp node location, size nslab               */
+ +    int  *pp_max;  /* The maximum pp node location,size nslab                */
+ +    int  maxshift; /* The maximum shift for coordinate redistribution in PME */
+ +} gmx_ddpme_t;
+ +
+ +typedef struct
+ +{
+ +    real min0;    /* The minimum bottom of this zone                        */
+ +    real max1;    /* The maximum top of this zone                           */
++    real min1;    /* The minimum top of this zone                           */
+ +    real mch0;    /* The maximum bottom communicaton height for this zone   */
+ +    real mch1;    /* The maximum top communicaton height for this zone      */
+ +    real p1_0;    /* The bottom value of the first cell in this zone        */
+ +    real p1_1;    /* The top value of the first cell in this zone           */
+ +} gmx_ddzone_t;
+ +
++typedef struct
++{
++    gmx_domdec_ind_t ind;
++    int *ibuf;
++    int ibuf_nalloc;
++    vec_rvec_t vbuf;
++    int nsend;
++    int nat;
++    int nsend_zone;
++} dd_comm_setup_work_t;
++
+ +typedef struct gmx_domdec_comm
+ +{
+ +    /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
+ +     * unless stated otherwise.
+ +     */
+ +
+ +    /* The number of decomposition dimensions for PME, 0: no PME */
+ +    int  npmedecompdim;
+ +    /* The number of nodes doing PME (PP/PME or only PME) */
+ +    int  npmenodes;
+ +    int  npmenodes_x;
+ +    int  npmenodes_y;
+ +    /* The communication setup including the PME only nodes */
+ +    gmx_bool bCartesianPP_PME;
+ +    ivec ntot;
+ +    int  cartpmedim;
+ +    int  *pmenodes;          /* size npmenodes                         */
+ +    int  *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
+ +                              * but with bCartesianPP_PME              */
+ +    gmx_ddpme_t ddpme[2];
+ +    
+ +    /* The DD particle-particle nodes only */
+ +    gmx_bool bCartesianPP;
+ +    int  *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
+ +    
+ +    /* The global charge groups */
+ +    t_block cgs_gl;
+ +
+ +    /* Should we sort the cgs */
+ +    int  nstSortCG;
+ +    gmx_domdec_sort_t *sort;
+ +    
++    /* Are there charge groups? */
++    gmx_bool bCGs;
++
+ +    /* Are there bonded and multi-body interactions between charge groups? */
+ +    gmx_bool bInterCGBondeds;
+ +    gmx_bool bInterCGMultiBody;
+ +
+ +    /* Data for the optional bonded interaction atom communication range */
+ +    gmx_bool bBondComm;
+ +    t_blocka *cglink;
+ +    char *bLocalCG;
+ +
+ +    /* The DLB option */
+ +    int  eDLB;
+ +    /* Are we actually using DLB? */
+ +    gmx_bool bDynLoadBal;
+ +
+ +    /* Cell sizes for static load balancing, first index cartesian */
+ +    real **slb_frac;
+ +    
+ +    /* The width of the communicated boundaries */
+ +    real cutoff_mbody;
+ +    real cutoff;
+ +    /* The minimum cell size (including triclinic correction) */
+ +    rvec cellsize_min;
+ +    /* For dlb, for use with edlbAUTO */
+ +    rvec cellsize_min_dlb;
+ +    /* The lower limit for the DD cell size with DLB */
+ +    real cellsize_limit;
+ +    /* Effectively no NB cut-off limit with DLB for systems without PBC? */
+ +    gmx_bool bVacDLBNoLimit;
+ +
+ +    /* tric_dir is only stored here because dd_get_ns_ranges needs it */
+ +    ivec tric_dir;
+ +    /* box0 and box_size are required with dim's without pbc and -gcom */
+ +    rvec box0;
+ +    rvec box_size;
+ +    
+ +    /* The cell boundaries */
+ +    rvec cell_x0;
+ +    rvec cell_x1;
+ +
+ +    /* The old location of the cell boundaries, to check cg displacements */
+ +    rvec old_cell_x0;
+ +    rvec old_cell_x1;
+ +
+ +    /* The communication setup and charge group boundaries for the zones */
+ +    gmx_domdec_zones_t zones;
+ +    
+ +    /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
+ +     * cell boundaries of neighboring cells for dynamic load balancing.
+ +     */
+ +    gmx_ddzone_t zone_d1[2];
+ +    gmx_ddzone_t zone_d2[2][2];
+ +    
+ +    /* The coordinate/force communication setup and indices */
+ +    gmx_domdec_comm_dim_t cd[DIM];
+ +    /* The maximum number of cells to communicate with in one dimension */
+ +    int  maxpulse;
+ +    
+ +    /* Which cg distribution is stored on the master node */
+ +    int master_cg_ddp_count;
+ +    
+ +    /* The number of cg's received from the direct neighbors */
+ +    int  zone_ncg1[DD_MAXZONE];
+ +    
+ +    /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
+ +    int  nat[ddnatNR];
++
++    /* Array for signalling if atoms have moved to another domain */
++    int  *moved;
++    int  moved_nalloc;
+ +    
+ +    /* Communication buffer for general use */
+ +    int  *buf_int;
+ +    int  nalloc_int;
+ +
-     
++    /* Communication buffer for general use */
+ +    vec_rvec_t vbuf;
-     gmx_large_int_t globalcomm_step;
++
++    /* Temporary storage for thread parallel communication setup */
++    int nth;
++    dd_comm_setup_work_t *dth;
++
+ +    /* Communication buffers only used with multiple grid pulses */
+ +    int  *buf_int2;
+ +    int  nalloc_int2;
+ +    vec_rvec_t vbuf2;
+ +    
+ +    /* Communication buffers for local redistribution */
+ +    int  **cggl_flag;
+ +    int  cggl_flag_nalloc[DIM*2];
+ +    rvec **cgcm_state;
+ +    int  cgcm_state_nalloc[DIM*2];
+ +    
+ +    /* Cell sizes for dynamic load balancing */
+ +    gmx_domdec_root_t **root;
+ +    real *cell_f_row;
+ +    real cell_f0[DIM];
+ +    real cell_f1[DIM];
+ +    real cell_f_max0[DIM];
+ +    real cell_f_min1[DIM];
+ +    
+ +    /* Stuff for load communication */
+ +    gmx_bool bRecordLoad;
+ +    gmx_domdec_load_t *load;
+ +#ifdef GMX_MPI
+ +    MPI_Comm *mpi_comm_load;
+ +#endif
+ +
+ +    /* Maximum DLB scaling per load balancing step in percent */
+ +    int dlb_scale_lim;
+ +
+ +    /* Cycle counters */
+ +    float cycl[ddCyclNr];
+ +    int   cycl_n[ddCyclNr];
+ +    float cycl_max[ddCyclNr];
+ +    /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
+ +    int eFlop;
+ +    double flop;
+ +    int    flop_n;
+ +    /* Have often have did we have load measurements */
+ +    int    n_load_have;
+ +    /* Have often have we collected the load measurements */
+ +    int    n_load_collect;
+ +    
+ +    /* Statistics */
+ +    double sum_nat[ddnatNR-ddnatZONE];
+ +    int    ndecomp;
+ +    int    nload;
+ +    double load_step;
+ +    double load_sum;
+ +    double load_max;
+ +    ivec   load_lim;
+ +    double load_mdf;
+ +    double load_pme;
+ +
+ +    /* The last partition step */
-     rvec vbuf_s[5*2],vbuf_r[5*2];
++    gmx_large_int_t partition_step;
+ +
+ +    /* Debugging */
+ +    int  nstDDDump;
+ +    int  nstDDDumpGrid;
+ +    int  DD_debug;
+ +} gmx_domdec_comm_t;
+ +
+ +/* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
+ +#define DD_CGIBS 2
+ +
+ +/* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
+ +#define DD_FLAG_NRCG  65535
+ +#define DD_FLAG_FW(d) (1<<(16+(d)*2))
+ +#define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
+ +
+ +/* Zone permutation required to obtain consecutive charge groups
+ + * for neighbor searching.
+ + */
+ +static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
+ +
+ +/* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
+ + * components see only j zones with that component 0.
+ + */
+ +
+ +/* The DD zone order */
+ +static const ivec dd_zo[DD_MAXZONE] =
+ +  {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
+ +
+ +/* The 3D setup */
+ +#define dd_z3n  8
+ +#define dd_zp3n 4
+ +static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
+ +
+ +/* The 2D setup */
+ +#define dd_z2n  4
+ +#define dd_zp2n 2
+ +static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
+ +
+ +/* The 1D setup */
+ +#define dd_z1n  2
+ +#define dd_zp1n 1
+ +static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
+ +
+ +/* Factors used to avoid problems due to rounding issues */
+ +#define DD_CELL_MARGIN       1.0001
+ +#define DD_CELL_MARGIN2      1.00005
+ +/* Factor to account for pressure scaling during nstlist steps */
+ +#define DD_PRES_SCALE_MARGIN 1.02
+ +
+ +/* Allowed performance loss before we DLB or warn */
+ +#define DD_PERF_LOSS 0.05
+ +
+ +#define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
+ +
+ +/* Use separate MPI send and receive commands
+ + * when nnodes <= GMX_DD_NNODES_SENDRECV.
+ + * This saves memory (and some copying for small nnodes).
+ + * For high parallelization scatter and gather calls are used.
+ + */
+ +#define GMX_DD_NNODES_SENDRECV 4
+ +
+ +
+ +/*
+ +#define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
+ +
+ +static void index2xyz(ivec nc,int ind,ivec xyz)
+ +{
+ +  xyz[XX] = ind % nc[XX];
+ +  xyz[YY] = (ind / nc[XX]) % nc[YY];
+ +  xyz[ZZ] = ind / (nc[YY]*nc[XX]);
+ +}
+ +*/
+ +
+ +/* This order is required to minimize the coordinate communication in PME
+ + * which uses decomposition in the x direction.
+ + */
+ +#define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
+ +
+ +static void ddindex2xyz(ivec nc,int ind,ivec xyz)
+ +{
+ +    xyz[XX] = ind / (nc[YY]*nc[ZZ]);
+ +    xyz[YY] = (ind / nc[ZZ]) % nc[YY];
+ +    xyz[ZZ] = ind % nc[ZZ];
+ +}
+ +
+ +static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
+ +{
+ +    int ddindex;
+ +    int ddnodeid=-1;
+ +    
+ +    ddindex = dd_index(dd->nc,c);
+ +    if (dd->comm->bCartesianPP_PME)
+ +    {
+ +        ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
+ +    }
+ +    else if (dd->comm->bCartesianPP)
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
+ +#endif
+ +    }
+ +    else
+ +    {
+ +        ddnodeid = ddindex;
+ +    }
+ +    
+ +    return ddnodeid;
+ +}
+ +
+ +static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
+ +{
+ +    return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
+ +}
+ +
+ +int ddglatnr(gmx_domdec_t *dd,int i)
+ +{
+ +    int atnr;
+ +    
+ +    if (dd == NULL)
+ +    {
+ +        atnr = i + 1;
+ +    }
+ +    else
+ +    {
+ +        if (i >= dd->comm->nat[ddnatNR-1])
+ +        {
+ +            gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
+ +        }
+ +        atnr = dd->gatindex[i] + 1;
+ +    }
+ +    
+ +    return atnr;
+ +}
+ +
+ +t_block *dd_charge_groups_global(gmx_domdec_t *dd)
+ +{
+ +    return &dd->comm->cgs_gl;
+ +}
+ +
+ +static void vec_rvec_init(vec_rvec_t *v)
+ +{
+ +    v->nalloc = 0;
+ +    v->v      = NULL;
+ +}
+ +
+ +static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
+ +{
+ +    if (n > v->nalloc)
+ +    {
+ +        v->nalloc = over_alloc_dd(n);
+ +        srenew(v->v,v->nalloc);
+ +    }
+ +}
+ +
+ +void dd_store_state(gmx_domdec_t *dd,t_state *state)
+ +{
+ +    int i;
+ +    
+ +    if (state->ddp_count != dd->ddp_count)
+ +    {
+ +        gmx_incons("The state does not the domain decomposition state");
+ +    }
+ +    
+ +    state->ncg_gl = dd->ncg_home;
+ +    if (state->ncg_gl > state->cg_gl_nalloc)
+ +    {
+ +        state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
+ +        srenew(state->cg_gl,state->cg_gl_nalloc);
+ +    }
+ +    for(i=0; i<state->ncg_gl; i++)
+ +    {
+ +        state->cg_gl[i] = dd->index_gl[i];
+ +    }
+ +    
+ +    state->ddp_count_cg_gl = dd->ddp_count;
+ +}
+ +
+ +gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
+ +{
+ +    return &dd->comm->zones;
+ +}
+ +
+ +void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
+ +                      int *jcg0,int *jcg1,ivec shift0,ivec shift1)
+ +{
+ +    gmx_domdec_zones_t *zones;
+ +    int izone,d,dim;
+ +
+ +    zones = &dd->comm->zones;
+ +
+ +    izone = 0;
+ +    while (icg >= zones->izone[izone].cg1)
+ +    {
+ +        izone++;
+ +    }
+ +    
+ +    if (izone == 0)
+ +    {
+ +        *jcg0 = icg;
+ +    }
+ +    else if (izone < zones->nizone)
+ +    {
+ +        *jcg0 = zones->izone[izone].jcg0;
+ +    }
+ +    else
+ +    {
+ +        gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
+ +                  icg,izone,zones->nizone);
+ +    }
+ +        
+ +    *jcg1 = zones->izone[izone].jcg1;
+ +    
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        shift0[dim] = zones->izone[izone].shift0[dim];
+ +        shift1[dim] = zones->izone[izone].shift1[dim];
+ +        if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
+ +        {
+ +            /* A conservative approach, this can be optimized */
+ +            shift0[dim] -= 1;
+ +            shift1[dim] += 1;
+ +        }
+ +    }
+ +}
+ +
+ +int dd_natoms_vsite(gmx_domdec_t *dd)
+ +{
+ +    return dd->comm->nat[ddnatVSITE];
+ +}
+ +
+ +void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
+ +{
+ +    *at_start = dd->comm->nat[ddnatCON-1];
+ +    *at_end   = dd->comm->nat[ddnatCON];
+ +}
+ +
+ +void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
+ +{
+ +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
+ +    int  *index,*cgindex;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    gmx_domdec_ind_t *ind;
+ +    rvec shift={0,0,0},*buf,*rbuf;
+ +    gmx_bool bPBC,bScrew;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    cgindex = dd->cgindex;
+ +    
+ +    buf = comm->vbuf.v;
+ +
+ +    nzone = 1;
+ +    nat_tot = dd->nat_home;
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        bPBC   = (dd->ci[dd->dim[d]] == 0);
+ +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
+ +        if (bPBC)
+ +        {
+ +            copy_rvec(box[dd->dim[d]],shift);
+ +        }
+ +        cd = &comm->cd[d];
+ +        for(p=0; p<cd->np; p++)
+ +        {
+ +            ind = &cd->ind[p];
+ +            index = ind->index;
+ +            n = 0;
+ +            if (!bPBC)
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        copy_rvec(x[j],buf[n]);
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +            else if (!bScrew)
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        /* We need to shift the coordinates */
+ +                        rvec_add(x[j],shift,buf[n]);
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +            else
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        /* Shift x */
+ +                        buf[n][XX] = x[j][XX] + shift[XX];
+ +                        /* Rotate y and z.
+ +                         * This operation requires a special shift force
+ +                         * treatment, which is performed in calc_vir.
+ +                         */
+ +                        buf[n][YY] = box[YY][YY] - x[j][YY];
+ +                        buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +            
+ +            if (cd->bInPlace)
+ +            {
+ +                rbuf = x + nat_tot;
+ +            }
+ +            else
+ +            {
+ +                rbuf = comm->vbuf2.v;
+ +            }
+ +            /* Send and receive the coordinates */
+ +            dd_sendrecv_rvec(dd, d, dddirBackward,
+ +                             buf,  ind->nsend[nzone+1],
+ +                             rbuf, ind->nrecv[nzone+1]);
+ +            if (!cd->bInPlace)
+ +            {
+ +                j = 0;
+ +                for(zone=0; zone<nzone; zone++)
+ +                {
+ +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
+ +                    {
+ +                        copy_rvec(rbuf[j],x[i]);
+ +                        j++;
+ +                    }
+ +                }
+ +            }
+ +            nat_tot += ind->nrecv[nzone+1];
+ +        }
+ +        nzone += nzone;
+ +    }
+ +}
+ +
+ +void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
+ +{
+ +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
+ +    int  *index,*cgindex;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    gmx_domdec_ind_t *ind;
+ +    rvec *buf,*sbuf;
+ +    ivec vis;
+ +    int  is;
+ +    gmx_bool bPBC,bScrew;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    cgindex = dd->cgindex;
+ +
+ +    buf = comm->vbuf.v;
+ +
+ +    n = 0;
+ +    nzone = comm->zones.n/2;
+ +    nat_tot = dd->nat_tot;
+ +    for(d=dd->ndim-1; d>=0; d--)
+ +    {
+ +        bPBC   = (dd->ci[dd->dim[d]] == 0);
+ +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
+ +        if (fshift == NULL && !bScrew)
+ +        {
+ +            bPBC = FALSE;
+ +        }
+ +        /* Determine which shift vector we need */
+ +        clear_ivec(vis);
+ +        vis[dd->dim[d]] = 1;
+ +        is = IVEC2IS(vis);
+ +        
+ +        cd = &comm->cd[d];
+ +        for(p=cd->np-1; p>=0; p--) {
+ +            ind = &cd->ind[p];
+ +            nat_tot -= ind->nrecv[nzone+1];
+ +            if (cd->bInPlace)
+ +            {
+ +                sbuf = f + nat_tot;
+ +            }
+ +            else
+ +            {
+ +                sbuf = comm->vbuf2.v;
+ +                j = 0;
+ +                for(zone=0; zone<nzone; zone++)
+ +                {
+ +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
+ +                    {
+ +                        copy_rvec(f[i],sbuf[j]);
+ +                        j++;
+ +                    }
+ +                }
+ +            }
+ +            /* Communicate the forces */
+ +            dd_sendrecv_rvec(dd, d, dddirForward,
+ +                             sbuf, ind->nrecv[nzone+1],
+ +                             buf,  ind->nsend[nzone+1]);
+ +            index = ind->index;
+ +            /* Add the received forces */
+ +            n = 0;
+ +            if (!bPBC)
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        rvec_inc(f[j],buf[n]);
+ +                        n++;
+ +                    }
+ +                } 
+ +            }
+ +            else if (!bScrew)
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        rvec_inc(f[j],buf[n]);
+ +                        /* Add this force to the shift force */
+ +                        rvec_inc(fshift[is],buf[n]);
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +            else
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        /* Rotate the force */
+ +                        f[j][XX] += buf[n][XX];
+ +                        f[j][YY] -= buf[n][YY];
+ +                        f[j][ZZ] -= buf[n][ZZ];
+ +                        if (fshift)
+ +                        {
+ +                            /* Add this force to the shift force */
+ +                            rvec_inc(fshift[is],buf[n]);
+ +                        }
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        nzone /= 2;
+ +    }
+ +}
+ +
+ +void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
+ +{
+ +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
+ +    int  *index,*cgindex;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    gmx_domdec_ind_t *ind;
+ +    real *buf,*rbuf;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    cgindex = dd->cgindex;
+ +    
+ +    buf = &comm->vbuf.v[0][0];
+ +
+ +    nzone = 1;
+ +    nat_tot = dd->nat_home;
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        cd = &comm->cd[d];
+ +        for(p=0; p<cd->np; p++)
+ +        {
+ +            ind = &cd->ind[p];
+ +            index = ind->index;
+ +            n = 0;
+ +            for(i=0; i<ind->nsend[nzone]; i++)
+ +            {
+ +                at0 = cgindex[index[i]];
+ +                at1 = cgindex[index[i]+1];
+ +                for(j=at0; j<at1; j++)
+ +                {
+ +                    buf[n] = v[j];
+ +                    n++;
+ +                }
+ +            }
+ +            
+ +            if (cd->bInPlace)
+ +            {
+ +                rbuf = v + nat_tot;
+ +            }
+ +            else
+ +            {
+ +                rbuf = &comm->vbuf2.v[0][0];
+ +            }
+ +            /* Send and receive the coordinates */
+ +            dd_sendrecv_real(dd, d, dddirBackward,
+ +                             buf,  ind->nsend[nzone+1],
+ +                             rbuf, ind->nrecv[nzone+1]);
+ +            if (!cd->bInPlace)
+ +            {
+ +                j = 0;
+ +                for(zone=0; zone<nzone; zone++)
+ +                {
+ +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
+ +                    {
+ +                        v[i] = rbuf[j];
+ +                        j++;
+ +                    }
+ +                }
+ +            }
+ +            nat_tot += ind->nrecv[nzone+1];
+ +        }
+ +        nzone += nzone;
+ +    }
+ +}
+ +
+ +void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
+ +{
+ +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
+ +    int  *index,*cgindex;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    gmx_domdec_ind_t *ind;
+ +    real *buf,*sbuf;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    cgindex = dd->cgindex;
+ +
+ +    buf = &comm->vbuf.v[0][0];
+ +
+ +    n = 0;
+ +    nzone = comm->zones.n/2;
+ +    nat_tot = dd->nat_tot;
+ +    for(d=dd->ndim-1; d>=0; d--)
+ +    {
+ +        cd = &comm->cd[d];
+ +        for(p=cd->np-1; p>=0; p--) {
+ +            ind = &cd->ind[p];
+ +            nat_tot -= ind->nrecv[nzone+1];
+ +            if (cd->bInPlace)
+ +            {
+ +                sbuf = v + nat_tot;
+ +            }
+ +            else
+ +            {
+ +                sbuf = &comm->vbuf2.v[0][0];
+ +                j = 0;
+ +                for(zone=0; zone<nzone; zone++)
+ +                {
+ +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
+ +                    {
+ +                        sbuf[j] = v[i];
+ +                        j++;
+ +                    }
+ +                }
+ +            }
+ +            /* Communicate the forces */
+ +            dd_sendrecv_real(dd, d, dddirForward,
+ +                             sbuf, ind->nrecv[nzone+1],
+ +                             buf,  ind->nsend[nzone+1]);
+ +            index = ind->index;
+ +            /* Add the received forces */
+ +            n = 0;
+ +            for(i=0; i<ind->nsend[nzone]; i++)
+ +            {
+ +                at0 = cgindex[index[i]];
+ +                at1 = cgindex[index[i]+1];
+ +                for(j=at0; j<at1; j++)
+ +                {
+ +                    v[j] += buf[n];
+ +                    n++;
+ +                }
+ +            } 
+ +        }
+ +        nzone /= 2;
+ +    }
+ +}
+ +
+ +static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
+ +{
+ +    fprintf(fp,"zone d0 %d d1 %d d2 %d  min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
+ +            d,i,j,
+ +            zone->min0,zone->max1,
+ +            zone->mch0,zone->mch0,
+ +            zone->p1_0,zone->p1_1);
+ +}
+ +
++
++#define DDZONECOMM_MAXZONE  5
++#define DDZONECOMM_BUFSIZE  3
++
+ +static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
+ +                               int ddimind,int direction,
+ +                               gmx_ddzone_t *buf_s,int n_s,
+ +                               gmx_ddzone_t *buf_r,int n_r)
+ +{
-         vbuf_s[i*2  ][0] = buf_s[i].min0;
-         vbuf_s[i*2  ][1] = buf_s[i].max1;
-         vbuf_s[i*2  ][2] = buf_s[i].mch0;
-         vbuf_s[i*2+1][0] = buf_s[i].mch1;
-         vbuf_s[i*2+1][1] = buf_s[i].p1_0;
-         vbuf_s[i*2+1][2] = buf_s[i].p1_1;
++#define ZBS  DDZONECOMM_BUFSIZE
++    rvec vbuf_s[DDZONECOMM_MAXZONE*ZBS];
++    rvec vbuf_r[DDZONECOMM_MAXZONE*ZBS];
+ +    int i;
+ +
+ +    for(i=0; i<n_s; i++)
+ +    {
-                      vbuf_s, n_s*2,
-                      vbuf_r, n_r*2);
++        vbuf_s[i*ZBS  ][0] = buf_s[i].min0;
++        vbuf_s[i*ZBS  ][1] = buf_s[i].max1;
++        vbuf_s[i*ZBS  ][2] = buf_s[i].min1;
++        vbuf_s[i*ZBS+1][0] = buf_s[i].mch0;
++        vbuf_s[i*ZBS+1][1] = buf_s[i].mch1;
++        vbuf_s[i*ZBS+1][2] = 0;
++        vbuf_s[i*ZBS+2][0] = buf_s[i].p1_0;
++        vbuf_s[i*ZBS+2][1] = buf_s[i].p1_1;
++        vbuf_s[i*ZBS+2][2] = 0;
+ +    }
+ +
+ +    dd_sendrecv_rvec(dd, ddimind, direction,
-         buf_r[i].min0 = vbuf_r[i*2  ][0];
-         buf_r[i].max1 = vbuf_r[i*2  ][1];
-         buf_r[i].mch0 = vbuf_r[i*2  ][2];
-         buf_r[i].mch1 = vbuf_r[i*2+1][0];
-         buf_r[i].p1_0 = vbuf_r[i*2+1][1];
-         buf_r[i].p1_1 = vbuf_r[i*2+1][2];
++                     vbuf_s, n_s*ZBS,
++                     vbuf_r, n_r*ZBS);
+ +
+ +    for(i=0; i<n_r; i++)
+ +    {
-     gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
++        buf_r[i].min0 = vbuf_r[i*ZBS  ][0];
++        buf_r[i].max1 = vbuf_r[i*ZBS  ][1];
++        buf_r[i].min1 = vbuf_r[i*ZBS  ][2];
++        buf_r[i].mch0 = vbuf_r[i*ZBS+1][0];
++        buf_r[i].mch1 = vbuf_r[i*ZBS+1][1];
++        buf_r[i].p1_0 = vbuf_r[i*ZBS+2][0];
++        buf_r[i].p1_1 = vbuf_r[i*ZBS+2][1];
+ +    }
++
++#undef ZBS
+ +}
+ +
+ +static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
+ +                          rvec cell_ns_x0,rvec cell_ns_x1)
+ +{
+ +    int  d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
-         extr_s[d][2] = 0;
++    gmx_ddzone_t *zp;
++    gmx_ddzone_t buf_s[DDZONECOMM_MAXZONE];
++    gmx_ddzone_t buf_r[DDZONECOMM_MAXZONE];
++    gmx_ddzone_t buf_e[DDZONECOMM_MAXZONE];
+ +    rvec extr_s[2],extr_r[2];
+ +    rvec dh;
+ +    real dist_d,c=0,det;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_bool bPBC,bUse;
+ +
+ +    comm = dd->comm;
+ +
+ +    for(d=1; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
+ +        zp->min0 = cell_ns_x0[dim];
+ +        zp->max1 = cell_ns_x1[dim];
++        zp->min1 = cell_ns_x1[dim];
+ +        zp->mch0 = cell_ns_x0[dim];
+ +        zp->mch1 = cell_ns_x1[dim];
+ +        zp->p1_0 = cell_ns_x0[dim];
+ +        zp->p1_1 = cell_ns_x1[dim];
+ +    }
+ +    
+ +    for(d=dd->ndim-2; d>=0; d--)
+ +    {
+ +        dim  = dd->dim[d];
+ +        bPBC = (dim < ddbox->npbcdim);
+ +
+ +        /* Use an rvec to store two reals */
+ +        extr_s[d][0] = comm->cell_f0[d+1];
+ +        extr_s[d][1] = comm->cell_f1[d+1];
- static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
- {
-     if (debug)
-     {
-         fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
-     }
-     fr->cg_nalloc = over_alloc_dd(nalloc);
-     srenew(fr->cg_cm,fr->cg_nalloc);
-     srenew(fr->cginfo,fr->cg_nalloc);
- }
- 
++        extr_s[d][2] = comm->cell_f1[d+1];
+ +
+ +        pos = 0;
+ +        /* Store the extremes in the backward sending buffer,
+ +         * so the get updated separately from the forward communication.
+ +         */
+ +        for(d1=d; d1<dd->ndim-1; d1++)
+ +        {
+ +            /* We invert the order to be able to use the same loop for buf_e */
+ +            buf_s[pos].min0 = extr_s[d1][1];
+ +            buf_s[pos].max1 = extr_s[d1][0];
++            buf_s[pos].min1 = extr_s[d1][2];
+ +            buf_s[pos].mch0 = 0;
+ +            buf_s[pos].mch1 = 0;
+ +            /* Store the cell corner of the dimension we communicate along */
+ +            buf_s[pos].p1_0 = comm->cell_x0[dim];
+ +            buf_s[pos].p1_1 = 0;
+ +            pos++;
+ +        }
+ +
+ +        buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
+ +        pos++;
+ +
+ +        if (dd->ndim == 3 && d == 0)
+ +        {
+ +            buf_s[pos] = comm->zone_d2[0][1];
+ +            pos++;
+ +            buf_s[pos] = comm->zone_d1[0];
+ +            pos++;
+ +        }
+ +
+ +        /* We only need to communicate the extremes
+ +         * in the forward direction
+ +         */
+ +        npulse = comm->cd[d].np;
+ +        if (bPBC)
+ +        {
+ +            /* Take the minimum to avoid double communication */
+ +            npulse_min = min(npulse,dd->nc[dim]-1-npulse);
+ +        }
+ +        else
+ +        {
+ +            /* Without PBC we should really not communicate over
+ +             * the boundaries, but implementing that complicates
+ +             * the communication setup and therefore we simply
+ +             * do all communication, but ignore some data.
+ +             */
+ +            npulse_min = npulse;
+ +        }
+ +        for(p=0; p<npulse_min; p++)
+ +        {
+ +            /* Communicate the extremes forward */
+ +            bUse = (bPBC || dd->ci[dim] > 0);
+ +
+ +            dd_sendrecv_rvec(dd, d, dddirForward,
+ +                             extr_s+d, dd->ndim-d-1,
+ +                             extr_r+d, dd->ndim-d-1);
+ +
+ +            if (bUse)
+ +            {
+ +                for(d1=d; d1<dd->ndim-1; d1++)
+ +                {
+ +                    extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
+ +                    extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
++                    extr_s[d1][2] = min(extr_s[d1][2],extr_r[d1][2]);
+ +                }
+ +            }
+ +        }
+ +
+ +        buf_size = pos;
+ +        for(p=0; p<npulse; p++)
+ +        {
+ +            /* Communicate all the zone information backward */
+ +            bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
+ +
+ +            dd_sendrecv_ddzone(dd, d, dddirBackward,
+ +                               buf_s, buf_size,
+ +                               buf_r, buf_size);
+ +
+ +            clear_rvec(dh);
+ +            if (p > 0)
+ +            {
+ +                for(d1=d+1; d1<dd->ndim; d1++)
+ +                {
+ +                    /* Determine the decrease of maximum required
+ +                     * communication height along d1 due to the distance along d,
+ +                     * this avoids a lot of useless atom communication.
+ +                     */
+ +                    dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
+ +
+ +                    if (ddbox->tric_dir[dim])
+ +                    {
+ +                        /* c is the off-diagonal coupling between the cell planes
+ +                         * along directions d and d1.
+ +                         */
+ +                        c = ddbox->v[dim][dd->dim[d1]][dim];
+ +                    }
+ +                    else
+ +                    {
+ +                        c = 0;
+ +                    }
+ +                    det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
+ +                    if (det > 0)
+ +                    {
+ +                        dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
+ +                    }
+ +                    else
+ +                    {
+ +                        /* A negative value signals out of range */
+ +                        dh[d1] = -1;
+ +                    }
+ +                }
+ +            }
+ +
+ +            /* Accumulate the extremes over all pulses */
+ +            for(i=0; i<buf_size; i++)
+ +            {
+ +                if (p == 0)
+ +                {
+ +                    buf_e[i] = buf_r[i];
+ +                }
+ +                else
+ +                {
+ +                    if (bUse)
+ +                    {
+ +                        buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
+ +                        buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
++                        buf_e[i].min1 = min(buf_e[i].min1,buf_r[i].min1);
+ +                    }
+ +
+ +                    if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
+ +                    {
+ +                        d1 = 1;
+ +                    }
+ +                    else
+ +                    {
+ +                        d1 = d + 1;
+ +                    }
+ +                    if (bUse && dh[d1] >= 0)
+ +                    {
+ +                        buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
+ +                        buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
+ +                    }
+ +                }
+ +                /* Copy the received buffer to the send buffer,
+ +                 * to pass the data through with the next pulse.
+ +                 */
+ +                buf_s[i] = buf_r[i];
+ +            }
+ +            if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
+ +                (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
+ +            {
+ +                /* Store the extremes */ 
+ +                pos = 0;
+ +
+ +                for(d1=d; d1<dd->ndim-1; d1++)
+ +                {
+ +                    extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
+ +                    extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
++                    extr_s[d1][2] = min(extr_s[d1][2],buf_e[pos].min1);
+ +                    pos++;
+ +                }
+ +
+ +                if (d == 1 || (d == 0 && dd->ndim == 3))
+ +                {
+ +                    for(i=d; i<2; i++)
+ +                    {
+ +                        comm->zone_d2[1-d][i] = buf_e[pos];
+ +                        pos++;
+ +                    }
+ +                }
+ +                if (d == 0)
+ +                {
+ +                    comm->zone_d1[1] = buf_e[pos];
+ +                    pos++;
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (dd->ndim >= 2)
+ +    {
+ +        dim = dd->dim[1];
+ +        for(i=0; i<2; i++)
+ +        {
+ +            if (debug)
+ +            {
+ +                print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
+ +            }
+ +            cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
+ +            cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
+ +        }
+ +    }
+ +    if (dd->ndim >= 3)
+ +    {
+ +        dim = dd->dim[2];
+ +        for(i=0; i<2; i++)
+ +        {
+ +            for(j=0; j<2; j++)
+ +            {
+ +                if (debug)
+ +                {
+ +                    print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
+ +                }
+ +                cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
+ +                cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
+ +            }
+ +        }
+ +    }
+ +    for(d=1; d<dd->ndim; d++)
+ +    {
+ +        comm->cell_f_max0[d] = extr_s[d-1][0];
+ +        comm->cell_f_min1[d] = extr_s[d-1][1];
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
+ +                    d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
+ +        }
+ +    }
+ +}
+ +
+ +static void dd_collect_cg(gmx_domdec_t *dd,
+ +                          t_state *state_local)
+ +{
+ +    gmx_domdec_master_t *ma=NULL;
+ +    int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
+ +    t_block *cgs_gl;
+ +
+ +    if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
+ +    {
+ +        /* The master has the correct distribution */
+ +        return;
+ +    }
+ +    
+ +    if (state_local->ddp_count == dd->ddp_count)
+ +    {
+ +        ncg_home = dd->ncg_home;
+ +        cg       = dd->index_gl;
+ +        nat_home = dd->nat_home;
+ +    } 
+ +    else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
+ +    {
+ +        cgs_gl = &dd->comm->cgs_gl;
+ +
+ +        ncg_home = state_local->ncg_gl;
+ +        cg       = state_local->cg_gl;
+ +        nat_home = 0;
+ +        for(i=0; i<ncg_home; i++)
+ +        {
+ +            nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
+ +        }
+ +    }
+ +    else
+ +    {
+ +        gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
+ +    }
+ +    
+ +    buf2[0] = dd->ncg_home;
+ +    buf2[1] = dd->nat_home;
+ +    if (DDMASTER(dd))
+ +    {
+ +        ma = dd->ma;
+ +        ibuf = ma->ibuf;
+ +    }
+ +    else
+ +    {
+ +        ibuf = NULL;
+ +    }
+ +    /* Collect the charge group and atom counts on the master */
+ +    dd_gather(dd,2*sizeof(int),buf2,ibuf);
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        ma->index[0] = 0;
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            ma->ncg[i] = ma->ibuf[2*i];
+ +            ma->nat[i] = ma->ibuf[2*i+1];
+ +            ma->index[i+1] = ma->index[i] + ma->ncg[i];
+ +            
+ +        }
+ +        /* Make byte counts and indices */
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
+ +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
+ +        }
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Initial charge group distribution: ");
+ +            for(i=0; i<dd->nnodes; i++)
+ +                fprintf(debug," %d",ma->ncg[i]);
+ +            fprintf(debug,"\n");
+ +        }
+ +    }
+ +    
+ +    /* Collect the charge group indices on the master */
+ +    dd_gatherv(dd,
+ +               dd->ncg_home*sizeof(int),dd->index_gl,
+ +               DDMASTER(dd) ? ma->ibuf : NULL,
+ +               DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
+ +               DDMASTER(dd) ? ma->cg : NULL);
+ +    
+ +    dd->comm->master_cg_ddp_count = state_local->ddp_count;
+ +}
+ +
+ +static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
+ +                                    rvec *lv,rvec *v)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  n,i,c,a,nalloc=0;
+ +    rvec *buf=NULL;
+ +    t_block *cgs_gl;
+ +
+ +    ma = dd->ma;
+ +    
+ +    if (!DDMASTER(dd))
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
+ +                 dd->rank,dd->mpi_comm_all);
+ +#endif
+ +    } else {
+ +        /* Copy the master coordinates to the global array */
+ +        cgs_gl = &dd->comm->cgs_gl;
+ +
+ +        n = DDMASTERRANK(dd);
+ +        a = 0;
+ +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +        {
+ +            for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
+ +            {
+ +                copy_rvec(lv[a++],v[c]);
+ +            }
+ +        }
+ +        
+ +        for(n=0; n<dd->nnodes; n++)
+ +        {
+ +            if (n != dd->rank)
+ +            {
+ +                if (ma->nat[n] > nalloc)
+ +                {
+ +                    nalloc = over_alloc_dd(ma->nat[n]);
+ +                    srenew(buf,nalloc);
+ +                }
+ +#ifdef GMX_MPI
+ +                MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
+ +                         n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
+ +#endif
+ +                a = 0;
+ +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +                {
+ +                    for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
+ +                    {
+ +                        copy_rvec(buf[a++],v[c]);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        sfree(buf);
+ +    }
+ +}
+ +
+ +static void get_commbuffer_counts(gmx_domdec_t *dd,
+ +                                  int **counts,int **disps)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int n;
+ +
+ +    ma = dd->ma;
+ +    
+ +    /* Make the rvec count and displacment arrays */
+ +    *counts  = ma->ibuf;
+ +    *disps   = ma->ibuf + dd->nnodes;
+ +    for(n=0; n<dd->nnodes; n++)
+ +    {
+ +        (*counts)[n] = ma->nat[n]*sizeof(rvec);
+ +        (*disps)[n]  = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
+ +    }
+ +}
+ +
+ +static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
+ +                                   rvec *lv,rvec *v)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  *rcounts=NULL,*disps=NULL;
+ +    int  n,i,c,a;
+ +    rvec *buf=NULL;
+ +    t_block *cgs_gl;
+ +    
+ +    ma = dd->ma;
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        get_commbuffer_counts(dd,&rcounts,&disps);
+ +
+ +        buf = ma->vbuf;
+ +    }
+ +    
+ +    dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        cgs_gl = &dd->comm->cgs_gl;
+ +
+ +        a = 0;
+ +        for(n=0; n<dd->nnodes; n++)
+ +        {
+ +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +            {
+ +                for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
+ +                {
+ +                    copy_rvec(buf[a++],v[c]);
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +void dd_collect_vec(gmx_domdec_t *dd,
+ +                    t_state *state_local,rvec *lv,rvec *v)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  n,i,c,a,nalloc=0;
+ +    rvec *buf=NULL;
+ +    
+ +    dd_collect_cg(dd,state_local);
+ +
+ +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
+ +    {
+ +        dd_collect_vec_sendrecv(dd,lv,v);
+ +    }
+ +    else
+ +    {
+ +        dd_collect_vec_gatherv(dd,lv,v);
+ +    }
+ +}
+ +
+ +
+ +void dd_collect_state(gmx_domdec_t *dd,
+ +                      t_state *state_local,t_state *state)
+ +{
+ +    int est,i,j,nh;
+ +
+ +    nh = state->nhchainlength;
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        for (i=0;i<efptNR;i++) {
+ +            state->lambda[i] = state_local->lambda[i];
+ +        }
+ +        state->fep_state = state_local->fep_state;
+ +        state->veta = state_local->veta;
+ +        state->vol0 = state_local->vol0;
+ +        copy_mat(state_local->box,state->box);
+ +        copy_mat(state_local->boxv,state->boxv);
+ +        copy_mat(state_local->svir_prev,state->svir_prev);
+ +        copy_mat(state_local->fvir_prev,state->fvir_prev);
+ +        copy_mat(state_local->pres_prev,state->pres_prev);
+ +
+ +
+ +        for(i=0; i<state_local->ngtc; i++)
+ +        {
+ +            for(j=0; j<nh; j++) {
+ +                state->nosehoover_xi[i*nh+j]        = state_local->nosehoover_xi[i*nh+j];
+ +                state->nosehoover_vxi[i*nh+j]       = state_local->nosehoover_vxi[i*nh+j];
+ +            }
+ +            state->therm_integral[i] = state_local->therm_integral[i];            
+ +        }
+ +        for(i=0; i<state_local->nnhpres; i++) 
+ +        {
+ +            for(j=0; j<nh; j++) {
+ +                state->nhpres_xi[i*nh+j]        = state_local->nhpres_xi[i*nh+j];
+ +                state->nhpres_vxi[i*nh+j]       = state_local->nhpres_vxi[i*nh+j];
+ +            }
+ +        }
+ +    }
+ +    for(est=0; est<estNR; est++)
+ +    {
+ +        if (EST_DISTR(est) && (state_local->flags & (1<<est)))
+ +        {
+ +            switch (est) {
+ +            case estX:
+ +                dd_collect_vec(dd,state_local,state_local->x,state->x);
+ +                break;
+ +            case estV:
+ +                dd_collect_vec(dd,state_local,state_local->v,state->v);
+ +                break;
+ +            case estSDX:
+ +                dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
+ +                break;
+ +            case estCGP:
+ +                dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
+ +                break;
+ +            case estLD_RNG:
+ +                if (state->nrngi == 1)
+ +                {
+ +                    if (DDMASTER(dd))
+ +                    {
+ +                        for(i=0; i<state_local->nrng; i++)
+ +                        {
+ +                            state->ld_rng[i] = state_local->ld_rng[i];
+ +                        }
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
+ +                              state_local->ld_rng,state->ld_rng);
+ +                }
+ +                break;
+ +            case estLD_RNGI:
+ +                if (state->nrngi == 1)
+ +                {
+ +                   if (DDMASTER(dd))
+ +                    {
+ +                        state->ld_rngi[0] = state_local->ld_rngi[0];
+ +                    } 
+ +                }
+ +                else
+ +                {
+ +                    dd_gather(dd,sizeof(state->ld_rngi[0]),
+ +                              state_local->ld_rngi,state->ld_rngi);
+ +                }
+ +                break;
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in dd_collect_state");
+ +            }
+ +        }
+ +    }
+ +}
+ +
- static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
+ +static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
+ +{
+ +    int est;
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
+ +    }
+ +
+ +    state->nalloc = over_alloc_dd(nalloc);
+ +    
+ +    for(est=0; est<estNR; est++)
+ +    {
+ +        if (EST_DISTR(est) && (state->flags & (1<<est)))
+ +        {
+ +            switch(est) {
+ +            case estX:
+ +                srenew(state->x,state->nalloc);
+ +                break;
+ +            case estV:
+ +                srenew(state->v,state->nalloc);
+ +                break;
+ +            case estSDX:
+ +                srenew(state->sd_X,state->nalloc);
+ +                break;
+ +            case estCGP:
+ +                srenew(state->cg_p,state->nalloc);
+ +                break;
+ +            case estLD_RNG:
+ +            case estLD_RNGI:
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                /* No reallocation required */
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in dd_realloc_state");            
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (f != NULL)
+ +    {
+ +        srenew(*f,state->nalloc);
+ +    }
+ +}
+ +
++static void dd_check_alloc_ncg(t_forcerec *fr,t_state *state,rvec **f,
++                               int nalloc)
++{
++    if (nalloc > fr->cg_nalloc)
++    {
++        if (debug)
++        {
++            fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
++        }
++        fr->cg_nalloc = over_alloc_dd(nalloc);
++        srenew(fr->cginfo,fr->cg_nalloc);
++        if (fr->cutoff_scheme == ecutsGROUP)
++        {
++            srenew(fr->cg_cm,fr->cg_nalloc);
++        }
++    }
++    if (fr->cutoff_scheme == ecutsVERLET && nalloc > state->nalloc)
++    {
++        /* We don't use charge groups, we use x in state to set up
++         * the atom communication.
++         */
++        dd_realloc_state(state,f,nalloc);
++    }
++}
++
+ +static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
+ +                                       rvec *v,rvec *lv)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  n,i,c,a,nalloc=0;
+ +    rvec *buf=NULL;
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        ma  = dd->ma;
+ +        
+ +        for(n=0; n<dd->nnodes; n++)
+ +        {
+ +            if (n != dd->rank)
+ +            {
+ +                if (ma->nat[n] > nalloc)
+ +                {
+ +                    nalloc = over_alloc_dd(ma->nat[n]);
+ +                    srenew(buf,nalloc);
+ +                }
+ +                /* Use lv as a temporary buffer */
+ +                a = 0;
+ +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +                {
+ +                    for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
+ +                    {
+ +                        copy_rvec(v[c],buf[a++]);
+ +                    }
+ +                }
+ +                if (a != ma->nat[n])
+ +                {
+ +                    gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
+ +                              a,ma->nat[n]);
+ +                }
+ +                
+ +#ifdef GMX_MPI
+ +                MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
+ +                         DDRANK(dd,n),n,dd->mpi_comm_all);
+ +#endif
+ +            }
+ +        }
+ +        sfree(buf);
+ +        n = DDMASTERRANK(dd);
+ +        a = 0;
+ +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +        {
+ +            for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
+ +            {
+ +                copy_rvec(v[c],lv[a++]);
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
+ +                 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
+ +#endif
+ +    }
+ +}
+ +
+ +static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
+ +                                       rvec *v,rvec *lv)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  *scounts=NULL,*disps=NULL;
+ +    int  n,i,c,a,nalloc=0;
+ +    rvec *buf=NULL;
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        ma  = dd->ma;
+ +     
+ +        get_commbuffer_counts(dd,&scounts,&disps);
+ +
+ +        buf = ma->vbuf;
+ +        a = 0;
+ +        for(n=0; n<dd->nnodes; n++)
+ +        {
+ +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +            {
+ +                for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
+ +                {
+ +                    copy_rvec(v[c],buf[a++]);
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
+ +}
+ +
+ +static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
+ +{
+ +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
+ +    {
+ +        dd_distribute_vec_sendrecv(dd,cgs,v,lv);
+ +    }
+ +    else
+ +    {
+ +        dd_distribute_vec_scatterv(dd,cgs,v,lv);
+ +    }
+ +}
+ +
+ +static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
+ +                                t_state *state,t_state *state_local,
+ +                                rvec **f)
+ +{
+ +    int  i,j,nh;
+ +
+ +    nh = state->nhchainlength;
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        for(i=0;i<efptNR;i++)
+ +        {
+ +            state_local->lambda[i] = state->lambda[i];
+ +        }
+ +        state_local->fep_state = state->fep_state;
+ +        state_local->veta   = state->veta;
+ +        state_local->vol0   = state->vol0;
+ +        copy_mat(state->box,state_local->box);
+ +        copy_mat(state->box_rel,state_local->box_rel);
+ +        copy_mat(state->boxv,state_local->boxv);
+ +        copy_mat(state->svir_prev,state_local->svir_prev);
+ +        copy_mat(state->fvir_prev,state_local->fvir_prev);
+ +        for(i=0; i<state_local->ngtc; i++)
+ +        {
+ +            for(j=0; j<nh; j++) {
+ +                state_local->nosehoover_xi[i*nh+j]        = state->nosehoover_xi[i*nh+j];
+ +                state_local->nosehoover_vxi[i*nh+j]       = state->nosehoover_vxi[i*nh+j];
+ +            }
+ +            state_local->therm_integral[i] = state->therm_integral[i];
+ +        }
+ +        for(i=0; i<state_local->nnhpres; i++)
+ +        {
+ +            for(j=0; j<nh; j++) {
+ +                state_local->nhpres_xi[i*nh+j]        = state->nhpres_xi[i*nh+j];
+ +                state_local->nhpres_vxi[i*nh+j]       = state->nhpres_vxi[i*nh+j];
+ +            }
+ +        }
+ +    }
+ +    dd_bcast(dd,((efptNR)*sizeof(real)),state_local->lambda);
+ +    dd_bcast(dd,sizeof(int),&state_local->fep_state);
+ +    dd_bcast(dd,sizeof(real),&state_local->veta);
+ +    dd_bcast(dd,sizeof(real),&state_local->vol0);
+ +    dd_bcast(dd,sizeof(state_local->box),state_local->box);
+ +    dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
+ +    dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
+ +    dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
+ +    dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
+ +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
+ +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
+ +    dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
+ +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
+ +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
+ +
+ +    if (dd->nat_home > state_local->nalloc)
+ +    {
+ +        dd_realloc_state(state_local,f,dd->nat_home);
+ +    }
+ +    for(i=0; i<estNR; i++)
+ +    {
+ +        if (EST_DISTR(i) && (state_local->flags & (1<<i)))
+ +        {
+ +            switch (i) {
+ +            case estX:
+ +                dd_distribute_vec(dd,cgs,state->x,state_local->x);
+ +                break;
+ +            case estV:
+ +                dd_distribute_vec(dd,cgs,state->v,state_local->v);
+ +                break;
+ +            case estSDX:
+ +                dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
+ +                break;
+ +            case estCGP:
+ +                dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
+ +                break;
+ +            case estLD_RNG:
+ +                if (state->nrngi == 1)
+ +                {
+ +                    dd_bcastc(dd,
+ +                              state_local->nrng*sizeof(state_local->ld_rng[0]),
+ +                              state->ld_rng,state_local->ld_rng);
+ +                }
+ +                else
+ +                {
+ +                    dd_scatter(dd,
+ +                               state_local->nrng*sizeof(state_local->ld_rng[0]),
+ +                               state->ld_rng,state_local->ld_rng);
+ +                }
+ +                break;
+ +            case estLD_RNGI:
+ +                if (state->nrngi == 1)
+ +                {
+ +                    dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
+ +                              state->ld_rngi,state_local->ld_rngi);
+ +                }
+ +                else
+ +                {
+ +                     dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
+ +                               state->ld_rngi,state_local->ld_rngi);
+ +                }   
+ +                break;
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                /* Not implemented yet */
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in dd_distribute_state");
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static char dim2char(int dim)
+ +{
+ +    char c='?';
+ +    
+ +    switch (dim)
+ +    {
+ +    case XX: c = 'X'; break;
+ +    case YY: c = 'Y'; break;
+ +    case ZZ: c = 'Z'; break;
+ +    default: gmx_fatal(FARGS,"Unknown dim %d",dim);
+ +    }
+ +    
+ +    return c;
+ +}
+ +
+ +static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
+ +                              gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
+ +{
+ +    rvec grid_s[2],*grid_r=NULL,cx,r;
+ +    char fname[STRLEN],format[STRLEN],buf[22];
+ +    FILE *out;
+ +    int  a,i,d,z,y,x;
+ +    matrix tric;
+ +    real vol;
+ +
+ +    copy_rvec(dd->comm->cell_x0,grid_s[0]);
+ +    copy_rvec(dd->comm->cell_x1,grid_s[1]);
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        snew(grid_r,2*dd->nnodes);
+ +    }
+ +    
+ +    dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            for(i=0; i<DIM; i++)
+ +            {
+ +                if (d == i)
+ +                {
+ +                    tric[d][i] = 1;
+ +                }
+ +                else
+ +                {
+ +                    if (d < ddbox->npbcdim && dd->nc[d] > 1)
+ +                    {
+ +                        tric[d][i] = box[i][d]/box[i][i];
+ +                    }
+ +                    else
+ +                    {
+ +                        tric[d][i] = 0;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
+ +        sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
+ +        out = gmx_fio_fopen(fname,"w");
+ +        gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
+ +        a = 1;
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
+ +            for(d=0; d<DIM; d++)
+ +            {
+ +                vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
+ +            }
+ +            for(z=0; z<2; z++)
+ +            {
+ +                for(y=0; y<2; y++)
+ +                {
+ +                    for(x=0; x<2; x++)
+ +                    {
+ +                        cx[XX] = grid_r[i*2+x][XX];
+ +                        cx[YY] = grid_r[i*2+y][YY];
+ +                        cx[ZZ] = grid_r[i*2+z][ZZ];
+ +                        mvmul(tric,cx,r);
+ +                        fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
+ +                                10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
+ +                    }
+ +                }
+ +            }
+ +            for(d=0; d<DIM; d++)
+ +            {
+ +                for(x=0; x<4; x++)
+ +                {
+ +                    switch(d)
+ +                    {
+ +                    case 0: y = 1 + i*8 + 2*x; break;
+ +                    case 1: y = 1 + i*8 + 2*x - (x % 2); break;
+ +                    case 2: y = 1 + i*8 + x; break;
+ +                    }
+ +                    fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
+ +                }
+ +            }
+ +        }
+ +        gmx_fio_fclose(out);
+ +        sfree(grid_r);
+ +    }
+ +}
+ +
+ +void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
+ +                  gmx_mtop_t *mtop,t_commrec *cr,
+ +                  int natoms,rvec x[],matrix box)
+ +{
+ +    char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
+ +    FILE *out;
+ +    int  i,ii,resnr,c;
+ +    char *atomname,*resname;
+ +    real b;
+ +    gmx_domdec_t *dd;
+ +    
+ +    dd = cr->dd;
+ +    if (natoms == -1)
+ +    {
+ +        natoms = dd->comm->nat[ddnatVSITE];
+ +    }
+ +    
+ +    sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
+ +    
+ +    sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
+ +    sprintf(format4,"%s%s\n",get_pdbformat4(),"%6.2f%6.2f");
+ +    
+ +    out = gmx_fio_fopen(fname,"w");
+ +    
+ +    fprintf(out,"TITLE     %s\n",title);
+ +    gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
+ +    for(i=0; i<natoms; i++)
+ +    {
+ +        ii = dd->gatindex[i];
+ +        gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
+ +        if (i < dd->comm->nat[ddnatZONE])
+ +        {
+ +            c = 0;
+ +            while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
+ +            {
+ +                c++;
+ +            }
+ +            b = c;
+ +        }
+ +        else if (i < dd->comm->nat[ddnatVSITE])
+ +        {
+ +            b = dd->comm->zones.n;
+ +        }
+ +        else
+ +        {
+ +            b = dd->comm->zones.n + 1;
+ +        }
+ +        fprintf(out,strlen(atomname)<4 ? format : format4,
+ +                "ATOM",(ii+1)%100000,
+ +                atomname,resname,' ',resnr%10000,' ',
+ +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
+ +    }
+ +    fprintf(out,"TER\n");
+ +    
+ +    gmx_fio_fclose(out);
+ +}
+ +
+ +real dd_cutoff_mbody(gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  di;
+ +    real r;
+ +
+ +    comm = dd->comm;
+ +
+ +    r = -1;
+ +    if (comm->bInterCGBondeds)
+ +    {
+ +        if (comm->cutoff_mbody > 0)
+ +        {
+ +            r = comm->cutoff_mbody;
+ +        }
+ +        else
+ +        {
+ +            /* cutoff_mbody=0 means we do not have DLB */
+ +            r = comm->cellsize_min[dd->dim[0]];
+ +            for(di=1; di<dd->ndim; di++)
+ +            {
+ +                r = min(r,comm->cellsize_min[dd->dim[di]]);
+ +            }
+ +            if (comm->bBondComm)
+ +            {
+ +                r = max(r,comm->cutoff_mbody);
+ +            }
+ +            else
+ +            {
+ +                r = min(r,comm->cutoff);
+ +            }
+ +        }
+ +    }
+ +
+ +    return r;
+ +}
+ +
+ +real dd_cutoff_twobody(gmx_domdec_t *dd)
+ +{
+ +    real r_mb;
+ +
+ +    r_mb = dd_cutoff_mbody(dd);
+ +
+ +    return max(dd->comm->cutoff,r_mb);
+ +}
+ +
+ +
+ +static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
+ +{
+ +    int nc,ntot;
+ +    
+ +    nc   = dd->nc[dd->comm->cartpmedim];
+ +    ntot = dd->comm->ntot[dd->comm->cartpmedim];
+ +    copy_ivec(coord,coord_pme);
+ +    coord_pme[dd->comm->cartpmedim] =
+ +        nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
+ +}
+ +
+ +static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
+ +{
+ +    /* Here we assign a PME node to communicate with this DD node
+ +     * by assuming that the major index of both is x.
+ +     * We add cr->npmenodes/2 to obtain an even distribution.
+ +     */
+ +    return (ddindex*npme + npme/2)/ndd;
+ +}
+ +
+ +static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
+ +{
+ +    return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
+ +}
+ +
+ +static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
+ +{
+ +    return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
+ +}
+ +
+ +static int *dd_pmenodes(t_commrec *cr)
+ +{
+ +    int *pmenodes;
+ +    int n,i,p0,p1;
+ +    
+ +    snew(pmenodes,cr->npmenodes);
+ +    n = 0;
+ +    for(i=0; i<cr->dd->nnodes; i++) {
+ +        p0 = cr_ddindex2pmeindex(cr,i);
+ +        p1 = cr_ddindex2pmeindex(cr,i+1);
+ +        if (i+1 == cr->dd->nnodes || p1 > p0) {
+ +            if (debug)
+ +                fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
+ +            pmenodes[n] = i + 1 + n;
+ +            n++;
+ +        }
+ +    }
+ +
+ +    return pmenodes;
+ +}
+ +
+ +static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
+ +{
+ +    gmx_domdec_t *dd;
+ +    ivec coords,coords_pme,nc;
+ +    int  slab;
+ +    
+ +    dd = cr->dd;
+ +    /*
+ +      if (dd->comm->bCartesian) {
+ +      gmx_ddindex2xyz(dd->nc,ddindex,coords);
+ +      dd_coords2pmecoords(dd,coords,coords_pme);
+ +      copy_ivec(dd->ntot,nc);
+ +      nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
+ +      coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
+ +      
+ +      slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
+ +      } else {
+ +      slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
+ +      }
+ +    */
+ +    coords[XX] = x;
+ +    coords[YY] = y;
+ +    coords[ZZ] = z;
+ +    slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
+ +    
+ +    return slab;
+ +}
+ +
+ +static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    ivec coords;
+ +    int  ddindex,nodeid=-1;
+ +    
+ +    comm = cr->dd->comm;
+ +    
+ +    coords[XX] = x;
+ +    coords[YY] = y;
+ +    coords[ZZ] = z;
+ +    if (comm->bCartesianPP_PME)
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
+ +#endif
+ +    }
+ +    else
+ +    {
+ +        ddindex = dd_index(cr->dd->nc,coords);
+ +        if (comm->bCartesianPP)
+ +        {
+ +            nodeid = comm->ddindex2simnodeid[ddindex];
+ +        }
+ +        else
+ +        {
+ +            if (comm->pmenodes)
+ +            {
+ +                nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
+ +            }
+ +            else
+ +            {
+ +                nodeid = ddindex;
+ +            }
+ +        }
+ +    }
+ +  
+ +    return nodeid;
+ +}
+ +
+ +static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    ivec coord,coord_pme;
+ +    int  i;
+ +    int  pmenode=-1;
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +    /* This assumes a uniform x domain decomposition grid cell size */
+ +    if (comm->bCartesianPP_PME)
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
+ +        if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
+ +        {
+ +            /* This is a PP node */
+ +            dd_cart_coord2pmecoord(dd,coord,coord_pme);
+ +            MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
+ +        }
+ +#endif
+ +    }
+ +    else if (comm->bCartesianPP)
+ +    {
+ +        if (sim_nodeid < dd->nnodes)
+ +        {
+ +            pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* This assumes DD cells with identical x coordinates
+ +         * are numbered sequentially.
+ +         */
+ +        if (dd->comm->pmenodes == NULL)
+ +        {
+ +            if (sim_nodeid < dd->nnodes)
+ +            {
+ +                /* The DD index equals the nodeid */
+ +                pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            i = 0;
+ +            while (sim_nodeid > dd->comm->pmenodes[i])
+ +            {
+ +                i++;
+ +            }
+ +            if (sim_nodeid < dd->comm->pmenodes[i])
+ +            {
+ +                pmenode = dd->comm->pmenodes[i];
+ +            }
+ +        }
+ +    }
+ +    
+ +    return pmenode;
+ +}
+ +
+ +gmx_bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
+ +{
+ +    gmx_bool bPMEOnlyNode;
+ +    
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
+ +    }
+ +    else
+ +    {
+ +        bPMEOnlyNode = FALSE;
+ +    }
+ +    
+ +    return bPMEOnlyNode;
+ +}
+ +
+ +void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
+ +                     int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
+ +{
+ +    gmx_domdec_t *dd;
+ +    int x,y,z;
+ +    ivec coord,coord_pme;
+ +    
+ +    dd = cr->dd;
+ +    
+ +    snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
+ +    
+ +    *nmy_ddnodes = 0;
+ +    for(x=0; x<dd->nc[XX]; x++)
+ +    {
+ +        for(y=0; y<dd->nc[YY]; y++)
+ +        {
+ +            for(z=0; z<dd->nc[ZZ]; z++)
+ +            {
+ +                if (dd->comm->bCartesianPP_PME)
+ +                {
+ +                    coord[XX] = x;
+ +                    coord[YY] = y;
+ +                    coord[ZZ] = z;
+ +                    dd_cart_coord2pmecoord(dd,coord,coord_pme);
+ +                    if (dd->ci[XX] == coord_pme[XX] &&
+ +                        dd->ci[YY] == coord_pme[YY] &&
+ +                        dd->ci[ZZ] == coord_pme[ZZ])
+ +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
+ +                }
+ +                else
+ +                {
+ +                    /* The slab corresponds to the nodeid in the PME group */
+ +                    if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
+ +                    {
+ +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* The last PP-only node is the peer node */
+ +    *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Receive coordinates from PP nodes:");
+ +        for(x=0; x<*nmy_ddnodes; x++)
+ +        {
+ +            fprintf(debug," %d",(*my_ddnodes)[x]);
+ +        }
+ +        fprintf(debug,"\n");
+ +    }
+ +}
+ +
+ +static gmx_bool receive_vir_ener(t_commrec *cr)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  pmenode,coords[DIM],rank;
+ +    gmx_bool bReceive;
+ +    
+ +    bReceive = TRUE;
+ +    if (cr->npmenodes < cr->dd->nnodes)
+ +    {
+ +        comm = cr->dd->comm;
+ +        if (comm->bCartesianPP_PME)
+ +        {
+ +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
+ +#ifdef GMX_MPI
+ +            MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
+ +            coords[comm->cartpmedim]++;
+ +            if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
+ +            {
+ +                MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
+ +                if (dd_simnode2pmenode(cr,rank) == pmenode)
+ +                {
+ +                    /* This is not the last PP node for pmenode */
+ +                    bReceive = FALSE;
+ +                }
+ +            }
+ +#endif  
+ +        }
+ +        else
+ +        {
+ +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
+ +            if (cr->sim_nodeid+1 < cr->nnodes &&
+ +                dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
+ +            {
+ +                /* This is not the last PP node for pmenode */
+ +                bReceive = FALSE;
+ +            }
+ +        }
+ +    }
+ +    
+ +    return bReceive;
+ +}
+ +
+ +static void set_zones_ncg_home(gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_zones_t *zones;
+ +    int i;
+ +
+ +    zones = &dd->comm->zones;
+ +
+ +    zones->cg_range[0] = 0;
+ +    for(i=1; i<zones->n+1; i++)
+ +    {
+ +        zones->cg_range[i] = dd->ncg_home;
+ +    }
+ +}
+ +
- static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
++static void rebuild_cgindex(gmx_domdec_t *dd,
++                            const int *gcgs_index,t_state *state)
+ +{
+ +    int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
+ +    
+ +    ind = state->cg_gl;
+ +    dd_cg_gl = dd->index_gl;
+ +    cgindex  = dd->cgindex;
+ +    nat = 0;
+ +    cgindex[0] = nat;
+ +    for(i=0; i<state->ncg_gl; i++)
+ +    {
+ +        cgindex[i] = nat;
+ +        cg_gl = ind[i];
+ +        dd_cg_gl[i] = cg_gl;
+ +        nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
+ +    }
+ +    cgindex[i] = nat;
+ +    
+ +    dd->ncg_home = state->ncg_gl;
+ +    dd->nat_home = nat;
+ +
+ +    set_zones_ncg_home(dd);
+ +}
+ +
+ +static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
+ +{
+ +    while (cg >= cginfo_mb->cg_end)
+ +    {
+ +        cginfo_mb++;
+ +    }
+ +
+ +    return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
+ +}
+ +
+ +static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
+ +                          t_forcerec *fr,char *bLocalCG)
+ +{
+ +    cginfo_mb_t *cginfo_mb;
+ +    int *cginfo;
+ +    int cg;
+ +
+ +    if (fr != NULL)
+ +    {
+ +        cginfo_mb = fr->cginfo_mb;
+ +        cginfo    = fr->cginfo;
+ +
+ +        for(cg=cg0; cg<cg1; cg++)
+ +        {
+ +            cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
+ +        }
+ +    }
+ +
+ +    if (bLocalCG != NULL)
+ +    {
+ +        for(cg=cg0; cg<cg1; cg++)
+ +        {
+ +            bLocalCG[index_gl[cg]] = TRUE;
+ +        }
+ +    }
+ +}
+ +
-     int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
++static void make_dd_indices(gmx_domdec_t *dd,
++                            const int *gcgs_index,int cg_start)
+ +{
-         for(cg=cg0; cg<zone2cg[zone+1]; cg++)
++    int nzone,zone,zone1,cg0,cg1,cg1_p1,cg,cg_gl,a,a_gl;
+ +    int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
+ +    gmx_ga2la_t *ga2la;
+ +    char *bLocalCG;
++    gmx_bool bCGs;
+ +
+ +    bLocalCG = dd->comm->bLocalCG;
+ +
+ +    if (dd->nat_tot > dd->gatindex_nalloc)
+ +    {
+ +        dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
+ +        srenew(dd->gatindex,dd->gatindex_nalloc);
+ +    }
+ +
+ +    nzone      = dd->comm->zones.n;
+ +    zone2cg    = dd->comm->zones.cg_range;
+ +    zone_ncg1  = dd->comm->zone_ncg1;
+ +    index_gl   = dd->index_gl;
+ +    gatindex   = dd->gatindex;
++    bCGs       = dd->comm->bCGs;
+ +
+ +    if (zone2cg[1] != dd->ncg_home)
+ +    {
+ +        gmx_incons("dd->ncg_zone is not up to date");
+ +    }
+ +    
+ +    /* Make the local to global and global to local atom index */
+ +    a = dd->cgindex[cg_start];
+ +    for(zone=0; zone<nzone; zone++)
+ +    {
+ +        if (zone == 0)
+ +        {
+ +            cg0 = cg_start;
+ +        }
+ +        else
+ +        {
+ +            cg0 = zone2cg[zone];
+ +        }
-             if (cg - cg0 >= zone_ncg1[zone])
++        cg1    = zone2cg[zone+1];
++        cg1_p1 = cg0 + zone_ncg1[zone];
++
++        for(cg=cg0; cg<cg1; cg++)
+ +        {
+ +            zone1 = zone;
-                 /* Signal that this cg is from more than one zone away */
++            if (cg >= cg1_p1)
+ +            {
-             for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
++                /* Signal that this cg is from more than one pulse away */
+ +                zone1 += nzone;
+ +            }
+ +            cg_gl = index_gl[cg];
-                 gatindex[a] = a_gl;
-                 ga2la_set(dd->ga2la,a_gl,a,zone1);
++            if (bCGs)
++            {
++                for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
++                {
++                    gatindex[a] = a_gl;
++                    ga2la_set(dd->ga2la,a_gl,a,zone1);
++                    a++;
++                }
++            }
++            else
+ +            {
- static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
++                gatindex[a] = cg_gl;
++                ga2la_set(dd->ga2la,cg_gl,a,zone1);
+ +                a++;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
+ +                          const char *where)
+ +{
+ +    int ncg,i,ngl,nerr;
+ +
+ +    nerr = 0;
+ +    if (bLocalCG == NULL)
+ +    {
+ +        return nerr;
+ +    }
+ +    for(i=0; i<dd->ncg_tot; i++)
+ +    {
+ +        if (!bLocalCG[dd->index_gl[i]])
+ +        {
+ +            fprintf(stderr,
+ +                    "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
+ +            nerr++;
+ +        }
+ +    }
+ +    ngl = 0;
+ +    for(i=0; i<ncg_sys; i++)
+ +    {
+ +        if (bLocalCG[i])
+ +        {
+ +            ngl++;
+ +        }
+ +    }
+ +    if (ngl != dd->ncg_tot)
+ +    {
+ +        fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
+ +        nerr++;
+ +    }
+ +
+ +    return nerr;
+ +}
+ +
+ +static void check_index_consistency(gmx_domdec_t *dd,
+ +                                    int natoms_sys,int ncg_sys,
+ +                                    const char *where)
+ +{
+ +    int  nerr,ngl,i,a,cell;
+ +    int  *have;
+ +
+ +    nerr = 0;
+ +
+ +    if (dd->comm->DD_debug > 1)
+ +    {
+ +        snew(have,natoms_sys);
+ +        for(a=0; a<dd->nat_tot; a++)
+ +        {
+ +            if (have[dd->gatindex[a]] > 0)
+ +            {
+ +                fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
+ +            }
+ +            else
+ +            {
+ +                have[dd->gatindex[a]] = a + 1;
+ +            }
+ +        }
+ +        sfree(have);
+ +    }
+ +
+ +    snew(have,dd->nat_tot);
+ +
+ +    ngl  = 0;
+ +    for(i=0; i<natoms_sys; i++)
+ +    {
+ +        if (ga2la_get(dd->ga2la,i,&a,&cell))
+ +        {
+ +            if (a >= dd->nat_tot)
+ +            {
+ +                fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
+ +                nerr++;
+ +            }
+ +            else
+ +            {
+ +                have[a] = 1;
+ +                if (dd->gatindex[a] != i)
+ +                {
+ +                    fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
+ +                    nerr++;
+ +                }
+ +            }
+ +            ngl++;
+ +        }
+ +    }
+ +    if (ngl != dd->nat_tot)
+ +    {
+ +        fprintf(stderr,
+ +                "DD node %d, %s: %d global atom indices, %d local atoms\n",
+ +                dd->rank,where,ngl,dd->nat_tot);
+ +    }
+ +    for(a=0; a<dd->nat_tot; a++)
+ +    {
+ +        if (have[a] == 0)
+ +        {
+ +            fprintf(stderr,
+ +                    "DD node %d, %s: local atom %d, global %d has no global index\n",
+ +                    dd->rank,where,a+1,dd->gatindex[a]+1);
+ +        }
+ +    }
+ +    sfree(have);
+ +
+ +    nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
+ +
+ +    if (nerr > 0) {
+ +        gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
+ +                  dd->rank,where,nerr);
+ +    }
+ +}
+ +
+ +static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
+ +{
+ +    int  i;
+ +    char *bLocalCG;
+ +
+ +    if (a_start == 0)
+ +    {
+ +        /* Clear the whole list without searching */
+ +        ga2la_clear(dd->ga2la);
+ +    }
+ +    else
+ +    {
+ +        for(i=a_start; i<dd->nat_tot; i++)
+ +        {
+ +            ga2la_del(dd->ga2la,dd->gatindex[i]);
+ +        }
+ +    }
+ +
+ +    bLocalCG = dd->comm->bLocalCG;
+ +    if (bLocalCG)
+ +    {
+ +        for(i=cg_start; i<dd->ncg_tot; i++)
+ +        {
+ +            bLocalCG[dd->index_gl[i]] = FALSE;
+ +        }
+ +    }
+ +
+ +    dd_clear_local_vsite_indices(dd);
+ +    
+ +    if (dd->constraints)
+ +    {
+ +        dd_clear_local_constraint_indices(dd);
+ +    }
+ +}
+ +
-                               comm->cutoff/comm->cd[dim_ind].np);
++static real grid_jump_limit(gmx_domdec_comm_t *comm,real cutoff,
++                            int dim_ind)
+ +{
+ +    real grid_jump_limit;
+ +
+ +    /* The distance between the boundaries of cells at distance
+ +     * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
+ +     * and by the fact that cells should not be shifted by more than
+ +     * half their size, such that cg's only shift by one cell
+ +     * at redecomposition.
+ +     */
+ +    grid_jump_limit = comm->cellsize_limit;
+ +    if (!comm->bVacDLBNoLimit)
+ +    {
+ +        grid_jump_limit = max(grid_jump_limit,
- static void check_grid_jump(gmx_large_int_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
++                              cutoff/comm->cd[dim_ind].np);
+ +    }
+ +
+ +    return grid_jump_limit;
+ +}
+ +
-     
++static gmx_bool check_grid_jump(gmx_large_int_t step,
++                                gmx_domdec_t *dd,
++                                real cutoff,
++                                gmx_ddbox_t *ddbox,
++                                gmx_bool bFatal)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  d,dim;
+ +    real limit,bfac;
-         limit = grid_jump_limit(comm,d);
++    gmx_bool bInvalid;
++
++    bInvalid = FALSE;
++
+ +    comm = dd->comm;
+ +    
+ +    for(d=1; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
-             char buf[22];
-             gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
-                       gmx_step_str(step,buf),
-                       dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
++        limit = grid_jump_limit(comm,cutoff,d);
+ +        bfac = ddbox->box_size[dim];
+ +        if (ddbox->tric_dir[dim])
+ +        {
+ +            bfac *= ddbox->skew_fac[dim];
+ +        }
+ +        if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac <  limit ||
+ +            (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
+ +        {
-     dist_min_f_hard        = grid_jump_limit(comm,d)/ddbox->box_size[dim];
-     dist_min_f       = dist_min_f_hard * DD_CELL_MARGIN;
++            bInvalid = TRUE;
++
++            if (bFatal)
++            {
++                char buf[22];
++
++                /* This error should never be triggered under normal
++                 * circumstances, but you never know ...
++                 */
++                gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d. This should not have happened. Running with less nodes might avoid this issue.",
++                          gmx_step_str(step,buf),
++                          dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
++            }
+ +        }
+ +    }
++
++    return bInvalid;
+ +}
+ +
+ +static int dd_load_count(gmx_domdec_comm_t *comm)
+ +{
+ +    return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
+ +}
+ +
+ +static float dd_force_load(gmx_domdec_comm_t *comm)
+ +{
+ +    float load;
+ +    
+ +    if (comm->eFlop)
+ +    {
+ +        load = comm->flop;
+ +        if (comm->eFlop > 1)
+ +        {
+ +            load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
+ +        }
+ +    } 
+ +    else
+ +    {
+ +        load = comm->cycl[ddCyclF];
+ +        if (comm->cycl_n[ddCyclF] > 1)
+ +        {
+ +            /* Subtract the maximum of the last n cycle counts
+ +             * to get rid of possible high counts due to other soures,
+ +             * for instance system activity, that would otherwise
+ +             * affect the dynamic load balancing.
+ +             */
+ +            load -= comm->cycl_max[ddCyclF];
+ +        }
+ +    }
+ +    
+ +    return load;
+ +}
+ +
+ +static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dim,real **dim_f)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int i;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    snew(*dim_f,dd->nc[dim]+1);
+ +    (*dim_f)[0] = 0;
+ +    for(i=1; i<dd->nc[dim]; i++)
+ +    {
+ +        if (comm->slb_frac[dim])
+ +        {
+ +            (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
+ +        }
+ +        else
+ +        {
+ +            (*dim_f)[i] = (real)i/(real)dd->nc[dim];
+ +        }
+ +    }
+ +    (*dim_f)[dd->nc[dim]] = 1;
+ +}
+ +
+ +static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,int dimind)
+ +{
+ +    int        pmeindex,slab,nso,i;
+ +    ivec xyz;
+ +    
+ +    if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
+ +    {
+ +        ddpme->dim = YY;
+ +    }
+ +    else
+ +    {
+ +        ddpme->dim = dimind;
+ +    }
+ +    ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
+ +    
+ +    ddpme->nslab = (ddpme->dim == 0 ?
+ +                    dd->comm->npmenodes_x :
+ +                    dd->comm->npmenodes_y);
+ +
+ +    if (ddpme->nslab <= 1)
+ +    {
+ +        return;
+ +    }
+ +
+ +    nso = dd->comm->npmenodes/ddpme->nslab;
+ +    /* Determine for each PME slab the PP location range for dimension dim */
+ +    snew(ddpme->pp_min,ddpme->nslab);
+ +    snew(ddpme->pp_max,ddpme->nslab);
+ +    for(slab=0; slab<ddpme->nslab; slab++) {
+ +        ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
+ +        ddpme->pp_max[slab] = 0;
+ +    }
+ +    for(i=0; i<dd->nnodes; i++) {
+ +        ddindex2xyz(dd->nc,i,xyz);
+ +        /* For y only use our y/z slab.
+ +         * This assumes that the PME x grid size matches the DD grid size.
+ +         */
+ +        if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
+ +            pmeindex = ddindex2pmeindex(dd,i);
+ +            if (dimind == 0) {
+ +                slab = pmeindex/nso;
+ +            } else {
+ +                slab = pmeindex % ddpme->nslab;
+ +            }
+ +            ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
+ +            ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
+ +        }
+ +    }
+ +
+ +    set_slb_pme_dim_f(dd,ddpme->dim,&ddpme->slb_dim_f);
+ +}
+ +
+ +int dd_pme_maxshift_x(gmx_domdec_t *dd)
+ +{
+ +    if (dd->comm->ddpme[0].dim == XX)
+ +    {
+ +        return dd->comm->ddpme[0].maxshift;
+ +    }
+ +    else
+ +    {
+ +        return 0;
+ +    }
+ +}
+ +
+ +int dd_pme_maxshift_y(gmx_domdec_t *dd)
+ +{
+ +    if (dd->comm->ddpme[0].dim == YY)
+ +    {
+ +        return dd->comm->ddpme[0].maxshift;
+ +    }
+ +    else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
+ +    {
+ +        return dd->comm->ddpme[1].maxshift;
+ +    }
+ +    else
+ +    {
+ +        return 0;
+ +    }
+ +}
+ +
+ +static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
+ +                             gmx_bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  nc,ns,s;
+ +    int  *xmin,*xmax;
+ +    real range,pme_boundary;
+ +    int  sh;
+ +    
+ +    comm = dd->comm;
+ +    nc  = dd->nc[ddpme->dim];
+ +    ns  = ddpme->nslab;
+ +    
+ +    if (!ddpme->dim_match)
+ +    {
+ +        /* PP decomposition is not along dim: the worst situation */
+ +        sh = ns/2;
+ +    }
+ +    else if (ns <= 3 || (bUniform && ns == nc))
+ +    {
+ +        /* The optimal situation */
+ +        sh = 1;
+ +    }
+ +    else
+ +    {
+ +        /* We need to check for all pme nodes which nodes they
+ +         * could possibly need to communicate with.
+ +         */
+ +        xmin = ddpme->pp_min;
+ +        xmax = ddpme->pp_max;
+ +        /* Allow for atoms to be maximally 2/3 times the cut-off
+ +         * out of their DD cell. This is a reasonable balance between
+ +         * between performance and support for most charge-group/cut-off
+ +         * combinations.
+ +         */
+ +        range  = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
+ +        /* Avoid extra communication when we are exactly at a boundary */
+ +        range *= 0.999;
+ +        
+ +        sh = 1;
+ +        for(s=0; s<ns; s++)
+ +        {
+ +            /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
+ +            pme_boundary = (real)s/ns;
+ +            while (sh+1 < ns &&
+ +                   ((s-(sh+1) >= 0 &&
+ +                     cell_f[xmax[s-(sh+1)   ]+1]     + range > pme_boundary) ||
+ +                    (s-(sh+1) <  0 &&
+ +                     cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
+ +            {
+ +                sh++;
+ +            }
+ +            pme_boundary = (real)(s+1)/ns;
+ +            while (sh+1 < ns &&
+ +                   ((s+(sh+1) <  ns &&
+ +                     cell_f[xmin[s+(sh+1)   ]  ]     - range < pme_boundary) ||
+ +                    (s+(sh+1) >= ns &&
+ +                     cell_f[xmin[s+(sh+1)-ns]  ] + 1 - range < pme_boundary)))
+ +            {
+ +                sh++;
+ +            }
+ +        }
+ +    }
+ +    
+ +    ddpme->maxshift = sh;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"PME slab communication range for dim %d is %d\n",
+ +                ddpme->dim,ddpme->maxshift);
+ +    }
+ +}
+ +
+ +static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
+ +{
+ +    int d,dim;
+ +    
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        if (dim < ddbox->nboundeddim &&
+ +            ddbox->box_size[dim]*ddbox->skew_fac[dim] <
+ +            dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
+ +        {
+ +            gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
+ +                      dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
+ +                      dd->nc[dim],dd->comm->cellsize_limit);
+ +        }
+ +    }
+ +}
+ +
+ +static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
+ +                                  gmx_bool bMaster,ivec npulse)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  d,j;
+ +    rvec cellsize_min;
+ +    real *cell_x,cell_dx,cellsize;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    for(d=0; d<DIM; d++)
+ +    {
+ +        cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
+ +        npulse[d] = 1;
+ +        if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
+ +        {
+ +            /* Uniform grid */
+ +            cell_dx = ddbox->box_size[d]/dd->nc[d];
+ +            if (bMaster)
+ +            {
+ +                for(j=0; j<dd->nc[d]+1; j++)
+ +                {
+ +                    dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
+ +                }
+ +            }
+ +            else
+ +            {
+ +                comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]  )*cell_dx;
+ +                comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
+ +            }
+ +            cellsize = cell_dx*ddbox->skew_fac[d];
+ +            while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
+ +            {
+ +                npulse[d]++;
+ +            }
+ +            cellsize_min[d] = cellsize;
+ +        }
+ +        else
+ +        {
+ +            /* Statically load balanced grid */
+ +            /* Also when we are not doing a master distribution we determine
+ +             * all cell borders in a loop to obtain identical values
+ +             * to the master distribution case and to determine npulse.
+ +             */
+ +            if (bMaster)
+ +            {
+ +                cell_x = dd->ma->cell_x[d];
+ +            }
+ +            else
+ +            {
+ +                snew(cell_x,dd->nc[d]+1);
+ +            }
+ +            cell_x[0] = ddbox->box0[d];
+ +            for(j=0; j<dd->nc[d]; j++)
+ +            {
+ +                cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
+ +                cell_x[j+1] = cell_x[j] + cell_dx;
+ +                cellsize = cell_dx*ddbox->skew_fac[d];
+ +                while (cellsize*npulse[d] < comm->cutoff &&
+ +                       npulse[d] < dd->nc[d]-1)
+ +                {
+ +                    npulse[d]++;
+ +                }
+ +                cellsize_min[d] = min(cellsize_min[d],cellsize);
+ +            }
+ +            if (!bMaster)
+ +            {
+ +                comm->cell_x0[d] = cell_x[dd->ci[d]];
+ +                comm->cell_x1[d] = cell_x[dd->ci[d]+1];
+ +                sfree(cell_x);
+ +            }
+ +        }
+ +        /* The following limitation is to avoid that a cell would receive
+ +         * some of its own home charge groups back over the periodic boundary.
+ +         * Double charge groups cause trouble with the global indices.
+ +         */
+ +        if (d < ddbox->npbcdim &&
+ +            dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
+ +        {
+ +            gmx_fatal_collective(FARGS,NULL,dd,
+ +                                 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
+ +                                 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
+ +                                 comm->cutoff,
+ +                                 dd->nc[d],dd->nc[d],
+ +                                 dd->nnodes > dd->nc[d] ? "cells" : "processors");
+ +        }
+ +    }
+ +    
+ +    if (!comm->bDynLoadBal)
+ +    {
+ +        copy_rvec(cellsize_min,comm->cellsize_min);
+ +    }
+ +   
+ +    for(d=0; d<comm->npmedecompdim; d++)
+ +    {
+ +        set_pme_maxshift(dd,&comm->ddpme[d],
+ +                         comm->slb_frac[dd->dim[d]]==NULL,ddbox,
+ +                         comm->ddpme[d].slb_dim_f);
+ +    }
+ +}
+ +
+ +
+ +static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
+ +                                       int d,int dim,gmx_domdec_root_t *root,
+ +                                       gmx_ddbox_t *ddbox,
+ +                                       gmx_bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  ncd,i,j,nmin,nmin_old;
+ +    gmx_bool bLimLo,bLimHi;
+ +    real *cell_size;
+ +    real fac,halfway,cellsize_limit_f_i,region_size;
+ +    gmx_bool bPBC,bLastHi=FALSE;
+ +    int nrange[]={range[0],range[1]};
+ +
+ +    region_size= root->cell_f[range[1]]-root->cell_f[range[0]];  
+ +
+ +    comm = dd->comm;
+ +
+ +    ncd = dd->nc[dim];
+ +
+ +    bPBC = (dim < ddbox->npbcdim);
+ +
+ +    cell_size = root->buf_ncd;
+ +
+ +    if (debug) 
+ +    {
+ +        fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
+ +    }
+ +
+ +    /* First we need to check if the scaling does not make cells
+ +     * smaller than the smallest allowed size.
+ +     * We need to do this iteratively, since if a cell is too small,
+ +     * it needs to be enlarged, which makes all the other cells smaller,
+ +     * which could in turn make another cell smaller than allowed.
+ +     */
+ +    for(i=range[0]; i<range[1]; i++)
+ +    {
+ +        root->bCellMin[i] = FALSE;
+ +    }
+ +    nmin = 0;
+ +    do
+ +    {
+ +        nmin_old = nmin;
+ +        /* We need the total for normalization */
+ +        fac = 0;
+ +        for(i=range[0]; i<range[1]; i++)
+ +        {
+ +            if (root->bCellMin[i] == FALSE)
+ +            {
+ +                fac += cell_size[i];
+ +            }
+ +        }
+ +        fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
+ +        /* Determine the cell boundaries */
+ +        for(i=range[0]; i<range[1]; i++)
+ +        {
+ +            if (root->bCellMin[i] == FALSE)
+ +            {
+ +                cell_size[i] *= fac;
+ +                if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
+ +                {
+ +                    cellsize_limit_f_i = 0;
+ +                }
+ +                else
+ +                {
+ +                    cellsize_limit_f_i = cellsize_limit_f;
+ +                }
+ +                if (cell_size[i] < cellsize_limit_f_i)
+ +                {
+ +                    root->bCellMin[i] = TRUE;
+ +                    cell_size[i] = cellsize_limit_f_i;
+ +                    nmin++;
+ +                }
+ +            }
+ +            root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
+ +        }
+ +    }
+ +    while (nmin > nmin_old);
+ +    
+ +    i=range[1]-1;
+ +    cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
+ +    /* For this check we should not use DD_CELL_MARGIN,
+ +     * but a slightly smaller factor,
+ +     * since rounding could get use below the limit.
+ +     */
+ +    if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
+ +    {
+ +        char buf[22];
+ +        gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
+ +                  gmx_step_str(step,buf),
+ +                  dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
+ +                  ncd,comm->cellsize_min[dim]);
+ +    }
+ +    
+ +    root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
+ +    
+ +    if (!bUniform)
+ +    {
+ +        /* Check if the boundary did not displace more than halfway
+ +         * each of the cells it bounds, as this could cause problems,
+ +         * especially when the differences between cell sizes are large.
+ +         * If changes are applied, they will not make cells smaller
+ +         * than the cut-off, as we check all the boundaries which
+ +         * might be affected by a change and if the old state was ok,
+ +         * the cells will at most be shrunk back to their old size.
+ +         */
+ +        for(i=range[0]+1; i<range[1]; i++)
+ +        {
+ +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
+ +            if (root->cell_f[i] < halfway)
+ +            {
+ +                root->cell_f[i] = halfway;
+ +                /* Check if the change also causes shifts of the next boundaries */
+ +                for(j=i+1; j<range[1]; j++)
+ +                {
+ +                    if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
+ +                        root->cell_f[j] =  root->cell_f[j-1] + cellsize_limit_f;
+ +                }
+ +            }
+ +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
+ +            if (root->cell_f[i] > halfway)
+ +            {
+ +                root->cell_f[i] = halfway;
+ +                /* Check if the change also causes shifts of the next boundaries */
+ +                for(j=i-1; j>=range[0]+1; j--)
+ +                {
+ +                    if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
+ +                        root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* nrange is defined as [lower, upper) range for new call to enforce_limits */
+ +    /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
+ +     * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
+ +     * for a and b nrange is used */
+ +    if (d > 0)
+ +    {
+ +        /* Take care of the staggering of the cell boundaries */
+ +        if (bUniform)
+ +        {
+ +            for(i=range[0]; i<range[1]; i++)
+ +            {
+ +                root->cell_f_max0[i] = root->cell_f[i];
+ +                root->cell_f_min1[i] = root->cell_f[i+1];
+ +            }
+ +        }
+ +        else
+ +        {
+ +            for(i=range[0]+1; i<range[1]; i++)
+ +            {
+ +                bLimLo = (root->cell_f[i] < root->bound_min[i]);
+ +                bLimHi = (root->cell_f[i] > root->bound_max[i]);
+ +                if (bLimLo && bLimHi)
+ +                {
+ +                    /* Both limits violated, try the best we can */
+ +                    /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
+ +                    root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
+ +                    nrange[0]=range[0];
+ +                    nrange[1]=i;
+ +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +
+ +                    nrange[0]=i;
+ +                    nrange[1]=range[1];
+ +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +
+ +                    return;
+ +                }
+ +                else if (bLimLo)
+ +                {
+ +                    /* root->cell_f[i] = root->bound_min[i]; */
+ +                    nrange[1]=i;  /* only store violation location. There could be a LimLo violation following with an higher index */
+ +                    bLastHi=FALSE;
+ +                }
+ +                else if (bLimHi && !bLastHi)
+ +                {
+ +                    bLastHi=TRUE;
+ +                    if (nrange[1] < range[1])   /* found a LimLo before */
+ +                    {
+ +                        root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
+ +                        dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +                        nrange[0]=nrange[1];
+ +                    }
+ +                    root->cell_f[i] = root->bound_max[i];
+ +                    nrange[1]=i; 
+ +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +                    nrange[0]=i;
+ +                    nrange[1]=range[1];
+ +                }
+ +            }
+ +            if (nrange[1] < range[1])   /* found last a LimLo */
+ +            {
+ +                root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
+ +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +                nrange[0]=nrange[1];
+ +                nrange[1]=range[1];
+ +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +            } 
+ +            else if (nrange[0] > range[0]) /* found at least one LimHi */
+ +            {
+ +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
+ +                                       int d,int dim,gmx_domdec_root_t *root,
+ +                                       gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
+ +                                       gmx_bool bUniform,gmx_large_int_t step)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  ncd,d1,i,j,pos;
+ +    real *cell_size;
+ +    real load_aver,load_i,imbalance,change,change_max,sc;
+ +    real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
+ +    real change_limit;
+ +    real relax = 0.5;
+ +    gmx_bool bPBC;
+ +    int range[] = { 0, 0 };
+ +
+ +    comm = dd->comm;
+ +
+ +    /* Convert the maximum change from the input percentage to a fraction */
+ +    change_limit = comm->dlb_scale_lim*0.01;
+ +
+ +    ncd = dd->nc[dim];
+ +
+ +    bPBC = (dim < ddbox->npbcdim);
+ +
+ +    cell_size = root->buf_ncd;
+ +
+ +    /* Store the original boundaries */
+ +    for(i=0; i<ncd+1; i++)
+ +    {
+ +        root->old_cell_f[i] = root->cell_f[i];
+ +    }
+ +    if (bUniform) {
+ +        for(i=0; i<ncd; i++)
+ +        {
+ +            cell_size[i] = 1.0/ncd;
+ +        }
+ +    }
+ +    else if (dd_load_count(comm))
+ +    {
+ +        load_aver = comm->load[d].sum_m/ncd;
+ +        change_max = 0;
+ +        for(i=0; i<ncd; i++)
+ +        {
+ +            /* Determine the relative imbalance of cell i */
+ +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
+ +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
+ +            /* Determine the change of the cell size using underrelaxation */
+ +            change = -relax*imbalance;
+ +            change_max = max(change_max,max(change,-change));
+ +        }
+ +        /* Limit the amount of scaling.
+ +         * We need to use the same rescaling for all cells in one row,
+ +         * otherwise the load balancing might not converge.
+ +         */
+ +        sc = relax;
+ +        if (change_max > change_limit)
+ +        {
+ +            sc *= change_limit/change_max;
+ +        }
+ +        for(i=0; i<ncd; i++)
+ +        {
+ +            /* Determine the relative imbalance of cell i */
+ +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
+ +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
+ +            /* Determine the change of the cell size using underrelaxation */
+ +            change = -sc*imbalance;
+ +            cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
+ +        }
+ +    }
+ +    
+ +    cellsize_limit_f  = comm->cellsize_min[dim]/ddbox->box_size[dim];
+ +    cellsize_limit_f *= DD_CELL_MARGIN;
-             check_grid_jump(step,dd,ddbox);
++    dist_min_f_hard   = grid_jump_limit(comm,comm->cutoff,d)/ddbox->box_size[dim];
++    dist_min_f        = dist_min_f_hard * DD_CELL_MARGIN;
+ +    if (ddbox->tric_dir[dim])
+ +    {
+ +        cellsize_limit_f /= ddbox->skew_fac[dim];
+ +        dist_min_f       /= ddbox->skew_fac[dim];
+ +    }
+ +    if (bDynamicBox && d > 0)
+ +    {
+ +        dist_min_f *= DD_PRES_SCALE_MARGIN;
+ +    }
+ +    if (d > 0 && !bUniform)
+ +    {
+ +        /* Make sure that the grid is not shifted too much */
+ +        for(i=1; i<ncd; i++) {
+ +            if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard) 
+ +            {
+ +                gmx_incons("Inconsistent DD boundary staggering limits!");
+ +            }
+ +            root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
+ +            space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
+ +            if (space > 0) {
+ +                root->bound_min[i] += 0.5*space;
+ +            }
+ +            root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
+ +            space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
+ +            if (space < 0) {
+ +                root->bound_max[i] += 0.5*space;
+ +            }
+ +            if (debug)
+ +            {
+ +                fprintf(debug,
+ +                        "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
+ +                        d,i,
+ +                        root->cell_f_max0[i-1] + dist_min_f,
+ +                        root->bound_min[i],root->cell_f[i],root->bound_max[i],
+ +                        root->cell_f_min1[i] - dist_min_f);
+ +            }
+ +        }
+ +    }
+ +    range[1]=ncd;
+ +    root->cell_f[0] = 0;
+ +    root->cell_f[ncd] = 1;
+ +    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
+ +
+ +
+ +    /* After the checks above, the cells should obey the cut-off
+ +     * restrictions, but it does not hurt to check.
+ +     */
+ +    for(i=0; i<ncd; i++)
+ +    {
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Relative bounds dim %d  cell %d: %f %f\n",
+ +                    dim,i,root->cell_f[i],root->cell_f[i+1]);
+ +        }
+ +
+ +        if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
+ +            root->cell_f[i+1] - root->cell_f[i] <
+ +            cellsize_limit_f/DD_CELL_MARGIN)
+ +        {
+ +            char buf[22];
+ +            fprintf(stderr,
+ +                    "\nWARNING step %s: direction %c, cell %d too small: %f\n",
+ +                    gmx_step_str(step,buf),dim2char(dim),i,
+ +                    (root->cell_f[i+1] - root->cell_f[i])
+ +                    *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
+ +        }
+ +    }
+ +    
+ +    pos = ncd + 1;
+ +    /* Store the cell boundaries of the lower dimensions at the end */
+ +    for(d1=0; d1<d; d1++)
+ +    {
+ +        root->cell_f[pos++] = comm->cell_f0[d1];
+ +        root->cell_f[pos++] = comm->cell_f1[d1];
+ +    }
+ +    
+ +    if (d < comm->npmedecompdim)
+ +    {
+ +        /* The master determines the maximum shift for
+ +         * the coordinate communication between separate PME nodes.
+ +         */
+ +        set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
+ +    }
+ +    root->cell_f[pos++] = comm->ddpme[0].maxshift;
+ +    if (d >= 1)
+ +    {
+ +        root->cell_f[pos++] = comm->ddpme[1].maxshift;
+ +    }
+ +}    
+ +
+ +static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
+ +                                             gmx_ddbox_t *ddbox,int dimind)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int dim;
+ +
+ +    comm = dd->comm;
+ +
+ +    /* Set the cell dimensions */
+ +    dim = dd->dim[dimind];
+ +    comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
+ +    comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
+ +    if (dim >= ddbox->nboundeddim)
+ +    {
+ +        comm->cell_x0[dim] += ddbox->box0[dim];
+ +        comm->cell_x1[dim] += ddbox->box0[dim];
+ +    }
+ +}
+ +
+ +static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
+ +                                         int d,int dim,real *cell_f_row,
+ +                                         gmx_ddbox_t *ddbox)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int d1,dim1,pos;
+ +
+ +    comm = dd->comm;
+ +
+ +#ifdef GMX_MPI
+ +    /* Each node would only need to know two fractions,
+ +     * but it is probably cheaper to broadcast the whole array.
+ +     */
+ +    MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
+ +              0,comm->mpi_comm_load[d]);
+ +#endif
+ +    /* Copy the fractions for this dimension from the buffer */
+ +    comm->cell_f0[d] = cell_f_row[dd->ci[dim]  ];
+ +    comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
+ +    /* The whole array was communicated, so set the buffer position */
+ +    pos = dd->nc[dim] + 1;
+ +    for(d1=0; d1<=d; d1++)
+ +    {
+ +        if (d1 < d)
+ +        {
+ +            /* Copy the cell fractions of the lower dimensions */
+ +            comm->cell_f0[d1] = cell_f_row[pos++];
+ +            comm->cell_f1[d1] = cell_f_row[pos++];
+ +        }
+ +        relative_to_absolute_cell_bounds(dd,ddbox,d1);
+ +    }
+ +    /* Convert the communicated shift from float to int */
+ +    comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
+ +    if (d >= 1)
+ +    {
+ +        comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
+ +    }
+ +}
+ +
+ +static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
+ +                                         gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
+ +                                         gmx_bool bUniform,gmx_large_int_t step)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int d,dim,d1;
+ +    gmx_bool bRowMember,bRowRoot;
+ +    real *cell_f_row;
+ +    
+ +    comm = dd->comm;
+ +
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        bRowMember = TRUE;
+ +        bRowRoot = TRUE;
+ +        for(d1=d; d1<dd->ndim; d1++)
+ +        {
+ +            if (dd->ci[dd->dim[d1]] > 0)
+ +            {
+ +                if (d1 > d)
+ +                {
+ +                    bRowMember = FALSE;
+ +                }
+ +                bRowRoot = FALSE;
+ +            }
+ +        }
+ +        if (bRowMember)
+ +        {
+ +            if (bRowRoot)
+ +            {
+ +                set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
+ +                                           ddbox,bDynamicBox,bUniform,step);
+ +                cell_f_row = comm->root[d]->cell_f;
+ +            }
+ +            else
+ +            {
+ +                cell_f_row = comm->cell_f_row;
+ +            }
+ +            distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
+ +        }
+ +    }
+ +}    
+ +
+ +static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
+ +{
+ +    int d;
+ +
+ +    /* This function assumes the box is static and should therefore
+ +     * not be called when the box has changed since the last
+ +     * call to dd_partition_system.
+ +     */
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        relative_to_absolute_cell_bounds(dd,ddbox,d); 
+ +    }
+ +}
+ +
+ +
+ +
+ +static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
+ +                                  gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
+ +                                  gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
+ +                                  gmx_wallcycle_t wcycle)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int dim;
+ +
+ +    comm = dd->comm;
+ +    
+ +    if (bDoDLB)
+ +    {
+ +        wallcycle_start(wcycle,ewcDDCOMMBOUND);
+ +        set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
+ +        wallcycle_stop(wcycle,ewcDDCOMMBOUND);
+ +    }
+ +    else if (bDynamicBox)
+ +    {
+ +        set_dd_cell_sizes_dlb_nochange(dd,ddbox);
+ +    }
+ +    
+ +    /* Set the dimensions for which no DD is used */
+ +    for(dim=0; dim<DIM; dim++) {
+ +        if (dd->nc[dim] == 1) {
+ +            comm->cell_x0[dim] = 0;
+ +            comm->cell_x1[dim] = ddbox->box_size[dim];
+ +            if (dim >= ddbox->nboundeddim)
+ +            {
+ +                comm->cell_x0[dim] += ddbox->box0[dim];
+ +                comm->cell_x1[dim] += ddbox->box0[dim];
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
+ +{
+ +    int d,np,i;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        cd = &dd->comm->cd[d];
+ +        np = npulse[dd->dim[d]];
+ +        if (np > cd->np_nalloc)
+ +        {
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
+ +                        dim2char(dd->dim[d]),np);
+ +            }
+ +            if (DDMASTER(dd) && cd->np_nalloc > 0)
+ +            {
+ +                fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
+ +            }
+ +            srenew(cd->ind,np);
+ +            for(i=cd->np_nalloc; i<np; i++)
+ +            {
+ +                cd->ind[i].index  = NULL;
+ +                cd->ind[i].nalloc = 0;
+ +            }
+ +            cd->np_nalloc = np;
+ +        }
+ +        cd->np = np;
+ +    }
+ +}
+ +
+ +
+ +static void set_dd_cell_sizes(gmx_domdec_t *dd,
+ +                              gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
+ +                              gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
+ +                              gmx_wallcycle_t wcycle)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  d;
+ +    ivec npulse;
+ +    
+ +    comm = dd->comm;
+ +
+ +    /* Copy the old cell boundaries for the cg displacement check */
+ +    copy_rvec(comm->cell_x0,comm->old_cell_x0);
+ +    copy_rvec(comm->cell_x1,comm->old_cell_x1);
+ +    
+ +    if (comm->bDynLoadBal)
+ +    {
+ +        if (DDMASTER(dd))
+ +        {
+ +            check_box_size(dd,ddbox);
+ +        }
+ +        set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
+ +    }
+ +    else
+ +    {
+ +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
+ +        realloc_comm_ind(dd,npulse);
+ +    }
+ +    
+ +    if (debug)
+ +    {
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
+ +                    d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
+ +        }
+ +    }
+ +}
+ +
+ +static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
+ +                                  gmx_ddbox_t *ddbox,
+ +                                  rvec cell_ns_x0,rvec cell_ns_x1,
+ +                                  gmx_large_int_t step)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int dim_ind,dim;
+ +    
+ +    comm = dd->comm;
+ +
+ +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
+ +    {
+ +        dim = dd->dim[dim_ind];
+ +        
+ +        /* Without PBC we don't have restrictions on the outer cells */
+ +        if (!(dim >= ddbox->npbcdim && 
+ +              (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
+ +            comm->bDynLoadBal &&
+ +            (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
+ +            comm->cellsize_min[dim])
+ +        {
+ +            char buf[22];
+ +            gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
+ +                      gmx_step_str(step,buf),dim2char(dim),
+ +                      comm->cell_x1[dim] - comm->cell_x0[dim],
+ +                      ddbox->skew_fac[dim],
+ +                      dd->comm->cellsize_min[dim],
+ +                      dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +        }
+ +    }
+ +    
+ +    if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
+ +    {
+ +        /* Communicate the boundaries and update cell_ns_x0/1 */
+ +        dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
+ +        if (dd->bGridJump && dd->ndim > 1)
+ +        {
-              * Here we set it to -1.
-              * fill_grid will change it from -1 to 4*grid->ncells.
++            check_grid_jump(step,dd,dd->comm->cutoff,ddbox,TRUE);
+ +        }
+ +    }
+ +}
+ +
+ +static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
+ +{
+ +    if (YY < npbcdim)
+ +    {
+ +        tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
+ +    }
+ +    else
+ +    {
+ +        tcm[YY][XX] = 0;
+ +    }
+ +    if (ZZ < npbcdim)
+ +    {
+ +        tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
+ +        tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
+ +    }
+ +    else
+ +    {
+ +        tcm[ZZ][XX] = 0;
+ +        tcm[ZZ][YY] = 0;
+ +    }
+ +}
+ +
+ +static void check_screw_box(matrix box)
+ +{
+ +    /* Mathematical limitation */
+ +    if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
+ +    {
+ +        gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
+ +    }
+ +    
+ +    /* Limitation due to the asymmetry of the eighth shell method */
+ +    if (box[ZZ][YY] != 0)
+ +    {
+ +        gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
+ +    }
+ +}
+ +
+ +static void distribute_cg(FILE *fplog,gmx_large_int_t step,
+ +                          matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
+ +                          gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int **tmp_ind=NULL,*tmp_nalloc=NULL;
+ +    int  i,icg,j,k,k0,k1,d,npbcdim;
+ +    matrix tcm;
+ +    rvec box_size,cg_cm;
+ +    ivec ind;
+ +    real nrcg,inv_ncg,pos_d;
+ +    atom_id *cgindex;
+ +    gmx_bool bUnbounded,bScrew;
+ +
+ +    ma = dd->ma;
+ +    
+ +    if (tmp_ind == NULL)
+ +    {
+ +        snew(tmp_nalloc,dd->nnodes);
+ +        snew(tmp_ind,dd->nnodes);
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
+ +            snew(tmp_ind[i],tmp_nalloc[i]);
+ +        }
+ +    }
+ +    
+ +    /* Clear the count */
+ +    for(i=0; i<dd->nnodes; i++)
+ +    {
+ +        ma->ncg[i] = 0;
+ +        ma->nat[i] = 0;
+ +    }
+ +    
+ +    make_tric_corr_matrix(dd->npbcdim,box,tcm);
+ +    
+ +    cgindex = cgs->index;
+ +    
+ +    /* Compute the center of geometry for all charge groups */
+ +    for(icg=0; icg<cgs->nr; icg++)
+ +    {
+ +        k0      = cgindex[icg];
+ +        k1      = cgindex[icg+1];
+ +        nrcg    = k1 - k0;
+ +        if (nrcg == 1)
+ +        {
+ +            copy_rvec(pos[k0],cg_cm);
+ +        }
+ +        else
+ +        {
+ +            inv_ncg = 1.0/nrcg;
+ +            
+ +            clear_rvec(cg_cm);
+ +            for(k=k0; (k<k1); k++)
+ +            {
+ +                rvec_inc(cg_cm,pos[k]);
+ +            }
+ +            for(d=0; (d<DIM); d++)
+ +            {
+ +                cg_cm[d] *= inv_ncg;
+ +            }
+ +        }
+ +        /* Put the charge group in the box and determine the cell index */
+ +        for(d=DIM-1; d>=0; d--) {
+ +            pos_d = cg_cm[d];
+ +            if (d < dd->npbcdim)
+ +            {
+ +                bScrew = (dd->bScrewPBC && d == XX);
+ +                if (tric_dir[d] && dd->nc[d] > 1)
+ +                {
+ +                    /* Use triclinic coordintates for this dimension */
+ +                    for(j=d+1; j<DIM; j++)
+ +                    {
+ +                        pos_d += cg_cm[j]*tcm[j][d];
+ +                    }
+ +                }
+ +                while(pos_d >= box[d][d])
+ +                {
+ +                    pos_d -= box[d][d];
+ +                    rvec_dec(cg_cm,box[d]);
+ +                    if (bScrew)
+ +                    {
+ +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
+ +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
+ +                    }
+ +                    for(k=k0; (k<k1); k++)
+ +                    {
+ +                        rvec_dec(pos[k],box[d]);
+ +                        if (bScrew)
+ +                        {
+ +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
+ +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
+ +                        }
+ +                    }
+ +                }
+ +                while(pos_d < 0)
+ +                {
+ +                    pos_d += box[d][d];
+ +                    rvec_inc(cg_cm,box[d]);
+ +                    if (bScrew)
+ +                    {
+ +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
+ +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
+ +                    }
+ +                    for(k=k0; (k<k1); k++)
+ +                    {
+ +                        rvec_inc(pos[k],box[d]);
+ +                        if (bScrew) {
+ +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
+ +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            /* This could be done more efficiently */
+ +            ind[d] = 0;
+ +            while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
+ +            {
+ +                ind[d]++;
+ +            }
+ +        }
+ +        i = dd_index(dd->nc,ind);
+ +        if (ma->ncg[i] == tmp_nalloc[i])
+ +        {
+ +            tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
+ +            srenew(tmp_ind[i],tmp_nalloc[i]);
+ +        }
+ +        tmp_ind[i][ma->ncg[i]] = icg;
+ +        ma->ncg[i]++;
+ +        ma->nat[i] += cgindex[icg+1] - cgindex[icg];
+ +    }
+ +    
+ +    k1 = 0;
+ +    for(i=0; i<dd->nnodes; i++)
+ +    {
+ +        ma->index[i] = k1;
+ +        for(k=0; k<ma->ncg[i]; k++)
+ +        {
+ +            ma->cg[k1++] = tmp_ind[i][k];
+ +        }
+ +    }
+ +    ma->index[dd->nnodes] = k1;
+ +    
+ +    for(i=0; i<dd->nnodes; i++)
+ +    {
+ +        sfree(tmp_ind[i]);
+ +    }
+ +    sfree(tmp_ind);
+ +    sfree(tmp_nalloc);
+ +    
+ +    if (fplog)
+ +    {
+ +        char buf[22];
+ +        fprintf(fplog,"Charge group distribution at step %s:",
+ +                gmx_step_str(step,buf));
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            fprintf(fplog," %d",ma->ncg[i]);
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +}
+ +
+ +static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
+ +                                t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
+ +                                rvec pos[])
+ +{
+ +    gmx_domdec_master_t *ma=NULL;
+ +    ivec npulse;
+ +    int  i,cg_gl;
+ +    int  *ibuf,buf2[2] = { 0, 0 };
+ +    gmx_bool bMaster = DDMASTER(dd);
+ +    if (bMaster)
+ +    {
+ +        ma = dd->ma;
+ +        
+ +        if (dd->bScrewPBC)
+ +        {
+ +            check_screw_box(box);
+ +        }
+ +    
+ +        set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
+ +    
+ +        distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            ma->ibuf[2*i]   = ma->ncg[i];
+ +            ma->ibuf[2*i+1] = ma->nat[i];
+ +        }
+ +        ibuf = ma->ibuf;
+ +    }
+ +    else
+ +    {
+ +        ibuf = NULL;
+ +    }
+ +    dd_scatter(dd,2*sizeof(int),ibuf,buf2);
+ +    
+ +    dd->ncg_home = buf2[0];
+ +    dd->nat_home = buf2[1];
+ +    dd->ncg_tot  = dd->ncg_home;
+ +    dd->nat_tot  = dd->nat_home;
+ +    if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
+ +    {
+ +        dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
+ +        srenew(dd->index_gl,dd->cg_nalloc);
+ +        srenew(dd->cgindex,dd->cg_nalloc+1);
+ +    }
+ +    if (bMaster)
+ +    {
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
+ +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
+ +        }
+ +    }
+ +    
+ +    dd_scatterv(dd,
+ +                DDMASTER(dd) ? ma->ibuf : NULL,
+ +                DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
+ +                DDMASTER(dd) ? ma->cg : NULL,
+ +                dd->ncg_home*sizeof(int),dd->index_gl);
+ +    
+ +    /* Determine the home charge group sizes */
+ +    dd->cgindex[0] = 0;
+ +    for(i=0; i<dd->ncg_home; i++)
+ +    {
+ +        cg_gl = dd->index_gl[i];
+ +        dd->cgindex[i+1] =
+ +            dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
+ +    }
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Home charge groups:\n");
+ +        for(i=0; i<dd->ncg_home; i++)
+ +        {
+ +            fprintf(debug," %d",dd->index_gl[i]);
+ +            if (i % 10 == 9) 
+ +                fprintf(debug,"\n");
+ +        }
+ +        fprintf(debug,"\n");
+ +    }
+ +}
+ +
+ +static int compact_and_copy_vec_at(int ncg,int *move,
+ +                                   int *cgindex,
+ +                                   int nvec,int vec,
+ +                                   rvec *src,gmx_domdec_comm_t *comm,
+ +                                   gmx_bool bCompact)
+ +{
+ +    int m,icg,i,i0,i1,nrcg;
+ +    int home_pos;
+ +    int pos_vec[DIM*2];
+ +    
+ +    home_pos = 0;
+ +
+ +    for(m=0; m<DIM*2; m++)
+ +    {
+ +        pos_vec[m] = 0;
+ +    }
+ +    
+ +    i0 = 0;
+ +    for(icg=0; icg<ncg; icg++)
+ +    {
+ +        i1 = cgindex[icg+1];
+ +        m = move[icg];
+ +        if (m == -1)
+ +        {
+ +            if (bCompact)
+ +            {
+ +                /* Compact the home array in place */
+ +                for(i=i0; i<i1; i++)
+ +                {
+ +                    copy_rvec(src[i],src[home_pos++]);
+ +                }
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* Copy to the communication buffer */
+ +            nrcg = i1 - i0;
+ +            pos_vec[m] += 1 + vec*nrcg;
+ +            for(i=i0; i<i1; i++)
+ +            {
+ +                copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
+ +            }
+ +            pos_vec[m] += (nvec - vec - 1)*nrcg;
+ +        }
+ +        if (!bCompact)
+ +        {
+ +            home_pos += i1 - i0;
+ +        }
+ +        i0 = i1;
+ +    }
+ +    
+ +    return home_pos;
+ +}
+ +
+ +static int compact_and_copy_vec_cg(int ncg,int *move,
+ +                                   int *cgindex,
+ +                                   int nvec,rvec *src,gmx_domdec_comm_t *comm,
+ +                                   gmx_bool bCompact)
+ +{
+ +    int m,icg,i0,i1,nrcg;
+ +    int home_pos;
+ +    int pos_vec[DIM*2];
+ +    
+ +    home_pos = 0;
+ +    
+ +    for(m=0; m<DIM*2; m++)
+ +    {
+ +        pos_vec[m] = 0;
+ +    }
+ +    
+ +    i0 = 0;
+ +    for(icg=0; icg<ncg; icg++)
+ +    {
+ +        i1 = cgindex[icg+1];
+ +        m = move[icg];
+ +        if (m == -1)
+ +        {
+ +            if (bCompact)
+ +            {
+ +                /* Compact the home array in place */
+ +                copy_rvec(src[icg],src[home_pos++]);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            nrcg = i1 - i0;
+ +            /* Copy to the communication buffer */
+ +            copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
+ +            pos_vec[m] += 1 + nrcg*nvec;
+ +        }
+ +        i0 = i1;
+ +    }
+ +    if (!bCompact)
+ +    {
+ +        home_pos = ncg;
+ +    }
+ +    
+ +    return home_pos;
+ +}
+ +
+ +static int compact_ind(int ncg,int *move,
+ +                       int *index_gl,int *cgindex,
+ +                       int *gatindex,
+ +                       gmx_ga2la_t ga2la,char *bLocalCG,
+ +                       int *cginfo)
+ +{
+ +    int cg,nat,a0,a1,a,a_gl;
+ +    int home_pos;
+ +
+ +    home_pos = 0;
+ +    nat = 0;
+ +    for(cg=0; cg<ncg; cg++)
+ +    {
+ +        a0 = cgindex[cg];
+ +        a1 = cgindex[cg+1];
+ +        if (move[cg] == -1)
+ +        {
+ +            /* Compact the home arrays in place.
+ +             * Anything that can be done here avoids access to global arrays.
+ +             */
+ +            cgindex[home_pos] = nat;
+ +            for(a=a0; a<a1; a++)
+ +            {
+ +                a_gl = gatindex[a];
+ +                gatindex[nat] = a_gl;
+ +                /* The cell number stays 0, so we don't need to set it */
+ +                ga2la_change_la(ga2la,a_gl,nat);
+ +                nat++;
+ +            }
+ +            index_gl[home_pos] = index_gl[cg];
+ +            cginfo[home_pos]   = cginfo[cg];
+ +            /* The charge group remains local, so bLocalCG does not change */
+ +            home_pos++;
+ +        }
+ +        else
+ +        {
+ +            /* Clear the global indices */
+ +            for(a=a0; a<a1; a++)
+ +            {
+ +                ga2la_del(ga2la,gatindex[a]);
+ +            }
+ +            if (bLocalCG)
+ +            {
+ +                bLocalCG[index_gl[cg]] = FALSE;
+ +            }
+ +        }
+ +    }
+ +    cgindex[home_pos] = nat;
+ +    
+ +    return home_pos;
+ +}
+ +
+ +static void clear_and_mark_ind(int ncg,int *move,
+ +                               int *index_gl,int *cgindex,int *gatindex,
+ +                               gmx_ga2la_t ga2la,char *bLocalCG,
+ +                               int *cell_index)
+ +{
+ +    int cg,a0,a1,a;
+ +    
+ +    for(cg=0; cg<ncg; cg++)
+ +    {
+ +        if (move[cg] >= 0)
+ +        {
+ +            a0 = cgindex[cg];
+ +            a1 = cgindex[cg+1];
+ +            /* Clear the global indices */
+ +            for(a=a0; a<a1; a++)
+ +            {
+ +                ga2la_del(ga2la,gatindex[a]);
+ +            }
+ +            if (bLocalCG)
+ +            {
+ +                bLocalCG[index_gl[cg]] = FALSE;
+ +            }
+ +            /* Signal that this cg has moved using the ns cell index.
- static int dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
-                               gmx_domdec_t *dd,ivec tric_dir,
-                               t_state *state,rvec **f,
-                               t_forcerec *fr,t_mdatoms *md,
-                               gmx_bool bCompact,
-                               t_nrnb *nrnb)
++             * Here we set it to -1. fill_grid will change it
++             * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
+ +             */
+ +            cell_index[cg] = -1;
+ +        }
+ +    }
+ +}
+ +
+ +static void print_cg_move(FILE *fplog,
+ +                          gmx_domdec_t *dd,
+ +                          gmx_large_int_t step,int cg,int dim,int dir,
+ +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
+ +                          rvec cm_old,rvec cm_new,real pos_d)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    char buf[22];
+ +
+ +    comm = dd->comm;
+ +
+ +    fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
+ +    if (bHaveLimitdAndCMOld)
+ +    {
+ +        fprintf(fplog,"The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
+ +                ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
+ +    }
+ +    else
+ +    {
+ +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
+ +                ddglatnr(dd,dd->cgindex[cg]),dim2char(dim));
+ +    }
+ +    fprintf(fplog,"distance out of cell %f\n",
+ +            dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
+ +    if (bHaveLimitdAndCMOld)
+ +    {
+ +        fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
+ +                cm_old[XX],cm_old[YY],cm_old[ZZ]);
+ +    }
+ +    fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
+ +            cm_new[XX],cm_new[YY],cm_new[ZZ]);
+ +    fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
+ +            dim2char(dim),
+ +            comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
+ +    fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
+ +            dim2char(dim),
+ +            comm->cell_x0[dim],comm->cell_x1[dim]);
+ +}
+ +
+ +static void cg_move_error(FILE *fplog,
+ +                          gmx_domdec_t *dd,
+ +                          gmx_large_int_t step,int cg,int dim,int dir,
+ +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
+ +                          rvec cm_old,rvec cm_new,real pos_d)
+ +{
+ +    if (fplog)
+ +    {
+ +        print_cg_move(fplog, dd,step,cg,dim,dir,
+ +                      bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
+ +    }
+ +    print_cg_move(stderr,dd,step,cg,dim,dir,
+ +                  bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
+ +    gmx_fatal(FARGS,
+ +              "A charge group moved too far between two domain decomposition steps\n"
+ +              "This usually means that your system is not well equilibrated");
+ +}
+ +
+ +static void rotate_state_atom(t_state *state,int a)
+ +{
+ +    int est;
+ +
+ +    for(est=0; est<estNR; est++)
+ +    {
+ +        if (EST_DISTR(est) && (state->flags & (1<<est))) {
+ +            switch (est) {
+ +            case estX:
+ +                /* Rotate the complete state; for a rectangular box only */
+ +                state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
+ +                state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
+ +                break;
+ +            case estV:
+ +                state->v[a][YY] = -state->v[a][YY];
+ +                state->v[a][ZZ] = -state->v[a][ZZ];
+ +                break;
+ +            case estSDX:
+ +                state->sd_X[a][YY] = -state->sd_X[a][YY];
+ +                state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
+ +                break;
+ +            case estCGP:
+ +                state->cg_p[a][YY] = -state->cg_p[a][YY];
+ +                state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
+ +                break;
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                /* These are distances, so not affected by rotation */
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in rotate_state_atom");            
+ +            }
+ +        }
+ +    }
+ +}
+ +
-     int  *move;
++static int *get_moved(gmx_domdec_comm_t *comm,int natoms)
++{
++    if (natoms > comm->moved_nalloc)
++    {
++        /* Contents should be preserved here */
++        comm->moved_nalloc = over_alloc_dd(natoms);
++        srenew(comm->moved,comm->moved_nalloc);
++    }
++
++    return comm->moved;
++}
++
++static void calc_cg_move(FILE *fplog,gmx_large_int_t step,
++                         gmx_domdec_t *dd,
++                         t_state *state,
++                         ivec tric_dir,matrix tcm,
++                         rvec cell_x0,rvec cell_x1,
++                         rvec limitd,rvec limit0,rvec limit1,
++                         const int *cgindex,
++                         int cg_start,int cg_end,
++                         rvec *cg_cm,
++                         int *move)
+ +{
-     int  ncg[DIM*2],nat[DIM*2];
+ +    int  npbcdim;
-     int  sbuf[2],rbuf[2];
-     int  home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
+ +    int  c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
+ +    int  mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
-     gmx_bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
+ +    int  flag;
-     matrix tcm;
-     rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
-     atom_id *cgindex;
-     cginfo_mb_t *cginfo_mb;
-     gmx_domdec_comm_t *comm;
-     
-     if (dd->bScrewPBC)
-     {
-         check_screw_box(state->box);
-     }
-     
-     comm  = dd->comm;
-     cg_cm = fr->cg_cm;
-     
-     for(i=0; i<estNR; i++)
-     {
-         if (EST_DISTR(i))
-         {
-             switch (i)
-             {
-             case estX:   /* Always present */            break;
-             case estV:   bV   = (state->flags & (1<<i)); break;
-             case estSDX: bSDX = (state->flags & (1<<i)); break;
-             case estCGP: bCGP = (state->flags & (1<<i)); break;
-             case estLD_RNG:
-             case estLD_RNGI:
-             case estDISRE_INITF:
-             case estDISRE_RM3TAV:
-             case estORIRE_INITF:
-             case estORIRE_DTAV:
-                 /* No processing required */
-                 break;
-             default:
-             gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
-             }
-         }
-     }
-     
-     if (dd->ncg_tot > comm->nalloc_int)
-     {
-         comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
-         srenew(comm->buf_int,comm->nalloc_int);
-     }
-     move = comm->buf_int;
-     
-     /* Clear the count */
-     for(c=0; c<dd->ndim*2; c++)
-     {
-         ncg[c] = 0;
-         nat[c] = 0;
-     }
+ +    gmx_bool bScrew;
+ +    ivec dev;
+ +    real inv_ncg,pos_d;
-     for(d=0; (d<DIM); d++)
-     {
-         limitd[d] = dd->comm->cellsize_min[d];
-         if (d >= npbcdim && dd->ci[d] == 0)
-         {
-             cell_x0[d] = -GMX_FLOAT_MAX;
-         }
-         else
-         {
-             cell_x0[d] = comm->cell_x0[d];
-         }
-         if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
-         {
-             cell_x1[d] = GMX_FLOAT_MAX;
-         }
-         else
-         {
-             cell_x1[d] = comm->cell_x1[d];
-         }
-         if (d < npbcdim)
-         {
-             limit0[d] = comm->old_cell_x0[d] - limitd[d];
-             limit1[d] = comm->old_cell_x1[d] + limitd[d];
-         }
-         else
-         {
-             /* We check after communication if a charge group moved
-              * more than one cell. Set the pre-comm check limit to float_max.
-              */
-             limit0[d] = -GMX_FLOAT_MAX;
-             limit1[d] =  GMX_FLOAT_MAX;
-         }
-     }
-     
-     make_tric_corr_matrix(npbcdim,state->box,tcm);
-     
-     cgindex = dd->cgindex;
-     
-     /* Compute the center of geometry for all home charge groups
-      * and put them in the box and determine where they should go.
-      */
-     for(cg=0; cg<dd->ncg_home; cg++)
++    rvec cm_new;
+ +
+ +    npbcdim = dd->npbcdim;
+ +
-         move[cg] = mc;
-         if (mc >= 0)
-         {
-             if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
-             {
-                 comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
-                 srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
-             }
-             comm->cggl_flag[mc][ncg[mc]*DD_CGIBS  ] = dd->index_gl[cg];
-             /* We store the cg size in the lower 16 bits
-              * and the place where the charge group should go
-              * in the next 6 bits. This saves some communication volume.
-              */
-             comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
-             ncg[mc] += 1;
-             nat[mc] += nrcg;
-         }
++    for(cg=cg_start; cg<cg_end; cg++)
+ +    {
+ +        k0   = cgindex[cg];
+ +        k1   = cgindex[cg+1];
+ +        nrcg = k1 - k0;
+ +        if (nrcg == 1)
+ +        {
+ +            copy_rvec(state->x[k0],cm_new);
+ +        }
+ +        else
+ +        {
+ +            inv_ncg = 1.0/nrcg;
+ +            
+ +            clear_rvec(cm_new);
+ +            for(k=k0; (k<k1); k++)
+ +            {
+ +                rvec_inc(cm_new,state->x[k]);
+ +            }
+ +            for(d=0; (d<DIM); d++)
+ +            {
+ +                cm_new[d] = inv_ncg*cm_new[d];
+ +            }
+ +        }
+ +        
+ +        clear_ivec(dev);
+ +        /* Do pbc and check DD cell boundary crossings */
+ +        for(d=DIM-1; d>=0; d--)
+ +        {
+ +            if (dd->nc[d] > 1)
+ +            {
+ +                bScrew = (dd->bScrewPBC && d == XX);
+ +                /* Determine the location of this cg in lattice coordinates */
+ +                pos_d = cm_new[d];
+ +                if (tric_dir[d])
+ +                {
+ +                    for(d2=d+1; d2<DIM; d2++)
+ +                    {
+ +                        pos_d += cm_new[d2]*tcm[d2][d];
+ +                    }
+ +                }
+ +                /* Put the charge group in the triclinic unit-cell */
+ +                if (pos_d >= cell_x1[d])
+ +                {
+ +                    if (pos_d >= limit1[d])
+ +                    {
+ +                        cg_move_error(fplog,dd,step,cg,d,1,TRUE,limitd[d],
+ +                                      cg_cm[cg],cm_new,pos_d);
+ +                    }
+ +                    dev[d] = 1;
+ +                    if (dd->ci[d] == dd->nc[d] - 1)
+ +                    {
+ +                        rvec_dec(cm_new,state->box[d]);
+ +                        if (bScrew)
+ +                        {
+ +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
+ +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
+ +                        }
+ +                        for(k=k0; (k<k1); k++)
+ +                        {
+ +                            rvec_dec(state->x[k],state->box[d]);
+ +                            if (bScrew)
+ +                            {
+ +                                rotate_state_atom(state,k);
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +                else if (pos_d < cell_x0[d])
+ +                {
+ +                    if (pos_d < limit0[d])
+ +                    {
+ +                        cg_move_error(fplog,dd,step,cg,d,-1,TRUE,limitd[d],
+ +                                      cg_cm[cg],cm_new,pos_d);
+ +                    }
+ +                    dev[d] = -1;
+ +                    if (dd->ci[d] == 0)
+ +                    {
+ +                        rvec_inc(cm_new,state->box[d]);
+ +                        if (bScrew)
+ +                        {
+ +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
+ +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
+ +                        }
+ +                        for(k=k0; (k<k1); k++)
+ +                        {
+ +                            rvec_inc(state->x[k],state->box[d]);
+ +                            if (bScrew)
+ +                            {
+ +                                rotate_state_atom(state,k);
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            else if (d < npbcdim)
+ +            {
+ +                /* Put the charge group in the rectangular unit-cell */
+ +                while (cm_new[d] >= state->box[d][d])
+ +                {
+ +                    rvec_dec(cm_new,state->box[d]);
+ +                    for(k=k0; (k<k1); k++)
+ +                    {
+ +                        rvec_dec(state->x[k],state->box[d]);
+ +                    }
+ +                }
+ +                while (cm_new[d] < 0)
+ +                {
+ +                    rvec_inc(cm_new,state->box[d]);
+ +                    for(k=k0; (k<k1); k++)
+ +                    {
+ +                        rvec_inc(state->x[k],state->box[d]);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    
+ +        copy_rvec(cm_new,cg_cm[cg]);
+ +        
+ +        /* Determine where this cg should go */
+ +        flag = 0;
+ +        mc = -1;
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            dim = dd->dim[d];
+ +            if (dev[dim] == 1)
+ +            {
+ +                flag |= DD_FLAG_FW(d);
+ +                if (mc == -1)
+ +                {
+ +                    mc = d*2;
+ +                }
+ +            }
+ +            else if (dev[dim] == -1)
+ +            {
+ +                flag |= DD_FLAG_BW(d);
+ +                if (mc == -1) {
+ +                    if (dd->nc[dim] > 2)
+ +                    {
+ +                        mc = d*2 + 1;
+ +                    }
+ +                    else
+ +                    {
+ +                        mc = d*2;
+ +                    }
+ +                }
+ +            }
+ +        }
-     inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
-     inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
-     
-     nvec = 1;
-     if (bV)
-     {
-         nvec++;
-     }
-     if (bSDX)
++        /* Temporarily store the flag in move */
++        move[cg] = mc + flag;
+ +    }
++}
++
++static void dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
++                               gmx_domdec_t *dd,ivec tric_dir,
++                               t_state *state,rvec **f,
++                               t_forcerec *fr,t_mdatoms *md,
++                               gmx_bool bCompact,
++                               t_nrnb *nrnb,
++                               int *ncg_stay_home,
++                               int *ncg_moved)
++{
++    int  *move;
++    int  npbcdim;
++    int  ncg[DIM*2],nat[DIM*2];
++    int  c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
++    int  mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
++    int  sbuf[2],rbuf[2];
++    int  home_pos_cg,home_pos_at,buf_pos;
++    int  flag;
++    gmx_bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
++    gmx_bool bScrew;
++    ivec dev;
++    real inv_ncg,pos_d;
++    matrix tcm;
++    rvec *cg_cm=NULL,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
++    atom_id *cgindex;
++    cginfo_mb_t *cginfo_mb;
++    gmx_domdec_comm_t *comm;
++    int  *moved;
++    int  nthread,thread;
+ +    
-         nvec++;
++    if (dd->bScrewPBC)
+ +    {
-     if (bCGP)
++        check_screw_box(state->box);
+ +    }
-         nvec++;
++    
++    comm  = dd->comm;
++    if (fr->cutoff_scheme == ecutsGROUP)
+ +    {
-     /* Make sure the communication buffers are large enough */
-     for(mc=0; mc<dd->ndim*2; mc++)
++        cg_cm = fr->cg_cm;
+ +    }
+ +    
-         nvr = ncg[mc] + nat[mc]*nvec;
-         if (nvr > comm->cgcm_state_nalloc[mc])
++    for(i=0; i<estNR; i++)
+ +    {
-             comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
-             srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
++        if (EST_DISTR(i))
+ +        {
-     /* Recalculating cg_cm might be cheaper than communicating,
-      * but that could give rise to rounding issues.
-      */
-     home_pos_cg =
-         compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
-                                 nvec,cg_cm,comm,bCompact);
-     
-     vec = 0;
-     home_pos_at =
-         compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
-                                 nvec,vec++,state->x,comm,bCompact);
-     if (bV)
-     {
-         compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
-                                 nvec,vec++,state->v,comm,bCompact);
-     }
-     if (bSDX)
-     {
-         compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
-                                 nvec,vec++,state->sd_X,comm,bCompact);
-     }
-     if (bCGP)
++            switch (i)
++            {
++            case estX:   /* Always present */            break;
++            case estV:   bV   = (state->flags & (1<<i)); break;
++            case estSDX: bSDX = (state->flags & (1<<i)); break;
++            case estCGP: bCGP = (state->flags & (1<<i)); break;
++            case estLD_RNG:
++            case estLD_RNGI:
++            case estDISRE_INITF:
++            case estDISRE_RM3TAV:
++            case estORIRE_INITF:
++            case estORIRE_DTAV:
++                /* No processing required */
++                break;
++            default:
++            gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
++            }
+ +        }
+ +    }
+ +    
-         compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
-                                 nvec,vec++,state->cg_p,comm,bCompact);
++    if (dd->ncg_tot > comm->nalloc_int)
+ +    {
-     if (bCompact)
++        comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
++        srenew(comm->buf_int,comm->nalloc_int);
+ +    }
++    move = comm->buf_int;
+ +    
-         compact_ind(dd->ncg_home,move,
-                     dd->index_gl,dd->cgindex,dd->gatindex,
-                     dd->ga2la,comm->bLocalCG,
-                     fr->cginfo);
++    /* Clear the count */
++    for(c=0; c<dd->ndim*2; c++)
+ +    {
-                            fr->ns.grid->cell_index);
++        ncg[c] = 0;
++        nat[c] = 0;
++    }
++
++    npbcdim = dd->npbcdim;
++
++    for(d=0; (d<DIM); d++)
++    {
++        limitd[d] = dd->comm->cellsize_min[d];
++        if (d >= npbcdim && dd->ci[d] == 0)
++        {
++            cell_x0[d] = -GMX_FLOAT_MAX;
++        }
++        else
++        {
++            cell_x0[d] = comm->cell_x0[d];
++        }
++        if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
++        {
++            cell_x1[d] = GMX_FLOAT_MAX;
++        }
++        else
++        {
++            cell_x1[d] = comm->cell_x1[d];
++        }
++        if (d < npbcdim)
++        {
++            limit0[d] = comm->old_cell_x0[d] - limitd[d];
++            limit1[d] = comm->old_cell_x1[d] + limitd[d];
++        }
++        else
++        {
++            /* We check after communication if a charge group moved
++             * more than one cell. Set the pre-comm check limit to float_max.
++             */
++            limit0[d] = -GMX_FLOAT_MAX;
++            limit1[d] =  GMX_FLOAT_MAX;
++        }
++    }
++    
++    make_tric_corr_matrix(npbcdim,state->box,tcm);
++    
++    cgindex = dd->cgindex;
++
++    nthread = gmx_omp_nthreads_get(emntDomdec);
++
++    /* Compute the center of geometry for all home charge groups
++     * and put them in the box and determine where they should go.
++     */
++#pragma omp parallel for num_threads(nthread) schedule(static)
++    for(thread=0; thread<nthread; thread++)
++    {
++        calc_cg_move(fplog,step,dd,state,tric_dir,tcm,
++                     cell_x0,cell_x1,limitd,limit0,limit1,
++                     cgindex,
++                     ( thread   *dd->ncg_home)/nthread,
++                     ((thread+1)*dd->ncg_home)/nthread,
++                     fr->cutoff_scheme==ecutsGROUP ? cg_cm : state->x,
++                     move);
++    }
++
++    for(cg=0; cg<dd->ncg_home; cg++)
++    {
++        if (move[cg] >= 0)
++        {
++            mc = move[cg];
++            flag     = mc & ~DD_FLAG_NRCG;
++            mc       = mc & DD_FLAG_NRCG;
++            move[cg] = mc;
++
++            if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
++            {
++                comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
++                srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
++            }
++            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS  ] = dd->index_gl[cg];
++            /* We store the cg size in the lower 16 bits
++             * and the place where the charge group should go
++             * in the next 6 bits. This saves some communication volume.
++             */
++            nrcg = cgindex[cg+1] - cgindex[cg];
++            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
++            ncg[mc] += 1;
++            nat[mc] += nrcg;
++        }
++    }
++    
++    inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
++    inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
++
++    *ncg_moved = 0;
++    for(i=0; i<dd->ndim*2; i++)
++    {
++        *ncg_moved += ncg[i];
++    }
++    
++    nvec = 1;
++    if (bV)
++    {
++        nvec++;
++    }
++    if (bSDX)
++    {
++        nvec++;
++    }
++    if (bCGP)
++    {
++        nvec++;
++    }
++    
++    /* Make sure the communication buffers are large enough */
++    for(mc=0; mc<dd->ndim*2; mc++)
++    {
++        nvr = ncg[mc] + nat[mc]*nvec;
++        if (nvr > comm->cgcm_state_nalloc[mc])
++        {
++            comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
++            srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
++        }
++    }
++    
++    switch (fr->cutoff_scheme)
++    {
++    case ecutsGROUP:
++        /* Recalculating cg_cm might be cheaper than communicating,
++         * but that could give rise to rounding issues.
++         */
++        home_pos_cg =
++            compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
++                                    nvec,cg_cm,comm,bCompact);
++    break;
++    case ecutsVERLET:
++        /* Without charge groups we send the moved atom coordinates
++         * over twice. This is so the code below can be used without
++         * many conditionals for both for with and without charge groups.
++         */
++        home_pos_cg =
++            compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
++                                    nvec,state->x,comm,FALSE);
++        if (bCompact)
++        {
++            home_pos_cg -= *ncg_moved;
++        }
++        break;
++    default:
++        gmx_incons("unimplemented");
++        home_pos_cg = 0;
++    }
++    
++    vec = 0;
++    home_pos_at =
++        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
++                                nvec,vec++,state->x,comm,bCompact);
++    if (bV)
++    {
++        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
++                                nvec,vec++,state->v,comm,bCompact);
++    }
++    if (bSDX)
++    {
++        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
++                                nvec,vec++,state->sd_X,comm,bCompact);
++    }
++    if (bCGP)
++    {
++        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
++                                nvec,vec++,state->cg_p,comm,bCompact);
++    }
++    
++    if (bCompact)
++    {
++        compact_ind(dd->ncg_home,move,
++                    dd->index_gl,dd->cgindex,dd->gatindex,
++                    dd->ga2la,comm->bLocalCG,
++                    fr->cginfo);
+ +    }
+ +    else
+ +    {
++        if (fr->cutoff_scheme == ecutsVERLET)
++        {
++            moved = get_moved(comm,dd->ncg_home);
++
++            for(k=0; k<dd->ncg_home; k++)
++            {
++                moved[k] = 0;
++            }
++        }
++        else
++        {
++            moved = fr->ns.grid->cell_index;
++        }
++
+ +        clear_and_mark_ind(dd->ncg_home,move,
+ +                           dd->index_gl,dd->cgindex,dd->gatindex,
+ +                           dd->ga2la,comm->bLocalCG,
-     ncg_stay_home = home_pos_cg;
++                           moved);
+ +    }
+ +    
+ +    cginfo_mb = fr->cginfo_mb;
+ +
-                 if (home_pos_cg >= fr->cg_nalloc)
++    *ncg_stay_home = home_pos_cg;
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        ncg_recv = 0;
+ +        nat_recv = 0;
+ +        nvr      = 0;
+ +        for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
+ +        {
+ +            cdd = d*2 + dir;
+ +            /* Communicate the cg and atom counts */
+ +            sbuf[0] = ncg[cdd];
+ +            sbuf[1] = nat[cdd];
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
+ +                        d,dir,sbuf[0],sbuf[1]);
+ +            }
+ +            dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
+ +            
+ +            if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
+ +            {
+ +                comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
+ +                srenew(comm->buf_int,comm->nalloc_int);
+ +            }
+ +            
+ +            /* Communicate the charge group indices, sizes and flags */
+ +            dd_sendrecv_int(dd, d, dir,
+ +                            comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
+ +                            comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
+ +            
+ +            nvs = ncg[cdd] + nat[cdd]*nvec;
+ +            i   = rbuf[0]  + rbuf[1] *nvec;
+ +            vec_rvec_check_alloc(&comm->vbuf,nvr+i);
+ +            
+ +            /* Communicate cgcm and state */
+ +            dd_sendrecv_rvec(dd, d, dir,
+ +                             comm->cgcm_state[cdd], nvs,
+ +                             comm->vbuf.v+nvr, i);
+ +            ncg_recv += rbuf[0];
+ +            nat_recv += rbuf[1];
+ +            nvr      += i;
+ +        }
+ +        
+ +        /* Process the received charge groups */
+ +        buf_pos = 0;
+ +        for(cg=0; cg<ncg_recv; cg++)
+ +        {
+ +            flag = comm->buf_int[cg*DD_CGIBS+1];
+ +
+ +            if (dim >= npbcdim && dd->nc[dim] > 2)
+ +            {
+ +                /* No pbc in this dim and more than one domain boundary.
+ +                 * We do a separate check if a charge group didn't move too far.
+ +                 */
+ +                if (((flag & DD_FLAG_FW(d)) &&
+ +                     comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
+ +                    ((flag & DD_FLAG_BW(d)) &&
+ +                     comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
+ +                {
+ +                    cg_move_error(fplog,dd,step,cg,dim,
+ +                                  (flag & DD_FLAG_FW(d)) ? 1 : 0,
+ +                                   FALSE,0,
+ +                                   comm->vbuf.v[buf_pos],
+ +                                   comm->vbuf.v[buf_pos],
+ +                                   comm->vbuf.v[buf_pos][dim]);
+ +                }
+ +            }
+ +
+ +            mc = -1;
+ +            if (d < dd->ndim-1)
+ +            {
+ +                /* Check which direction this cg should go */
+ +                for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
+ +                {
+ +                    if (dd->bGridJump)
+ +                    {
+ +                        /* The cell boundaries for dimension d2 are not equal
+ +                         * for each cell row of the lower dimension(s),
+ +                         * therefore we might need to redetermine where
+ +                         * this cg should go.
+ +                         */
+ +                        dim2 = dd->dim[d2];
+ +                        /* If this cg crosses the box boundary in dimension d2
+ +                         * we can use the communicated flag, so we do not
+ +                         * have to worry about pbc.
+ +                         */
+ +                        if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
+ +                               (flag & DD_FLAG_FW(d2))) ||
+ +                              (dd->ci[dim2] == 0 &&
+ +                               (flag & DD_FLAG_BW(d2)))))
+ +                        {
+ +                            /* Clear the two flags for this dimension */
+ +                            flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
+ +                            /* Determine the location of this cg
+ +                             * in lattice coordinates
+ +                             */
+ +                            pos_d = comm->vbuf.v[buf_pos][dim2];
+ +                            if (tric_dir[dim2])
+ +                            {
+ +                                for(d3=dim2+1; d3<DIM; d3++)
+ +                                {
+ +                                    pos_d +=
+ +                                        comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
+ +                                }
+ +                            }
+ +                            /* Check of we are not at the box edge.
+ +                             * pbc is only handled in the first step above,
+ +                             * but this check could move over pbc while
+ +                             * the first step did not due to different rounding.
+ +                             */
+ +                            if (pos_d >= cell_x1[dim2] &&
+ +                                dd->ci[dim2] != dd->nc[dim2]-1)
+ +                            {
+ +                                flag |= DD_FLAG_FW(d2);
+ +                            }
+ +                            else if (pos_d < cell_x0[dim2] &&
+ +                                     dd->ci[dim2] != 0)
+ +                            {
+ +                                flag |= DD_FLAG_BW(d2);
+ +                            }
+ +                            comm->buf_int[cg*DD_CGIBS+1] = flag;
+ +                        }
+ +                    }
+ +                    /* Set to which neighboring cell this cg should go */
+ +                    if (flag & DD_FLAG_FW(d2))
+ +                    {
+ +                        mc = d2*2;
+ +                    }
+ +                    else if (flag & DD_FLAG_BW(d2))
+ +                    {
+ +                        if (dd->nc[dd->dim[d2]] > 2)
+ +                        {
+ +                            mc = d2*2+1;
+ +                        }
+ +                        else
+ +                        {
+ +                            mc = d2*2;
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            
+ +            nrcg = flag & DD_FLAG_NRCG;
+ +            if (mc == -1)
+ +            {
+ +                if (home_pos_cg+1 > dd->cg_nalloc)
+ +                {
+ +                    dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
+ +                    srenew(dd->index_gl,dd->cg_nalloc);
+ +                    srenew(dd->cgindex,dd->cg_nalloc+1);
+ +                }
+ +                /* Set the global charge group index and size */
+ +                dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
+ +                dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
+ +                /* Copy the state from the buffer */
-                     dd_realloc_fr_cg(fr,home_pos_cg+1);
++                dd_check_alloc_ncg(fr,state,f,home_pos_cg+1);
++                if (fr->cutoff_scheme == ecutsGROUP)
+ +                {
-                 copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
+ +                    cg_cm = fr->cg_cm;
++                    copy_rvec(comm->vbuf.v[buf_pos],cg_cm[home_pos_cg]);
+ +                }
-         fprintf(debug,"Finished repartitioning\n");
++                buf_pos++;
++
+ +                /* Set the cginfo */
+ +                fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
+ +                                                   dd->index_gl[home_pos_cg]);
+ +                if (comm->bLocalCG)
+ +                {
+ +                    comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
+ +                }
+ +
+ +                if (home_pos_at+nrcg > state->nalloc)
+ +                {
+ +                    dd_realloc_state(state,f,home_pos_at+nrcg);
+ +                }
+ +                for(i=0; i<nrcg; i++)
+ +                {
+ +                    copy_rvec(comm->vbuf.v[buf_pos++],
+ +                              state->x[home_pos_at+i]);
+ +                }
+ +                if (bV)
+ +                {
+ +                    for(i=0; i<nrcg; i++)
+ +                    {
+ +                        copy_rvec(comm->vbuf.v[buf_pos++],
+ +                                  state->v[home_pos_at+i]);
+ +                    }
+ +                }
+ +                if (bSDX)
+ +                {
+ +                    for(i=0; i<nrcg; i++)
+ +                    {
+ +                        copy_rvec(comm->vbuf.v[buf_pos++],
+ +                                  state->sd_X[home_pos_at+i]);
+ +                    }
+ +                }
+ +                if (bCGP)
+ +                {
+ +                    for(i=0; i<nrcg; i++)
+ +                    {
+ +                        copy_rvec(comm->vbuf.v[buf_pos++],
+ +                                  state->cg_p[home_pos_at+i]);
+ +                    }
+ +                }
+ +                home_pos_cg += 1;
+ +                home_pos_at += nrcg;
+ +            }
+ +            else
+ +            {
+ +                /* Reallocate the buffers if necessary  */
+ +                if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
+ +                {
+ +                    comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
+ +                    srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
+ +                }
+ +                nvr = ncg[mc] + nat[mc]*nvec;
+ +                if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
+ +                {
+ +                    comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
+ +                    srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
+ +                }
+ +                /* Copy from the receive to the send buffers */
+ +                memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
+ +                       comm->buf_int + cg*DD_CGIBS,
+ +                       DD_CGIBS*sizeof(int));
+ +                memcpy(comm->cgcm_state[mc][nvr],
+ +                       comm->vbuf.v[buf_pos],
+ +                       (1+nrcg*nvec)*sizeof(rvec));
+ +                buf_pos += 1 + nrcg*nvec;
+ +                ncg[mc] += 1;
+ +                nat[mc] += nrcg;
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* With sorting (!bCompact) the indices are now only partially up to date
+ +     * and ncg_home and nat_home are not the real count, since there are
+ +     * "holes" in the arrays for the charge groups that moved to neighbors.
+ +     */
++    if (fr->cutoff_scheme == ecutsVERLET)
++    {
++        moved = get_moved(comm,home_pos_cg);
++
++        for(i=dd->ncg_home; i<home_pos_cg; i++)
++        {
++            moved[i] = 0;
++        }
++    }
+ +    dd->ncg_home = home_pos_cg;
+ +    dd->nat_home = home_pos_at;
+ +
+ +    if (debug)
+ +    {
- 
-     return ncg_stay_home;
++        fprintf(debug,
++                "Finished repartitioning: cgs moved out %d, new home %d\n",
++                *ncg_moved,dd->ncg_home-*ncg_moved);
++                
+ +    }
- static float dd_pme_f_ratio(gmx_domdec_t *dd)
+ +}
+ +
+ +void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
+ +{
+ +    dd->comm->cycl[ddCycl] += cycles;
+ +    dd->comm->cycl_n[ddCycl]++;
+ +    if (cycles > dd->comm->cycl_max[ddCycl])
+ +    {
+ +        dd->comm->cycl_max[ddCycl] = cycles;
+ +    }
+ +}
+ +
+ +static double force_flop_count(t_nrnb *nrnb)
+ +{
+ +    int i;
+ +    double sum;
+ +    const char *name;
+ +
+ +    sum = 0;
+ +    for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
+ +    {
+ +        /* To get closer to the real timings, we half the count
+ +         * for the normal loops and again half it for water loops.
+ +         */
+ +        name = nrnb_str(i);
+ +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
+ +        {
+ +            sum += nrnb->n[i]*0.25*cost_nrnb(i);
+ +        }
+ +        else
+ +        {
+ +            sum += nrnb->n[i]*0.50*cost_nrnb(i);
+ +        }
+ +    }
+ +    for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
+ +    {
+ +        name = nrnb_str(i);
+ +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
+ +        sum += nrnb->n[i]*cost_nrnb(i);
+ +    }
+ +    for(i=eNR_BONDS; i<=eNR_WALLS; i++)
+ +    {
+ +        sum += nrnb->n[i]*cost_nrnb(i);
+ +    }
+ +
+ +    return sum;
+ +}
+ +
+ +void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
+ +{
+ +    if (dd->comm->eFlop)
+ +    {
+ +        dd->comm->flop -= force_flop_count(nrnb);
+ +    }
+ +}
+ +void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
+ +{
+ +    if (dd->comm->eFlop)
+ +    {
+ +        dd->comm->flop += force_flop_count(nrnb);
+ +        dd->comm->flop_n++;
+ +    }
+ +}  
+ +
+ +static void clear_dd_cycle_counts(gmx_domdec_t *dd)
+ +{
+ +    int i;
+ +    
+ +    for(i=0; i<ddCyclNr; i++)
+ +    {
+ +        dd->comm->cycl[i] = 0;
+ +        dd->comm->cycl_n[i] = 0;
+ +        dd->comm->cycl_max[i] = 0;
+ +    }
+ +    dd->comm->flop = 0;
+ +    dd->comm->flop_n = 0;
+ +}
+ +
+ +static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_load_t *load;
+ +    gmx_domdec_root_t *root=NULL;
+ +    int  d,dim,cid,i,pos;
+ +    float cell_frac=0,sbuf[DD_NLOAD_MAX];
+ +    gmx_bool bSepPME;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"get_load_distribution start\n");
+ +    }
+ +
+ +    wallcycle_start(wcycle,ewcDDCOMMLOAD);
+ +    
+ +    comm = dd->comm;
+ +    
+ +    bSepPME = (dd->pme_nodeid >= 0);
+ +    
+ +    for(d=dd->ndim-1; d>=0; d--)
+ +    {
+ +        dim = dd->dim[d];
+ +        /* Check if we participate in the communication in this dimension */
+ +        if (d == dd->ndim-1 || 
+ +            (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
+ +        {
+ +            load = &comm->load[d];
+ +            if (dd->bGridJump)
+ +            {
+ +                cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
+ +            }
+ +            pos = 0;
+ +            if (d == dd->ndim-1)
+ +            {
+ +                sbuf[pos++] = dd_force_load(comm);
+ +                sbuf[pos++] = sbuf[0];
+ +                if (dd->bGridJump)
+ +                {
+ +                    sbuf[pos++] = sbuf[0];
+ +                    sbuf[pos++] = cell_frac;
+ +                    if (d > 0)
+ +                    {
+ +                        sbuf[pos++] = comm->cell_f_max0[d];
+ +                        sbuf[pos++] = comm->cell_f_min1[d];
+ +                    }
+ +                }
+ +                if (bSepPME)
+ +                {
+ +                    sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
+ +                    sbuf[pos++] = comm->cycl[ddCyclPME];
+ +                }
+ +            }
+ +            else
+ +            {
+ +                sbuf[pos++] = comm->load[d+1].sum;
+ +                sbuf[pos++] = comm->load[d+1].max;
+ +                if (dd->bGridJump)
+ +                {
+ +                    sbuf[pos++] = comm->load[d+1].sum_m;
+ +                    sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
+ +                    sbuf[pos++] = comm->load[d+1].flags;
+ +                    if (d > 0)
+ +                    {
+ +                        sbuf[pos++] = comm->cell_f_max0[d];
+ +                        sbuf[pos++] = comm->cell_f_min1[d];
+ +                    }
+ +                }
+ +                if (bSepPME)
+ +                {
+ +                    sbuf[pos++] = comm->load[d+1].mdf;
+ +                    sbuf[pos++] = comm->load[d+1].pme;
+ +                }
+ +            }
+ +            load->nload = pos;
+ +            /* Communicate a row in DD direction d.
+ +             * The communicators are setup such that the root always has rank 0.
+ +             */
+ +#ifdef GMX_MPI
+ +            MPI_Gather(sbuf      ,load->nload*sizeof(float),MPI_BYTE,
+ +                       load->load,load->nload*sizeof(float),MPI_BYTE,
+ +                       0,comm->mpi_comm_load[d]);
+ +#endif
+ +            if (dd->ci[dim] == dd->master_ci[dim])
+ +            {
+ +                /* We are the root, process this row */
+ +                if (comm->bDynLoadBal)
+ +                {
+ +                    root = comm->root[d];
+ +                }
+ +                load->sum = 0;
+ +                load->max = 0;
+ +                load->sum_m = 0;
+ +                load->cvol_min = 1;
+ +                load->flags = 0;
+ +                load->mdf = 0;
+ +                load->pme = 0;
+ +                pos = 0;
+ +                for(i=0; i<dd->nc[dim]; i++)
+ +                {
+ +                    load->sum += load->load[pos++];
+ +                    load->max = max(load->max,load->load[pos]);
+ +                    pos++;
+ +                    if (dd->bGridJump)
+ +                    {
+ +                        if (root->bLimited)
+ +                        {
+ +                            /* This direction could not be load balanced properly,
+ +                             * therefore we need to use the maximum iso the average load.
+ +                             */
+ +                            load->sum_m = max(load->sum_m,load->load[pos]);
+ +                        }
+ +                        else
+ +                        {
+ +                            load->sum_m += load->load[pos];
+ +                        }
+ +                        pos++;
+ +                        load->cvol_min = min(load->cvol_min,load->load[pos]);
+ +                        pos++;
+ +                        if (d < dd->ndim-1)
+ +                        {
+ +                            load->flags = (int)(load->load[pos++] + 0.5);
+ +                        }
+ +                        if (d > 0)
+ +                        {
+ +                            root->cell_f_max0[i] = load->load[pos++];
+ +                            root->cell_f_min1[i] = load->load[pos++];
+ +                        }
+ +                    }
+ +                    if (bSepPME)
+ +                    {
+ +                        load->mdf = max(load->mdf,load->load[pos]);
+ +                        pos++;
+ +                        load->pme = max(load->pme,load->load[pos]);
+ +                        pos++;
+ +                    }
+ +                }
+ +                if (comm->bDynLoadBal && root->bLimited)
+ +                {
+ +                    load->sum_m *= dd->nc[dim];
+ +                    load->flags |= (1<<d);
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        comm->nload      += dd_load_count(comm);
+ +        comm->load_step  += comm->cycl[ddCyclStep];
+ +        comm->load_sum   += comm->load[0].sum;
+ +        comm->load_max   += comm->load[0].max;
+ +        if (comm->bDynLoadBal)
+ +        {
+ +            for(d=0; d<dd->ndim; d++)
+ +            {
+ +                if (comm->load[0].flags & (1<<d))
+ +                {
+ +                    comm->load_lim[d]++;
+ +                }
+ +            }
+ +        }
+ +        if (bSepPME)
+ +        {
+ +            comm->load_mdf += comm->load[0].mdf;
+ +            comm->load_pme += comm->load[0].pme;
+ +        }
+ +    }
+ +
+ +    wallcycle_stop(wcycle,ewcDDCOMMLOAD);
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"get_load_distribution finished\n");
+ +    }
+ +}
+ +
+ +static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
+ +{
+ +    /* Return the relative performance loss on the total run time
+ +     * due to the force calculation load imbalance.
+ +     */
+ +    if (dd->comm->nload > 0)
+ +    {
+ +        return
+ +            (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
+ +            (dd->comm->load_step*dd->nnodes);
+ +    }
+ +    else
+ +    {
+ +        return 0;
+ +    }
+ +}
+ +
+ +static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
+ +{
+ +    char  buf[STRLEN];
+ +    int   npp,npme,nnodes,d,limp;
+ +    float imbal,pme_f_ratio,lossf,lossp=0;
+ +    gmx_bool  bLim;
+ +    gmx_domdec_comm_t *comm;
+ +
+ +    comm = dd->comm;
+ +    if (DDMASTER(dd) && comm->nload > 0)
+ +    {
+ +        npp    = dd->nnodes;
+ +        npme   = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
+ +        nnodes = npp + npme;
+ +        imbal = comm->load_max*npp/comm->load_sum - 1;
+ +        lossf = dd_force_imb_perf_loss(dd);
+ +        sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
+ +        fprintf(fplog,"%s",buf);
+ +        fprintf(stderr,"\n");
+ +        fprintf(stderr,"%s",buf);
+ +        sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
+ +        fprintf(fplog,"%s",buf);
+ +        fprintf(stderr,"%s",buf);
+ +        bLim = FALSE;
+ +        if (comm->bDynLoadBal)
+ +        {
+ +            sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
+ +            for(d=0; d<dd->ndim; d++)
+ +            {
+ +                limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
+ +                sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
+ +                if (limp >= 50)
+ +                {
+ +                    bLim = TRUE;
+ +                }
+ +            }
+ +            sprintf(buf+strlen(buf),"\n");
+ +            fprintf(fplog,"%s",buf);
+ +            fprintf(stderr,"%s",buf);
+ +        }
+ +        if (npme > 0)
+ +        {
+ +            pme_f_ratio = comm->load_pme/comm->load_mdf;
+ +            lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
+ +            if (lossp <= 0)
+ +            {
+ +                lossp *= (float)npme/(float)nnodes;
+ +            }
+ +            else
+ +            {
+ +                lossp *= (float)npp/(float)nnodes;
+ +            }
+ +            sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
+ +            fprintf(fplog,"%s",buf);
+ +            fprintf(stderr,"%s",buf);
+ +            sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
+ +            fprintf(fplog,"%s",buf);
+ +            fprintf(stderr,"%s",buf);
+ +        }
+ +        fprintf(fplog,"\n");
+ +        fprintf(stderr,"\n");
+ +        
+ +        if (lossf >= DD_PERF_LOSS)
+ +        {
+ +            sprintf(buf,
+ +                    "NOTE: %.1f %% performance was lost due to load imbalance\n"
+ +                    "      in the domain decomposition.\n",lossf*100);
+ +            if (!comm->bDynLoadBal)
+ +            {
+ +                sprintf(buf+strlen(buf),"      You might want to use dynamic load balancing (option -dlb.)\n");
+ +            }
+ +            else if (bLim)
+ +            {
+ +                sprintf(buf+strlen(buf),"      You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
+ +            }
+ +            fprintf(fplog,"%s\n",buf);
+ +            fprintf(stderr,"%s\n",buf);
+ +        }
+ +        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
+ +        {
+ +            sprintf(buf,
+ +                    "NOTE: %.1f %% performance was lost because the PME nodes\n"
+ +                    "      had %s work to do than the PP nodes.\n"
+ +                    "      You might want to %s the number of PME nodes\n"
+ +                    "      or %s the cut-off and the grid spacing.\n",
+ +                    fabs(lossp*100),
+ +                    (lossp < 0) ? "less"     : "more",
+ +                    (lossp < 0) ? "decrease" : "increase",
+ +                    (lossp < 0) ? "decrease" : "increase");
+ +            fprintf(fplog,"%s\n",buf);
+ +            fprintf(stderr,"%s\n",buf);
+ +        }
+ +    }
+ +}
+ +
+ +static float dd_vol_min(gmx_domdec_t *dd)
+ +{
+ +    return dd->comm->load[0].cvol_min*dd->nnodes;
+ +}
+ +
+ +static gmx_bool dd_load_flags(gmx_domdec_t *dd)
+ +{
+ +    return dd->comm->load[0].flags;
+ +}
+ +
+ +static float dd_f_imbal(gmx_domdec_t *dd)
+ +{
+ +    return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
+ +}
+ +
-     return dd->comm->load[0].pme/dd->comm->load[0].mdf;
++float dd_pme_f_ratio(gmx_domdec_t *dd)
+ +{
-     dd->bInterCGcons = inter_charge_group_constraints(mtop);
++    if (dd->comm->cycl_n[ddCyclPME] > 0)
++    {
++        return dd->comm->load[0].pme/dd->comm->load[0].mdf;
++    }
++    else
++    {
++        return -1.0;
++    }
+ +}
+ +
+ +static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
+ +{
+ +    int flags,d;
+ +    char buf[22];
+ +    
+ +    flags = dd_load_flags(dd);
+ +    if (flags)
+ +    {
+ +        fprintf(fplog,
+ +                "DD  load balancing is limited by minimum cell size in dimension");
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            if (flags & (1<<d))
+ +            {
+ +                fprintf(fplog," %c",dim2char(dd->dim[d]));
+ +            }
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +    fprintf(fplog,"DD  step %s",gmx_step_str(step,buf));
+ +    if (dd->comm->bDynLoadBal)
+ +    {
+ +        fprintf(fplog,"  vol min/aver %5.3f%c",
+ +                dd_vol_min(dd),flags ? '!' : ' ');
+ +    }
+ +    fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
+ +    if (dd->comm->cycl_n[ddCyclPME])
+ +    {
+ +        fprintf(fplog,"  pme mesh/force %5.3f",dd_pme_f_ratio(dd));
+ +    }
+ +    fprintf(fplog,"\n\n");
+ +}
+ +
+ +static void dd_print_load_verbose(gmx_domdec_t *dd)
+ +{
+ +    if (dd->comm->bDynLoadBal)
+ +    {
+ +        fprintf(stderr,"vol %4.2f%c ",
+ +                dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
+ +    }
+ +    fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
+ +    if (dd->comm->cycl_n[ddCyclPME])
+ +    {
+ +        fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
+ +    }
+ +}
+ +
+ +#ifdef GMX_MPI
+ +static void make_load_communicator(gmx_domdec_t *dd, int dim_ind,ivec loc)
+ +{
+ +    MPI_Comm  c_row;
+ +    int  dim, i, rank;
+ +    ivec loc_c;
+ +    gmx_domdec_root_t *root;
+ +    gmx_bool bPartOfGroup = FALSE;
+ +    
+ +    dim = dd->dim[dim_ind];
+ +    copy_ivec(loc,loc_c);
+ +    for(i=0; i<dd->nc[dim]; i++)
+ +    {
+ +        loc_c[dim] = i;
+ +        rank = dd_index(dd->nc,loc_c);
+ +        if (rank == dd->rank)
+ +        {
+ +            /* This process is part of the group */
+ +            bPartOfGroup = TRUE;
+ +        }
+ +    }
+ +    MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup?0:MPI_UNDEFINED, dd->rank,
+ +                   &c_row);
+ +    if (bPartOfGroup)
+ +    {
+ +        dd->comm->mpi_comm_load[dim_ind] = c_row;
+ +        if (dd->comm->eDLB != edlbNO)
+ +        {
+ +            if (dd->ci[dim] == dd->master_ci[dim])
+ +            {
+ +                /* This is the root process of this row */
+ +                snew(dd->comm->root[dim_ind],1);
+ +                root = dd->comm->root[dim_ind];
+ +                snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
+ +                snew(root->old_cell_f,dd->nc[dim]+1);
+ +                snew(root->bCellMin,dd->nc[dim]);
+ +                if (dim_ind > 0)
+ +                {
+ +                    snew(root->cell_f_max0,dd->nc[dim]);
+ +                    snew(root->cell_f_min1,dd->nc[dim]);
+ +                    snew(root->bound_min,dd->nc[dim]);
+ +                    snew(root->bound_max,dd->nc[dim]);
+ +                }
+ +                snew(root->buf_ncd,dd->nc[dim]);
+ +            }
+ +            else
+ +            {
+ +                /* This is not a root process, we only need to receive cell_f */
+ +                snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
+ +            }
+ +        }
+ +        if (dd->ci[dim] == dd->master_ci[dim])
+ +        {
+ +            snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
+ +        }
+ +    }
+ +}
+ +#endif
+ +
+ +static void make_load_communicators(gmx_domdec_t *dd)
+ +{
+ +#ifdef GMX_MPI
+ +  int  dim0,dim1,i,j;
+ +  ivec loc;
+ +
+ +  if (debug)
+ +    fprintf(debug,"Making load communicators\n");
+ +
+ +  snew(dd->comm->load,dd->ndim);
+ +  snew(dd->comm->mpi_comm_load,dd->ndim);
+ +  
+ +  clear_ivec(loc);
+ +  make_load_communicator(dd,0,loc);
+ +  if (dd->ndim > 1) {
+ +    dim0 = dd->dim[0];
+ +    for(i=0; i<dd->nc[dim0]; i++) {
+ +      loc[dim0] = i;
+ +      make_load_communicator(dd,1,loc);
+ +    }
+ +  }
+ +  if (dd->ndim > 2) {
+ +    dim0 = dd->dim[0];
+ +    for(i=0; i<dd->nc[dim0]; i++) {
+ +      loc[dim0] = i;
+ +      dim1 = dd->dim[1];
+ +      for(j=0; j<dd->nc[dim1]; j++) {
+ +        loc[dim1] = j;
+ +        make_load_communicator(dd,2,loc);
+ +      }
+ +    }
+ +  }
+ +
+ +  if (debug)
+ +    fprintf(debug,"Finished making load communicators\n");
+ +#endif
+ +}
+ +
+ +void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
+ +{
+ +    gmx_bool bZYX;
+ +    int  d,dim,i,j,m;
+ +    ivec tmp,s;
+ +    int  nzone,nzonep;
+ +    ivec dd_zp[DD_MAXIZONE];
+ +    gmx_domdec_zones_t *zones;
+ +    gmx_domdec_ns_ranges_t *izone;
+ +    
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        copy_ivec(dd->ci,tmp);
+ +        tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
+ +        dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
+ +        copy_ivec(dd->ci,tmp);
+ +        tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
+ +        dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
+ +                    dd->rank,dim,
+ +                    dd->neighbor[d][0],
+ +                    dd->neighbor[d][1]);
+ +        }
+ +    }
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
+ +          dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
+ +    }
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
+ +                dd->ndim,
+ +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
+ +                dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +    }
+ +    switch (dd->ndim)
+ +    {
+ +    case 3:
+ +        nzone  = dd_z3n;
+ +        nzonep = dd_zp3n;
+ +        for(i=0; i<nzonep; i++)
+ +        {
+ +            copy_ivec(dd_zp3[i],dd_zp[i]);
+ +        }
+ +        break;
+ +    case 2:
+ +        nzone  = dd_z2n;
+ +        nzonep = dd_zp2n;
+ +        for(i=0; i<nzonep; i++)
+ +        {
+ +            copy_ivec(dd_zp2[i],dd_zp[i]);
+ +        }
+ +        break;
+ +    case 1:
+ +        nzone  = dd_z1n;
+ +        nzonep = dd_zp1n;
+ +        for(i=0; i<nzonep; i++)
+ +        {
+ +            copy_ivec(dd_zp1[i],dd_zp[i]);
+ +        }
+ +        break;
+ +    default:
+ +        gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
+ +        nzone = 0;
+ +        nzonep = 0;
+ +    }
+ +
+ +    zones = &dd->comm->zones;
+ +
+ +    for(i=0; i<nzone; i++)
+ +    {
+ +        m = 0;
+ +        clear_ivec(zones->shift[i]);
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
+ +        }
+ +    }
+ +    
+ +    zones->n = nzone;
+ +    for(i=0; i<nzone; i++)
+ +    {
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            s[d] = dd->ci[d] - zones->shift[i][d];
+ +            if (s[d] < 0)
+ +            {
+ +                s[d] += dd->nc[d];
+ +            }
+ +            else if (s[d] >= dd->nc[d])
+ +            {
+ +                s[d] -= dd->nc[d];
+ +            }
+ +        }
+ +    }
+ +    zones->nizone = nzonep;
+ +    for(i=0; i<zones->nizone; i++)
+ +    {
+ +        if (dd_zp[i][0] != i)
+ +        {
+ +            gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
+ +        }
+ +        izone = &zones->izone[i];
+ +        izone->j0 = dd_zp[i][1];
+ +        izone->j1 = dd_zp[i][2];
+ +        for(dim=0; dim<DIM; dim++)
+ +        {
+ +            if (dd->nc[dim] == 1)
+ +            {
+ +                /* All shifts should be allowed */
+ +                izone->shift0[dim] = -1;
+ +                izone->shift1[dim] = 1;
+ +            }
+ +            else
+ +            {
+ +                /*
+ +                  izone->shift0[d] = 0;
+ +                  izone->shift1[d] = 0;
+ +                  for(j=izone->j0; j<izone->j1; j++) {
+ +                  if (dd->shift[j][d] > dd->shift[i][d])
+ +                  izone->shift0[d] = -1;
+ +                  if (dd->shift[j][d] < dd->shift[i][d])
+ +                  izone->shift1[d] = 1;
+ +                  }
+ +                */
+ +                
+ +                int shift_diff;
+ +                
+ +                /* Assume the shift are not more than 1 cell */
+ +                izone->shift0[dim] = 1;
+ +                izone->shift1[dim] = -1;
+ +                for(j=izone->j0; j<izone->j1; j++)
+ +                {
+ +                    shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
+ +                    if (shift_diff < izone->shift0[dim])
+ +                    {
+ +                        izone->shift0[dim] = shift_diff;
+ +                    }
+ +                    if (shift_diff > izone->shift1[dim])
+ +                    {
+ +                        izone->shift1[dim] = shift_diff;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (dd->comm->eDLB != edlbNO)
+ +    {
+ +        snew(dd->comm->root,dd->ndim);
+ +    }
+ +    
+ +    if (dd->comm->bRecordLoad)
+ +    {
+ +        make_load_communicators(dd);
+ +    }
+ +}
+ +
+ +static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    int  i,rank,*buf;
+ +    ivec periods;
+ +#ifdef GMX_MPI
+ +    MPI_Comm comm_cart;
+ +#endif
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +#ifdef GMX_MPI
+ +    if (comm->bCartesianPP)
+ +    {
+ +        /* Set up cartesian communication for the particle-particle part */
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
+ +                    dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
+ +        }
+ +        
+ +        for(i=0; i<DIM; i++)
+ +        {
+ +            periods[i] = TRUE;
+ +        }
+ +        MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
+ +                        &comm_cart);
+ +        /* We overwrite the old communicator with the new cartesian one */
+ +        cr->mpi_comm_mygroup = comm_cart;
+ +    }
+ +    
+ +    dd->mpi_comm_all = cr->mpi_comm_mygroup;
+ +    MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
+ +    
+ +    if (comm->bCartesianPP_PME)
+ +    {
+ +        /* Since we want to use the original cartesian setup for sim,
+ +         * and not the one after split, we need to make an index.
+ +         */
+ +        snew(comm->ddindex2ddnodeid,dd->nnodes);
+ +        comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
+ +        gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
+ +        /* Get the rank of the DD master,
+ +         * above we made sure that the master node is a PP node.
+ +         */
+ +        if (MASTER(cr))
+ +        {
+ +            rank = dd->rank;
+ +        }
+ +        else
+ +        {
+ +            rank = 0;
+ +        }
+ +        MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
+ +    }
+ +    else if (comm->bCartesianPP)
+ +    {
+ +        if (cr->npmenodes == 0)
+ +        {
+ +            /* The PP communicator is also
+ +             * the communicator for this simulation
+ +             */
+ +            cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
+ +        }
+ +        cr->nodeid = dd->rank;
+ +        
+ +        MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
+ +        
+ +        /* We need to make an index to go from the coordinates
+ +         * to the nodeid of this simulation.
+ +         */
+ +        snew(comm->ddindex2simnodeid,dd->nnodes);
+ +        snew(buf,dd->nnodes);
+ +        if (cr->duty & DUTY_PP)
+ +        {
+ +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
+ +        }
+ +        /* Communicate the ddindex to simulation nodeid index */
+ +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
+ +                      cr->mpi_comm_mysim);
+ +        sfree(buf);
+ +        
+ +        /* Determine the master coordinates and rank.
+ +         * The DD master should be the same node as the master of this sim.
+ +         */
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            if (comm->ddindex2simnodeid[i] == 0)
+ +            {
+ +                ddindex2xyz(dd->nc,i,dd->master_ci);
+ +                MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
+ +            }
+ +        }
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"The master rank is %d\n",dd->masterrank);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* No Cartesian communicators */
+ +        /* We use the rank in dd->comm->all as DD index */
+ +        ddindex2xyz(dd->nc,dd->rank,dd->ci);
+ +        /* The simulation master nodeid is 0, so the DD master rank is also 0 */
+ +        dd->masterrank = 0;
+ +        clear_ivec(dd->master_ci);
+ +    }
+ +#endif
+ +  
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,
+ +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
+ +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +    }
+ +    if (debug)
+ +    {
+ +        fprintf(debug,
+ +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
+ +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +    }
+ +}
+ +
+ +static void receive_ddindex2simnodeid(t_commrec *cr)
+ +{
+ +    gmx_domdec_t *dd;
+ +    
+ +    gmx_domdec_comm_t *comm;
+ +    int  *buf;
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +#ifdef GMX_MPI
+ +    if (!comm->bCartesianPP_PME && comm->bCartesianPP)
+ +    {
+ +        snew(comm->ddindex2simnodeid,dd->nnodes);
+ +        snew(buf,dd->nnodes);
+ +        if (cr->duty & DUTY_PP)
+ +        {
+ +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
+ +        }
+ +#ifdef GMX_MPI
+ +        /* Communicate the ddindex to simulation nodeid index */
+ +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
+ +                      cr->mpi_comm_mysim);
+ +#endif
+ +        sfree(buf);
+ +    }
+ +#endif
+ +}
+ +
+ +static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
+ +                                                     int ncg,int natoms)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int i;
+ +
+ +    snew(ma,1);
+ +    
+ +    snew(ma->ncg,dd->nnodes);
+ +    snew(ma->index,dd->nnodes+1);
+ +    snew(ma->cg,ncg);
+ +    snew(ma->nat,dd->nnodes);
+ +    snew(ma->ibuf,dd->nnodes*2);
+ +    snew(ma->cell_x,DIM);
+ +    for(i=0; i<DIM; i++)
+ +    {
+ +        snew(ma->cell_x[i],dd->nc[i]+1);
+ +    }
+ +
+ +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
+ +    {
+ +        ma->vbuf = NULL;
+ +    }
+ +    else
+ +    {
+ +        snew(ma->vbuf,natoms);
+ +    }
+ +
+ +    return ma;
+ +}
+ +
+ +static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
+ +                               int reorder)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    int  i,rank;
+ +    gmx_bool bDiv[DIM];
+ +    ivec periods;
+ +#ifdef GMX_MPI
+ +    MPI_Comm comm_cart;
+ +#endif
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +    if (comm->bCartesianPP)
+ +    {
+ +        for(i=1; i<DIM; i++)
+ +        {
+ +            bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
+ +        }
+ +        if (bDiv[YY] || bDiv[ZZ])
+ +        {
+ +            comm->bCartesianPP_PME = TRUE;
+ +            /* If we have 2D PME decomposition, which is always in x+y,
+ +             * we stack the PME only nodes in z.
+ +             * Otherwise we choose the direction that provides the thinnest slab
+ +             * of PME only nodes as this will have the least effect
+ +             * on the PP communication.
+ +             * But for the PME communication the opposite might be better.
+ +             */
+ +            if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
+ +                             !bDiv[YY] ||
+ +                             dd->nc[YY] > dd->nc[ZZ]))
+ +            {
+ +                comm->cartpmedim = ZZ;
+ +            }
+ +            else
+ +            {
+ +                comm->cartpmedim = YY;
+ +            }
+ +            comm->ntot[comm->cartpmedim]
+ +                += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
+ +        }
+ +        else if (fplog)
+ +        {
+ +            fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
+ +            fprintf(fplog,
+ +                    "Will not use a Cartesian communicator for PP <-> PME\n\n");
+ +        }
+ +    }
+ +    
+ +#ifdef GMX_MPI
+ +    if (comm->bCartesianPP_PME)
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
+ +        }
+ +        
+ +        for(i=0; i<DIM; i++)
+ +        {
+ +            periods[i] = TRUE;
+ +        }
+ +        MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
+ +                        &comm_cart);
+ +        
+ +        MPI_Comm_rank(comm_cart,&rank);
+ +        if (MASTERNODE(cr) && rank != 0)
+ +        {
+ +            gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
+ +        }
+ +        
+ +        /* With this assigment we loose the link to the original communicator
+ +         * which will usually be MPI_COMM_WORLD, unless have multisim.
+ +         */
+ +        cr->mpi_comm_mysim = comm_cart;
+ +        cr->sim_nodeid = rank;
+ +        
+ +        MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
+ +        
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
+ +                    cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +        }
+ +        
+ +        if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
+ +        {
+ +            cr->duty = DUTY_PP;
+ +        }
+ +        if (cr->npmenodes == 0 ||
+ +            dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
+ +        {
+ +            cr->duty = DUTY_PME;
+ +        }
+ +        
+ +        /* Split the sim communicator into PP and PME only nodes */
+ +        MPI_Comm_split(cr->mpi_comm_mysim,
+ +                       cr->duty,
+ +                       dd_index(comm->ntot,dd->ci),
+ +                       &cr->mpi_comm_mygroup);
+ +    }
+ +    else
+ +    {
+ +        switch (dd_node_order)
+ +        {
+ +        case ddnoPP_PME:
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"Order of the nodes: PP first, PME last\n");
+ +            }
+ +            break;
+ +        case ddnoINTERLEAVE:
+ +            /* Interleave the PP-only and PME-only nodes,
+ +             * as on clusters with dual-core machines this will double
+ +             * the communication bandwidth of the PME processes
+ +             * and thus speed up the PP <-> PME and inter PME communication.
+ +             */
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"Interleaving PP and PME nodes\n");
+ +            }
+ +            comm->pmenodes = dd_pmenodes(cr);
+ +            break;
+ +        case ddnoCARTESIAN:
+ +            break;
+ +        default:
+ +            gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
+ +        }
+ +    
+ +        if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
+ +        {
+ +            cr->duty = DUTY_PME;
+ +        }
+ +        else
+ +        {
+ +            cr->duty = DUTY_PP;
+ +        }
+ +        
+ +        /* Split the sim communicator into PP and PME only nodes */
+ +        MPI_Comm_split(cr->mpi_comm_mysim,
+ +                       cr->duty,
+ +                       cr->nodeid,
+ +                       &cr->mpi_comm_mygroup);
+ +        MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
+ +    }
+ +#endif
+ +
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"This is a %s only node\n\n",
+ +                (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
+ +    }
+ +}
+ +
+ +void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    int CartReorder;
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +    copy_ivec(dd->nc,comm->ntot);
+ +    
+ +    comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
+ +    comm->bCartesianPP_PME = FALSE;
+ +    
+ +    /* Reorder the nodes by default. This might change the MPI ranks.
+ +     * Real reordering is only supported on very few architectures,
+ +     * Blue Gene is one of them.
+ +     */
+ +    CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
+ +    
+ +    if (cr->npmenodes > 0)
+ +    {
+ +        /* Split the communicator into a PP and PME part */
+ +        split_communicator(fplog,cr,dd_node_order,CartReorder);
+ +        if (comm->bCartesianPP_PME)
+ +        {
+ +            /* We (possibly) reordered the nodes in split_communicator,
+ +             * so it is no longer required in make_pp_communicator.
+ +             */
+ +            CartReorder = FALSE;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* All nodes do PP and PME */
+ +#ifdef GMX_MPI    
+ +        /* We do not require separate communicators */
+ +        cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
+ +#endif
+ +    }
+ +    
+ +    if (cr->duty & DUTY_PP)
+ +    {
+ +        /* Copy or make a new PP communicator */
+ +        make_pp_communicator(fplog,cr,CartReorder);
+ +    }
+ +    else
+ +    {
+ +        receive_ddindex2simnodeid(cr);
+ +    }
+ +    
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Set up the commnuication to our PME node */
+ +        dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
+ +        dd->pme_receive_vir_ener = receive_vir_ener(cr);
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"My pme_nodeid %d receive ener %d\n",
+ +                    dd->pme_nodeid,dd->pme_receive_vir_ener);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        dd->pme_nodeid = -1;
+ +    }
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        dd->ma = init_gmx_domdec_master_t(dd,
+ +                                          comm->cgs_gl.nr,
+ +                                          comm->cgs_gl.index[comm->cgs_gl.nr]);
+ +    }
+ +}
+ +
+ +static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
+ +{
+ +    real *slb_frac,tot;
+ +    int  i,n;
+ +    double dbl;
+ +    
+ +    slb_frac = NULL;
+ +    if (nc > 1 && size_string != NULL)
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Using static load balancing for the %s direction\n",
+ +                    dir);
+ +        }
+ +        snew(slb_frac,nc);
+ +        tot = 0;
+ +        for (i=0; i<nc; i++)
+ +        {
+ +            dbl = 0;
+ +            sscanf(size_string,"%lf%n",&dbl,&n);
+ +            if (dbl == 0)
+ +            {
+ +                gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
+ +            }
+ +            slb_frac[i] = dbl;
+ +            size_string += n;
+ +            tot += slb_frac[i];
+ +        }
+ +        /* Normalize */
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Relative cell sizes:");
+ +        }
+ +        for (i=0; i<nc; i++)
+ +        {
+ +            slb_frac[i] /= tot;
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog," %5.3f",slb_frac[i]);
+ +            }
+ +        }
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"\n");
+ +        }
+ +    }
+ +    
+ +    return slb_frac;
+ +}
+ +
+ +static int multi_body_bondeds_count(gmx_mtop_t *mtop)
+ +{
+ +    int n,nmol,ftype;
+ +    gmx_mtop_ilistloop_t iloop;
+ +    t_ilist *il;
+ +    
+ +    n = 0;
+ +    iloop = gmx_mtop_ilistloop_init(mtop);
+ +    while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
+ +    {
+ +        for(ftype=0; ftype<F_NRE; ftype++)
+ +        {
+ +            if ((interaction_function[ftype].flags & IF_BOND) &&
+ +                NRAL(ftype) >  2)
+ +            {
+ +                n += nmol*il[ftype].nr/(1 + NRAL(ftype));
+ +            }
+ +        }
+ +  }
+ +
+ +  return n;
+ +}
+ +
+ +static int dd_nst_env(FILE *fplog,const char *env_var,int def)
+ +{
+ +    char *val;
+ +    int  nst;
+ +    
+ +    nst = def;
+ +    val = getenv(env_var);
+ +    if (val)
+ +    {
+ +        if (sscanf(val,"%d",&nst) <= 0)
+ +        {
+ +            nst = 1;
+ +        }
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
+ +                    env_var,val,nst);
+ +        }
+ +    }
+ +    
+ +    return nst;
+ +}
+ +
+ +static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
+ +{
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr,"\n%s\n",warn_string);
+ +    }
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\n%s\n",warn_string);
+ +    }
+ +}
+ +
+ +static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
+ +                                  t_inputrec *ir,FILE *fplog)
+ +{
+ +    if (ir->ePBC == epbcSCREW &&
+ +        (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
+ +    {
+ +        gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
+ +    }
+ +
+ +    if (ir->ns_type == ensSIMPLE)
+ +    {
+ +        gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
+ +    }
+ +
+ +    if (ir->nstlist == 0)
+ +    {
+ +        gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
+ +    }
+ +
+ +    if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
+ +    {
+ +        dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
+ +    }
+ +}
+ +
+ +static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
+ +{
+ +    int  di,d;
+ +    real r;
+ +
+ +    r = ddbox->box_size[XX];
+ +    for(di=0; di<dd->ndim; di++)
+ +    {
+ +        d = dd->dim[di];
+ +        /* Check using the initial average cell size */
+ +        r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
+ +    }
+ +
+ +    return r;
+ +}
+ +
+ +static int check_dlb_support(FILE *fplog,t_commrec *cr,
+ +                             const char *dlb_opt,gmx_bool bRecordLoad,
+ +                             unsigned long Flags,t_inputrec *ir)
+ +{
+ +    gmx_domdec_t *dd;
+ +    int  eDLB=-1;
+ +    char buf[STRLEN];
+ +
+ +    switch (dlb_opt[0])
+ +    {
+ +    case 'a': eDLB = edlbAUTO; break;
+ +    case 'n': eDLB = edlbNO;   break;
+ +    case 'y': eDLB = edlbYES;  break;
+ +    default: gmx_incons("Unknown dlb_opt");
+ +    }
+ +
+ +    if (Flags & MD_RERUN)
+ +    {
+ +        return edlbNO;
+ +    }
+ +
+ +    if (!EI_DYNAMICS(ir->eI))
+ +    {
+ +        if (eDLB == edlbYES)
+ +        {
+ +            sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
+ +            dd_warning(cr,fplog,buf);
+ +        }
+ +            
+ +        return edlbNO;
+ +    }
+ +
+ +    if (!bRecordLoad)
+ +    {
+ +        dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
+ +
+ +        return edlbNO;
+ +    }
+ +
+ +    if (Flags & MD_REPRODUCIBLE)
+ +    {
+ +        switch (eDLB)
+ +        {
+ +                      case edlbNO: 
+ +                              break;
+ +                      case edlbAUTO:
+ +                              dd_warning(cr,fplog,"NOTE: reproducibility requested, will not use dynamic load balancing\n");
+ +                              eDLB = edlbNO;
+ +                              break;
+ +                      case edlbYES:
+ +                              dd_warning(cr,fplog,"WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
+ +                              break;
+ +                      default:
+ +                              gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
+ +                              break;
+ +        }
+ +    }
+ +
+ +    return eDLB;
+ +}
+ +
+ +static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
+ +{
+ +    int dim;
+ +
+ +    dd->ndim = 0;
+ +    if (getenv("GMX_DD_ORDER_ZYX") != NULL)
+ +    {
+ +        /* Decomposition order z,y,x */
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Using domain decomposition order z, y, x\n");
+ +        }
+ +        for(dim=DIM-1; dim>=0; dim--)
+ +        {
+ +            if (dd->nc[dim] > 1)
+ +            {
+ +                dd->dim[dd->ndim++] = dim;
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* Decomposition order x,y,z */
+ +        for(dim=0; dim<DIM; dim++)
+ +        {
+ +            if (dd->nc[dim] > 1)
+ +            {
+ +                dd->dim[dd->ndim++] = dim;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static gmx_domdec_comm_t *init_dd_comm()
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  i;
+ +
+ +    snew(comm,1);
+ +    snew(comm->cggl_flag,DIM*2);
+ +    snew(comm->cgcm_state,DIM*2);
+ +    for(i=0; i<DIM*2; i++)
+ +    {
+ +        comm->cggl_flag_nalloc[i]  = 0;
+ +        comm->cgcm_state_nalloc[i] = 0;
+ +    }
+ +    
+ +    comm->nalloc_int = 0;
+ +    comm->buf_int    = NULL;
+ +
+ +    vec_rvec_init(&comm->vbuf);
+ +
+ +    comm->n_load_have    = 0;
+ +    comm->n_load_collect = 0;
+ +
+ +    for(i=0; i<ddnatNR-ddnatZONE; i++)
+ +    {
+ +        comm->sum_nat[i] = 0;
+ +    }
+ +    comm->ndecomp = 0;
+ +    comm->nload   = 0;
+ +    comm->load_step = 0;
+ +    comm->load_sum  = 0;
+ +    comm->load_max  = 0;
+ +    clear_ivec(comm->load_lim);
+ +    comm->load_mdf  = 0;
+ +    comm->load_pme  = 0;
+ +
+ +    return comm;
+ +}
+ +
+ +gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
+ +                                        unsigned long Flags,
+ +                                        ivec nc,
+ +                                        real comm_distance_min,real rconstr,
+ +                                        const char *dlb_opt,real dlb_scale,
+ +                                        const char *sizex,const char *sizey,const char *sizez,
+ +                                        gmx_mtop_t *mtop,t_inputrec *ir,
+ +                                        matrix box,rvec *x,
+ +                                        gmx_ddbox_t *ddbox,
+ +                                        int *npme_x,int *npme_y)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    int  recload;
+ +    int  d,i,j;
+ +    real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
+ +    gmx_bool bC;
+ +    char buf[STRLEN];
+ +    
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,
+ +                "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
+ +    }
+ +    
+ +    snew(dd,1);
+ +
+ +    dd->comm = init_dd_comm();
+ +    comm = dd->comm;
+ +    snew(comm->cggl_flag,DIM*2);
+ +    snew(comm->cgcm_state,DIM*2);
+ +
+ +    dd->npbcdim   = ePBC2npbcdim(ir->ePBC);
+ +    dd->bScrewPBC = (ir->ePBC == epbcSCREW);
+ +    
+ +    dd->bSendRecv2      = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
+ +    comm->dlb_scale_lim = dd_nst_env(fplog,"GMX_DLB_MAX",10);
+ +    comm->eFlop         = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
+ +    recload             = dd_nst_env(fplog,"GMX_DD_LOAD",1);
+ +    comm->nstSortCG     = dd_nst_env(fplog,"GMX_DD_SORT",1);
+ +    comm->nstDDDump     = dd_nst_env(fplog,"GMX_DD_DUMP",0);
+ +    comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
+ +    comm->DD_debug      = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
+ +
+ +    dd->pme_recv_f_alloc = 0;
+ +    dd->pme_recv_f_buf = NULL;
+ +
+ +    if (dd->bSendRecv2 && fplog)
+ +    {
+ +        fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
+ +    }
+ +    if (comm->eFlop)
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Will load balance based on FLOP count\n");
+ +        }
+ +        if (comm->eFlop > 1)
+ +        {
+ +            srand(1+cr->nodeid);
+ +        }
+ +        comm->bRecordLoad = TRUE;
+ +    }
+ +    else
+ +    {
+ +        comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
+ +                             
+ +    }
+ +    
+ +    comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
+ +    
+ +    comm->bDynLoadBal = (comm->eDLB == edlbYES);
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
+ +    }
+ +    dd->bGridJump = comm->bDynLoadBal;
+ +    
+ +    if (comm->nstSortCG)
+ +    {
+ +        if (fplog)
+ +        {
+ +            if (comm->nstSortCG == 1)
+ +            {
+ +                fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
+ +            }
+ +            else
+ +            {
+ +                fprintf(fplog,"Will sort the charge groups every %d steps\n",
+ +                        comm->nstSortCG);
+ +            }
+ +        }
+ +        snew(comm->sort,1);
+ +    }
+ +    else
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Will not sort the charge groups\n");
+ +        }
+ +    }
++
++    comm->bCGs = (ncg_mtop(mtop) < mtop->natoms);
+ +    
+ +    comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
+ +    if (comm->bInterCGBondeds)
+ +    {
+ +        comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
+ +    }
+ +    else
+ +    {
+ +        comm->bInterCGMultiBody = FALSE;
+ +    }
+ +    
-     comm->globalcomm_step = INT_MIN;
++    dd->bInterCGcons    = inter_charge_group_constraints(mtop);
++    dd->bInterCGsettles = inter_charge_group_settles(mtop);
+ +
+ +    if (ir->rlistlong == 0)
+ +    {
+ +        /* Set the cut-off to some very large value,
+ +         * so we don't need if statements everywhere in the code.
+ +         * We use sqrt, since the cut-off is squared in some places.
+ +         */
+ +        comm->cutoff   = GMX_CUTOFF_INF;
+ +    }
+ +    else
+ +    {
+ +        comm->cutoff   = ir->rlistlong;
+ +    }
+ +    comm->cutoff_mbody = 0;
+ +    
+ +    comm->cellsize_limit = 0;
+ +    comm->bBondComm = FALSE;
+ +
+ +    if (comm->bInterCGBondeds)
+ +    {
+ +        if (comm_distance_min > 0)
+ +        {
+ +            comm->cutoff_mbody = comm_distance_min;
+ +            if (Flags & MD_DDBONDCOMM)
+ +            {
+ +                comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
+ +            }
+ +            else
+ +            {
+ +                comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
+ +            }
+ +            r_bonded_limit = comm->cutoff_mbody;
+ +        }
+ +        else if (ir->bPeriodicMols)
+ +        {
+ +            /* Can not easily determine the required cut-off */
+ +            dd_warning(cr,fplog,"NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
+ +            comm->cutoff_mbody = comm->cutoff/2;
+ +            r_bonded_limit = comm->cutoff_mbody;
+ +        }
+ +        else
+ +        {
+ +            if (MASTER(cr))
+ +            {
+ +                dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
+ +                                      Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
+ +            }
+ +            gmx_bcast(sizeof(r_2b),&r_2b,cr);
+ +            gmx_bcast(sizeof(r_mb),&r_mb,cr);
+ +
+ +            /* We use an initial margin of 10% for the minimum cell size,
+ +             * except when we are just below the non-bonded cut-off.
+ +             */
+ +            if (Flags & MD_DDBONDCOMM)
+ +            {
+ +                if (max(r_2b,r_mb) > comm->cutoff)
+ +                {
+ +                    r_bonded       = max(r_2b,r_mb);
+ +                    r_bonded_limit = 1.1*r_bonded;
+ +                    comm->bBondComm = TRUE;
+ +                }
+ +                else
+ +                {
+ +                    r_bonded       = r_mb;
+ +                    r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
+ +                }
+ +                /* We determine cutoff_mbody later */
+ +            }
+ +            else
+ +            {
+ +                /* No special bonded communication,
+ +                 * simply increase the DD cut-off.
+ +                 */
+ +                r_bonded_limit     = 1.1*max(r_2b,r_mb);
+ +                comm->cutoff_mbody = r_bonded_limit;
+ +                comm->cutoff       = max(comm->cutoff,comm->cutoff_mbody);
+ +            }
+ +        }
+ +        comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,
+ +                    "Minimum cell size due to bonded interactions: %.3f nm\n",
+ +                    comm->cellsize_limit);
+ +        }
+ +    }
+ +
+ +    if (dd->bInterCGcons && rconstr <= 0)
+ +    {
+ +        /* There is a cell size limit due to the constraints (P-LINCS) */
+ +        rconstr = constr_r_max(fplog,mtop,ir);
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,
+ +                    "Estimated maximum distance required for P-LINCS: %.3f nm\n",
+ +                    rconstr);
+ +            if (rconstr > comm->cellsize_limit)
+ +            {
+ +                fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
+ +            }
+ +        }
+ +    }
+ +    else if (rconstr > 0 && fplog)
+ +    {
+ +        /* Here we do not check for dd->bInterCGcons,
+ +         * because one can also set a cell size limit for virtual sites only
+ +         * and at this point we don't know yet if there are intercg v-sites.
+ +         */
+ +        fprintf(fplog,
+ +                "User supplied maximum distance required for P-LINCS: %.3f nm\n",
+ +                rconstr);
+ +    }
+ +    comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
+ +
+ +    comm->cgs_gl = gmx_mtop_global_cgs(mtop);
+ +
+ +    if (nc[XX] > 0)
+ +    {
+ +        copy_ivec(nc,dd->nc);
+ +        set_dd_dim(fplog,dd);
+ +        set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
+ +
+ +        if (cr->npmenodes == -1)
+ +        {
+ +            cr->npmenodes = 0;
+ +        }
+ +        acs = average_cellsize_min(dd,ddbox);
+ +        if (acs < comm->cellsize_limit)
+ +        {
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
+ +            }
+ +            gmx_fatal_collective(FARGS,cr,NULL,
+ +                                 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
+ +                                 acs,comm->cellsize_limit);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
+ +
+ +        /* We need to choose the optimal DD grid and possibly PME nodes */
+ +        limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
+ +                               comm->eDLB!=edlbNO,dlb_scale,
+ +                               comm->cellsize_limit,comm->cutoff,
+ +                               comm->bInterCGBondeds,comm->bInterCGMultiBody);
+ +        
+ +        if (dd->nc[XX] == 0)
+ +        {
+ +            bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
+ +            sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
+ +                    !bC ? "-rdd" : "-rcon",
+ +                    comm->eDLB!=edlbNO ? " or -dds" : "",
+ +                    bC ? " or your LINCS settings" : "");
+ +
+ +            gmx_fatal_collective(FARGS,cr,NULL,
+ +                                 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
+ +                                 "%s\n"
+ +                                 "Look in the log file for details on the domain decomposition",
+ +                                 cr->nnodes-cr->npmenodes,limit,buf);
+ +        }
+ +        set_dd_dim(fplog,dd);
+ +    }
+ +
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,
+ +                "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
+ +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
+ +    }
+ +    
+ +    dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
+ +    if (cr->nnodes - dd->nnodes != cr->npmenodes)
+ +    {
+ +        gmx_fatal_collective(FARGS,cr,NULL,
+ +                             "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
+ +                             dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
+ +    }
+ +    if (cr->npmenodes > dd->nnodes)
+ +    {
+ +        gmx_fatal_collective(FARGS,cr,NULL,
+ +                             "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
+ +    }
+ +    if (cr->npmenodes > 0)
+ +    {
+ +        comm->npmenodes = cr->npmenodes;
+ +    }
+ +    else
+ +    {
+ +        comm->npmenodes = dd->nnodes;
+ +    }
+ +
+ +    if (EEL_PME(ir->coulombtype))
+ +    {
+ +        /* The following choices should match those
+ +         * in comm_cost_est in domdec_setup.c.
+ +         * Note that here the checks have to take into account
+ +         * that the decomposition might occur in a different order than xyz
+ +         * (for instance through the env.var. GMX_DD_ORDER_ZYX),
+ +         * in which case they will not match those in comm_cost_est,
+ +         * but since that is mainly for testing purposes that's fine.
+ +         */
+ +        if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
+ +            comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
+ +            getenv("GMX_PMEONEDD") == NULL)
+ +        {
+ +            comm->npmedecompdim = 2;
+ +            comm->npmenodes_x   = dd->nc[XX];
+ +            comm->npmenodes_y   = comm->npmenodes/comm->npmenodes_x;
+ +        }
+ +        else
+ +        {
+ +            /* In case nc is 1 in both x and y we could still choose to
+ +             * decompose pme in y instead of x, but we use x for simplicity.
+ +             */
+ +            comm->npmedecompdim = 1;
+ +            if (dd->dim[0] == YY)
+ +            {
+ +                comm->npmenodes_x = 1;
+ +                comm->npmenodes_y = comm->npmenodes;
+ +            }
+ +            else
+ +            {
+ +                comm->npmenodes_x = comm->npmenodes;
+ +                comm->npmenodes_y = 1;
+ +            }
+ +        }    
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
+ +                    comm->npmenodes_x,comm->npmenodes_y,1);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        comm->npmedecompdim = 0;
+ +        comm->npmenodes_x   = 0;
+ +        comm->npmenodes_y   = 0;
+ +    }
+ +    
+ +    /* Technically we don't need both of these,
+ +     * but it simplifies code not having to recalculate it.
+ +     */
+ +    *npme_x = comm->npmenodes_x;
+ +    *npme_y = comm->npmenodes_y;
+ +        
+ +    snew(comm->slb_frac,DIM);
+ +    if (comm->eDLB == edlbNO)
+ +    {
+ +        comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
+ +        comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
+ +        comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
+ +    }
+ +
+ +    if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
+ +    {
+ +        if (comm->bBondComm || comm->eDLB != edlbNO)
+ +        {
+ +            /* Set the bonded communication distance to halfway
+ +             * the minimum and the maximum,
+ +             * since the extra communication cost is nearly zero.
+ +             */
+ +            acs = average_cellsize_min(dd,ddbox);
+ +            comm->cutoff_mbody = 0.5*(r_bonded + acs);
+ +            if (comm->eDLB != edlbNO)
+ +            {
+ +                /* Check if this does not limit the scaling */
+ +                comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
+ +            }
+ +            if (!comm->bBondComm)
+ +            {
+ +                /* Without bBondComm do not go beyond the n.b. cut-off */
+ +                comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
+ +                if (comm->cellsize_limit >= comm->cutoff)
+ +                {
+ +                    /* We don't loose a lot of efficieny
+ +                     * when increasing it to the n.b. cut-off.
+ +                     * It can even be slightly faster, because we need
+ +                     * less checks for the communication setup.
+ +                     */
+ +                    comm->cutoff_mbody = comm->cutoff;
+ +                }
+ +            }
+ +            /* Check if we did not end up below our original limit */
+ +            comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
+ +
+ +            if (comm->cutoff_mbody > comm->cellsize_limit)
+ +            {
+ +                comm->cellsize_limit = comm->cutoff_mbody;
+ +            }
+ +        }
+ +        /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
+ +    }
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
+ +                "cellsize limit %f\n",
+ +                comm->bBondComm,comm->cellsize_limit);
+ +    }
+ +    
+ +    if (MASTER(cr))
+ +    {
+ +        check_dd_restrictions(cr,dd,ir,fplog);
+ +    }
+ +
-     int  d,dim,npulse,npulse_d_max,npulse_d;
-     gmx_bool bNoCutOff;
++    comm->partition_step = INT_MIN;
+ +    dd->ddp_count = 0;
+ +
+ +    clear_dd_cycle_counts(dd);
+ +
+ +    return dd;
+ +}
+ +
+ +static void set_dlb_limits(gmx_domdec_t *dd)
+ +
+ +{
+ +    int d;
+ +
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
+ +        dd->comm->cellsize_min[dd->dim[d]] =
+ +            dd->comm->cellsize_min_dlb[dd->dim[d]];
+ +    }
+ +}
+ +
+ +
+ +static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    real cellsize_min;
+ +    int  d,nc,i;
+ +    char buf[STRLEN];
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
+ +    }
+ +
+ +    cellsize_min = comm->cellsize_min[dd->dim[0]];
+ +    for(d=1; d<dd->ndim; d++)
+ +    {
+ +        cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
+ +    }
+ +
+ +    if (cellsize_min < comm->cellsize_limit*1.05)
+ +    {
+ +        dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
+ +
+ +        /* Change DLB from "auto" to "no". */
+ +        comm->eDLB = edlbNO;
+ +
+ +        return;
+ +    }
+ +
+ +    dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
+ +    comm->bDynLoadBal = TRUE;
+ +    dd->bGridJump = TRUE;
+ +    
+ +    set_dlb_limits(dd);
+ +
+ +    /* We can set the required cell size info here,
+ +     * so we do not need to communicate this.
+ +     * The grid is completely uniform.
+ +     */
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        if (comm->root[d])
+ +        {
+ +            comm->load[d].sum_m = comm->load[d].sum;
+ +
+ +            nc = dd->nc[dd->dim[d]];
+ +            for(i=0; i<nc; i++)
+ +            {
+ +                comm->root[d]->cell_f[i]    = i/(real)nc;
+ +                if (d > 0)
+ +                {
+ +                    comm->root[d]->cell_f_max0[i] =  i   /(real)nc;
+ +                    comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
+ +                }
+ +            }
+ +            comm->root[d]->cell_f[nc] = 1.0;
+ +        }
+ +    }
+ +}
+ +
+ +static char *init_bLocalCG(gmx_mtop_t *mtop)
+ +{
+ +    int  ncg,cg;
+ +    char *bLocalCG;
+ +    
+ +    ncg = ncg_mtop(mtop);
+ +    snew(bLocalCG,ncg);
+ +    for(cg=0; cg<ncg; cg++)
+ +    {
+ +        bLocalCG[cg] = FALSE;
+ +    }
+ +
+ +    return bLocalCG;
+ +}
+ +
+ +void dd_init_bondeds(FILE *fplog,
+ +                     gmx_domdec_t *dd,gmx_mtop_t *mtop,
+ +                     gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                     t_inputrec *ir,gmx_bool bBCheck,cginfo_mb_t *cginfo_mb)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_bool bBondComm;
+ +    int  d;
+ +
+ +    dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
+ +
+ +    comm = dd->comm;
+ +
+ +    if (comm->bBondComm)
+ +    {
+ +        /* Communicate atoms beyond the cut-off for bonded interactions */
+ +        comm = dd->comm;
+ +
+ +        comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
+ +
+ +        comm->bLocalCG = init_bLocalCG(mtop);
+ +    }
+ +    else
+ +    {
+ +        /* Only communicate atoms based on cut-off */
+ +        comm->cglink   = NULL;
+ +        comm->bLocalCG = NULL;
+ +    }
+ +}
+ +
+ +static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
+ +                              t_inputrec *ir,
+ +                              gmx_bool bDynLoadBal,real dlb_scale,
+ +                              gmx_ddbox_t *ddbox)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  d;
+ +    ivec np;
+ +    real limit,shrink;
+ +    char buf[64];
+ +
+ +    if (fplog == NULL)
+ +    {
+ +        return;
+ +    }
+ +
+ +    comm = dd->comm;
+ +
+ +    if (bDynLoadBal)
+ +    {
+ +        fprintf(fplog,"The maximum number of communication pulses is:");
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
+ +        }
+ +        fprintf(fplog,"\n");
+ +        fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
+ +        fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
+ +        fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            if (dd->nc[d] > 1)
+ +            {
+ +                if (d >= ddbox->npbcdim && dd->nc[d] == 2)
+ +                {
+ +                    shrink = 0;
+ +                }
+ +                else
+ +                {
+ +                    shrink =
+ +                        comm->cellsize_min_dlb[d]/
+ +                        (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
+ +                }
+ +                fprintf(fplog," %c %.2f",dim2char(d),shrink);
+ +            }
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +    else
+ +    {
+ +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
+ +        fprintf(fplog,"The initial number of communication pulses is:");
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
+ +        }
+ +        fprintf(fplog,"\n");
+ +        fprintf(fplog,"The initial domain decomposition cell size is:");
+ +        for(d=0; d<DIM; d++) {
+ +            if (dd->nc[d] > 1)
+ +            {
+ +                fprintf(fplog," %c %.2f nm",
+ +                        dim2char(d),dd->comm->cellsize_min[d]);
+ +            }
+ +        }
+ +        fprintf(fplog,"\n\n");
+ +    }
+ +    
+ +    if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
+ +    {
+ +        fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
+ +        fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                "non-bonded interactions","",comm->cutoff);
+ +
+ +        if (bDynLoadBal)
+ +        {
+ +            limit = dd->comm->cellsize_limit;
+ +        }
+ +        else
+ +        {
+ +            if (dynamic_dd_box(ddbox,ir))
+ +            {
+ +                fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
+ +            }
+ +            limit = dd->comm->cellsize_min[XX];
+ +            for(d=1; d<DIM; d++)
+ +            {
+ +                limit = min(limit,dd->comm->cellsize_min[d]);
+ +            }
+ +        }
+ +
+ +        if (comm->bInterCGBondeds)
+ +        {
+ +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                    "two-body bonded interactions","(-rdd)",
+ +                    max(comm->cutoff,comm->cutoff_mbody));
+ +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                    "multi-body bonded interactions","(-rdd)",
+ +                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
+ +        }
+ +        if (dd->vsite_comm)
+ +        {
+ +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                    "virtual site constructions","(-rcon)",limit);
+ +        }
+ +        if (dd->constraint_comm)
+ +        {
+ +            sprintf(buf,"atoms separated by up to %d constraints",
+ +                    1+ir->nProjOrder);
+ +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                    buf,"(-rcon)",limit);
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +    
+ +    fflush(fplog);
+ +}
+ +
++static void set_cell_limits_dlb(gmx_domdec_t *dd,
++                                real dlb_scale,
++                                const t_inputrec *ir,
++                                const gmx_ddbox_t *ddbox)
++{
++    gmx_domdec_comm_t *comm;
++    int  d,dim,npulse,npulse_d_max,npulse_d;
++    gmx_bool bNoCutOff;
++
++    comm = dd->comm;
++
++    bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
++
++    /* Determine the maximum number of comm. pulses in one dimension */
++        
++    comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
++        
++    /* Determine the maximum required number of grid pulses */
++    if (comm->cellsize_limit >= comm->cutoff)
++    {
++        /* Only a single pulse is required */
++        npulse = 1;
++    }
++    else if (!bNoCutOff && comm->cellsize_limit > 0)
++    {
++        /* We round down slightly here to avoid overhead due to the latency
++         * of extra communication calls when the cut-off
++         * would be only slightly longer than the cell size.
++         * Later cellsize_limit is redetermined,
++         * so we can not miss interactions due to this rounding.
++         */
++        npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
++    }
++    else
++    {
++        /* There is no cell size limit */
++        npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
++    }
++
++    if (!bNoCutOff && npulse > 1)
++    {
++        /* See if we can do with less pulses, based on dlb_scale */
++        npulse_d_max = 0;
++        for(d=0; d<dd->ndim; d++)
++        {
++            dim = dd->dim[d];
++            npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
++                             /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
++            npulse_d_max = max(npulse_d_max,npulse_d);
++        }
++        npulse = min(npulse,npulse_d_max);
++    }
++
++    /* This env var can override npulse */
++    d = dd_nst_env(debug,"GMX_DD_NPULSE",0);
++    if (d > 0)
++    {
++        npulse = d;
++    }
++
++    comm->maxpulse = 1;
++    comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
++    for(d=0; d<dd->ndim; d++)
++    {
++        comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
++        comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
++        snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
++        comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
++        if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
++        {
++            comm->bVacDLBNoLimit = FALSE;
++        }
++    }
++
++    /* cellsize_limit is set for LINCS in init_domain_decomposition */
++    if (!comm->bVacDLBNoLimit)
++    {
++        comm->cellsize_limit = max(comm->cellsize_limit,
++                                   comm->cutoff/comm->maxpulse);
++    }
++    comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
++    /* Set the minimum cell size for each DD dimension */
++    for(d=0; d<dd->ndim; d++)
++    {
++        if (comm->bVacDLBNoLimit ||
++            comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
++        {
++            comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
++        }
++        else
++        {
++            comm->cellsize_min_dlb[dd->dim[d]] =
++                comm->cutoff/comm->cd[d].np_dlb;
++        }
++    }
++    if (comm->cutoff_mbody <= 0)
++    {
++        comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
++    }
++    if (comm->bDynLoadBal)
++    {
++        set_dlb_limits(dd);
++    }
++}
++
++gmx_bool dd_bonded_molpbc(gmx_domdec_t *dd,int ePBC)
++{
++    /* If each molecule is a single charge group
++     * or we use domain decomposition for each periodic dimension,
++     * we do not need to take pbc into account for the bonded interactions.
++     */
++    return (ePBC != epbcNONE && dd->comm->bInterCGBondeds &&
++            !(dd->nc[XX]>1 &&
++              dd->nc[YY]>1 &&
++              (dd->nc[ZZ]>1 || ePBC==epbcXY)));
++}
++
+ +void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
+ +                       t_inputrec *ir,t_forcerec *fr,
+ +                       gmx_ddbox_t *ddbox)
+ +{
+ +    gmx_domdec_comm_t *comm;
-     bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
+ +    int  natoms_tot;
+ +    real vol_frac;
+ +
+ +    comm = dd->comm;
+ +
-     
-     /* If each molecule is a single charge group
-      * or we use domain decomposition for each periodic dimension,
-      * we do not need to take pbc into account for the bonded interactions.
-      */
-     if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
-         (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
-     {
-         fr->bMolPBC = FALSE;
-     }
-     else
-     {
-         fr->bMolPBC = TRUE;
-     }
++    /* Initialize the thread data.
++     * This can not be done in init_domain_decomposition,
++     * as the numbers of threads is determined later.
++     */
++    comm->nth = gmx_omp_nthreads_get(emntDomdec);
++    if (comm->nth > 1)
++    {
++        snew(comm->dth,comm->nth);
++    }
+ +
+ +    if (EEL_PME(ir->coulombtype))
+ +    {
+ +        init_ddpme(dd,&comm->ddpme[0],0);
+ +        if (comm->npmedecompdim >= 2)
+ +        {
+ +            init_ddpme(dd,&comm->ddpme[1],1);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        comm->npmenodes = 0;
+ +        if (dd->pme_nodeid >= 0)
+ +        {
+ +            gmx_fatal_collective(FARGS,NULL,dd,
+ +                                 "Can not have separate PME nodes without PME electrostatics");
+ +        }
+ +    }
-         /* Determine the maximum number of comm. pulses in one dimension */
-         
-         comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
-         
-         /* Determine the maximum required number of grid pulses */
-         if (comm->cellsize_limit >= comm->cutoff)
-         {
-             /* Only a single pulse is required */
-             npulse = 1;
-         }
-         else if (!bNoCutOff && comm->cellsize_limit > 0)
-         {
-             /* We round down slightly here to avoid overhead due to the latency
-              * of extra communication calls when the cut-off
-              * would be only slightly longer than the cell size.
-              * Later cellsize_limit is redetermined,
-              * so we can not miss interactions due to this rounding.
-              */
-             npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
-         }
-         else
+ +        
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
+ +    }
+ +    if (comm->eDLB != edlbNO)
+ +    {
-             /* There is no cell size limit */
-             npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
++        set_cell_limits_dlb(dd,dlb_scale,ir,ddbox);
++    }
++    
++    print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
++    if (comm->eDLB == edlbAUTO)
++    {
++        if (fplog)
+ +        {
- 
-         if (!bNoCutOff && npulse > 1)
-         {
-             /* See if we can do with less pulses, based on dlb_scale */
-             npulse_d_max = 0;
-             for(d=0; d<dd->ndim; d++)
-             {
-                 dim = dd->dim[d];
-                 npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
-                                  /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
-                 npulse_d_max = max(npulse_d_max,npulse_d);
-             }
-             npulse = min(npulse,npulse_d_max);
-         }
-         
-         /* This env var can override npulse */
-         d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
-         if (d > 0)
-         {
-             npulse = d;
-         }
- 
-         comm->maxpulse = 1;
-         comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
-         for(d=0; d<dd->ndim; d++)
-         {
-             comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
-             comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
-             snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
-             comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
-             if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
-             {
-                 comm->bVacDLBNoLimit = FALSE;
-             }
-         }
-         
-         /* cellsize_limit is set for LINCS in init_domain_decomposition */
-         if (!comm->bVacDLBNoLimit)
-         {
-             comm->cellsize_limit = max(comm->cellsize_limit,
-                                        comm->cutoff/comm->maxpulse);
-         }
-         comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
-         /* Set the minimum cell size for each DD dimension */
-         for(d=0; d<dd->ndim; d++)
-         {
-             if (comm->bVacDLBNoLimit ||
-                 comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
-             {
-                 comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
-             }
-             else
-             {
-                 comm->cellsize_min_dlb[dd->dim[d]] =
-                     comm->cutoff/comm->cd[d].np_dlb;
-             }
-         }
-         if (comm->cutoff_mbody <= 0)
-         {
-             comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
-         }
-         if (comm->bDynLoadBal)
-         {
-             set_dlb_limits(dd);
-         }
-     }
-     
-     print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
-     if (comm->eDLB == edlbAUTO)
-     {
-         if (fplog)
-         {
-             fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
-         }
-         print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
-     }
++            fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
+ +        }
- static void setup_dd_communication(gmx_domdec_t *dd,
-                                    matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
- {
-     int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
-     int nzone,nzone_send,zone,zonei,cg0,cg1;
-     int c,i,j,cg,cg_gl,nrcg;
-     int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
-     gmx_domdec_comm_t *comm;
-     gmx_domdec_zones_t *zones;
-     gmx_domdec_comm_dim_t *cd;
-     gmx_domdec_ind_t *ind;
-     cginfo_mb_t *cginfo_mb;
-     gmx_bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
-     real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
-     rvec rb,rn;
-     real corner[DIM][4],corner_round_0=0,corner_round_1[4];
-     real bcorner[DIM],bcorner_round_1=0;
-     ivec tric_dist;
-     rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
-     real skew_fac2_d,skew_fac_01;
-     rvec sf2_round;
-     int  nsend,nat;
-     
-     if (debug)
-     {
-         fprintf(debug,"Setting up DD communication\n");
-     }
-     
-     comm  = dd->comm;
-     cg_cm = fr->cg_cm;
- 
-     for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
-     {
-         dim = dd->dim[dim_ind];
- 
-         /* Check if we need to use triclinic distances */
-         tric_dist[dim_ind] = 0;
-         for(i=0; i<=dim_ind; i++)
-         {
-             if (ddbox->tric_dir[dd->dim[i]])
-             {
-                 tric_dist[dim_ind] = 1;
-             }
-         }
-     }
++        print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
++    }
+ +
+ +    if (ir->ePBC == epbcNONE)
+ +    {
+ +        vol_frac = 1 - 1/(double)dd->nnodes;
+ +    }
+ +    else
+ +    {
+ +        vol_frac =
+ +            (1 + comm_box_frac(dd->nc,comm->cutoff,ddbox))/(double)dd->nnodes;
+ +    }
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
+ +    }
+ +    natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
+ +   
+ +    dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
+ +}
+ +
++gmx_bool change_dd_cutoff(t_commrec *cr,t_state *state,t_inputrec *ir,
++                          real cutoff_req)
++{
++    gmx_domdec_t *dd;
++    gmx_ddbox_t ddbox;
++    int d,dim,np;
++    real inv_cell_size;
++    int LocallyLimited;
++
++    dd = cr->dd;
++
++    set_ddbox(dd,FALSE,cr,ir,state->box,
++              TRUE,&dd->comm->cgs_gl,state->x,&ddbox);
++
++    LocallyLimited = 0;
++
++    for(d=0; d<dd->ndim; d++)
++    {
++        dim = dd->dim[d];
++
++        inv_cell_size = DD_CELL_MARGIN*dd->nc[dim]/ddbox.box_size[dim];
++        if (dynamic_dd_box(&ddbox,ir))
++        {
++            inv_cell_size *= DD_PRES_SCALE_MARGIN;
++        }
++
++        np = 1 + (int)(cutoff_req*inv_cell_size*ddbox.skew_fac[dim]);
++
++        if (dd->comm->eDLB != edlbNO && dim < ddbox.npbcdim &&
++            dd->comm->cd[d].np_dlb > 0)
++        {
++            if (np > dd->comm->cd[d].np_dlb)
++            {
++                return FALSE;
++            }
++
++            /* If a current local cell size is smaller than the requested
++             * cut-off, we could still fix it, but this gets very complicated.
++             * Without fixing here, we might actually need more checks.
++             */
++            if ((dd->comm->cell_x1[dim] - dd->comm->cell_x0[dim])*ddbox.skew_fac[dim]*dd->comm->cd[d].np_dlb < cutoff_req)
++            {
++                LocallyLimited = 1;
++            }
++        }
++    }
++
++    if (dd->comm->eDLB != edlbNO)
++    {
++        if (check_grid_jump(0,dd,cutoff_req,&ddbox,FALSE))
++        {
++            LocallyLimited = 1; 
++        }
++
++        gmx_sumi(1,&LocallyLimited,cr);
++
++        if (LocallyLimited > 0)
++        {
++            return FALSE;
++        }
++    }
++
++    dd->comm->cutoff = cutoff_req;
++
++    return TRUE;
++}
++
+ +static void merge_cg_buffers(int ncell,
+ +                             gmx_domdec_comm_dim_t *cd, int pulse,
+ +                             int  *ncg_cell,
+ +                             int  *index_gl, int  *recv_i,
+ +                             rvec *cg_cm,    rvec *recv_vr,
+ +                             int *cgindex,
+ +                             cginfo_mb_t *cginfo_mb,int *cginfo)
+ +{
+ +    gmx_domdec_ind_t *ind,*ind_p;
+ +    int p,cell,c,cg,cg0,cg1,cg_gl,nat;
+ +    int shift,shift_at;
+ +    
+ +    ind = &cd->ind[pulse];
+ +    
+ +    /* First correct the already stored data */
+ +    shift = ind->nrecv[ncell];
+ +    for(cell=ncell-1; cell>=0; cell--)
+ +    {
+ +        shift -= ind->nrecv[cell];
+ +        if (shift > 0)
+ +        {
+ +            /* Move the cg's present from previous grid pulses */
+ +            cg0 = ncg_cell[ncell+cell];
+ +            cg1 = ncg_cell[ncell+cell+1];
+ +            cgindex[cg1+shift] = cgindex[cg1];
+ +            for(cg=cg1-1; cg>=cg0; cg--)
+ +            {
+ +                index_gl[cg+shift] = index_gl[cg];
+ +                copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
+ +                cgindex[cg+shift] = cgindex[cg];
+ +                cginfo[cg+shift] = cginfo[cg];
+ +            }
+ +            /* Correct the already stored send indices for the shift */
+ +            for(p=1; p<=pulse; p++)
+ +            {
+ +                ind_p = &cd->ind[p];
+ +                cg0 = 0;
+ +                for(c=0; c<cell; c++)
+ +                {
+ +                    cg0 += ind_p->nsend[c];
+ +                }
+ +                cg1 = cg0 + ind_p->nsend[cell];
+ +                for(cg=cg0; cg<cg1; cg++)
+ +                {
+ +                    ind_p->index[cg] += shift;
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    /* Merge in the communicated buffers */
+ +    shift = 0;
+ +    shift_at = 0;
+ +    cg0 = 0;
+ +    for(cell=0; cell<ncell; cell++)
+ +    {
+ +        cg1 = ncg_cell[ncell+cell+1] + shift;
+ +        if (shift_at > 0)
+ +        {
+ +            /* Correct the old cg indices */
+ +            for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
+ +            {
+ +                cgindex[cg+1] += shift_at;
+ +            }
+ +        }
+ +        for(cg=0; cg<ind->nrecv[cell]; cg++)
+ +        {
+ +            /* Copy this charge group from the buffer */
+ +            index_gl[cg1] = recv_i[cg0];
+ +            copy_rvec(recv_vr[cg0],cg_cm[cg1]);
+ +            /* Add it to the cgindex */
+ +            cg_gl = index_gl[cg1];
+ +            cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
+ +            nat = GET_CGINFO_NATOMS(cginfo[cg1]);
+ +            cgindex[cg1+1] = cgindex[cg1] + nat;
+ +            cg0++;
+ +            cg1++;
+ +            shift_at += nat;
+ +        }
+ +        shift += ind->nrecv[cell];
+ +        ncg_cell[ncell+cell+1] = cg1;
+ +    }
+ +}
+ +
+ +static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
+ +                               int nzone,int cg0,const int *cgindex)
+ +{
+ +    int cg,zone,p;
+ +    
+ +    /* Store the atom block boundaries for easy copying of communication buffers
+ +     */
+ +    cg = cg0;
+ +    for(zone=0; zone<nzone; zone++)
+ +    {
+ +        for(p=0; p<cd->np; p++) {
+ +            cd->ind[p].cell2at0[zone] = cgindex[cg];
+ +            cg += cd->ind[p].nrecv[zone];
+ +            cd->ind[p].cell2at1[zone] = cgindex[cg];
+ +        }
+ +    }
+ +}
+ +
+ +static gmx_bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
+ +{
+ +    int  i;
+ +    gmx_bool bMiss;
+ +
+ +    bMiss = FALSE;
+ +    for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
+ +    {
+ +        if (!bLocalCG[link->a[i]])
+ +        {
+ +            bMiss = TRUE;
+ +        }
+ +    }
+ +
+ +    return bMiss;
+ +}
+ +
-     bBondComm = comm->bBondComm;
++/* Domain corners for communication, a maximum of 4 i-zones see a j domain */
++typedef struct {
++    real c[DIM][4]; /* the corners for the non-bonded communication */
++    real cr0;       /* corner for rounding */
++    real cr1[4];    /* corners for rounding */
++    real bc[DIM];   /* corners for bounded communication */
++    real bcr1;      /* corner for rounding for bonded communication */
++} dd_corners_t;
+ +
-     /* Do we need to determine extra distances for multi-body bondeds? */
-     bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
-     
-     /* Do we need to determine extra distances for only two-body bondeds? */
-     bDist2B = (bBondComm && !bDistMB);
++/* Determine the corners of the domain(s) we are communicating with */
++static void
++set_dd_corners(const gmx_domdec_t *dd,
++               int dim0, int dim1, int dim2,
++               gmx_bool bDistMB,
++               dd_corners_t *c)
++{
++    const gmx_domdec_comm_t *comm;
++    const gmx_domdec_zones_t *zones;
++    int i,j;
+ +
-     r_comm2  = sqr(comm->cutoff);
-     r_bcomm2 = sqr(comm->cutoff_mbody);
++    comm = dd->comm;
+ +
-     if (debug)
-     {
-         fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
-     }
++    zones = &comm->zones;
+ +
-     zones = &comm->zones;
-     
-     dim0 = dd->dim[0];
++    /* Keep the compiler happy */
++    c->cr0  = 0;
++    c->bcr1 = 0;
+ +
-     corner[0][0] = comm->cell_x0[dim0];
+ +    /* The first dimension is equal for all cells */
-         bcorner[0] = corner[0][0];
++    c->c[0][0] = comm->cell_x0[dim0];
+ +    if (bDistMB)
+ +    {
-         corner[1][0] = comm->cell_x0[dim1];
++        c->bc[0] = c->c[0][0];
+ +    }
+ +    if (dd->ndim >= 2)
+ +    {
+ +        dim1 = dd->dim[1];
+ +        /* This cell row is only seen from the first row */
-         corner[1][1] = comm->cell_x0[dim1];
++        c->c[1][0] = comm->cell_x0[dim1];
+ +        /* All rows can see this row */
-             corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
++        c->c[1][1] = comm->cell_x0[dim1];
+ +        if (dd->bGridJump)
+ +        {
-                 bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
++            c->c[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
+ +            if (bDistMB)
+ +            {
+ +                /* For the multi-body distance we need the maximum */
-         corner_round_0 = comm->cell_x1[dim0];
++                c->bc[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
+ +            }
+ +        }
+ +        /* Set the upper-right corner for rounding */
-                 corner[2][j] = comm->cell_x0[dim2];
++        c->cr0 = comm->cell_x1[dim0];
+ +        
+ +        if (dd->ndim >= 3)
+ +        {
+ +            dim2 = dd->dim[2];
+ +            for(j=0; j<4; j++)
+ +            {
-                             corner[2][j-4] =
-                                 max(corner[2][j-4],
++                c->c[2][j] = comm->cell_x0[dim2];
+ +            }
+ +            if (dd->bGridJump)
+ +            {
+ +                /* Use the maximum of the i-cells that see a j-cell */
+ +                for(i=0; i<zones->nizone; i++)
+ +                {
+ +                    for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
+ +                    {
+ +                        if (j >= 4)
+ +                        {
-                     bcorner[2] = comm->cell_x0[dim2];
++                            c->c[2][j-4] =
++                                max(c->c[2][j-4],
+ +                                    comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
+ +                        }
+ +                    }
+ +                }
+ +                if (bDistMB)
+ +                {
+ +                    /* For the multi-body distance we need the maximum */
-                             bcorner[2] = max(bcorner[2],
-                                              comm->zone_d2[i][j].p1_0);
++                    c->bc[2] = comm->cell_x0[dim2];
+ +                    for(i=0; i<2; i++)
+ +                    {
+ +                        for(j=0; j<2; j++)
+ +                        {
-             corner_round_1[0] = comm->cell_x1[dim1];
-             corner_round_1[3] = comm->cell_x1[dim1];
++                            c->bc[2] = max(c->bc[2],comm->zone_d2[i][j].p1_0);
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            
+ +            /* Set the upper-right corner for rounding */
+ +            /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
+ +             * Only cell (0,0,0) can see cell 7 (1,1,1)
+ +             */
-                 corner_round_1[0] = max(comm->cell_x1[dim1],
-                                         comm->zone_d1[1].mch1);
++            c->cr1[0] = comm->cell_x1[dim1];
++            c->cr1[3] = comm->cell_x1[dim1];
+ +            if (dd->bGridJump)
+ +            {
-                     bcorner_round_1 = max(comm->cell_x1[dim1],
-                                           comm->zone_d1[1].p1_1);
++                c->cr1[0] = max(comm->cell_x1[dim1],comm->zone_d1[1].mch1);
+ +                if (bDistMB)
+ +                {
+ +                    /* For the multi-body distance we need the maximum */
-         bScrew = (dd->bScrewPBC && dim == XX);
-         
++                    c->bcr1 = max(comm->cell_x1[dim1],comm->zone_d1[1].p1_1);
++                }
++            }
++        }
++    }
++}
++
++/* Determine which cg's we need to send in this pulse from this zone */
++static void
++get_zone_pulse_cgs(gmx_domdec_t *dd,
++                   int zonei, int zone,
++                   int cg0, int cg1,
++                   const int *index_gl,
++                   const int *cgindex,
++                   int dim, int dim_ind,
++                   int dim0, int dim1, int dim2,
++                   real r_comm2, real r_bcomm2,
++                   matrix box,
++                   ivec tric_dist,
++                   rvec *normal,
++                   real skew_fac2_d, real skew_fac_01,
++                   rvec *v_d, rvec *v_0, rvec *v_1,
++                   const dd_corners_t *c,
++                   rvec sf2_round,
++                   gmx_bool bDistBonded,
++                   gmx_bool bBondComm,
++                   gmx_bool bDist2B,
++                   gmx_bool bDistMB,
++                   rvec *cg_cm,
++                   int *cginfo,
++                   gmx_domdec_ind_t *ind,
++                   int **ibuf, int *ibuf_nalloc,
++                   vec_rvec_t *vbuf,
++                   int *nsend_ptr,
++                   int *nat_ptr,
++                   int *nsend_z_ptr)
++{
++    gmx_domdec_comm_t *comm;
++    gmx_bool bScrew;
++    gmx_bool bDistMB_pulse;
++    int  cg,i;
++    real r2,rb2,r,tric_sh;
++    rvec rn,rb;
++    int  dimd;
++    int  nsend_z,nsend,nat;
++
++    comm = dd->comm;
++
++    bScrew = (dd->bScrewPBC && dim == XX);
++
++    bDistMB_pulse = (bDistMB && bDistBonded);
++
++    nsend_z = 0;
++    nsend   = *nsend_ptr;
++    nat     = *nat_ptr;
++
++    for(cg=cg0; cg<cg1; cg++)
++    {
++        r2  = 0;
++        rb2 = 0;
++        if (tric_dist[dim_ind] == 0)
++        {
++            /* Rectangular direction, easy */
++            r = cg_cm[cg][dim] - c->c[dim_ind][zone];
++            if (r > 0)
++            {
++                r2 += r*r;
++            }
++            if (bDistMB_pulse)
++            {
++                r = cg_cm[cg][dim] - c->bc[dim_ind];
++                if (r > 0)
++                {
++                    rb2 += r*r;
++                }
++            }
++            /* Rounding gives at most a 16% reduction
++             * in communicated atoms
++             */
++            if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
++            {
++                r = cg_cm[cg][dim0] - c->cr0;
++                /* This is the first dimension, so always r >= 0 */
++                r2 += r*r;
++                if (bDistMB_pulse)
++                {
++                    rb2 += r*r;
++                }
++            }
++            if (dim_ind == 2 && (zonei == 2 || zonei == 3))
++            {
++                r = cg_cm[cg][dim1] - c->cr1[zone];
++                if (r > 0)
++                {
++                    r2 += r*r;
++                }
++                if (bDistMB_pulse)
++                {
++                    r = cg_cm[cg][dim1] - c->bcr1;
++                    if (r > 0)
++                    {
++                        rb2 += r*r;
++                    }
++                }
++            }
++        }
++        else
++        {
++            /* Triclinic direction, more complicated */
++            clear_rvec(rn);
++            clear_rvec(rb);
++            /* Rounding, conservative as the skew_fac multiplication
++             * will slightly underestimate the distance.
++             */
++            if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
++            {
++                rn[dim0] = cg_cm[cg][dim0] - c->cr0;
++                for(i=dim0+1; i<DIM; i++)
++                {
++                    rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
++                }
++                r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
++                if (bDistMB_pulse)
++                {
++                    rb[dim0] = rn[dim0];
++                    rb2 = r2;
++                }
++                /* Take care that the cell planes along dim0 might not
++                 * be orthogonal to those along dim1 and dim2.
++                 */
++                for(i=1; i<=dim_ind; i++)
++                {
++                    dimd = dd->dim[i];
++                    if (normal[dim0][dimd] > 0)
++                    {
++                        rn[dimd] -= rn[dim0]*normal[dim0][dimd];
++                        if (bDistMB_pulse)
++                        {
++                            rb[dimd] -= rb[dim0]*normal[dim0][dimd];
++                        }
++                    }
++                }
++            }
++            if (dim_ind == 2 && (zonei == 2 || zonei == 3))
++            {
++                rn[dim1] += cg_cm[cg][dim1] - c->cr1[zone];
++                tric_sh = 0;
++                for(i=dim1+1; i<DIM; i++)
++                {
++                    tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
++                }
++                rn[dim1] += tric_sh;
++                if (rn[dim1] > 0)
++                {
++                    r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
++                    /* Take care of coupling of the distances
++                     * to the planes along dim0 and dim1 through dim2.
++                     */
++                    r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
++                    /* Take care that the cell planes along dim1
++                     * might not be orthogonal to that along dim2.
++                     */
++                    if (normal[dim1][dim2] > 0)
++                    {
++                        rn[dim2] -= rn[dim1]*normal[dim1][dim2];
++                    }
++                }
++                if (bDistMB_pulse)
++                {
++                    rb[dim1] +=
++                        cg_cm[cg][dim1] - c->bcr1 + tric_sh;
++                    if (rb[dim1] > 0)
++                    {
++                        rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
++                        /* Take care of coupling of the distances
++                         * to the planes along dim0 and dim1 through dim2.
++                         */
++                        rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
++                        /* Take care that the cell planes along dim1
++                         * might not be orthogonal to that along dim2.
++                         */
++                        if (normal[dim1][dim2] > 0)
++                        {
++                            rb[dim2] -= rb[dim1]*normal[dim1][dim2];
++                        }
++                    }
++                }
++            }
++            /* The distance along the communication direction */
++            rn[dim] += cg_cm[cg][dim] - c->c[dim_ind][zone];
++            tric_sh = 0;
++            for(i=dim+1; i<DIM; i++)
++            {
++                tric_sh -= cg_cm[cg][i]*v_d[i][dim];
++            }
++            rn[dim] += tric_sh;
++            if (rn[dim] > 0)
++            {
++                r2 += rn[dim]*rn[dim]*skew_fac2_d;
++                /* Take care of coupling of the distances
++                 * to the planes along dim0 and dim1 through dim2.
++                 */
++                if (dim_ind == 1 && zonei == 1)
++                {
++                    r2 -= rn[dim0]*rn[dim]*skew_fac_01;
++                }
++            }
++            if (bDistMB_pulse)
++            {
++                clear_rvec(rb);
++                rb[dim] += cg_cm[cg][dim] - c->bc[dim_ind] + tric_sh;
++                if (rb[dim] > 0)
++                {
++                    rb2 += rb[dim]*rb[dim]*skew_fac2_d;
++                    /* Take care of coupling of the distances
++                     * to the planes along dim0 and dim1 through dim2.
++                     */
++                    if (dim_ind == 1 && zonei == 1)
++                    {
++                        rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
++                    }
++                }
++            }
++        }
++        
++        if (r2 < r_comm2 ||
++            (bDistBonded &&
++             ((bDistMB && rb2 < r_bcomm2) ||
++              (bDist2B && r2  < r_bcomm2)) &&
++             (!bBondComm ||
++              (GET_CGINFO_BOND_INTER(cginfo[cg]) &&
++               missing_link(comm->cglink,index_gl[cg],
++                            comm->bLocalCG)))))
++        {
++            /* Make an index to the local charge groups */
++            if (nsend+1 > ind->nalloc)
++            {
++                ind->nalloc = over_alloc_large(nsend+1);
++                srenew(ind->index,ind->nalloc);
++            }
++            if (nsend+1 > *ibuf_nalloc)
++            {
++                *ibuf_nalloc = over_alloc_large(nsend+1);
++                srenew(*ibuf,*ibuf_nalloc);
++            }
++            ind->index[nsend] = cg;
++            (*ibuf)[nsend] = index_gl[cg];
++            nsend_z++;
++            vec_rvec_check_alloc(vbuf,nsend+1);
++            
++            if (dd->ci[dim] == 0)
++            {
++                /* Correct cg_cm for pbc */
++                rvec_add(cg_cm[cg],box[dim],vbuf->v[nsend]);
++                if (bScrew)
++                {
++                    vbuf->v[nsend][YY] = box[YY][YY] - vbuf->v[nsend][YY];
++                    vbuf->v[nsend][ZZ] = box[ZZ][ZZ] - vbuf->v[nsend][ZZ];
+ +                }
+ +            }
++            else
++            {
++                copy_rvec(cg_cm[cg],vbuf->v[nsend]);
++            }
++            nsend++;
++            nat += cgindex[cg+1] - cgindex[cg];
+ +        }
+ +    }
++
++    *nsend_ptr   = nsend;
++    *nat_ptr     = nat;
++    *nsend_z_ptr = nsend_z;
++}
++
++static void setup_dd_communication(gmx_domdec_t *dd,
++                                   matrix box,gmx_ddbox_t *ddbox,
++                                   t_forcerec *fr,t_state *state,rvec **f)
++{
++    int dim_ind,dim,dim0,dim1,dim2,dimd,p,nat_tot;
++    int nzone,nzone_send,zone,zonei,cg0,cg1;
++    int c,i,j,cg,cg_gl,nrcg;
++    int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
++    gmx_domdec_comm_t *comm;
++    gmx_domdec_zones_t *zones;
++    gmx_domdec_comm_dim_t *cd;
++    gmx_domdec_ind_t *ind;
++    cginfo_mb_t *cginfo_mb;
++    gmx_bool bBondComm,bDist2B,bDistMB,bDistBonded;
++    real r_mb,r_comm2,r_scomm2,r_bcomm2,r_0,r_1,r2inc,inv_ncg;
++    dd_corners_t corners;
++    ivec tric_dist;
++    rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
++    real skew_fac2_d,skew_fac_01;
++    rvec sf2_round;
++    int  nsend,nat;
++    int  th;
++    
++    if (debug)
++    {
++        fprintf(debug,"Setting up DD communication\n");
++    }
++    
++    comm  = dd->comm;
++
++    switch (fr->cutoff_scheme)
++    {
++    case ecutsGROUP:
++        cg_cm = fr->cg_cm;
++        break;
++    case ecutsVERLET:
++        cg_cm = state->x;
++        break;
++    default:
++        gmx_incons("unimplemented");
++        cg_cm = NULL;
++    }
++
++    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
++    {
++        dim = dd->dim[dim_ind];
++
++        /* Check if we need to use triclinic distances */
++        tric_dist[dim_ind] = 0;
++        for(i=0; i<=dim_ind; i++)
++        {
++            if (ddbox->tric_dir[dd->dim[i]])
++            {
++                tric_dist[dim_ind] = 1;
++            }
++        }
++    }
++
++    bBondComm = comm->bBondComm;
++
++    /* Do we need to determine extra distances for multi-body bondeds? */
++    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
++    
++    /* Do we need to determine extra distances for only two-body bondeds? */
++    bDist2B = (bBondComm && !bDistMB);
++
++    r_comm2  = sqr(comm->cutoff);
++    r_bcomm2 = sqr(comm->cutoff_mbody);
++
++    if (debug)
++    {
++        fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
++    }
++
++    zones = &comm->zones;
++    
++    dim0 = dd->dim[0];
++    dim1 = (dd->ndim >= 2 ? dd->dim[1] : -1);
++    dim2 = (dd->ndim >= 3 ? dd->dim[2] : -1);
++
++    set_dd_corners(dd,dim0,dim1,dim2,bDistMB,&corners);
+ +    
+ +    /* Triclinic stuff */
+ +    normal = ddbox->normal;
+ +    skew_fac_01 = 0;
+ +    if (dd->ndim >= 2)
+ +    {
+ +        v_0 = ddbox->v[dim0];
+ +        if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
+ +        {
+ +            /* Determine the coupling coefficient for the distances
+ +             * to the cell planes along dim0 and dim1 through dim2.
+ +             * This is required for correct rounding.
+ +             */
+ +            skew_fac_01 =
+ +                ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
+ +            }
+ +        }
+ +    }
+ +    if (dd->ndim >= 3)
+ +    {
+ +        v_1 = ddbox->v[dim1];
+ +    }
+ +    
+ +    zone_cg_range = zones->cg_range;
+ +    index_gl = dd->index_gl;
+ +    cgindex  = dd->cgindex;
+ +    cginfo_mb = fr->cginfo_mb;
+ +    
+ +    zone_cg_range[0]   = 0;
+ +    zone_cg_range[1]   = dd->ncg_home;
+ +    comm->zone_ncg1[0] = dd->ncg_home;
+ +    pos_cg             = dd->ncg_home;
+ +    
+ +    nat_tot = dd->nat_home;
+ +    nzone = 1;
+ +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
+ +    {
+ +        dim = dd->dim[dim_ind];
+ +        cd = &comm->cd[dim_ind];
+ +        
+ +        if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
+ +        {
+ +            /* No pbc in this dimension, the first node should not comm. */
+ +            nzone_send = 0;
+ +        }
+ +        else
+ +        {
+ +            nzone_send = nzone;
+ +        }
+ +
-             bDistBonded   = ((bDistMB || bDist2B) && p == 0);
-             bDistMB_pulse = (bDistMB && bDistBonded);
+ +        v_d = ddbox->v[dim];
+ +        skew_fac2_d = sqr(ddbox->skew_fac[dim]);
+ +
+ +        cd->bInPlace = TRUE;
+ +        for(p=0; p<cd->np; p++)
+ +        {
+ +            /* Only atoms communicated in the first pulse are used
+ +             * for multi-body bonded interactions or for bBondComm.
+ +             */
-                 ind->nsend[zone] = 0;
-                 for(cg=cg0; cg<cg1; cg++)
++            bDistBonded = ((bDistMB || bDist2B) && p == 0);
+ +
+ +            ind = &cd->ind[p];
+ +            nsend = 0;
+ +            nat = 0;
+ +            for(zone=0; zone<nzone_send; zone++)
+ +            {
+ +                if (tric_dist[dim_ind] && dim_ind > 0)
+ +                {
+ +                    /* Determine slightly more optimized skew_fac's
+ +                     * for rounding.
+ +                     * This reduces the number of communicated atoms
+ +                     * by about 10% for 3D DD of rhombic dodecahedra.
+ +                     */
+ +                    for(dimd=0; dimd<dim; dimd++)
+ +                    {
+ +                        sf2_round[dimd] = 1;
+ +                        if (ddbox->tric_dir[dimd])
+ +                        {
+ +                            for(i=dd->dim[dimd]+1; i<DIM; i++)
+ +                            {
+ +                                /* If we are shifted in dimension i
+ +                                 * and the cell plane is tilted forward
+ +                                 * in dimension i, skip this coupling.
+ +                                 */
+ +                                if (!(zones->shift[nzone+zone][i] &&
+ +                                      ddbox->v[dimd][i][dimd] >= 0))
+ +                                {
+ +                                    sf2_round[dimd] +=
+ +                                        sqr(ddbox->v[dimd][i][dimd]);
+ +                                }
+ +                            }
+ +                            sf2_round[dimd] = 1/sf2_round[dimd];
+ +                        }
+ +                    }
+ +                }
+ +
+ +                zonei = zone_perm[dim_ind][zone];
+ +                if (p == 0)
+ +                {
+ +                    /* Here we permutate the zones to obtain a convenient order
+ +                     * for neighbor searching
+ +                     */
+ +                    cg0 = zone_cg_range[zonei];
+ +                    cg1 = zone_cg_range[zonei+1];
+ +                }
+ +                else
+ +                {
+ +                    /* Look only at the cg's received in the previous grid pulse
+ +                     */
+ +                    cg1 = zone_cg_range[nzone+zone+1];
+ +                    cg0 = cg1 - cd->ind[p-1].nrecv[zone];
+ +                }
-                     r2  = 0;
-                     rb2 = 0;
-                     if (tric_dist[dim_ind] == 0)
++
++#pragma omp parallel for num_threads(comm->nth) schedule(static)
++                for(th=0; th<comm->nth; th++)
+ +                {
-                         /* Rectangular direction, easy */
-                         r = cg_cm[cg][dim] - corner[dim_ind][zone];
-                         if (r > 0)
-                         {
-                             r2 += r*r;
-                         }
-                         if (bDistMB_pulse)
-                         {
-                             r = cg_cm[cg][dim] - bcorner[dim_ind];
-                             if (r > 0)
-                             {
-                                 rb2 += r*r;
-                             }
-                         }
-                         /* Rounding gives at most a 16% reduction
-                          * in communicated atoms
-                          */
-                         if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
-                         {
-                             r = cg_cm[cg][dim0] - corner_round_0;
-                             /* This is the first dimension, so always r >= 0 */
-                             r2 += r*r;
-                             if (bDistMB_pulse)
-                             {
-                                 rb2 += r*r;
-                             }
-                         }
-                         if (dim_ind == 2 && (zonei == 2 || zonei == 3))
-                         {
-                             r = cg_cm[cg][dim1] - corner_round_1[zone];
-                             if (r > 0)
-                             {
-                                 r2 += r*r;
-                             }
-                             if (bDistMB_pulse)
-                             {
-                                 r = cg_cm[cg][dim1] - bcorner_round_1;
-                                 if (r > 0)
-                                 {
-                                     rb2 += r*r;
-                                 }
-                             }
-                         }
++                    gmx_domdec_ind_t *ind_p;
++                    int **ibuf_p,*ibuf_nalloc_p;
++                    vec_rvec_t *vbuf_p;
++                    int *nsend_p,*nat_p;
++                    int *nsend_zone_p;
++                    int cg0_th,cg1_th;
++
++                    if (th == 0)
+ +                    {
-                         /* Triclinic direction, more complicated */
-                         clear_rvec(rn);
-                         clear_rvec(rb);
-                         /* Rounding, conservative as the skew_fac multiplication
-                          * will slightly underestimate the distance.
-                          */
-                         if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
-                         {
-                             rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
-                             for(i=dim0+1; i<DIM; i++)
-                             {
-                                 rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
-                             }
-                             r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
-                             if (bDistMB_pulse)
-                             {
-                                 rb[dim0] = rn[dim0];
-                                 rb2 = r2;
-                             }
-                             /* Take care that the cell planes along dim0 might not
-                              * be orthogonal to those along dim1 and dim2.
-                              */
-                             for(i=1; i<=dim_ind; i++)
-                             {
-                                 dimd = dd->dim[i];
-                                 if (normal[dim0][dimd] > 0)
-                                 {
-                                     rn[dimd] -= rn[dim0]*normal[dim0][dimd];
-                                     if (bDistMB_pulse)
-                                     {
-                                         rb[dimd] -= rb[dim0]*normal[dim0][dimd];
-                                     }
-                                 }
-                             }
-                         }
-                         if (dim_ind == 2 && (zonei == 2 || zonei == 3))
-                         {
-                             rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
-                             tric_sh = 0;
-                             for(i=dim1+1; i<DIM; i++)
-                             {
-                                 tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
-                             }
-                             rn[dim1] += tric_sh;
-                             if (rn[dim1] > 0)
-                             {
-                                 r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
-                                 /* Take care of coupling of the distances
-                                  * to the planes along dim0 and dim1 through dim2.
-                                  */
-                                 r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
-                                 /* Take care that the cell planes along dim1
-                                  * might not be orthogonal to that along dim2.
-                                  */
-                                 if (normal[dim1][dim2] > 0)
-                                 {
-                                     rn[dim2] -= rn[dim1]*normal[dim1][dim2];
-                                 }
-                             }
-                             if (bDistMB_pulse)
-                             {
-                                 rb[dim1] +=
-                                     cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
-                                 if (rb[dim1] > 0)
-                                 {
-                                     rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
-                                     /* Take care of coupling of the distances
-                                      * to the planes along dim0 and dim1 through dim2.
-                                      */
-                                     rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
-                                     /* Take care that the cell planes along dim1
-                                      * might not be orthogonal to that along dim2.
-                                      */
-                                     if (normal[dim1][dim2] > 0)
-                                     {
-                                         rb[dim2] -= rb[dim1]*normal[dim1][dim2];
-                                     }
-                                 }
-                             }
-                         }
-                         /* The distance along the communication direction */
-                         rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
-                         tric_sh = 0;
-                         for(i=dim+1; i<DIM; i++)
-                         {
-                             tric_sh -= cg_cm[cg][i]*v_d[i][dim];
-                         }
-                         rn[dim] += tric_sh;
-                         if (rn[dim] > 0)
-                         {
-                             r2 += rn[dim]*rn[dim]*skew_fac2_d;
-                             /* Take care of coupling of the distances
-                              * to the planes along dim0 and dim1 through dim2.
-                              */
-                             if (dim_ind == 1 && zonei == 1)
-                             {
-                                 r2 -= rn[dim0]*rn[dim]*skew_fac_01;
-                             }
-                         }
-                         if (bDistMB_pulse)
-                         {
-                             clear_rvec(rb);
-                             rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
-                             if (rb[dim] > 0)
-                             {
-                                 rb2 += rb[dim]*rb[dim]*skew_fac2_d;
-                                 /* Take care of coupling of the distances
-                                  * to the planes along dim0 and dim1 through dim2.
-                                  */
-                                 if (dim_ind == 1 && zonei == 1)
-                                 {
-                                     rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
-                                 }
-                             }
-                         }
++                        /* Thread 0 writes in the comm buffers */
++                        ind_p         = ind;
++                        ibuf_p        = &comm->buf_int;
++                        ibuf_nalloc_p = &comm->nalloc_int;
++                        vbuf_p        = &comm->vbuf;
++                        nsend_p       = &nsend;
++                        nat_p         = &nat;
++                        nsend_zone_p  = &ind->nsend[zone];
+ +                    }
+ +                    else
+ +                    {
-                     if (r2 < r_comm2 ||
-                         (bDistBonded &&
-                          ((bDistMB && rb2 < r_bcomm2) ||
-                           (bDist2B && r2  < r_bcomm2)) &&
-                          (!bBondComm ||
-                           (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
-                            missing_link(comm->cglink,index_gl[cg],
-                                         comm->bLocalCG)))))
++                        /* Other threads write into temp buffers */
++                        ind_p         = &comm->dth[th].ind;
++                        ibuf_p        = &comm->dth[th].ibuf;
++                        ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc;
++                        vbuf_p        = &comm->dth[th].vbuf;
++                        nsend_p       = &comm->dth[th].nsend;
++                        nat_p         = &comm->dth[th].nat;
++                        nsend_zone_p  = &comm->dth[th].nsend_zone;
++
++                        comm->dth[th].nsend      = 0;
++                        comm->dth[th].nat        = 0;
++                        comm->dth[th].nsend_zone = 0;
++                    }
++
++                    if (comm->nth == 1)
++                    {
++                        cg0_th = cg0;
++                        cg1_th = cg1;
++                    }
++                    else
++                    {
++                        cg0_th = cg0 + ((cg1 - cg0)* th   )/comm->nth;
++                        cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth;
+ +                    }
+ +                    
-                         /* Make an index to the local charge groups */
-                         if (nsend+1 > ind->nalloc)
-                         {
-                             ind->nalloc = over_alloc_large(nsend+1);
-                             srenew(ind->index,ind->nalloc);
-                         }
-                         if (nsend+1 > comm->nalloc_int)
-                         {
-                             comm->nalloc_int = over_alloc_large(nsend+1);
-                             srenew(comm->buf_int,comm->nalloc_int);
-                         }
-                         ind->index[nsend] = cg;
-                         comm->buf_int[nsend] = index_gl[cg];
-                         ind->nsend[zone]++;
-                         vec_rvec_check_alloc(&comm->vbuf,nsend+1);
++                    /* Get the cg's for this pulse in this zone */
++                    get_zone_pulse_cgs(dd,zonei,zone,cg0_th,cg1_th,
++                                       index_gl,cgindex,
++                                       dim,dim_ind,dim0,dim1,dim2,
++                                       r_comm2,r_bcomm2,
++                                       box,tric_dist,
++                                       normal,skew_fac2_d,skew_fac_01,
++                                       v_d,v_0,v_1,&corners,sf2_round,
++                                       bDistBonded,bBondComm,
++                                       bDist2B,bDistMB,
++                                       cg_cm,fr->cginfo,
++                                       ind_p,
++                                       ibuf_p,ibuf_nalloc_p,
++                                       vbuf_p,
++                                       nsend_p,nat_p,
++                                       nsend_zone_p);
++                }
++
++                /* Append data of threads>=1 to the communication buffers */
++                for(th=1; th<comm->nth; th++)
++                {
++                    dd_comm_setup_work_t *dth;
++                    int i,ns1;
++
++                    dth = &comm->dth[th];
++
++                    ns1 = nsend + dth->nsend_zone;
++                    if (ns1 > ind->nalloc)
+ +                    {
-                         if (dd->ci[dim] == 0)
-                         {
-                             /* Correct cg_cm for pbc */
-                             rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
-                             if (bScrew)
-                             {
-                                 comm->vbuf.v[nsend][YY] =
-                                     box[YY][YY]-comm->vbuf.v[nsend][YY];
-                                 comm->vbuf.v[nsend][ZZ] =
-                                     box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
-                             }
-                         }
-                         else
-                         {
-                             copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
-                         }
++                        ind->nalloc = over_alloc_dd(ns1);
++                        srenew(ind->index,ind->nalloc);
++                    }
++                    if (ns1 > comm->nalloc_int)
++                    {
++                        comm->nalloc_int = over_alloc_dd(ns1);
++                        srenew(comm->buf_int,comm->nalloc_int);
++                    }
++                    if (ns1 > comm->vbuf.nalloc)
++                    {
++                        comm->vbuf.nalloc = over_alloc_dd(ns1);
++                        srenew(comm->vbuf.v,comm->vbuf.nalloc);
++                    }
+ +
-                         nat += cgindex[cg+1] - cgindex[cg];
++                    for(i=0; i<dth->nsend_zone; i++)
++                    {
++                        ind->index[nsend] = dth->ind.index[i];
++                        comm->buf_int[nsend] = dth->ibuf[i];
++                        copy_rvec(dth->vbuf.v[i],
++                                  comm->vbuf.v[nsend]);
+ +                        nsend++;
-             if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
+ +                    }
++                    nat              += dth->nat;
++                    ind->nsend[zone] += dth->nsend_zone;
+ +                }
+ +            }
+ +            /* Clear the counts in case we do not have pbc */
+ +            for(zone=nzone_send; zone<nzone; zone++)
+ +            {
+ +                ind->nsend[zone] = 0;
+ +            }
+ +            ind->nsend[nzone]   = nsend;
+ +            ind->nsend[nzone+1] = nat;
+ +            /* Communicate the number of cg's and atoms to receive */
+ +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
+ +                            ind->nsend, nzone+2,
+ +                            ind->nrecv, nzone+2);
+ +            
+ +            /* The rvec buffer is also required for atom buffers of size nsend
+ +             * in dd_move_x and dd_move_f.
+ +             */
+ +            vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
+ +
+ +            if (p > 0)
+ +            {
+ +                /* We can receive in place if only the last zone is not empty */
+ +                for(zone=0; zone<nzone-1; zone++)
+ +                {
+ +                    if (ind->nrecv[zone] > 0)
+ +                    {
+ +                        cd->bInPlace = FALSE;
+ +                    }
+ +                }
+ +                if (!cd->bInPlace)
+ +                {
+ +                    /* The int buffer is only required here for the cg indices */
+ +                    if (ind->nrecv[nzone] > comm->nalloc_int2)
+ +                    {
+ +                        comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
+ +                        srenew(comm->buf_int2,comm->nalloc_int2);
+ +                    }
+ +                    /* The rvec buffer is also required for atom buffers
+ +                     * of size nrecv in dd_move_x and dd_move_f.
+ +                     */
+ +                    i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
+ +                    vec_rvec_check_alloc(&comm->vbuf2,i);
+ +                }
+ +            }
+ +            
+ +            /* Make space for the global cg indices */
+ +            if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
+ +                || dd->cg_nalloc == 0)
+ +            {
+ +                dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
+ +                srenew(index_gl,dd->cg_nalloc);
+ +                srenew(cgindex,dd->cg_nalloc+1);
+ +            }
+ +            /* Communicate the global cg indices */
+ +            if (cd->bInPlace)
+ +            {
+ +                recv_i = index_gl + pos_cg;
+ +            }
+ +            else
+ +            {
+ +                recv_i = comm->buf_int2;
+ +            }
+ +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
+ +                            comm->buf_int, nsend,
+ +                            recv_i,        ind->nrecv[nzone]);
+ +
+ +            /* Make space for cg_cm */
-                 dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
++            dd_check_alloc_ncg(fr,state,f,pos_cg + ind->nrecv[nzone]);
++            if (fr->cutoff_scheme == ecutsGROUP)
+ +            {
- static void order_int_cg(int n,gmx_cgsort_t *sort,
+ +                cg_cm = fr->cg_cm;
+ +            }
++            else
++            {
++                cg_cm = state->x;
++            }
+ +            /* Communicate cg_cm */
+ +            if (cd->bInPlace)
+ +            {
+ +                recv_vr = cg_cm + pos_cg;
+ +            }
+ +            else
+ +            {
+ +                recv_vr = comm->vbuf2.v;
+ +            }
+ +            dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
+ +                             comm->vbuf.v, nsend,
+ +                             recv_vr,      ind->nrecv[nzone]);
+ +            
+ +            /* Make the charge group index */
+ +            if (cd->bInPlace)
+ +            {
+ +                zone = (p == 0 ? 0 : nzone - 1);
+ +                while (zone < nzone)
+ +                {
+ +                    for(cg=0; cg<ind->nrecv[zone]; cg++)
+ +                    {
+ +                        cg_gl = index_gl[pos_cg];
+ +                        fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
+ +                        nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
+ +                        cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
+ +                        if (bBondComm)
+ +                        {
+ +                            /* Update the charge group presence,
+ +                             * so we can use it in the next pass of the loop.
+ +                             */
+ +                            comm->bLocalCG[cg_gl] = TRUE;
+ +                        }
+ +                        pos_cg++;
+ +                    }
+ +                    if (p == 0)
+ +                    {
+ +                        comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
+ +                    }
+ +                    zone++;
+ +                    zone_cg_range[nzone+zone] = pos_cg;
+ +                }
+ +            }
+ +            else
+ +            {
+ +                /* This part of the code is never executed with bBondComm. */
+ +                merge_cg_buffers(nzone,cd,p,zone_cg_range,
+ +                                 index_gl,recv_i,cg_cm,recv_vr,
+ +                                 cgindex,fr->cginfo_mb,fr->cginfo);
+ +                pos_cg += ind->nrecv[nzone];
+ +            }
+ +            nat_tot += ind->nrecv[nzone+1];
+ +        }
+ +        if (!cd->bInPlace)
+ +        {
+ +            /* Store the atom block for easy copying of communication buffers */
+ +            make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
+ +        }
+ +        nzone += nzone;
+ +    }
+ +    dd->index_gl = index_gl;
+ +    dd->cgindex  = cgindex;
+ +    
+ +    dd->ncg_tot = zone_cg_range[zones->n];
+ +    dd->nat_tot = nat_tot;
+ +    comm->nat[ddnatHOME] = dd->nat_home;
+ +    for(i=ddnatZONE; i<ddnatNR; i++)
+ +    {
+ +        comm->nat[i] = dd->nat_tot;
+ +    }
+ +
+ +    if (!bBondComm)
+ +    {
+ +        /* We don't need to update cginfo, since that was alrady done above.
+ +         * So we pass NULL for the forcerec.
+ +         */
+ +        dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
+ +                      NULL,comm->bLocalCG);
+ +    }
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Finished setting up DD communication, zones:");
+ +        for(c=0; c<zones->n; c++)
+ +        {
+ +            fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
+ +        }
+ +        fprintf(debug,"\n");
+ +    }
+ +}
+ +
+ +static void set_cg_boundaries(gmx_domdec_zones_t *zones)
+ +{
+ +    int c;
+ +    
+ +    for(c=0; c<zones->nizone; c++)
+ +    {
+ +        zones->izone[c].cg1  = zones->cg_range[c+1];
+ +        zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
+ +        zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
+ +    }
+ +}
+ +
++static void set_zones_size(gmx_domdec_t *dd,
++                           matrix box,const gmx_ddbox_t *ddbox,
++                           int zone_start,int zone_end)
++{
++    gmx_domdec_comm_t *comm;
++    gmx_domdec_zones_t *zones;
++    gmx_bool bDistMB;
++    int  z,zi,zj0,zj1,d,dim;
++    real rcs,rcmbs;
++    int  i,j;
++    real size_j,add_tric;
++    real vol;
++
++    comm = dd->comm;
++
++    zones = &comm->zones;
++
++    /* Do we need to determine extra distances for multi-body bondeds? */
++    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
++
++    for(z=zone_start; z<zone_end; z++)
++    {
++        /* Copy cell limits to zone limits.
++         * Valid for non-DD dims and non-shifted dims.
++         */
++        copy_rvec(comm->cell_x0,zones->size[z].x0);
++        copy_rvec(comm->cell_x1,zones->size[z].x1);
++    }
++
++    for(d=0; d<dd->ndim; d++)
++    {
++        dim = dd->dim[d];
++
++        for(z=0; z<zones->n; z++)
++        {
++            /* With a staggered grid we have different sizes
++             * for non-shifted dimensions.
++             */
++            if (dd->bGridJump && zones->shift[z][dim] == 0)
++            {
++                if (d == 1)
++                {
++                    zones->size[z].x0[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min0;
++                    zones->size[z].x1[dim] = comm->zone_d1[zones->shift[z][dd->dim[d-1]]].max1;
++                }
++                else if (d == 2)
++                {
++                    zones->size[z].x0[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min0;
++                    zones->size[z].x1[dim] = comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].max1;
++                }
++            }
++        }
++
++        rcs   = comm->cutoff;
++        rcmbs = comm->cutoff_mbody;
++        if (ddbox->tric_dir[dim])
++        {
++            rcs   /= ddbox->skew_fac[dim];
++            rcmbs /= ddbox->skew_fac[dim];
++        }
++
++        /* Set the lower limit for the shifted zone dimensions */
++        for(z=zone_start; z<zone_end; z++)
++        {
++            if (zones->shift[z][dim] > 0)
++            {
++                dim = dd->dim[d];
++                if (!dd->bGridJump || d == 0)
++                {
++                    zones->size[z].x0[dim] = comm->cell_x1[dim];
++                    zones->size[z].x1[dim] = comm->cell_x1[dim] + rcs;
++                }
++                else
++                {
++                    /* Here we take the lower limit of the zone from
++                     * the lowest domain of the zone below.
++                     */
++                    if (z < 4)
++                    {
++                        zones->size[z].x0[dim] =
++                             comm->zone_d1[zones->shift[z][dd->dim[d-1]]].min1;
++                    }
++                    else
++                    {
++                        if (d == 1)
++                        {
++                            zones->size[z].x0[dim] =
++                                zones->size[zone_perm[2][z-4]].x0[dim];
++                        }
++                        else
++                        {
++                            zones->size[z].x0[dim] =
++                                comm->zone_d2[zones->shift[z][dd->dim[d-2]]][zones->shift[z][dd->dim[d-1]]].min1;
++                        }
++                    }
++                    /* A temporary limit, is updated below */
++                    zones->size[z].x1[dim] = zones->size[z].x0[dim];
++
++                    if (bDistMB)
++                    {
++                        for(zi=0; zi<zones->nizone; zi++)
++                        {
++                            if (zones->shift[zi][dim] == 0)
++                            {
++                                /* This takes the whole zone into account.
++                                 * With multiple pulses this will lead
++                                 * to a larger zone then strictly necessary.
++                                 */
++                                zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
++                                                             zones->size[zi].x1[dim]+rcmbs);
++                            }
++                        }
++                    }
++                }
++            }
++        }
++
++        /* Loop over the i-zones to set the upper limit of each
++         * j-zone they see.
++         */
++        for(zi=0; zi<zones->nizone; zi++)
++        {
++            if (zones->shift[zi][dim] == 0)
++            {
++                for(z=zones->izone[zi].j0; z<zones->izone[zi].j1; z++)
++                {
++                    if (zones->shift[z][dim] > 0)
++                    {
++                        zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
++                                                     zones->size[zi].x1[dim]+rcs);
++                    }
++                }
++            }
++        }
++    }
++
++    for(z=zone_start; z<zone_end; z++)
++    {
++        for(i=0; i<DIM; i++)
++        {
++            zones->size[z].bb_x0[i] = zones->size[z].x0[i];
++            zones->size[z].bb_x1[i] = zones->size[z].x1[i];
++
++            for(j=i+1; j<ddbox->npbcdim; j++)
++            {
++                /* With 1D domain decomposition the cg's are not in
++                 * the triclinic box, but trilinic x-y and rectangular y-z.
++                 */
++                if (box[j][i] != 0 &&
++                    !(dd->ndim == 1 && i == YY && j == ZZ))
++                {
++                    /* Correct for triclinic offset of the lower corner */
++                    add_tric = zones->size[z].x0[j]*box[j][i]/box[j][j];
++                    zones->size[z].bb_x0[i] += add_tric;
++                    zones->size[z].bb_x1[i] += add_tric;
++
++                    /* Correct for triclinic offset of the upper corner */
++                    size_j = zones->size[z].x1[j] - zones->size[z].x0[j];
++                    add_tric = size_j*box[j][i]/box[j][j];
++
++                    if (box[j][i] < 0)
++                    {
++                        zones->size[z].bb_x0[i] += add_tric;
++                    }
++                    else
++                    {
++                        zones->size[z].bb_x1[i] += add_tric;
++                    }
++                }
++            }
++        }
++    }
++
++    if (zone_start == 0)
++    {
++        vol = 1;
++        for(dim=0; dim<DIM; dim++)
++        {
++            vol *= zones->size[0].x1[dim] - zones->size[0].x0[dim];
++        }
++        zones->dens_zone0 = (zones->cg_range[1] - zones->cg_range[0])/vol;
++    }
++
++    if (debug)
++    {
++        for(z=zone_start; z<zone_end; z++)
++        {
++            fprintf(debug,"zone %d    %6.3f - %6.3f  %6.3f - %6.3f  %6.3f - %6.3f\n",
++                    z,
++                    zones->size[z].x0[XX],zones->size[z].x1[XX],
++                    zones->size[z].x0[YY],zones->size[z].x1[YY],
++                    zones->size[z].x0[ZZ],zones->size[z].x1[ZZ]);
++            fprintf(debug,"zone %d bb %6.3f - %6.3f  %6.3f - %6.3f  %6.3f - %6.3f\n",
++                    z,
++                    zones->size[z].bb_x0[XX],zones->size[z].bb_x1[XX],
++                    zones->size[z].bb_x0[YY],zones->size[z].bb_x1[YY],
++                    zones->size[z].bb_x0[ZZ],zones->size[z].bb_x1[ZZ]);
++        }
++    }
++}
++
+ +static int comp_cgsort(const void *a,const void *b)
+ +{
+ +    int comp;
+ +    
+ +    gmx_cgsort_t *cga,*cgb;
+ +    cga = (gmx_cgsort_t *)a;
+ +    cgb = (gmx_cgsort_t *)b;
+ +    
+ +    comp = cga->nsc - cgb->nsc;
+ +    if (comp == 0)
+ +    {
+ +        comp = cga->ind_gl - cgb->ind_gl;
+ +    }
+ +    
+ +    return comp;
+ +}
+ +
- static void order_vec_cg(int n,gmx_cgsort_t *sort,
++static void order_int_cg(int n,const gmx_cgsort_t *sort,
+ +                         int *a,int *buf)
+ +{
+ +    int i;
+ +    
+ +    /* Order the data */
+ +    for(i=0; i<n; i++)
+ +    {
+ +        buf[i] = a[sort[i].ind];
+ +    }
+ +    
+ +    /* Copy back to the original array */
+ +    for(i=0; i<n; i++)
+ +    {
+ +        a[i] = buf[i];
+ +    }
+ +}
+ +
- static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
++static void order_vec_cg(int n,const gmx_cgsort_t *sort,
+ +                         rvec *v,rvec *buf)
+ +{
+ +    int i;
+ +    
+ +    /* Order the data */
+ +    for(i=0; i<n; i++)
+ +    {
+ +        copy_rvec(v[sort[i].ind],buf[i]);
+ +    }
+ +    
+ +    /* Copy back to the original array */
+ +    for(i=0; i<n; i++)
+ +    {
+ +        copy_rvec(buf[i],v[i]);
+ +    }
+ +}
+ +
- static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
-                           rvec *cgcm,t_forcerec *fr,t_state *state,
-                           int ncg_home_old)
++static void order_vec_atom(int ncg,const int *cgindex,const gmx_cgsort_t *sort,
+ +                           rvec *v,rvec *buf)
+ +{
+ +    int a,atot,cg,cg0,cg1,i;
+ +    
++    if (cgindex == NULL)
++    {
++        /* Avoid the useless loop of the atoms within a cg */
++        order_vec_cg(ncg,sort,v,buf);
++
++        return;
++    }
++
+ +    /* Order the data */
+ +    a = 0;
+ +    for(cg=0; cg<ncg; cg++)
+ +    {
+ +        cg0 = cgindex[sort[cg].ind];
+ +        cg1 = cgindex[sort[cg].ind+1];
+ +        for(i=cg0; i<cg1; i++)
+ +        {
+ +            copy_rvec(v[i],buf[a]);
+ +            a++;
+ +        }
+ +    }
+ +    atot = a;
+ +    
+ +    /* Copy back to the original array */
+ +    for(a=0; a<atot; a++)
+ +    {
+ +        copy_rvec(buf[a],v[a]);
+ +    }
+ +}
+ +
+ +static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
+ +                         int nsort_new,gmx_cgsort_t *sort_new,
+ +                         gmx_cgsort_t *sort1)
+ +{
+ +    int i1,i2,i_new;
+ +    
+ +    /* The new indices are not very ordered, so we qsort them */
+ +    qsort_threadsafe(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
+ +    
+ +    /* sort2 is already ordered, so now we can merge the two arrays */
+ +    i1 = 0;
+ +    i2 = 0;
+ +    i_new = 0;
+ +    while(i2 < nsort2 || i_new < nsort_new)
+ +    {
+ +        if (i2 == nsort2)
+ +        {
+ +            sort1[i1++] = sort_new[i_new++];
+ +        }
+ +        else if (i_new == nsort_new)
+ +        {
+ +            sort1[i1++] = sort2[i2++];
+ +        }
+ +        else if (sort2[i2].nsc < sort_new[i_new].nsc ||
+ +                 (sort2[i2].nsc == sort_new[i_new].nsc &&
+ +                  sort2[i2].ind_gl < sort_new[i_new].ind_gl))
+ +        {
+ +            sort1[i1++] = sort2[i2++];
+ +        }
+ +        else
+ +        {
+ +            sort1[i1++] = sort_new[i_new++];
+ +        }
+ +    }
+ +}
+ +
-     int  ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
-     rvec *vbuf;
-     
++static int dd_sort_order(gmx_domdec_t *dd,t_forcerec *fr,int ncg_home_old)
+ +{
+ +    gmx_domdec_sort_t *sort;
+ +    gmx_cgsort_t *cgsort,*sort_i;
-     
-     if (dd->ncg_home > sort->sort_nalloc)
-     {
-         sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
-         srenew(sort->sort1,sort->sort_nalloc);
-         srenew(sort->sort2,sort->sort_nalloc);
-     }
-     
++    int  ncg_new,nsort2,nsort_new,i,*a,moved,*ibuf;
++    int  sort_last,sort_skip;
++
+ +    sort = dd->comm->sort;
-             cell_index = fr->ns.grid->cell_index[i];
-             if (cell_index !=  4*fr->ns.grid->ncells)
++
++    a = fr->ns.grid->cell_index;
++
++    moved = NSGRID_SIGNAL_MOVED_FAC*fr->ns.grid->ncells;
++
+ +    if (ncg_home_old >= 0)
+ +    {
+ +        /* The charge groups that remained in the same ns grid cell
+ +         * are completely ordered. So we can sort efficiently by sorting
+ +         * the charge groups that did move into the stationary list.
+ +         */
+ +        ncg_new = 0;
+ +        nsort2 = 0;
+ +        nsort_new = 0;
+ +        for(i=0; i<dd->ncg_home; i++)
+ +        {
+ +            /* Check if this cg did not move to another node */
-                 if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
++            if (a[i] < moved)
+ +            {
-                  * and the global topology index
++                if (i >= ncg_home_old || a[i] != sort->sort[i].nsc)
+ +                {
+ +                    /* This cg is new on this node or moved ns grid cell */
+ +                    if (nsort_new >= sort->sort_new_nalloc)
+ +                    {
+ +                        sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
+ +                        srenew(sort->sort_new,sort->sort_new_nalloc);
+ +                    }
+ +                    sort_i = &(sort->sort_new[nsort_new++]);
+ +                }
+ +                else
+ +                {
+ +                    /* This cg did not move */
+ +                    sort_i = &(sort->sort2[nsort2++]);
+ +                }
+ +                /* Sort on the ns grid cell indices
-                 sort_i->nsc    = cell_index;
++                 * and the global topology index.
++                 * index_gl is irrelevant with cell ns,
++                 * but we set it here anyhow to avoid a conditional.
+ +                 */
-         ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
++                sort_i->nsc    = a[i];
+ +                sort_i->ind_gl = dd->index_gl[i];
+ +                sort_i->ind    = i;
+ +                ncg_new++;
+ +            }
+ +        }
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
+ +                    nsort2,nsort_new);
+ +        }
+ +        /* Sort efficiently */
-         cgsort = sort->sort1;
++        ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,
++                     sort->sort);
+ +    }
+ +    else
+ +    {
-             cgsort[i].nsc    = fr->ns.grid->cell_index[i];
++        cgsort = sort->sort;
+ +        ncg_new = 0;
+ +        for(i=0; i<dd->ncg_home; i++)
+ +        {
+ +            /* Sort on the ns grid cell indices
+ +             * and the global topology index
+ +             */
-             if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
++            cgsort[i].nsc    = a[i];
+ +            cgsort[i].ind_gl = dd->index_gl[i];
+ +            cgsort[i].ind    = i;
-     cgsort = sort->sort1;
++            if (cgsort[i].nsc < moved)
+ +            {
+ +                ncg_new++;
+ +            }
+ +        }
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
+ +        }
+ +        /* Determine the order of the charge groups using qsort */
+ +        qsort_threadsafe(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
+ +    }
-                 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
++
++    return ncg_new;
++}
++
++static int dd_sort_order_nbnxn(gmx_domdec_t *dd,t_forcerec *fr)
++{
++    gmx_cgsort_t *sort;
++    int  ncg_new,i,*a,na;
++
++    sort = dd->comm->sort->sort;
++
++    nbnxn_get_atomorder(fr->nbv->nbs,&a,&na);
++
++    ncg_new = 0;
++    for(i=0; i<na; i++)
++    {
++        if (a[i] >= 0)
++        {
++            sort[ncg_new].ind = a[i];
++            ncg_new++;
++        }
++    }
++
++    return ncg_new;
++}
++
++static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
++                          rvec *cgcm,t_forcerec *fr,t_state *state,
++                          int ncg_home_old)
++{
++    gmx_domdec_sort_t *sort;
++    gmx_cgsort_t *cgsort,*sort_i;
++    int  *cgindex;
++    int  ncg_new,i,*ibuf,cgsize;
++    rvec *vbuf;
+ +    
++    sort = dd->comm->sort;
++    
++    if (dd->ncg_home > sort->sort_nalloc)
++    {
++        sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
++        srenew(sort->sort,sort->sort_nalloc);
++        srenew(sort->sort2,sort->sort_nalloc);
++    }
++    cgsort = sort->sort;
++
++    switch (fr->cutoff_scheme)
++    {
++    case ecutsGROUP:
++        ncg_new = dd_sort_order(dd,fr,ncg_home_old);
++        break;
++    case ecutsVERLET:
++        ncg_new = dd_sort_order_nbnxn(dd,fr);
++        break;
++    default:
++        gmx_incons("unimplemented");
++        ncg_new = 0;
++    }
++
+ +    /* We alloc with the old size, since cgindex is still old */
+ +    vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
+ +    vbuf = dd->comm->vbuf.v;
+ +    
++    if (dd->comm->bCGs)
++    {
++        cgindex = dd->cgindex;
++    }
++    else
++    {
++        cgindex = NULL;
++    }
++
+ +    /* Remove the charge groups which are no longer at home here */
+ +    dd->ncg_home = ncg_new;
++    if (debug)
++    {
++        fprintf(debug,"Set the new home charge group count to %d\n",
++                dd->ncg_home);
++    }
+ +    
+ +    /* Reorder the state */
+ +    for(i=0; i<estNR; i++)
+ +    {
+ +        if (EST_DISTR(i) && (state->flags & (1<<i)))
+ +        {
+ +            switch (i)
+ +            {
+ +            case estX:
-                 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
++                order_vec_atom(dd->ncg_home,cgindex,cgsort,state->x,vbuf);
+ +                break;
+ +            case estV:
-                 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
++                order_vec_atom(dd->ncg_home,cgindex,cgsort,state->v,vbuf);
+ +                break;
+ +            case estSDX:
-                 order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
++                order_vec_atom(dd->ncg_home,cgindex,cgsort,state->sd_X,vbuf);
+ +                break;
+ +            case estCGP:
-     /* Reorder cgcm */
-     order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
++                order_vec_atom(dd->ncg_home,cgindex,cgsort,state->cg_p,vbuf);
+ +                break;
+ +            case estLD_RNG:
+ +            case estLD_RNGI:
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                /* No ordering required */
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in dd_sort_state");
+ +                break;
+ +            }
+ +        }
+ +    }
-     ibuf[0] = 0;
-     for(i=0; i<dd->ncg_home; i++)
++    if (fr->cutoff_scheme == ecutsGROUP)
++    {
++        /* Reorder cgcm */
++        order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
++    }
+ +    
+ +    if (dd->ncg_home+1 > sort->ibuf_nalloc)
+ +    {
+ +        sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
+ +        srenew(sort->ibuf,sort->ibuf_nalloc);
+ +    }
+ +    ibuf = sort->ibuf;
+ +    /* Reorder the global cg index */
+ +    order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
+ +    /* Reorder the cginfo */
+ +    order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
+ +    /* Rebuild the local cg index */
-         cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
-         ibuf[i+1] = ibuf[i] + cgsize;
++    if (dd->comm->bCGs)
+ +    {
-     for(i=0; i<dd->ncg_home+1; i++)
++        ibuf[0] = 0;
++        for(i=0; i<dd->ncg_home; i++)
++        {
++            cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
++            ibuf[i+1] = ibuf[i] + cgsize;
++        }
++        for(i=0; i<dd->ncg_home+1; i++)
++        {
++            dd->cgindex[i] = ibuf[i];
++        }
+ +    }
-         dd->cgindex[i] = ibuf[i];
++    else
+ +    {
-     
-     /* Copy the sorted ns cell indices back to the ns grid struct */
-     for(i=0; i<dd->ncg_home; i++)
++        for(i=0; i<dd->ncg_home+1; i++)
++        {
++            dd->cgindex[i] = i;
++        }
+ +    }
+ +    /* Set the home atom number */
+ +    dd->nat_home = dd->cgindex[dd->ncg_home];
-         fr->ns.grid->cell_index[i] = cgsort[i].nsc;
++
++    if (fr->cutoff_scheme == ecutsVERLET)
++    {
++        /* The atoms are now exactly in grid order, update the grid order */
++        nbnxn_set_atomorder(fr->nbv->nbs);
++    }
++    else
+ +    {
-     fr->ns.grid->nr = dd->ncg_home;
++        /* Copy the sorted ns cell indices back to the ns grid struct */
++        for(i=0; i<dd->ncg_home; i++)
++        {
++            fr->ns.grid->cell_index[i] = cgsort[i].nsc;
++        }
++        fr->ns.grid->nr = dd->ncg_home;
+ +    }
-     int  i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
+ +}
+ +
+ +static void add_dd_statistics(gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int ddnat;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
+ +    {
+ +        comm->sum_nat[ddnat-ddnatZONE] +=
+ +            comm->nat[ddnat] - comm->nat[ddnat-1];
+ +    }
+ +    comm->ndecomp++;
+ +}
+ +
+ +void reset_dd_statistics_counters(gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int ddnat;
+ +    
+ +    comm = dd->comm;
+ +
+ +    /* Reset all the statistics and counters for total run counting */
+ +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
+ +    {
+ +        comm->sum_nat[ddnat-ddnatZONE] = 0;
+ +    }
+ +    comm->ndecomp = 0;
+ +    comm->nload = 0;
+ +    comm->load_step = 0;
+ +    comm->load_sum = 0;
+ +    comm->load_max = 0;
+ +    clear_ivec(comm->load_lim);
+ +    comm->load_mdf = 0;
+ +    comm->load_pme = 0;
+ +}
+ +
+ +void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int ddnat;
+ +    double av;
+ +   
+ +    comm = cr->dd->comm;
+ +    
+ +    gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
+ +    
+ +    if (fplog == NULL)
+ +    {
+ +        return;
+ +    }
+ +    
+ +    fprintf(fplog,"\n    D O M A I N   D E C O M P O S I T I O N   S T A T I S T I C S\n\n");
+ +            
+ +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
+ +    {
+ +        av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
+ +        switch(ddnat)
+ +        {
+ +        case ddnatZONE:
+ +            fprintf(fplog,
+ +                    " av. #atoms communicated per step for force:  %d x %.1f\n",
+ +                    2,av);
+ +            break;
+ +        case ddnatVSITE:
+ +            if (cr->dd->vsite_comm)
+ +            {
+ +                fprintf(fplog,
+ +                        " av. #atoms communicated per step for vsites: %d x %.1f\n",
+ +                        (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
+ +                        av);
+ +            }
+ +            break;
+ +        case ddnatCON:
+ +            if (cr->dd->constraint_comm)
+ +            {
+ +                fprintf(fplog,
+ +                        " av. #atoms communicated per step for LINCS:  %d x %.1f\n",
+ +                        1 + ir->nLincsIter,av);
+ +            }
+ +            break;
+ +        default:
+ +            gmx_incons(" Unknown type for DD statistics");
+ +        }
+ +    }
+ +    fprintf(fplog,"\n");
+ +    
+ +    if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
+ +    {
+ +        print_dd_load_av(fplog,cr->dd);
+ +    }
+ +}
+ +
+ +void dd_partition_system(FILE            *fplog,
+ +                         gmx_large_int_t      step,
+ +                         t_commrec       *cr,
+ +                         gmx_bool            bMasterState,
+ +                         int             nstglobalcomm,
+ +                         t_state         *state_global,
+ +                         gmx_mtop_t      *top_global,
+ +                         t_inputrec      *ir,
+ +                         t_state         *state_local,
+ +                         rvec            **f,
+ +                         t_mdatoms       *mdatoms,
+ +                         gmx_localtop_t  *top_local,
+ +                         t_forcerec      *fr,
+ +                         gmx_vsite_t     *vsite,
+ +                         gmx_shellfc_t   shellfc,
+ +                         gmx_constr_t    constr,
+ +                         t_nrnb          *nrnb,
+ +                         gmx_wallcycle_t wcycle,
+ +                         gmx_bool            bVerbose)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_ddbox_t ddbox={0};
+ +    t_block *cgs_gl;
+ +    gmx_large_int_t step_pcoupl;
+ +    rvec cell_ns_x0,cell_ns_x1;
-     ivec ncells_old,np;
++    int  i,j,n,cg0=0,ncg_home_old=-1,ncg_moved,nat_f_novirsum;
+ +    gmx_bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
+ +    gmx_bool bRedist,bSortCG,bResortAll;
-         if (step_pcoupl >= comm->globalcomm_step)
++    ivec ncells_old={0,0,0},ncells_new={0,0,0},np;
+ +    real grid_density;
+ +    char sbuf[22];
+ +      
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +
+ +    bBoxChanged = (bMasterState || DEFORM(*ir));
+ +    if (ir->epc != epcNO)
+ +    {
+ +        /* With nstpcouple > 1 pressure coupling happens.
+ +         * one step after calculating the pressure.
+ +         * Box scaling happens at the end of the MD step,
+ +         * after the DD partitioning.
+ +         * We therefore have to do DLB in the first partitioning
+ +         * after an MD step where P-coupling occured.
+ +         * We need to determine the last step in which p-coupling occurred.
+ +         * MRS -- need to validate this for vv?
+ +         */
+ +        n = ir->nstpcouple;
+ +        if (n == 1)
+ +        {
+ +            step_pcoupl = step - 1;
+ +        }
+ +        else
+ +        {
+ +            step_pcoupl = ((step - 1)/n)*n + 1;
+ +        }
-     bNStGlobalComm = (step >= comm->globalcomm_step + nstglobalcomm);
++        if (step_pcoupl >= comm->partition_step)
+ +        {
+ +            bBoxChanged = TRUE;
+ +        }
+ +    }
+ +
-         if (dd->ncg_home > fr->cg_nalloc)
++    bNStGlobalComm = (step % nstglobalcomm == 0);
+ +
+ +    if (!comm->bDynLoadBal)
+ +    {
+ +        bDoDLB = FALSE;
+ +    }
+ +    else
+ +    {
+ +        /* Should we do dynamic load balacing this step?
+ +         * Since it requires (possibly expensive) global communication,
+ +         * we might want to do DLB less frequently.
+ +         */
+ +        if (bBoxChanged || ir->epc != epcNO)
+ +        {
+ +            bDoDLB = bBoxChanged;
+ +        }
+ +        else
+ +        {
+ +            bDoDLB = bNStGlobalComm;
+ +        }
+ +    }
+ +
+ +    /* Check if we have recorded loads on the nodes */
+ +    if (comm->bRecordLoad && dd_load_count(comm))
+ +    {
+ +        if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
+ +        {
+ +            /* Check if we should use DLB at the second partitioning
+ +             * and every 100 partitionings,
+ +             * so the extra communication cost is negligible.
+ +             */
+ +            n = max(100,nstglobalcomm);
+ +            bCheckDLB = (comm->n_load_collect == 0 ||
+ +                         comm->n_load_have % n == n-1);
+ +        }
+ +        else
+ +        {
+ +            bCheckDLB = FALSE;
+ +        }
+ +        
+ +        /* Print load every nstlog, first and last step to the log file */
+ +        bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
+ +                    comm->n_load_collect == 0 ||
+ +                    (ir->nsteps >= 0 &&
+ +                     (step + ir->nstlist > ir->init_step + ir->nsteps)));
+ +
+ +        /* Avoid extra communication due to verbose screen output
+ +         * when nstglobalcomm is set.
+ +         */
+ +        if (bDoDLB || bLogLoad || bCheckDLB ||
+ +            (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
+ +        {
+ +            get_load_distribution(dd,wcycle);
+ +            if (DDMASTER(dd))
+ +            {
+ +                if (bLogLoad)
+ +                {
+ +                    dd_print_load(fplog,dd,step-1);
+ +                }
+ +                if (bVerbose)
+ +                {
+ +                    dd_print_load_verbose(dd);
+ +                }
+ +            }
+ +            comm->n_load_collect++;
+ +
+ +            if (bCheckDLB) {
+ +                /* Since the timings are node dependent, the master decides */
+ +                if (DDMASTER(dd))
+ +                {
+ +                    bTurnOnDLB =
+ +                        (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
+ +                    if (debug)
+ +                    {
+ +                        fprintf(debug,"step %s, imb loss %f\n",
+ +                                gmx_step_str(step,sbuf),
+ +                                dd_force_imb_perf_loss(dd));
+ +                    }
+ +                }
+ +                dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
+ +                if (bTurnOnDLB)
+ +                {
+ +                    turn_on_dlb(fplog,cr,step);
+ +                    bDoDLB = TRUE;
+ +                }
+ +            }
+ +        }
+ +        comm->n_load_have++;
+ +    }
+ +
+ +    cgs_gl = &comm->cgs_gl;
+ +
+ +    bRedist = FALSE;
+ +    if (bMasterState)
+ +    {
+ +        /* Clear the old state */
+ +        clear_dd_indices(dd,0,0);
+ +
+ +        set_ddbox(dd,bMasterState,cr,ir,state_global->box,
+ +                  TRUE,cgs_gl,state_global->x,&ddbox);
+ +    
+ +        get_cg_distribution(fplog,step,dd,cgs_gl,
+ +                            state_global->box,&ddbox,state_global->x);
+ +        
+ +        dd_distribute_state(dd,cgs_gl,
+ +                            state_global,state_local,f);
+ +        
+ +        dd_make_local_cgs(dd,&top_local->cgs);
+ +        
-             dd_realloc_fr_cg(fr,dd->ncg_home);
++        /* Ensure that we have space for the new distribution */
++        dd_check_alloc_ncg(fr,state_local,f,dd->ncg_home);
++
++        if (fr->cutoff_scheme == ecutsGROUP)
+ +        {
-         calc_cgcm(fplog,0,dd->ncg_home,
-                   &top_local->cgs,state_local->x,fr->cg_cm);
++            calc_cgcm(fplog,0,dd->ncg_home,
++                      &top_local->cgs,state_local->x,fr->cg_cm);
+ +        }
-         
-         /* Redetermine the cg COMs */
-         calc_cgcm(fplog,0,dd->ncg_home,
-                   &top_local->cgs,state_local->x,fr->cg_cm);
+ +        
+ +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
+ +        
+ +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
+ +
+ +        cg0 = 0;
+ +    }
+ +    else if (state_local->ddp_count != dd->ddp_count)
+ +    {
+ +        if (state_local->ddp_count > dd->ddp_count)
+ +        {
+ +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
+ +        }
+ +        
+ +        if (state_local->ddp_count_cg_gl != state_local->ddp_count)
+ +        {
+ +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
+ +        }
+ +        
+ +        /* Clear the old state */
+ +        clear_dd_indices(dd,0,0);
+ +        
+ +        /* Build the new indices */
+ +        rebuild_cgindex(dd,cgs_gl->index,state_local);
+ +        make_dd_indices(dd,cgs_gl->index,0);
-         cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
-                                  state_local,f,fr,mdatoms,
-                                  !bSortCG,nrnb);
++
++        if (fr->cutoff_scheme == ecutsGROUP)
++        {
++            /* Redetermine the cg COMs */
++            calc_cgcm(fplog,0,dd->ncg_home,
++                      &top_local->cgs,state_local->x,fr->cg_cm);
++        }
+ +        
+ +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
+ +
+ +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
+ +
+ +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
+ +                  TRUE,&top_local->cgs,state_local->x,&ddbox);
+ +
+ +        bRedist = comm->bDynLoadBal;
+ +    }
+ +    else
+ +    {
+ +        /* We have the full state, only redistribute the cgs */
+ +
+ +        /* Clear the non-home indices */
+ +        clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
+ +
+ +        /* Avoid global communication for dim's without pbc and -gcom */
+ +        if (!bNStGlobalComm)
+ +        {
+ +            copy_rvec(comm->box0    ,ddbox.box0    );
+ +            copy_rvec(comm->box_size,ddbox.box_size);
+ +        }
+ +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
+ +                  bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
+ +
+ +        bBoxChanged = TRUE;
+ +        bRedist = TRUE;
+ +    }
+ +    /* For dim's without pbc and -gcom */
+ +    copy_rvec(ddbox.box0    ,comm->box0    );
+ +    copy_rvec(ddbox.box_size,comm->box_size);
+ +    
+ +    set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
+ +                      step,wcycle);
+ +    
+ +    if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
+ +    {
+ +        write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
+ +    }
+ +    
+ +    /* Check if we should sort the charge groups */
+ +    if (comm->nstSortCG > 0)
+ +    {
+ +        bSortCG = (bMasterState ||
+ +                   (bRedist && (step % comm->nstSortCG == 0)));
+ +    }
+ +    else
+ +    {
+ +        bSortCG = FALSE;
+ +    }
+ +
+ +    ncg_home_old = dd->ncg_home;
+ +
++    ncg_moved = 0;
+ +    if (bRedist)
+ +    {
-     get_nsgrid_boundaries(fr->ns.grid,dd,
-                           state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
++        wallcycle_sub_start(wcycle,ewcsDD_REDIST);
++
++        dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
++                           state_local,f,fr,mdatoms,
++                           !bSortCG,nrnb,&cg0,&ncg_moved);
++
++        wallcycle_sub_stop(wcycle,ewcsDD_REDIST);
+ +    }
+ +    
-     copy_ivec(fr->ns.grid->n,ncells_old);
-     grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
-                state_local->box,cell_ns_x0,cell_ns_x1,
-                fr->rlistlong,grid_density);
++    get_nsgrid_boundaries(ddbox.nboundeddim,state_local->box,
++                          dd,&ddbox,
++                          &comm->cell_x0,&comm->cell_x1,
+ +                          dd->ncg_home,fr->cg_cm,
+ +                          cell_ns_x0,cell_ns_x1,&grid_density);
+ +
+ +    if (bBoxChanged)
+ +    {
+ +        comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
+ +    }
+ +
-         fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
-                   0,dd->ncg_home,fr->cg_cm);
-         
++    switch (fr->cutoff_scheme)
++    {
++    case ecutsGROUP:
++        copy_ivec(fr->ns.grid->n,ncells_old);
++        grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
++                   state_local->box,cell_ns_x0,cell_ns_x1,
++                   fr->rlistlong,grid_density);
++        break;
++    case ecutsVERLET:
++        nbnxn_get_ncells(fr->nbv->nbs,&ncells_old[XX],&ncells_old[YY]);
++        break;
++    default:
++        gmx_incons("unimplemented");
++    }
+ +    /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
+ +    copy_ivec(ddbox.tric_dir,comm->tric_dir);
+ +
+ +    if (bSortCG)
+ +    {
++        wallcycle_sub_start(wcycle,ewcsDD_GRID);
++
+ +        /* Sort the state on charge group position.
+ +         * This enables exact restarts from this step.
+ +         * It also improves performance by about 15% with larger numbers
+ +         * of atoms per node.
+ +         */
+ +        
+ +        /* Fill the ns grid with the home cell,
+ +         * so we can sort with the indices.
+ +         */
+ +        set_zones_ncg_home(dd);
-         bResortAll = (bMasterState ||
-                       fr->ns.grid->n[XX] != ncells_old[XX] ||
-                       fr->ns.grid->n[YY] != ncells_old[YY] ||
-                       fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
++
++        switch (fr->cutoff_scheme)
++        {
++        case ecutsVERLET:
++            set_zones_size(dd,state_local->box,&ddbox,0,1);
++
++            nbnxn_put_on_grid(fr->nbv->nbs,fr->ePBC,state_local->box,
++                              0,
++                              comm->zones.size[0].bb_x0,
++                              comm->zones.size[0].bb_x1,
++                              0,dd->ncg_home,
++                              comm->zones.dens_zone0,
++                              fr->cginfo,
++                              state_local->x,
++                              ncg_moved,comm->moved,
++                              fr->nbv->grp[eintLocal].kernel_type,
++                              fr->nbv->grp[eintLocal].nbat);
++
++            nbnxn_get_ncells(fr->nbv->nbs,&ncells_new[XX],&ncells_new[YY]);
++            break;
++        case ecutsGROUP:
++            fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
++                      0,dd->ncg_home,fr->cg_cm);
++            
++            copy_ivec(fr->ns.grid->n,ncells_new);
++            break;
++        default:
++            gmx_incons("unimplemented");
++        }
++
++        bResortAll = bMasterState;
++   
+ +        /* Check if we can user the old order and ns grid cell indices
+ +         * of the charge groups to sort the charge groups efficiently.
+ +         */
-     setup_dd_communication(dd,state_local->box,&ddbox,fr);
++        if (ncells_new[XX] != ncells_old[XX] ||
++            ncells_new[YY] != ncells_old[YY] ||
++            ncells_new[ZZ] != ncells_old[ZZ])
++        {
++            bResortAll = TRUE;
++        }
+ +
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Step %s, sorting the %d home charge groups\n",
+ +                    gmx_step_str(step,sbuf),dd->ncg_home);
+ +        }
+ +        dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
+ +                      bResortAll ? -1 : ncg_home_old);
+ +        /* Rebuild all the indices */
+ +        cg0 = 0;
+ +        ga2la_clear(dd->ga2la);
++
++        wallcycle_sub_stop(wcycle,ewcsDD_GRID);
+ +    }
++
++    wallcycle_sub_start(wcycle,ewcsDD_SETUPCOMM);
+ +    
+ +    /* Setup up the communication and communicate the coordinates */
-     
++    setup_dd_communication(dd,state_local->box,&ddbox,fr,state_local,f);
+ +    
+ +    /* Set the indices */
+ +    make_dd_indices(dd,cgs_gl->index,cg0);
+ +
+ +    /* Set the charge group boundaries for neighbor searching */
+ +    set_cg_boundaries(&comm->zones);
-                       fr,vsite,top_global,top_local);
++
++    if (fr->cutoff_scheme == ecutsVERLET)
++    {
++        set_zones_size(dd,state_local->box,&ddbox,
++                       bSortCG ? 1 : 0,comm->zones.n);
++    }
++
++    wallcycle_sub_stop(wcycle,ewcsDD_SETUPCOMM);
++
+ +    /*
+ +    write_dd_pdb("dd_home",step,"dump",top_global,cr,
+ +                 -1,state_local->x,state_local->box);
+ +    */
++
++    wallcycle_sub_start(wcycle,ewcsDD_MAKETOP);
+ +    
+ +    /* Extract a local topology from the global topology */
+ +    for(i=0; i<dd->ndim; i++)
+ +    {
+ +        np[dd->dim[i]] = comm->cd[i].np;
+ +    }
+ +    dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
+ +                      comm->cellsize_min,np,
-             if (dd->bInterCGcons)
++                      fr,
++                      fr->cutoff_scheme==ecutsGROUP ? fr->cg_cm : state_local->x,
++                      vsite,top_global,top_local);
++
++    wallcycle_sub_stop(wcycle,ewcsDD_MAKETOP);
++
++    wallcycle_sub_start(wcycle,ewcsDD_MAKECONSTR);
+ +    
+ +    /* Set up the special atom communication */
+ +    n = comm->nat[ddnatZONE];
+ +    for(i=ddnatZONE+1; i<ddnatNR; i++)
+ +    {
+ +        switch(i)
+ +        {
+ +        case ddnatVSITE:
+ +            if (vsite && vsite->n_intercg_vsite)
+ +            {
+ +                n = dd_make_local_vsites(dd,n,top_local->idef.il);
+ +            }
+ +            break;
+ +        case ddnatCON:
-                 n = dd_make_local_constraints(dd,n,top_global,
++            if (dd->bInterCGcons || dd->bInterCGsettles)
+ +            {
+ +                /* Only for inter-cg constraints we need special code */
-                                               &top_local->idef.il[F_CONSTR]);
++                n = dd_make_local_constraints(dd,n,top_global,fr->cginfo,
+ +                                              constr,ir->nProjOrder,
-     
++                                              top_local->idef.il);
+ +            }
+ +            break;
+ +        default:
+ +            gmx_incons("Unknown special atom type setup");
+ +        }
+ +        comm->nat[i] = n;
+ +    }
-       
++
++    wallcycle_sub_stop(wcycle,ewcsDD_MAKECONSTR);
++
++    wallcycle_sub_start(wcycle,ewcsDD_TOPOTHER);
++
+ +    /* Make space for the extra coordinates for virtual site
+ +     * or constraint communication.
+ +     */
+ +    state_local->natoms = comm->nat[ddnatNR-1];
+ +    if (state_local->natoms > state_local->nalloc)
+ +    {
+ +        dd_realloc_state(state_local,f,state_local->natoms);
+ +    }
+ +
+ +    if (fr->bF_NoVirSum)
+ +    {
+ +        if (vsite && vsite->n_intercg_vsite)
+ +        {
+ +            nat_f_novirsum = comm->nat[ddnatVSITE];
+ +        }
+ +        else
+ +        {
+ +            if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
+ +            {
+ +                nat_f_novirsum = dd->nat_tot;
+ +            }
+ +            else
+ +            {
+ +                nat_f_novirsum = dd->nat_home;
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        nat_f_novirsum = 0;
+ +    }
+ +
+ +    /* Set the number of atoms required for the force calculation.
+ +     * Forces need to be constrained when using a twin-range setup
+ +     * or with energy minimization. For simple simulations we could
+ +     * avoid some allocation, zeroing and copying, but this is
+ +     * probably not worth the complications ande checking.
+ +     */
+ +    forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
+ +                        dd->nat_tot,comm->nat[ddnatCON],nat_f_novirsum);
+ +
+ +    /* We make the all mdatoms up to nat_tot_con.
+ +     * We could save some work by only setting invmass
+ +     * between nat_tot and nat_tot_con.
+ +     */
+ +    /* This call also sets the new number of home particles to dd->nat_home */
+ +    atoms2md(top_global,ir,
+ +             comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
+ +
+ +    /* Now we have the charges we can sort the FE interactions */
+ +    dd_sort_local_top(dd,mdatoms,top_local);
+ +
+ +    if (shellfc)
+ +    {
+ +        /* Make the local shell stuff, currently no communication is done */
+ +        make_local_shells(cr,mdatoms,shellfc);
+ +    }
+ +    
+ +      if (ir->implicit_solvent)
+ +    {
+ +        make_local_gb(cr,fr->born,ir->gb_algorithm);
+ +    }
-     if (bNStGlobalComm)
-     {
-         /* Store the global communication step */
-         comm->globalcomm_step = step;
-     }
++
++    init_bonded_thread_force_reduction(fr,&top_local->idef);
++
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Send the charges to our PME only node */
+ +        gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
+ +                       mdatoms->chargeA,mdatoms->chargeB,
+ +                       dd_pme_maxshift_x(dd),dd_pme_maxshift_y(dd));
+ +    }
+ +    
+ +    if (constr)
+ +    {
+ +        set_constraints(constr,top_local,ir,mdatoms,cr);
+ +    }
+ +    
+ +    if (ir->ePull != epullNO)
+ +    {
+ +        /* Update the local pull groups */
+ +        dd_make_local_pull_groups(dd,ir->pull,mdatoms);
+ +    }
+ +    
+ +    if (ir->bRot)
+ +    {
+ +        /* Update the local rotation groups */
+ +        dd_make_local_rotation_groups(dd,ir->rot);
+ +    }
+ +
+ +
+ +    add_dd_statistics(dd);
+ +    
+ +    /* Make sure we only count the cycles for this DD partitioning */
+ +    clear_dd_cycle_counts(dd);
+ +    
+ +    /* Because the order of the atoms might have changed since
+ +     * the last vsite construction, we need to communicate the constructing
+ +     * atom coordinates again (for spreading the forces this MD step).
+ +     */
+ +    dd_move_x_vsites(dd,state_local->box,state_local->x);
++
++    wallcycle_sub_stop(wcycle,ewcsDD_TOPOTHER);
+ +    
+ +    if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
+ +    {
+ +        dd_move_x(dd,state_local->box,state_local->x);
+ +        write_dd_pdb("dd_dump",step,"dump",top_global,cr,
+ +                     -1,state_local->x,state_local->box);
+ +    }
+ +
++    /* Store the partitioning step */
++    comm->partition_step = step;
+ +    
+ +    /* Increase the DD partitioning counter */
+ +    dd->ddp_count++;
+ +    /* The state currently matches this DD partitioning count, store it */
+ +    state_local->ddp_count = dd->ddp_count;
+ +    if (bMasterState)
+ +    {
+ +        /* The DD master node knows the complete cg distribution,
+ +         * store the count so we can possibly skip the cg info communication.
+ +         */
+ +        comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
+ +    }
+ +
+ +    if (comm->DD_debug > 0)
+ +    {
+ +        /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
+ +        check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
+ +                                "after partitioning");
+ +    }
+ +}
diff --cc src/gromacs/mdlib/domdec_con.c

index 84e662c3515cca3d83a8b0724ba17f625e81cd52,0000000000000000000000000000000000000000..e5e802e8e28f2ebab60518082991e88ec25488fa

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/domdec_con.c
--- /dev/null
+++ b/src/gromacs/mdlib/domdec_con.c
@@@ -1,1090 -1,0 +1,1389 @@@
-     /* The atom indices we need from the surrounding cells */
-     int  nind_req;
-     int  *ind_req;
-     int  ind_req_nalloc;
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + * This file is part of Gromacs        Copyright (c) 1991-2008
+ + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gnomes, ROck Monsters And Chili Sauce
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +#include <assert.h>
+ +
+ +#include "smalloc.h"
+ +#include "vec.h"
+ +#include "constr.h"
+ +#include "domdec.h"
+ +#include "domdec_network.h"
+ +#include "mtop_util.h"
+ +#include "gmx_ga2la.h"
++#include "gmx_hash.h"
++#include "gmx_omp_nthreads.h"
++#include "macros.h"
+ +
+ +typedef struct {
+ +    int nsend;
+ +    int *a;
+ +    int a_nalloc;
+ +    int nrecv;
+ +} gmx_specatsend_t;
+ +
++typedef struct {
++    int *ind;
++    int nalloc;
++    int n;
++} ind_req_t;
++
+ +typedef struct gmx_domdec_specat_comm {
-     int  *ga2la;
+ +    /* The number of indices to receive during the setup */
+ +    int  nreq[DIM][2][2];
+ +    /* The atoms to send */
+ +    gmx_specatsend_t spas[DIM][2];
+ +    gmx_bool *bSendAtom;
+ +    int   bSendAtom_nalloc;
+ +    /* Send buffers */
+ +    int  *ibuf;
+ +    int  ibuf_nalloc;
+ +    rvec *vbuf;
+ +    int  vbuf_nalloc;
+ +    rvec *vbuf2;
+ +    int  vbuf2_nalloc;
+ +    /* The range in the local buffer(s) for received atoms */
+ +    int  at_start;
+ +    int  at_end;
++
++    /* The atom indices we need from the surrounding cells.
++     * We can gather the indices over nthread threads.
++     */
++    int nthread;
++    ind_req_t *ireq;
+ +} gmx_domdec_specat_comm_t;
+ +
+ +typedef struct gmx_domdec_constraints {
+ +    int  *molb_con_offset;
+ +    int  *molb_ncon_mol;
+ +    /* The fully local and connected constraints */
+ +    int  ncon;
+ +    /* The global constraint number, only required for clearing gc_req */
+ +    int  *con_gl;
+ +    int  *con_nlocat;
+ +    int  con_nalloc;
+ +    /* Boolean that tells if a global constraint index has been requested */
+ +    char *gc_req;
+ +    /* Global to local communicated constraint atom only index */
-         for(i=dd->constraint_comm->at_start; i<dd->constraint_comm->at_end; i++)
-         {
-             dc->ga2la[dd->gatindex[i]] = -1;
-         }
++    gmx_hash_t ga2la;
++
++    /* Multi-threading stuff */
++    int nthread;
++    t_ilist *ils;
+ +} gmx_domdec_constraints_t;
+ +
+ +
+ +static void dd_move_f_specat(gmx_domdec_t *dd,gmx_domdec_specat_comm_t *spac,
+ +                             rvec *f,rvec *fshift)
+ +{
+ +    gmx_specatsend_t *spas;
+ +    rvec *vbuf;
+ +    int  n,n0,n1,d,dim,dir,i;
+ +    ivec vis;
+ +    int  is;
+ +    gmx_bool bPBC,bScrew;
+ +    
+ +    n = spac->at_end;
+ +    for(d=dd->ndim-1; d>=0; d--)
+ +    {
+ +        dim = dd->dim[d];
+ +        if (dd->nc[dim] > 2)
+ +        {
+ +            /* Pulse the grid forward and backward */
+ +            spas = spac->spas[d];
+ +            n0 = spas[0].nrecv;
+ +            n1 = spas[1].nrecv;
+ +            n -= n1 + n0;
+ +            vbuf = spac->vbuf;
+ +            /* Send and receive the coordinates */
+ +            dd_sendrecv2_rvec(dd,d,
+ +                              f+n+n1,n0,vbuf              ,spas[0].nsend,
+ +                              f+n   ,n1,vbuf+spas[0].nsend,spas[1].nsend);
+ +            for(dir=0; dir<2; dir++)
+ +            {
+ +                bPBC   = ((dir == 0 && dd->ci[dim] == 0) || 
+ +                          (dir == 1 && dd->ci[dim] == dd->nc[dim]-1));
+ +                bScrew = (bPBC && dd->bScrewPBC && dim == XX);
+ +                
+ +                spas = &spac->spas[d][dir];
+ +                /* Sum the buffer into the required forces */
+ +                if (!bPBC || (!bScrew && fshift == NULL))
+ +                {
+ +                    for(i=0; i<spas->nsend; i++)
+ +                    {
+ +                        rvec_inc(f[spas->a[i]],*vbuf);
+ +                        vbuf++;
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    clear_ivec(vis);
+ +                    vis[dim] = (dir==0 ? 1 : -1);
+ +                    is = IVEC2IS(vis);
+ +                    if (!bScrew)
+ +                    {
+ +                        /* Sum and add to shift forces */
+ +                        for(i=0; i<spas->nsend; i++)
+ +                        {
+ +                            rvec_inc(f[spas->a[i]],*vbuf);
+ +                            rvec_inc(fshift[is],*vbuf);
+ +                            vbuf++;
+ +                        }
+ +                    }
+ +                    else
+ +                    {     
+ +                        /* Rotate the forces */
+ +                        for(i=0; i<spas->nsend; i++)
+ +                        {
+ +                            f[spas->a[i]][XX] += (*vbuf)[XX];
+ +                            f[spas->a[i]][YY] -= (*vbuf)[YY];
+ +                            f[spas->a[i]][ZZ] -= (*vbuf)[ZZ];
+ +                            if (fshift)
+ +                            {
+ +                                rvec_inc(fshift[is],*vbuf);
+ +                            }
+ +                            vbuf++;
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* Two cells, so we only need to communicate one way */
+ +            spas = &spac->spas[d][0];
+ +            n -= spas->nrecv;
+ +            /* Send and receive the coordinates */
+ +            dd_sendrecv_rvec(dd,d,dddirForward,
+ +                             f+n,spas->nrecv,spac->vbuf,spas->nsend);
+ +            /* Sum the buffer into the required forces */
+ +            if (dd->bScrewPBC && dim == XX &&
+ +                (dd->ci[dim] == 0 ||
+ +                 dd->ci[dim] == dd->nc[dim]-1))
+ +            {
+ +                for(i=0; i<spas->nsend; i++)
+ +                {
+ +                    /* Rotate the force */
+ +                    f[spas->a[i]][XX] += spac->vbuf[i][XX];
+ +                    f[spas->a[i]][YY] -= spac->vbuf[i][YY];
+ +                    f[spas->a[i]][ZZ] -= spac->vbuf[i][ZZ];
+ +                }
+ +            }
+ +            else
+ +            {
+ +                for(i=0; i<spas->nsend; i++)
+ +                {
+ +                    rvec_inc(f[spas->a[i]],spac->vbuf[i]);
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +void dd_move_f_vsites(gmx_domdec_t *dd,rvec *f,rvec *fshift)
+ +{
+ +    if (dd->vsite_comm)
+ +    {
+ +        dd_move_f_specat(dd,dd->vsite_comm,f,fshift);
+ +    }
+ +}
+ +
+ +void dd_clear_f_vsites(gmx_domdec_t *dd,rvec *f)
+ +{
+ +    int i;
+ +    
+ +    if (dd->vsite_comm)
+ +    {
+ +        for(i=dd->vsite_comm->at_start; i<dd->vsite_comm->at_end; i++)
+ +        {
+ +            clear_rvec(f[i]);
+ +        }
+ +    }
+ +}
+ +
+ +static void dd_move_x_specat(gmx_domdec_t *dd,gmx_domdec_specat_comm_t *spac,
+ +                             matrix box,rvec *x0,rvec *x1)
+ +{
+ +    gmx_specatsend_t *spas;
+ +    rvec *x,*vbuf,*rbuf;
+ +    int  nvec,v,n,nn,ns0,ns1,nr0,nr1,nr,d,dim,dir,i;
+ +    gmx_bool bPBC,bScrew=FALSE;
+ +    rvec shift={0,0,0};
+ +    
+ +    nvec = 1;
+ +    if (x1)
+ +    {
+ +        nvec++;
+ +    }
+ +    
+ +    n = spac->at_start;
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        if (dd->nc[dim] > 2)
+ +        {
+ +            /* Pulse the grid forward and backward */
+ +            vbuf = spac->vbuf;
+ +            for(dir=0; dir<2; dir++)
+ +            {
+ +                if (dir == 0 && dd->ci[dim] == 0)
+ +                {
+ +                    bPBC   = TRUE;
+ +                    bScrew = (dd->bScrewPBC && dim == XX);
+ +                    copy_rvec(box[dim],shift);
+ +                }
+ +                else if (dir == 1 && dd->ci[dim] == dd->nc[dim]-1)
+ +                {
+ +                    bPBC = TRUE;
+ +                    bScrew = (dd->bScrewPBC && dim == XX);
+ +                    for(i=0; i<DIM; i++)
+ +                    {
+ +                        shift[i] = -box[dim][i];
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    bPBC = FALSE;
+ +                    bScrew = FALSE;
+ +                }
+ +                spas = &spac->spas[d][dir];
+ +                for(v=0; v<nvec; v++)
+ +                {
+ +                    x = (v == 0 ? x0 : x1);
+ +                    /* Copy the required coordinates to the send buffer */
+ +                    if (!bPBC)
+ +                    {
+ +                        /* Only copy */
+ +                        for(i=0; i<spas->nsend; i++)
+ +                        {
+ +                            copy_rvec(x[spas->a[i]],*vbuf);
+ +                            vbuf++;
+ +                        }
+ +                    }
+ +                    else if (!bScrew)
+ +                    {
+ +                        /* Shift coordinates */
+ +                        for(i=0; i<spas->nsend; i++)
+ +                        {
+ +                            rvec_add(x[spas->a[i]],shift,*vbuf);
+ +                            vbuf++;
+ +                        }
+ +                    }
+ +                    else
+ +                    {
+ +                        /* Shift and rotate coordinates */
+ +                        for(i=0; i<spas->nsend; i++)
+ +                        {
+ +                            (*vbuf)[XX] =               x[spas->a[i]][XX] + shift[XX];
+ +                            (*vbuf)[YY] = box[YY][YY] - x[spas->a[i]][YY] + shift[YY];
+ +                            (*vbuf)[ZZ] = box[ZZ][ZZ] - x[spas->a[i]][ZZ] + shift[ZZ];
+ +                            vbuf++;
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            /* Send and receive the coordinates */
+ +            spas = spac->spas[d];
+ +            ns0 = spas[0].nsend;
+ +            nr0 = spas[0].nrecv;
+ +            ns1 = spas[1].nsend;
+ +            nr1 = spas[1].nrecv;
+ +            if (nvec == 1)
+ +            {
+ +                dd_sendrecv2_rvec(dd,d,
+ +                                  spac->vbuf+ns0,ns1,x0+n    ,nr1,
+ +                                  spac->vbuf    ,ns0,x0+n+nr1,nr0);
+ +            }
+ +            else
+ +            {
+ +                /* Communicate both vectors in one buffer */
+ +                rbuf = spac->vbuf2;
+ +                dd_sendrecv2_rvec(dd,d,
+ +                                  spac->vbuf+2*ns0,2*ns1,rbuf      ,2*nr1,
+ +                                  spac->vbuf      ,2*ns0,rbuf+2*nr1,2*nr0);
+ +                /* Split the buffer into the two vectors */
+ +                nn = n;
+ +                for(dir=1; dir>=0; dir--)
+ +                {
+ +                    nr = spas[dir].nrecv;
+ +                    for(v=0; v<2; v++)
+ +                    {
+ +                        x = (v == 0 ? x0 : x1);
+ +                        for(i=0; i<nr; i++)
+ +                        {
+ +                            copy_rvec(*rbuf,x[nn+i]);
+ +                            rbuf++;
+ +                        }
+ +                    }
+ +                    nn += nr;
+ +                }
+ +            }
+ +            n += nr0 + nr1;
+ +        }
+ +        else
+ +        {
+ +            spas = &spac->spas[d][0];
+ +            /* Copy the required coordinates to the send buffer */
+ +            vbuf = spac->vbuf;
+ +            for(v=0; v<nvec; v++)
+ +            {
+ +                x = (v == 0 ? x0 : x1);
+ +                if (dd->bScrewPBC && dim == XX &&
+ +                    (dd->ci[XX] == 0 || dd->ci[XX] == dd->nc[XX]-1))
+ +                {
+ +                    /* Here we only perform the rotation, the rest of the pbc
+ +                     * is handled in the constraint or viste routines.
+ +                     */
+ +                    for(i=0; i<spas->nsend; i++)
+ +                    {
+ +                        (*vbuf)[XX] =               x[spas->a[i]][XX];
+ +                        (*vbuf)[YY] = box[YY][YY] - x[spas->a[i]][YY];
+ +                        (*vbuf)[ZZ] = box[ZZ][ZZ] - x[spas->a[i]][ZZ];
+ +                        vbuf++;
+ +                    }   
+ +                }
+ +                else
+ +                {
+ +                    for(i=0; i<spas->nsend; i++)
+ +                    {
+ +                        copy_rvec(x[spas->a[i]],*vbuf);
+ +                        vbuf++;
+ +                    }
+ +                }
+ +            }
+ +            /* Send and receive the coordinates */
+ +            if (nvec == 1)
+ +            {
+ +                dd_sendrecv_rvec(dd,d,dddirBackward,
+ +                                 spac->vbuf,spas->nsend,x0+n,spas->nrecv);
+ +            }
+ +            else
+ +            {
+ +                /* Communicate both vectors in one buffer */
+ +                rbuf = spac->vbuf2;
+ +                dd_sendrecv_rvec(dd,d,dddirBackward,
+ +                                 spac->vbuf,2*spas->nsend,rbuf,2*spas->nrecv);
+ +                /* Split the buffer into the two vectors */
+ +                nr = spas[0].nrecv;
+ +                for(v=0; v<2; v++)
+ +                {
+ +                    x = (v == 0 ? x0 : x1);
+ +                    for(i=0; i<nr; i++)
+ +                    {
+ +                        copy_rvec(*rbuf,x[n+i]);
+ +                        rbuf++;
+ +                    }
+ +                }
+ +            }
+ +            n += spas->nrecv;
+ +        }
+ +    }
+ +}
+ +
+ +void dd_move_x_constraints(gmx_domdec_t *dd,matrix box,rvec *x0,rvec *x1)
+ +{
+ +    if (dd->constraint_comm)
+ +    {
+ +        dd_move_x_specat(dd,dd->constraint_comm,box,x0,x1);
+ +    }
+ +}
+ +
+ +void dd_move_x_vsites(gmx_domdec_t *dd,matrix box,rvec *x)
+ +{
+ +    if (dd->vsite_comm)
+ +    {
+ +        dd_move_x_specat(dd,dd->vsite_comm,box,x,NULL);
+ +    }
+ +}
+ +
+ +int *dd_constraints_nlocalatoms(gmx_domdec_t *dd)
+ +{
+ +    if (dd->constraints)
+ +    {
+ +        return dd->constraints->con_nlocat;
+ +    }
+ +    else
+ +    {
+ +        return NULL;
+ +    }
+ +}
+ +
+ +void dd_clear_local_constraint_indices(gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_constraints_t *dc;
+ +    int i;
+ +    
+ +    dc = dd->constraints;
+ +    
+ +    for(i=0; i<dc->ncon; i++)
+ +    {
+ +        dc->gc_req[dc->con_gl[i]] = 0;
+ +    }
+ +  
+ +    if (dd->constraint_comm)
+ +    {
-         for(i=dd->vsite_comm->at_start; i<dd->vsite_comm->at_end; i++)
-         {
-             dd->ga2la_vsite[dd->gatindex[i]] = -1;
-         }
++        gmx_hash_clear_and_optimize(dc->ga2la);
+ +    }
+ +}
+ +
+ +void dd_clear_local_vsite_indices(gmx_domdec_t *dd)
+ +{
+ +    int i;
+ +    
+ +    if (dd->vsite_comm)
+ +    {
-                                       int *ga2la_specat,
++        gmx_hash_clear_and_optimize(dd->ga2la_vsite);
+ +    }
+ +}
+ +
+ +static int setup_specat_communication(gmx_domdec_t *dd,
++                                      ind_req_t *ireq,
+ +                                      gmx_domdec_specat_comm_t *spac,
-     int  d,dim,ndir,dir,nr,ns,i,nrecv_local,n0,start,ireq,ind,buf[2];
++                                      gmx_hash_t ga2la_specat,
+ +                                      int at_start,
+ +                                      int vbuf_fac,
+ +                                      const char *specat_type,
+ +                                      const char *add_err)
+ +{
+ +    int  nsend[2],nlast,nsend_zero[2]={0,0},*nsend_ptr;
-     nsend[0] = spac->nind_req;
++    int  d,dim,ndir,dir,nr,ns,i,nrecv_local,n0,start,indr,ind,buf[2];
+ +    int  nat_tot_specat,nat_tot_prev,nalloc_old;
+ +    gmx_bool bPBC,bFirst;
+ +    gmx_specatsend_t *spas;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Begin setup_specat_communication for %s\n",specat_type);
+ +    }
+ +    
+ +    /* nsend[0]: the number of atoms requested by this node only,
+ +     *           we communicate this for more efficients checks
+ +     * nsend[1]: the total number of requested atoms
+ +     */
-             if (nlast+nr > spac->ind_req_nalloc)
++    nsend[0] = ireq->n;
+ +    nsend[1] = nsend[0];
+ +    nlast    = nsend[1];
+ +    for(d=dd->ndim-1; d>=0; d--)
+ +    {
+ +        /* Pulse the grid forward and backward */
+ +        dim = dd->dim[d];
+ +        bPBC = (dim < dd->npbcdim);
+ +        if (dd->nc[dim] == 2)
+ +        {
+ +            /* Only 2 cells, so we only need to communicate once */
+ +            ndir = 1;
+ +        }
+ +        else
+ +        {
+ +            ndir = 2;
+ +        }
+ +        for(dir=0; dir<ndir; dir++)
+ +        {
+ +            if (!bPBC && 
+ +                dd->nc[dim] > 2 &&
+ +                ((dir == 0 && dd->ci[dim] == dd->nc[dim] - 1) ||
+ +                 (dir == 1 && dd->ci[dim] == 0)))
+ +            {
+ +                /* No pbc: the fist/last cell should not request atoms */
+ +                nsend_ptr = nsend_zero;
+ +            }
+ +            else
+ +            {
+ +                nsend_ptr = nsend;
+ +            }
+ +            /* Communicate the number of indices */
+ +            dd_sendrecv_int(dd,d,dir==0 ? dddirForward : dddirBackward,
+ +                            nsend_ptr,2,spac->nreq[d][dir],2);
+ +            nr = spac->nreq[d][dir][1];
-                 spac->ind_req_nalloc = over_alloc_dd(nlast+nr);
-                 srenew(spac->ind_req,spac->ind_req_nalloc);
++            if (nlast+nr > ireq->nalloc)
+ +            {
-                             spac->ind_req,nsend_ptr[1],spac->ind_req+nlast,nr);
++                ireq->nalloc = over_alloc_dd(nlast+nr);
++                srenew(ireq->ind,ireq->nalloc);
+ +            }
+ +            /* Communicate the indices */
+ +            dd_sendrecv_int(dd,d,dir==0 ? dddirForward : dddirBackward,
-                 ireq = spac->ind_req[start+i];
++                            ireq->ind,nsend_ptr[1],ireq->ind+nlast,nr);
+ +            nlast += nr;
+ +        }
+ +        nsend[1] = nlast;
+ +    }
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Communicated the counts\n");
+ +    }
+ +    
+ +    /* Search for the requested atoms and communicate the indices we have */
+ +    nat_tot_specat = at_start;
+ +    nrecv_local = 0;
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        bFirst = (d == 0);
+ +        /* Pulse the grid forward and backward */
+ +        if (dd->dim[d] >= dd->npbcdim || dd->nc[dd->dim[d]] > 2)
+ +        {
+ +            ndir = 2;
+ +        }
+ +        else
+ +        {
+ +            ndir = 1;
+ +        }
+ +        nat_tot_prev = nat_tot_specat;
+ +        for(dir=ndir-1; dir>=0; dir--)
+ +        {
+ +            if (nat_tot_specat > spac->bSendAtom_nalloc)
+ +            {
+ +                nalloc_old = spac->bSendAtom_nalloc;
+ +                spac->bSendAtom_nalloc = over_alloc_dd(nat_tot_specat);
+ +                srenew(spac->bSendAtom,spac->bSendAtom_nalloc);
+ +                for(i=nalloc_old; i<spac->bSendAtom_nalloc; i++)
+ +                {
+ +                    spac->bSendAtom[i] = FALSE;
+ +                }
+ +            }
+ +            spas = &spac->spas[d][dir];
+ +            n0 = spac->nreq[d][dir][0];
+ +            nr = spac->nreq[d][dir][1];
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"dim=%d, dir=%d, searching for %d atoms\n",
+ +                        d,dir,nr);
+ +            }
+ +            start = nlast - nr;
+ +            spas->nsend = 0;
+ +            nsend[0] = 0;
+ +            for(i=0; i<nr; i++)
+ +            {
-                 if (!ga2la_get_home(dd->ga2la,ireq,&ind))
++                indr = ireq->ind[start+i];
+ +                ind = -1;
+ +                /* Check if this is a home atom and if so ind will be set */
-                     ind = ga2la_specat[ireq];
++                if (!ga2la_get_home(dd->ga2la,indr,&ind))
+ +                {
+ +                    /* Search in the communicated atoms */
-                         spac->ibuf[spas->nsend] = ireq;
++                    ind = gmx_hash_get_minone(ga2la_specat,indr);
+ +                }
+ +                if (ind >= 0)
+ +                {
+ +                    if (i < n0 || !spac->bSendAtom[ind])
+ +                    {
+ +                        if (spas->nsend+1 > spas->a_nalloc)
+ +                        {
+ +                            spas->a_nalloc = over_alloc_large(spas->nsend+1);
+ +                            srenew(spas->a,spas->a_nalloc);
+ +                        }
+ +                        /* Store the local index so we know which coordinates
+ +                         * to send out later.
+ +                         */
+ +                        spas->a[spas->nsend] = ind;
+ +                        spac->bSendAtom[ind] = TRUE;
+ +                        if (spas->nsend+1 > spac->ibuf_nalloc)
+ +                        {
+ +                            spac->ibuf_nalloc = over_alloc_large(spas->nsend+1);
+ +                            srenew(spac->ibuf,spac->ibuf_nalloc);
+ +                        }
+ +                        /* Store the global index so we can send it now */
-             ga2la_specat[dd->gatindex[i]] = i;
++                        spac->ibuf[spas->nsend] = indr;
+ +                        if (i < n0)
+ +                        {
+ +                            nsend[0]++;
+ +                        }
+ +                        spas->nsend++;
+ +                    }
+ +                }
+ +            }
+ +            nlast = start;
+ +            /* Clear the local flags */
+ +            for(i=0; i<spas->nsend; i++)
+ +            {
+ +                spac->bSendAtom[spas->a[i]] = FALSE;
+ +            }
+ +            /* Send and receive the number of indices to communicate */
+ +            nsend[1] = spas->nsend;
+ +            dd_sendrecv_int(dd,d,dir==0 ? dddirBackward : dddirForward,
+ +                            nsend,2,buf,2);
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"Send to node %d, %d (%d) indices, "
+ +                        "receive from node %d, %d (%d) indices\n",
+ +                        dd->neighbor[d][1-dir],nsend[1],nsend[0],
+ +                        dd->neighbor[d][dir],buf[1],buf[0]);
+ +                if (gmx_debug_at)
+ +                {
+ +                    for(i=0; i<spas->nsend; i++)
+ +                    {
+ +                        fprintf(debug," %d",spac->ibuf[i]+1);
+ +                    }
+ +                    fprintf(debug,"\n");
+ +                }
+ +            }
+ +            nrecv_local += buf[0];
+ +            spas->nrecv  = buf[1];
+ +            if (nat_tot_specat + spas->nrecv > dd->gatindex_nalloc)
+ +            {
+ +                dd->gatindex_nalloc =
+ +                    over_alloc_dd(nat_tot_specat + spas->nrecv);
+ +                srenew(dd->gatindex,dd->gatindex_nalloc);
+ +            }
+ +            /* Send and receive the indices */
+ +            dd_sendrecv_int(dd,d,dir==0 ? dddirBackward : dddirForward,
+ +                            spac->ibuf,spas->nsend,
+ +                            dd->gatindex+nat_tot_specat,spas->nrecv);
+ +            nat_tot_specat += spas->nrecv;
+ +        }
+ +        
+ +        /* Allocate the x/f communication buffers */
+ +        ns = spac->spas[d][0].nsend;
+ +        nr = spac->spas[d][0].nrecv;
+ +        if (ndir == 2)
+ +        {
+ +            ns += spac->spas[d][1].nsend;
+ +            nr += spac->spas[d][1].nrecv;
+ +        }
+ +        if (vbuf_fac*ns > spac->vbuf_nalloc)
+ +        {
+ +            spac->vbuf_nalloc = over_alloc_dd(vbuf_fac*ns);
+ +            srenew(spac->vbuf,spac->vbuf_nalloc);
+ +        }
+ +        if (vbuf_fac == 2 && vbuf_fac*nr > spac->vbuf2_nalloc)
+ +        {
+ +            spac->vbuf2_nalloc = over_alloc_dd(vbuf_fac*nr);
+ +            srenew(spac->vbuf2,spac->vbuf2_nalloc);
+ +        }
+ +        
+ +        /* Make a global to local index for the communication atoms */
+ +        for(i=nat_tot_prev; i<nat_tot_specat; i++)
+ +        {
-     if (nrecv_local != spac->nind_req)
++            gmx_hash_change_or_set(ga2la_specat,dd->gatindex[i],i);
+ +        }
+ +    }
+ +    
+ +    /* Check that in the end we got the number of atoms we asked for */
-                     spac->nind_req,nrecv_local,nat_tot_specat-at_start);
++    if (nrecv_local != ireq->n)
+ +    {
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Requested %d, received %d (tot recv %d)\n",
-                 for(i=0; i<spac->nind_req; i++)
++                    ireq->n,nrecv_local,nat_tot_specat-at_start);
+ +            if (gmx_debug_at)
+ +            {
-                             ga2la_specat[spac->ind_req[i]]>=0 ? "" : "!",
-                             spac->ind_req[i]+1);
++                for(i=0; i<ireq->n; i++)
+ +                {
++                    ind = gmx_hash_get_minone(ga2la_specat,ireq->ind[i]);
+ +                    fprintf(debug," %s%d",
-         for(i=0; i<spac->nind_req; i++)
++                            (ind >= 0) ? "" : "!",
++                            ireq->ind[i]+1);
+ +                }
+ +                fprintf(debug,"\n");
+ +            }
+ +        }
+ +        fprintf(stderr,"\nDD cell %d %d %d: Neighboring cells do not have atoms:",
+ +                dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
-             if (ga2la_specat[spac->ind_req[i]] < 0)
++        for(i=0; i<ireq->n; i++)
+ +        {
-                 fprintf(stderr," %d",spac->ind_req[i]+1);
++            if (gmx_hash_get_minone(ga2la_specat,ireq->ind[i]) < 0)
+ +            {
-                   nrecv_local,spac->nind_req,specat_type,
++                fprintf(stderr," %d",ireq->ind[i]+1);
+ +            }
+ +        }
+ +        fprintf(stderr,"\n");
+ +        gmx_fatal(FARGS,"DD cell %d %d %d could only obtain %d of the %d atoms that are connected via %ss from the neighboring cells. This probably means your %s lengths are too long compared to the domain decomposition cell size. Decrease the number of domain decomposition grid cells%s%s.",
+ +                  dd->ci[XX],dd->ci[YY],dd->ci[ZZ],
-                      t_ilist *il_local)
++                  nrecv_local,ireq->n,specat_type,
+ +                  specat_type,add_err,
+ +                  dd->bGridJump ? " or use the -rcon option of mdrun" : "");
+ +    }
+ +    
+ +    spac->at_start = at_start;
+ +    spac->at_end   = nat_tot_specat;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Done setup_specat_communication\n");
+ +    }
+ +    
+ +    return nat_tot_specat;
+ +}
+ +
+ +static void walk_out(int con,int con_offset,int a,int offset,int nrec,
+ +                     int ncon1,const t_iatom *ia1,const t_iatom *ia2,
+ +                     const t_blocka *at2con,
+ +                     const gmx_ga2la_t ga2la,gmx_bool bHomeConnect,
+ +                     gmx_domdec_constraints_t *dc,
+ +                     gmx_domdec_specat_comm_t *dcc,
-     if (dc->ga2la[offset+a] == -1)
++                     t_ilist *il_local,
++                     ind_req_t *ireq)
+ +{
+ +    int a1_gl,a2_gl,a_loc,i,coni,b;
+ +    const t_iatom *iap;
+ +  
+ +    if (dc->gc_req[con_offset+con] == 0)
+ +    {
+ +        /* Add this non-home constraint to the list */
+ +        if (dc->ncon+1 > dc->con_nalloc)
+ +        {
+ +            dc->con_nalloc = over_alloc_large(dc->ncon+1);
+ +            srenew(dc->con_gl,dc->con_nalloc);
+ +            srenew(dc->con_nlocat,dc->con_nalloc);
+ +        }
+ +        dc->con_gl[dc->ncon] = con_offset + con;
+ +        dc->con_nlocat[dc->ncon] = (bHomeConnect ? 1 : 0);
+ +        dc->gc_req[con_offset+con] = 1;
+ +        if (il_local->nr + 3 > il_local->nalloc)
+ +        {
+ +            il_local->nalloc = over_alloc_dd(il_local->nr+3);
+ +            srenew(il_local->iatoms,il_local->nalloc);
+ +        }
+ +        iap = constr_iatomptr(ncon1,ia1,ia2,con);
+ +        il_local->iatoms[il_local->nr++] = iap[0];
+ +        a1_gl = offset + iap[1];
+ +        a2_gl = offset + iap[2];
+ +        /* The following indexing code can probably be optizimed */
+ +        if (ga2la_get_home(ga2la,a1_gl,&a_loc))
+ +        {
+ +            il_local->iatoms[il_local->nr++] = a_loc;
+ +        }
+ +        else
+ +        {
+ +            /* We set this index later */
+ +            il_local->iatoms[il_local->nr++] = -a1_gl - 1;
+ +        }
+ +        if (ga2la_get_home(ga2la,a2_gl,&a_loc))
+ +        {
+ +            il_local->iatoms[il_local->nr++] = a_loc;
+ +        }
+ +        else
+ +        {
+ +            /* We set this index later */
+ +            il_local->iatoms[il_local->nr++] = -a2_gl - 1;
+ +        }
+ +        dc->ncon++;
+ +    }
+ +    /* Check to not ask for the same atom more than once */
-         if (dcc->nind_req+1 > dcc->ind_req_nalloc)
++    if (gmx_hash_get_minone(dc->ga2la,offset+a) == -1)
+ +    {
+ +        assert(dcc);
+ +        /* Add this non-home atom to the list */
-             dcc->ind_req_nalloc = over_alloc_large(dcc->nind_req+1);
-             srenew(dcc->ind_req,dcc->ind_req_nalloc);
++        if (ireq->n+1 > ireq->nalloc)
+ +        {
-         dcc->ind_req[dcc->nind_req++] = offset + a;
++            ireq->nalloc = over_alloc_large(ireq->n+1);
++            srenew(ireq->ind,ireq->nalloc);
+ +        }
-         dc->ga2la[offset+a] = -2;
++        ireq->ind[ireq->n++] = offset + a;
+ +        /* Temporarily mark with -2, we get the index later */
-                              ga2la,FALSE,dc,dcc,il_local);
++        gmx_hash_set(dc->ga2la,offset+a,-2);
+ +    }
+ +    
+ +    if (nrec > 0)
+ +    {
+ +        for(i=at2con->index[a]; i<at2con->index[a+1]; i++)
+ +        {
+ +            coni = at2con->a[i];
+ +            if (coni != con)
+ +            {
+ +                /* Walk further */
+ +                iap = constr_iatomptr(ncon1,ia1,ia2,coni);
+ +                if (a == iap[1])
+ +                {
+ +                    b = iap[2];
+ +                }
+ +                else
+ +                {
+ +                    b = iap[1];
+ +                }
+ +                if (!ga2la_get_home(ga2la,offset+b,&a_loc))
+ +                {
+ +                    walk_out(coni,con_offset,b,offset,nrec-1,
+ +                             ncon1,ia1,ia2,at2con,
- int dd_make_local_constraints(gmx_domdec_t *dd,int at_start,
-                               gmx_mtop_t *mtop,
-                               gmx_constr_t constr,int nrec,
-                               t_ilist *il_local)
++                             ga2la,FALSE,dc,dcc,il_local,ireq);
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
-     t_blocka *at2con_mt,*at2con;
++static void atoms_to_settles(gmx_domdec_t *dd,
++                             const gmx_mtop_t *mtop,
++                             const int *cginfo,
++                             const int **at2settle_mt,
++                             int cg_start,int cg_end,
++                             t_ilist *ils_local,
++                             ind_req_t *ireq)
+ +{
-     int ncon1,ncon2;
+ +    gmx_ga2la_t ga2la;
-     int nhome,a,a_gl,a_mol,a_loc,b_lo,offset,mb,molnr,b_mol,i,con,con_offset;
++    gmx_mtop_atomlookup_t alook;
++    int settle;
++    int nral,sa;
++    int cg,a,a_gl,a_glsa,a_gls[3],a_locs[3];
++    int mb,molnr,a_mol,offset;
++    const gmx_molblock_t *molb;
++    const t_iatom *ia1;
++    gmx_bool a_home[3];
++    int nlocal;
++    gmx_bool bAssign;
++
++    ga2la  = dd->ga2la;
++
++    alook = gmx_mtop_atomlookup_settle_init(mtop);
++
++    nral = NRAL(F_SETTLE);
++
++    for(cg=cg_start; cg<cg_end; cg++)
++    {
++        if (GET_CGINFO_SETTLE(cginfo[cg]))
++        {
++            for(a=dd->cgindex[cg]; a<dd->cgindex[cg+1]; a++)
++            {
++                a_gl = dd->gatindex[a];
++                
++                gmx_mtop_atomnr_to_molblock_ind(alook,a_gl,&mb,&molnr,&a_mol);
++                molb = &mtop->molblock[mb];
++
++                settle = at2settle_mt[molb->type][a_mol];
++
++                if (settle >= 0)
++                {
++                    offset = a_gl - a_mol;
++
++                    ia1 = mtop->moltype[molb->type].ilist[F_SETTLE].iatoms;
++
++                    bAssign = FALSE;
++                    nlocal = 0;
++                    for(sa=0; sa<nral; sa++)
++                    {
++                        a_glsa = offset + ia1[settle*(1+nral)+1+sa];
++                        a_gls[sa] = a_glsa;
++                        a_home[sa] = ga2la_get_home(ga2la,a_glsa,&a_locs[sa]);
++                        if (a_home[sa])
++                        {
++                            if (nlocal == 0 && a_gl == a_glsa)
++                            {
++                                bAssign = TRUE;
++                            }
++                            nlocal++;
++                        }
++                    }
++
++                    if (bAssign)
++                    {
++                        if (ils_local->nr+1+nral > ils_local->nalloc)
++                        {
++                            ils_local->nalloc = over_alloc_dd(ils_local->nr+1+nral);
++                            srenew(ils_local->iatoms,ils_local->nalloc);
++                        }
++
++                        ils_local->iatoms[ils_local->nr++] = ia1[settle*4];
++
++                        for(sa=0; sa<nral; sa++)
++                        {
++                            if (ga2la_get_home(ga2la,a_gls[sa],&a_locs[sa]))
++                            {
++                                ils_local->iatoms[ils_local->nr++] = a_locs[sa];
++                            }
++                            else
++                            {
++                                ils_local->iatoms[ils_local->nr++] = -a_gls[sa] - 1;
++                                /* Add this non-home atom to the list */
++                                if (ireq->n+1 > ireq->nalloc)
++                                {
++                                    ireq->nalloc = over_alloc_large(ireq->n+1);
++                                    srenew(ireq->ind,ireq->nalloc);
++                                }
++                                ireq->ind[ireq->n++] = a_gls[sa];
++                                /* A check on double atom requests is
++                                 * not required for settle.
++                                 */
++                            }
++                        }
++                    }
++                }
++            }
++        }
++    }
++
++    gmx_mtop_atomlookup_destroy(alook);
++}
++
++static void atoms_to_constraints(gmx_domdec_t *dd,
++                                 const gmx_mtop_t *mtop,
++                                 const int *cginfo,
++                                 const t_blocka *at2con_mt,int nrec,
++                                 t_ilist *ilc_local,
++                                 ind_req_t *ireq)
++{
++    const t_blocka *at2con;
++    gmx_ga2la_t ga2la;
++    gmx_mtop_atomlookup_t alook;
++    int ncon1;
+ +    gmx_molblock_t *molb;
+ +    t_iatom *ia1,*ia2,*iap;
-     int at_end,*ga2la_specat,j;
++    int nhome,cg,a,a_gl,a_mol,a_loc,b_lo,offset,mb,molnr,b_mol,i,con,con_offset;
+ +    gmx_domdec_constraints_t *dc;
-     dc = dd->constraints;
++    gmx_domdec_specat_comm_t *dcc;
+ +    
-     at2con_mt = atom2constraints_moltype(constr);
++    dc  = dd->constraints;
++    dcc = dd->constraint_comm;
+ +    
-     
-     dc->ncon     = 0;
-     il_local->nr = 0;
+ +    ga2la  = dd->ga2la;
-     if (dd->constraint_comm)
-     {
-         dd->constraint_comm->nind_req = 0;
-     }
-     for(a=0; a<dd->nat_home; a++)
++
++    alook = gmx_mtop_atomlookup_init(mtop);
++
+ +    nhome = 0;
-         a_gl = dd->gatindex[a];
++    for(cg=0; cg<dd->ncg_home; cg++)
+ +    {
-         gmx_mtop_atomnr_to_molblock_ind(mtop,a_gl,&mb,&molnr,&a_mol);
-         molb = &mtop->molblock[mb];
++        if (GET_CGINFO_CONSTR(cginfo[cg]))
++        {
++            for(a=dd->cgindex[cg]; a<dd->cgindex[cg+1]; a++)
++            {
++                a_gl = dd->gatindex[a];
+ +        
-         ncon1 = mtop->moltype[molb->type].ilist[F_CONSTR].nr/3;
-         ncon2 = mtop->moltype[molb->type].ilist[F_CONSTRNC].nr/3;
-         if (ncon1 > 0 || ncon2 > 0)
-         {
-             ia1 = mtop->moltype[molb->type].ilist[F_CONSTR].iatoms;
-             ia2 = mtop->moltype[molb->type].ilist[F_CONSTRNC].iatoms;
- 
-             /* Calculate the global constraint number offset for the molecule.
-              * This is only required for the global index to make sure
-              * that we use each constraint only once.
-              */
-             con_offset = dc->molb_con_offset[mb] + molnr*dc->molb_ncon_mol[mb];
++                gmx_mtop_atomnr_to_molblock_ind(alook,a_gl,&mb,&molnr,&a_mol);
++                molb = &mtop->molblock[mb];
+ +        
-             /* The global atom number offset for this molecule */
-             offset = a_gl - a_mol;
-             at2con = &at2con_mt[molb->type];
-             for(i=at2con->index[a_mol]; i<at2con->index[a_mol+1]; i++)
-             {
-                 con = at2con->a[i];
-                 iap = constr_iatomptr(ncon1,ia1,ia2,con);
-                 if (a_mol == iap[1])
-                 {
-                     b_mol = iap[2];
-                 }
-                 else
-                 {
-                     b_mol = iap[1];
-                 }
-                 if (ga2la_get_home(ga2la,offset+b_mol,&a_loc))
++                ncon1 = mtop->moltype[molb->type].ilist[F_CONSTR].nr/NRAL(F_SETTLE);
++
++                ia1 = mtop->moltype[molb->type].ilist[F_CONSTR].iatoms;
++                ia2 = mtop->moltype[molb->type].ilist[F_CONSTRNC].iatoms;
++
++                /* Calculate the global constraint number offset for the molecule.
++                 * This is only required for the global index to make sure
++                 * that we use each constraint only once.
++                 */
++                con_offset =
++                    dc->molb_con_offset[mb] + molnr*dc->molb_ncon_mol[mb];
+ +            
-                     /* Add this fully home constraint at the first atom */
-                     if (a_mol < b_mol)
++                /* The global atom number offset for this molecule */
++                offset = a_gl - a_mol;
++                at2con = &at2con_mt[molb->type];
++                for(i=at2con->index[a_mol]; i<at2con->index[a_mol+1]; i++)
+ +                {
-                         if (dc->ncon+1 > dc->con_nalloc)
-                         {
-                             dc->con_nalloc = over_alloc_large(dc->ncon+1);
-                             srenew(dc->con_gl,dc->con_nalloc);
-                             srenew(dc->con_nlocat,dc->con_nalloc);
-                         }
-                         dc->con_gl[dc->ncon] = con_offset + con;
-                         dc->con_nlocat[dc->ncon] = 2;
-                         if (il_local->nr + 3 > il_local->nalloc)
++                    con = at2con->a[i];
++                    iap = constr_iatomptr(ncon1,ia1,ia2,con);
++                    if (a_mol == iap[1])
+ +                    {
-                             il_local->nalloc = over_alloc_dd(il_local->nr + 3);
-                             srenew(il_local->iatoms,il_local->nalloc);
++                        b_mol = iap[2];
++                    }
++                    else
++                    {
++                        b_mol = iap[1];
++                    }
++                    if (ga2la_get_home(ga2la,offset+b_mol,&a_loc))
++                    {
++                        /* Add this fully home constraint at the first atom */
++                        if (a_mol < b_mol)
+ +                        {
-                         b_lo = a_loc;
-                         il_local->iatoms[il_local->nr++] = iap[0];
-                         il_local->iatoms[il_local->nr++] = (a_gl == iap[1] ? a    : b_lo);
-                         il_local->iatoms[il_local->nr++] = (a_gl == iap[1] ? b_lo : a   );
-                         dc->ncon++;
-                         nhome++;
++                            if (dc->ncon+1 > dc->con_nalloc)
++                            {
++                                dc->con_nalloc = over_alloc_large(dc->ncon+1);
++                                srenew(dc->con_gl,dc->con_nalloc);
++                                srenew(dc->con_nlocat,dc->con_nalloc);
++                            }
++                            dc->con_gl[dc->ncon] = con_offset + con;
++                            dc->con_nlocat[dc->ncon] = 2;
++                            if (ilc_local->nr + 3 > ilc_local->nalloc)
++                            {
++                                ilc_local->nalloc = over_alloc_dd(ilc_local->nr + 3);
++                                srenew(ilc_local->iatoms,ilc_local->nalloc);
++                            }
++                            b_lo = a_loc;
++                            ilc_local->iatoms[ilc_local->nr++] = iap[0];
++                            ilc_local->iatoms[ilc_local->nr++] = (a_gl == iap[1] ? a    : b_lo);
++                            ilc_local->iatoms[ilc_local->nr++] = (a_gl == iap[1] ? b_lo : a   );
++                            dc->ncon++;
++                            nhome++;
+ +                        }
-                 }
-                 else
-                 {
-                     /* We need the nrec constraints coupled to this constraint,
-                      * so we need to walk out of the home cell by nrec+1 atoms,
-                      * since already atom bg is not locally present.
-                      * Therefore we call walk_out with nrec recursions to go
-                      * after this first call.
-                      */
-                     walk_out(con,con_offset,b_mol,offset,nrec,
-                              ncon1,ia1,ia2,at2con,
-                              dd->ga2la,TRUE,dc,dd->constraint_comm,il_local);
+ +                    }
-     
++                    else
++                    {
++                        /* We need the nrec constraints coupled to this constraint,
++                         * so we need to walk out of the home cell by nrec+1 atoms,
++                         * since already atom bg is not locally present.
++                         * Therefore we call walk_out with nrec recursions to go
++                         * after this first call.
++                         */
++                        walk_out(con,con_offset,b_mol,offset,nrec,
++                                 ncon1,ia1,ia2,at2con,
++                                 dd->ga2la,TRUE,dc,dcc,ilc_local,ireq);
++                    }
+ +                }
+ +            }
+ +        }
+ +    }
-                 dd->constraint_comm ? dd->constraint_comm->nind_req : 0);
++
++    gmx_mtop_atomlookup_destroy(alook);
++
+ +    if (debug)
+ +    {
+ +        fprintf(debug,
+ +                "Constraints: home %3d border %3d atoms: %3d\n",
+ +                nhome,dc->ncon-nhome,
-             setup_specat_communication(dd,dd->constraint_comm,
++                dd->constraint_comm ? ireq->n : 0);
++    }
++}
++
++int dd_make_local_constraints(gmx_domdec_t *dd,int at_start,
++                              const gmx_mtop_t *mtop,
++                              const int *cginfo,
++                              gmx_constr_t constr,int nrec,
++                              t_ilist *il_local)
++{
++    gmx_domdec_constraints_t *dc;
++    t_ilist *ilc_local,*ils_local;
++    ind_req_t *ireq;
++    const t_blocka *at2con_mt;
++    const int **at2settle_mt;
++    gmx_hash_t ga2la_specat;
++    int at_end,i,j;
++    t_iatom *iap;
++    
++    dc = dd->constraints;
++    
++    ilc_local = &il_local[F_CONSTR];
++    ils_local = &il_local[F_SETTLE];
++
++    dc->ncon      = 0;
++    ilc_local->nr = 0;
++    if (dd->constraint_comm)
++    {
++        at2con_mt = atom2constraints_moltype(constr);
++        ireq = &dd->constraint_comm->ireq[0];
++        ireq->n = 0;
++    }
++    else
++    {
++        at2con_mt = NULL;
++        ireq = NULL;
++    }
++
++    if (dd->bInterCGsettles)
++    {
++        at2settle_mt = atom2settle_moltype(constr);
++        ils_local->nr = 0;
++    }
++    else
++    {
++        /* Settle works inside charge groups, we assigned them already */
++        at2settle_mt = NULL;
++    }
++
++    if (at2settle_mt == NULL)
++    {
++        atoms_to_constraints(dd,mtop,cginfo,at2con_mt,nrec,
++                             ilc_local,ireq);
++    }
++    else
++    {
++        int t0_set;
++        int thread;
++
++        /* Do the constraints, if present, on the first thread.
++         * Do the settles on all other threads.
++         */
++        t0_set = ((at2con_mt != NULL && dc->nthread > 1) ? 1 : 0);
++
++#pragma omp parallel for num_threads(dc->nthread) schedule(static)
++        for(thread=0; thread<dc->nthread; thread++)
++        {
++            if (at2con_mt && thread == 0)
++            {
++                atoms_to_constraints(dd,mtop,cginfo,at2con_mt,nrec,
++                                     ilc_local,ireq);
++            }
++
++            if (thread >= t0_set)
++            {
++                int cg0,cg1;
++                t_ilist *ilst;
++                ind_req_t *ireqt;
++
++                /* Distribute the settle check+assignments over
++                 * dc->nthread or dc->nthread-1 threads.
++                 */
++                cg0 = (dd->ncg_home*(thread-t0_set  ))/(dc->nthread-t0_set);
++                cg1 = (dd->ncg_home*(thread-t0_set+1))/(dc->nthread-t0_set);
++
++                if (thread == t0_set)
++                {
++                    ilst = ils_local;
++                }
++                else
++                {
++                    ilst = &dc->ils[thread];
++                }
++                ilst->nr = 0;
++
++                ireqt = &dd->constraint_comm->ireq[thread];
++                if (thread > 0)
++                {
++                    ireqt->n = 0;
++                }
++
++                atoms_to_settles(dd,mtop,cginfo,at2settle_mt,
++                                 cg0,cg1,
++                                 ilst,ireqt);
++            }
++        }
++
++        /* Combine the generate settles and requested indices */
++        for(thread=1; thread<dc->nthread; thread++)
++        {
++            t_ilist *ilst;
++            ind_req_t *ireqt;
++            int ia;
++
++            if (thread > t0_set)
++            {
++                ilst = &dc->ils[thread];
++                if (ils_local->nr + ilst->nr > ils_local->nalloc)
++                {
++                    ils_local->nalloc = over_alloc_large(ils_local->nr + ilst->nr);
++                    srenew(ils_local->iatoms,ils_local->nalloc);
++                }
++                for(ia=0; ia<ilst->nr; ia++)
++                {
++                    ils_local->iatoms[ils_local->nr+ia] = ilst->iatoms[ia];
++                }
++                ils_local->nr += ilst->nr;
++            }
++
++            ireqt = &dd->constraint_comm->ireq[thread];
++            if (ireq->n+ireqt->n > ireq->nalloc)
++            {
++                ireq->nalloc = over_alloc_large(ireq->n+ireqt->n);
++                srenew(ireq->ind,ireq->nalloc);
++            }
++            for(ia=0; ia<ireqt->n; ia++)
++            {
++                ireq->ind[ireq->n+ia] = ireqt->ind[ia];
++            }
++            ireq->n += ireqt->n;
++        }
++
++        if (debug)
++        {
++            fprintf(debug,"Settles: total %3d\n",ils_local->nr/4);
++        }
+ +    }
+ +
+ +    if (dd->constraint_comm) {
++        int nral1;
++
+ +        at_end =
-         for(i=0; i<il_local->nr; i+=3)
++            setup_specat_communication(dd,ireq,dd->constraint_comm,
+ +                                       dd->constraints->ga2la,
+ +                                       at_start,2,
+ +                                       "constraint"," or lincs-order");
+ +        
+ +        /* Fill in the missing indices */
+ +        ga2la_specat = dd->constraints->ga2la;
-             iap = il_local->iatoms + i;
-             for(j=1; j<3; j++)
++
++        nral1 = 1 + NRAL(F_CONSTR);
++        for(i=0; i<ilc_local->nr; i+=nral1)
++        {
++            iap = ilc_local->iatoms + i;
++            for(j=1; j<nral1; j++)
++            {
++                if (iap[j] < 0)
++                {
++                    iap[j] = gmx_hash_get_minone(ga2la_specat,-iap[j]-1);
++                }
++            }
++        }
++
++        nral1 = 1 + NRAL(F_SETTLE);
++        for(i=0; i<ils_local->nr; i+=nral1)
+ +        {
-                     iap[j] = ga2la_specat[-iap[j]-1];
++            iap = ils_local->iatoms + i;
++            for(j=1; j<nral1; j++)
+ +            {
+ +                if (iap[j] < 0)
+ +                {
-     int  *ga2la_specat;
++                    iap[j] = gmx_hash_get_minone(ga2la_specat,-iap[j]-1);
+ +                }
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        at_end = at_start;
+ +    }
+ +    
+ +    return at_end;
+ +}
+ +
+ +int dd_make_local_vsites(gmx_domdec_t *dd,int at_start,t_ilist *lil)
+ +{
+ +    gmx_domdec_specat_comm_t *spac;
-     spac->nind_req = 0;
++    ind_req_t *ireq;
++    gmx_hash_t ga2la_specat;
+ +    int  ftype,nral,i,j,gat,a;
+ +    t_ilist *lilf;
+ +    t_iatom *iatoms;
+ +    int  at_end;
+ +    
+ +    spac         = dd->vsite_comm;
++    ireq         = &spac->ireq[0];
+ +    ga2la_specat = dd->ga2la_vsite;
+ +    
-                         if (ga2la_specat[a] == -1)
++    ireq->n = 0;
+ +    /* Loop over all the home vsites */
+ +    for(ftype=0; ftype<F_NRE; ftype++)
+ +    {
+ +        if (interaction_function[ftype].flags & IF_VSITE)
+ +        {
+ +            nral = NRAL(ftype);
+ +            lilf = &lil[ftype];
+ +            for(i=0; i<lilf->nr; i+=1+nral)
+ +            {
+ +                iatoms = lilf->iatoms + i;
+ +                /* Check if we have the other atoms */
+ +                for(j=1; j<1+nral; j++)
+ +                {
+ +                    if (iatoms[j] < 0) {
+ +                        /* This is not a home atom,
+ +                         * we need to ask our neighbors.
+ +                         */
+ +                        a = -iatoms[j] - 1;
+ +                        /* Check to not ask for the same atom more than once */
-                             if (spac->nind_req+1 > spac->ind_req_nalloc)
++                        if (gmx_hash_get_minone(dd->ga2la_vsite,a) == -1)
+ +                        {
+ +                            /* Add this non-home atom to the list */
-                                 spac->ind_req_nalloc =
-                                     over_alloc_small(spac->nind_req+1);
-                                 srenew(spac->ind_req,spac->ind_req_nalloc);
++                            if (ireq->n+1 > ireq->nalloc)
+ +                            {
-                             spac->ind_req[spac->nind_req++] = a;
++                                ireq->nalloc = over_alloc_large(ireq->n+1);
++                                srenew(ireq->ind,ireq->nalloc);
+ +                            }
-                             ga2la_specat[a] = -2;
++                            ireq->ind[ireq->n++] = a;
+ +                            /* Temporarily mark with -2,
+ +                             * we get the index later.
+ +                             */
-     at_end = setup_specat_communication(dd,dd->vsite_comm,ga2la_specat,
++                            gmx_hash_set(ga2la_specat,a,-2);
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
-                         iatoms[j] = ga2la_specat[-iatoms[j]-1];
++    at_end = setup_specat_communication(dd,ireq,dd->vsite_comm,ga2la_specat,
+ +                                        at_start,1,"vsite","");
+ +    
+ +    /* Fill in the missing indices */
+ +    for(ftype=0; ftype<F_NRE; ftype++)
+ +    {
+ +        if (interaction_function[ftype].flags & IF_VSITE)
+ +        {
+ +            nral = NRAL(ftype);
+ +            lilf = &lil[ftype];
+ +            for(i=0; i<lilf->nr; i+=1+nral)
+ +            {
+ +                iatoms = lilf->iatoms + i;
+ +                for(j=1; j<1+nral; j++)
+ +                {
+ +                    if (iatoms[j] < 0)
+ +                    {
-                              int natoms,gmx_mtop_t *mtop,
++                        iatoms[j] = gmx_hash_get_minone(ga2la_specat,-iatoms[j]-1);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    return at_end;
+ +}
+ +
++static gmx_domdec_specat_comm_t *specat_comm_init(int nthread)
++{
++    gmx_domdec_specat_comm_t *spac;
++
++    snew(spac,1);
++    spac->nthread = nthread;
++    snew(spac->ireq,spac->nthread);
++
++    return spac;
++}
++
+ +void init_domdec_constraints(gmx_domdec_t *dd,
-     snew(dc->gc_req,ncon);
-     for(c=0; c<ncon; c++)
-     {
-         dc->gc_req[c] = 0;
-     }
-     
-     snew(dc->ga2la,natoms);
-     for(a=0; a<natoms; a++)
++                             gmx_mtop_t *mtop,
+ +                             gmx_constr_t constr)
+ +{
+ +    gmx_domdec_constraints_t *dc;
+ +    gmx_molblock_t *molb;
+ +    int mb,ncon,c,a;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Begin init_domdec_constraints\n");
+ +    }
+ +    
+ +    snew(dd->constraints,1);
+ +    dc = dd->constraints;
+ +    
+ +    snew(dc->molb_con_offset,mtop->nmolblock);
+ +    snew(dc->molb_ncon_mol,mtop->nmolblock);
+ +    
+ +    ncon = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        dc->molb_con_offset[mb] = ncon;
+ +        dc->molb_ncon_mol[mb] =
+ +            mtop->moltype[molb->type].ilist[F_CONSTR].nr/3 +
+ +            mtop->moltype[molb->type].ilist[F_CONSTRNC].nr/3;
+ +        ncon += molb->nmol*dc->molb_ncon_mol[mb];
+ +    }
+ +    
-         dc->ga2la[a] = -1;
++    if (ncon > 0)
+ +    {
-     
-     snew(dd->constraint_comm,1);
++        snew(dc->gc_req,ncon);
++        for(c=0; c<ncon; c++)
++        {
++            dc->gc_req[c] = 0;
++        }
+ +    }
- void init_domdec_vsites(gmx_domdec_t *dd,int natoms)
++
++    /* Use a hash table for the global to local index.
++     * The number of keys is a rough estimate, it will be optimized later.
++     */
++    dc->ga2la = gmx_hash_init(min(mtop->natoms/20,
++                                  mtop->natoms/(2*dd->nnodes)));
++
++    dc->nthread = gmx_omp_nthreads_get(emntDomdec);
++    snew(dc->ils,dc->nthread);
++
++    dd->constraint_comm = specat_comm_init(dc->nthread);
+ +}
+ +
-     snew(dd->ga2la_vsite,natoms);
-     for(i=0; i<natoms; i++)
-     {
-         dd->ga2la_vsite[i] = -1;
-     }
++void init_domdec_vsites(gmx_domdec_t *dd,int n_intercg_vsite)
+ +{
+ +    int i;
+ +    gmx_domdec_constraints_t *dc;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Begin init_domdec_vsites\n");
+ +    }
+ +    
-     snew(dd->vsite_comm,1);
++    /* Use a hash table for the global to local index.
++     * The number of keys is a rough estimate, it will be optimized later.
++     */
++    dd->ga2la_vsite = gmx_hash_init(min(n_intercg_vsite/20,
++                                        n_intercg_vsite/(2*dd->nnodes)));
+ +    
++    dd->vsite_comm = specat_comm_init(1);
+ +}
diff --cc src/gromacs/mdlib/domdec_top.c

index a1ac828d64611aa663993108efb76305f5ab06a0,0000000000000000000000000000000000000000..9af136828fe5d8c4b3b17e5744d10d61741276f1

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/domdec_top.c
--- /dev/null
+++ b/src/gromacs/mdlib/domdec_top.c
@@@ -1,2080 -1,0 +1,2385 @@@
- static gmx_bool dd_check_ftype(int ftype,gmx_bool bBCheck,gmx_bool bConstr)
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + * This file is part of Gromacs        Copyright (c) 1991-2008
+ + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gnomes, ROck Monsters And Chili Sauce
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "domdec.h"
+ +#include "domdec_network.h"
+ +#include "names.h"
+ +#include "network.h"
+ +#include "vec.h"
+ +#include "pbc.h"
+ +#include "chargegroup.h"
+ +#include "gmx_random.h"
+ +#include "topsort.h"
+ +#include "mtop_util.h"
+ +#include "mshift.h"
+ +#include "vsite.h"
+ +#include "gmx_ga2la.h"
++#include "force.h"
++#include "gmx_omp_nthreads.h"
+ +
+ +/* for dd_init_local_state */
+ +#define NITEM_DD_INIT_LOCAL_STATE 5
+ +
+ +typedef struct {
+ +    int  *index;  /* Index for each atom into il                  */ 
+ +    int  *il;     /* ftype|type|a0|...|an|ftype|...               */
+ +} gmx_reverse_ilist_t;
+ +
+ +typedef struct {
+ +    int  a_start;
+ +    int  a_end;
+ +    int  natoms_mol;
+ +    int  type;
+ +} gmx_molblock_ind_t;
+ +
+ +typedef struct gmx_reverse_top {
+ +    gmx_bool bExclRequired; /* Do we require all exclusions to be assigned? */
+ +    gmx_bool bConstr;       /* Are there constraints in this revserse top?  */
++    gmx_bool bSettle;       /* Are there settles in this revserse top?  */
+ +    gmx_bool bBCheck;       /* All bonded interactions have to be assigned? */
+ +    gmx_bool bMultiCGmols;  /* Are the multi charge-group molecules?        */
+ +    gmx_reverse_ilist_t *ril_mt; /* Reverse ilist for all moltypes      */
+ +    int  ril_mt_tot_size;
+ +    int  ilsort;        /* The sorting state of bondeds for free energy */
+ +    gmx_molblock_ind_t *mbi;
++    int nmolblock;
++
++    /* Work data structures for multi-threading */
++    int      nthread;
++    t_idef   *idef_thread;
++    int      ***vsite_pbc;
++    int      **vsite_pbc_nalloc;
++    int      *nbonded_thread;
++    t_blocka *excl_thread;
++    int      *excl_count_thread;
+ +    
+ +    /* Pointers only used for an error message */
+ +    gmx_mtop_t     *err_top_global;
+ +    gmx_localtop_t *err_top_local;
+ +} gmx_reverse_top_t;
+ +
+ +static int nral_rt(int ftype)
+ +{
+ +    /* Returns the number of atom entries for il in gmx_reverse_top_t */
+ +    int nral;
+ +    
+ +    nral = NRAL(ftype);
+ +    if (interaction_function[ftype].flags & IF_VSITE)
+ +    {
+ +        /* With vsites the reverse topology contains
+ +         * two extra entries for PBC.
+ +         */
+ +        nral += 2;
+ +    }
+ +    
+ +    return nral;
+ +}
+ +
-             ftype == F_SETTLE ||
-             (bConstr && (ftype == F_CONSTR || ftype == F_CONSTRNC)));
++/* This function tells which interactions need to be assigned exactly once */
++static gmx_bool dd_check_ftype(int ftype,gmx_bool bBCheck,
++                               gmx_bool bConstr,gmx_bool bSettle)
+ +{
+ +    return (((interaction_function[ftype].flags & IF_BOND) &&
+ +             !(interaction_function[ftype].flags & IF_VSITE) &&
+ +             (bBCheck || !(interaction_function[ftype].flags & IF_LIMZERO))) ||
-         if (dd_check_ftype(ftype,rt->bBCheck,rt->bConstr))
++            (bConstr && (ftype == F_CONSTR || ftype == F_CONSTRNC)) ||
++            (bSettle && ftype == F_SETTLE));
+ +}
+ +
+ +static void print_error_header(FILE *fplog,char *moltypename,int nprint)
+ +{
+ +    fprintf(fplog, "\nMolecule type '%s'\n",moltypename);
+ +    fprintf(stderr,"\nMolecule type '%s'\n",moltypename);
+ +    fprintf(fplog,
+ +            "the first %d missing interactions, except for exclusions:\n",
+ +            nprint);
+ +    fprintf(stderr,
+ +            "the first %d missing interactions, except for exclusions:\n",
+ +            nprint);
+ +}
+ +
+ +static void print_missing_interactions_mb(FILE *fplog,t_commrec *cr,
+ +                                          gmx_reverse_top_t *rt,
+ +                                          char *moltypename,
+ +                                          gmx_reverse_ilist_t *ril,
+ +                                          int a_start,int a_end,
+ +                                          int nat_mol,int nmol,
+ +                                          t_idef *idef)
+ +{
+ +    int nril_mol,*assigned,*gatindex;
+ +    int ftype,ftype_j,nral,i,j_mol,j,k,a0,a0_mol,mol,a,a_gl;
+ +    int nprint;
+ +    t_ilist *il;
+ +    t_iatom *ia;
+ +    gmx_bool bFound;
+ +    
+ +    nril_mol = ril->index[nat_mol];
+ +    snew(assigned,nmol*nril_mol);
+ +    
+ +    gatindex = cr->dd->gatindex;
+ +    for(ftype=0; ftype<F_NRE; ftype++)
+ +    {
-                 || ftype == F_SETTLE
-                 || (dd->reverse_top->bConstr && ftype == F_CONSTR))
++        if (dd_check_ftype(ftype,rt->bBCheck,rt->bConstr,rt->bSettle))
+ +        {
+ +            nral = NRAL(ftype);
+ +            il = &idef->il[ftype];
+ +            ia = il->iatoms;
+ +            for(i=0; i<il->nr; i+=1+nral)
+ +            {
+ +                a0     = gatindex[ia[1]];
+ +                /* Check if this interaction is in
+ +                 * the currently checked molblock.
+ +                 */
+ +                if (a0 >= a_start && a0 < a_end)
+ +                {
+ +                    mol    = (a0 - a_start)/nat_mol;
+ +                    a0_mol = (a0 - a_start) - mol*nat_mol;
+ +                    j_mol  = ril->index[a0_mol];
+ +                    bFound = FALSE;
+ +                    while (j_mol < ril->index[a0_mol+1] && !bFound)
+ +                    {
+ +                        j = mol*nril_mol + j_mol;
+ +                        ftype_j = ril->il[j_mol];
+ +                        /* Here we need to check if this interaction has
+ +                         * not already been assigned, since we could have
+ +                         * multiply defined interactions.
+ +                         */
+ +                        if (ftype == ftype_j && ia[0] == ril->il[j_mol+1] &&
+ +                            assigned[j] == 0)
+ +                        {
+ +                            /* Check the atoms */
+ +                            bFound = TRUE;
+ +                            for(a=0; a<nral; a++)
+ +                            {
+ +                                if (gatindex[ia[1+a]] !=
+ +                                    a_start + mol*nat_mol + ril->il[j_mol+2+a])
+ +                                {
+ +                                    bFound = FALSE;
+ +                                }
+ +                            }
+ +                            if (bFound)
+ +                            {
+ +                                assigned[j] = 1;
+ +                            }
+ +                        }
+ +                        j_mol += 2 + nral_rt(ftype_j);
+ +                    }
+ +                    if (!bFound)
+ +                    {
+ +                        gmx_incons("Some interactions seem to be assigned multiple times");
+ +                    }
+ +                }
+ +                ia += 1 + nral;
+ +            }
+ +        }
+ +    }
+ +    
+ +    gmx_sumi(nmol*nril_mol,assigned,cr);
+ +    
+ +    nprint = 10;
+ +    i = 0;
+ +    for(mol=0; mol<nmol; mol++)
+ +    {
+ +        j_mol = 0;
+ +        while (j_mol < nril_mol)
+ +        {
+ +            ftype = ril->il[j_mol];
+ +            nral  = NRAL(ftype);
+ +            j = mol*nril_mol + j_mol;
+ +            if (assigned[j] == 0 &&
+ +                !(interaction_function[ftype].flags & IF_VSITE))
+ +            {
+ +                if (DDMASTER(cr->dd))
+ +                {
+ +                    if (i == 0)
+ +                    {
+ +                        print_error_header(fplog,moltypename,nprint);
+ +                    }
+ +                    fprintf(fplog, "%20s atoms",
+ +                            interaction_function[ftype].longname);
+ +                    fprintf(stderr,"%20s atoms",
+ +                            interaction_function[ftype].longname);
+ +                    for(a=0; a<nral; a++) {
+ +                        fprintf(fplog, "%5d",ril->il[j_mol+2+a]+1);
+ +                        fprintf(stderr,"%5d",ril->il[j_mol+2+a]+1);
+ +                    }
+ +                    while (a < 4)
+ +                    {
+ +                        fprintf(fplog, "     ");
+ +                        fprintf(stderr,"     ");
+ +                        a++;
+ +                    }
+ +                    fprintf(fplog, " global");
+ +                    fprintf(stderr," global");
+ +                    for(a=0; a<nral; a++)
+ +                    {
+ +                        fprintf(fplog, "%6d",
+ +                                a_start+mol*nat_mol+ril->il[j_mol+2+a]+1);
+ +                        fprintf(stderr,"%6d",
+ +                                a_start+mol*nat_mol+ril->il[j_mol+2+a]+1);
+ +                    }
+ +                    fprintf(fplog, "\n");
+ +                    fprintf(stderr,"\n");
+ +                }
+ +                i++;
+ +                if (i >= nprint)
+ +                {
+ +                    break;
+ +                }
+ +            }
+ +            j_mol += 2 + nral_rt(ftype);
+ +        }
+ +    }
+ +    
+ +    sfree(assigned);    
+ +}
+ +
+ +static void print_missing_interactions_atoms(FILE *fplog,t_commrec *cr,
+ +                                             gmx_mtop_t *mtop,t_idef *idef)
+ +{
+ +    int mb,a_start,a_end;
+ +    gmx_molblock_t *molb;
+ +    gmx_reverse_top_t *rt;
+ +    
+ +    rt = cr->dd->reverse_top;
+ +    
+ +    /* Print the atoms in the missing interactions per molblock */
+ +    a_end = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        a_start = a_end;
+ +        a_end   = a_start + molb->nmol*molb->natoms_mol;
+ +        
+ +        print_missing_interactions_mb(fplog,cr,rt,
+ +                                      *(mtop->moltype[molb->type].name),
+ +                                      &rt->ril_mt[molb->type],
+ +                                      a_start,a_end,molb->natoms_mol,
+ +                                      molb->nmol,
+ +                                      idef);
+ +    }
+ +}
+ +
+ +void dd_print_missing_interactions(FILE *fplog,t_commrec *cr,int local_count,  gmx_mtop_t *top_global, t_state *state_local)
+ +{
+ +    int  ndiff_tot,cl[F_NRE],n,ndiff,rest_global,rest_local;
+ +    int  ftype,nral;
+ +    char buf[STRLEN];
+ +    gmx_domdec_t *dd;
+ +    gmx_mtop_t     *err_top_global;
+ +    gmx_localtop_t *err_top_local;
+ +    
+ +    dd = cr->dd;
+ +
+ +    err_top_global = dd->reverse_top->err_top_global;
+ +    err_top_local  = dd->reverse_top->err_top_local;
+ +    
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\nNot all bonded interactions have been properly assigned to the domain decomposition cells\n");
+ +        fflush(fplog);
+ +    }
+ +    
+ +    ndiff_tot = local_count - dd->nbonded_global;
+ +    
+ +    for(ftype=0; ftype<F_NRE; ftype++)
+ +    {
+ +        nral = NRAL(ftype);
+ +        cl[ftype] = err_top_local->idef.il[ftype].nr/(1+nral);
+ +    }
+ +    
+ +    gmx_sumi(F_NRE,cl,cr);
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        fprintf(fplog,"\nA list of missing interactions:\n");
+ +        fprintf(stderr,"\nA list of missing interactions:\n");
+ +        rest_global = dd->nbonded_global;
+ +        rest_local  = local_count;
+ +        for(ftype=0; ftype<F_NRE; ftype++)
+ +        {
+ +            /* In the reverse and local top all constraints are merged
+ +             * into F_CONSTR. So in the if statement we skip F_CONSTRNC
+ +             * and add these constraints when doing F_CONSTR.
+ +             */
+ +            if (((interaction_function[ftype].flags & IF_BOND) &&
+ +                 (dd->reverse_top->bBCheck 
+ +                  || !(interaction_function[ftype].flags & IF_LIMZERO)))
- static void global_atomnr_to_moltype_ind(gmx_molblock_ind_t *mbi,int i_gl,
++                || (dd->reverse_top->bConstr && ftype == F_CONSTR)
++                || (dd->reverse_top->bSettle && ftype == F_SETTLE))
+ +            {
+ +                nral = NRAL(ftype);
+ +                n = gmx_mtop_ftype_count(err_top_global,ftype);
+ +                if (ftype == F_CONSTR)
+ +                {
+ +                    n += gmx_mtop_ftype_count(err_top_global,F_CONSTRNC);
+ +                }
+ +                ndiff = cl[ftype] - n;
+ +                if (ndiff != 0)
+ +                {
+ +                    sprintf(buf,"%20s of %6d missing %6d",
+ +                            interaction_function[ftype].longname,n,-ndiff);
+ +                    fprintf(fplog,"%s\n",buf);
+ +                    fprintf(stderr,"%s\n",buf);
+ +                }
+ +                rest_global -= n;
+ +                rest_local  -= cl[ftype];
+ +            }
+ +        }
+ +        
+ +        ndiff = rest_local - rest_global;
+ +        if (ndiff != 0)
+ +        {
+ +            sprintf(buf,"%20s of %6d missing %6d","exclusions",
+ +                    rest_global,-ndiff);
+ +            fprintf(fplog,"%s\n",buf);
+ +            fprintf(stderr,"%s\n",buf);
+ +        }
+ +    }
+ +    
+ +    print_missing_interactions_atoms(fplog,cr,err_top_global,
+ +                                     &err_top_local->idef);
+ +    write_dd_pdb("dd_dump_err",0,"dump",top_global,cr,
+ +                 -1,state_local->x,state_local->box);
+ +    if (DDMASTER(dd))
+ +    {
+ +        if (ndiff_tot > 0)
+ +        {
+ +            gmx_incons("One or more interactions were multiple assigned in the domain decompostion");
+ +        }
+ +        else
+ +        {
+ +            gmx_fatal(FARGS,"%d of the %d bonded interactions could not be calculated because some atoms involved moved further apart than the multi-body cut-off distance (%g nm) or the two-body cut-off distance (%g nm), see option -rdd, for pairs and tabulated bonds also see option -ddcheck",-ndiff_tot,cr->dd->nbonded_global,dd_cutoff_mbody(cr->dd),dd_cutoff_twobody(cr->dd));
+ +        }
+ +    }
+ +}
+ +
-   *mb = 0;
-   while (i_gl >= mbi->a_end) {
-     (*mb)++;
-     mbi++;
++static void global_atomnr_to_moltype_ind(gmx_reverse_top_t *rt,int i_gl,
+ +                                       int *mb,int *mt,int *mol,int *i_mol)
+ +{
+ +  int molb;
+ +
-                                   gmx_bool bConstr,gmx_bool bBCheck,
++
++  gmx_molblock_ind_t *mbi = rt->mbi;
++  int start = 0;
++  int end =  rt->nmolblock; /* exclusive */
++  int mid;
++
++  /* binary search for molblock_ind */
++  while (TRUE) {
++      mid = (start+end)/2;
++      if (i_gl >= mbi[mid].a_end)
++      {
++          start = mid+1;
++      }
++      else if (i_gl < mbi[mid].a_start)
++      {
++          end = mid;
++      }
++      else
++      {
++          break;
++      }
+ +  }
+ +
++  *mb = mid;
++  mbi += mid;
++
+ +  *mt    = mbi->type;
+ +  *mol   = (i_gl - mbi->a_start) / mbi->natoms_mol;
+ +  *i_mol = (i_gl - mbi->a_start) - (*mol)*mbi->natoms_mol;
+ +}
+ +
+ +static int count_excls(t_block *cgs,t_blocka *excls,int *n_intercg_excl)
+ +{
+ +    int n,n_inter,cg,at0,at1,at,excl,atj;
+ +    
+ +    n = 0;
+ +    *n_intercg_excl = 0;
+ +    for(cg=0; cg<cgs->nr; cg++)
+ +    {
+ +        at0 = cgs->index[cg];
+ +        at1 = cgs->index[cg+1];
+ +        for(at=at0; at<at1; at++)
+ +        {
+ +            for(excl=excls->index[at]; excl<excls->index[at+1]; excl++)
+ +            {
+ +                atj = excls->a[excl];
+ +                if (atj > at)
+ +                {
+ +                    n++;
+ +                    if (atj < at0 || atj >= at1)
+ +                    {
+ +                        (*n_intercg_excl)++;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }  
+ +    
+ +    return n;
+ +}
+ +
+ +static int low_make_reverse_ilist(t_ilist *il_mt,t_atom *atom,
+ +                                  int **vsite_pbc,
+ +                                  int *count,
-             ftype == F_SETTLE ||
-             (bConstr && (ftype == F_CONSTR || ftype == F_CONSTRNC))) {
++                                  gmx_bool bConstr,gmx_bool bSettle,
++                                  gmx_bool bBCheck,
+ +                                  int *r_index,int *r_il,
+ +                                  gmx_bool bLinkToAllAtoms,
+ +                                  gmx_bool bAssign)
+ +{
+ +    int  ftype,nral,i,j,nlink,link;
+ +    t_ilist *il;
+ +    t_iatom *ia;
+ +    atom_id a;
+ +    int  nint;
+ +    gmx_bool bVSite;
+ +    
+ +    nint = 0;
+ +    for(ftype=0; ftype<F_NRE; ftype++)
+ +    {
+ +        if ((interaction_function[ftype].flags & (IF_BOND | IF_VSITE)) ||
-                               gmx_bool bConstr,gmx_bool bBCheck,
++            (bConstr && (ftype == F_CONSTR || ftype == F_CONSTRNC)) ||
++            (bSettle && ftype == F_SETTLE))
++        {
+ +            bVSite = (interaction_function[ftype].flags & IF_VSITE);
+ +            nral = NRAL(ftype);
+ +            il = &il_mt[ftype];
+ +            ia  = il->iatoms;
+ +            for(i=0; i<il->nr; i+=1+nral)
+ +            {
+ +                ia = il->iatoms + i;
+ +                if (bLinkToAllAtoms)
+ +                {
+ +                    if (bVSite)
+ +                    {
+ +                        /* We don't need the virtual sites for the cg-links */
+ +                        nlink = 0;
+ +                    }
+ +                    else
+ +                    {
+ +                        nlink = nral;
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    /* Couple to the first atom in the interaction */
+ +                    nlink = 1;
+ +                }
+ +                for(link=0; link<nlink; link++)
+ +                {
+ +                    a = ia[1+link];
+ +                    if (bAssign)
+ +                    {
+ +                        r_il[r_index[a]+count[a]] =
+ +                            (ftype == F_CONSTRNC ? F_CONSTR : ftype);
+ +                        r_il[r_index[a]+count[a]+1] = ia[0];
+ +                        for(j=1; j<1+nral; j++)
+ +                        {
+ +                            /* Store the molecular atom number */
+ +                            r_il[r_index[a]+count[a]+1+j] = ia[j];
+ +                        }
+ +                    }
+ +                    if (interaction_function[ftype].flags & IF_VSITE)
+ +                    {
+ +                        if (bAssign)
+ +                        {
+ +                            /* Add an entry to iatoms for storing 
+ +                             * which of the constructing atoms are
+ +                             * vsites again.
+ +                             */
+ +                            r_il[r_index[a]+count[a]+2+nral] = 0;
+ +                            for(j=2; j<1+nral; j++)
+ +                            {
+ +                                if (atom[ia[j]].ptype == eptVSite)
+ +                                {
+ +                                    r_il[r_index[a]+count[a]+2+nral] |= (2<<j);
+ +                                }
+ +                            }
+ +                            /* Store vsite pbc atom in a second extra entry */
+ +                            r_il[r_index[a]+count[a]+2+nral+1] =
+ +                                (vsite_pbc ? vsite_pbc[ftype-F_VSITE2][i/(1+nral)] : -2);
+ +                        }
+ +                    }
+ +                    else
+ +                    {
+ +                        /* We do not count vsites since they are always
+ +                         * uniquely assigned and can be assigned
+ +                         * to multiple nodes with recursive vsites.
+ +                         */
+ +                        if (bBCheck ||
+ +                            !(interaction_function[ftype].flags & IF_LIMZERO))
+ +                        {
+ +                            nint++;
+ +                        }
+ +                    }
+ +                    count[a] += 2 + nral_rt(ftype);
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    return nint;
+ +}
+ +
+ +static int make_reverse_ilist(gmx_moltype_t *molt,
+ +                              int **vsite_pbc,
-                            bConstr,bBCheck,NULL,NULL,
++                              gmx_bool bConstr,gmx_bool bSettle,
++                              gmx_bool bBCheck,
+ +                              gmx_bool bLinkToAllAtoms,
+ +                              gmx_reverse_ilist_t *ril_mt)
+ +{
+ +    int nat_mt,*count,i,nint_mt;
+ +    
+ +    /* Count the interactions */
+ +    nat_mt = molt->atoms.nr;
+ +    snew(count,nat_mt);
+ +    low_make_reverse_ilist(molt->ilist,molt->atoms.atom,vsite_pbc,
+ +                           count,
-                                bConstr,bBCheck,
++                           bConstr,bSettle,bBCheck,NULL,NULL,
+ +                           bLinkToAllAtoms,FALSE);
+ +    
+ +    snew(ril_mt->index,nat_mt+1);
+ +    ril_mt->index[0] = 0;
+ +    for(i=0; i<nat_mt; i++)
+ +    {
+ +        ril_mt->index[i+1] = ril_mt->index[i] + count[i];
+ +        count[i] = 0;
+ +    }
+ +    snew(ril_mt->il,ril_mt->index[nat_mt]);
+ +    
+ +    /* Store the interactions */
+ +    nint_mt =
+ +        low_make_reverse_ilist(molt->ilist,molt->atoms.atom,vsite_pbc,
+ +                               count,
-                                            gmx_bool bConstr,
++                               bConstr,bSettle,bBCheck,
+ +                               ril_mt->index,ril_mt->il,
+ +                               bLinkToAllAtoms,TRUE);
+ +    
+ +    sfree(count);
+ +    
+ +    return nint_mt;
+ +}
+ +
+ +static void destroy_reverse_ilist(gmx_reverse_ilist_t *ril)
+ +{
+ +    sfree(ril->index);
+ +    sfree(ril->il);
+ +}
+ +
+ +static gmx_reverse_top_t *make_reverse_top(gmx_mtop_t *mtop,gmx_bool bFE,
+ +                                           int ***vsite_pbc_molt,
-                                rt->bConstr,rt->bBCheck,FALSE,
++                                           gmx_bool bConstr,gmx_bool bSettle,
+ +                                           gmx_bool bBCheck,int *nint)
+ +{
+ +    int mt,i,mb;
+ +    gmx_reverse_top_t *rt;
+ +    int *nint_mt;
+ +    gmx_moltype_t *molt;
++    int thread;
+ +    
+ +    snew(rt,1);
+ +    
+ +    /* Should we include constraints (for SHAKE) in rt? */
+ +    rt->bConstr = bConstr;
++    rt->bSettle = bSettle;
+ +    rt->bBCheck = bBCheck;
+ +    
+ +    rt->bMultiCGmols = FALSE;
+ +    snew(nint_mt,mtop->nmoltype);
+ +    snew(rt->ril_mt,mtop->nmoltype);
+ +    rt->ril_mt_tot_size = 0;
+ +    for(mt=0; mt<mtop->nmoltype; mt++)
+ +    {
+ +        molt = &mtop->moltype[mt];
+ +        if (molt->cgs.nr > 1)
+ +        {
+ +            rt->bMultiCGmols = TRUE;
+ +        }
+ +        
+ +        /* Make the atom to interaction list for this molecule type */
+ +        nint_mt[mt] =
+ +            make_reverse_ilist(molt,vsite_pbc_molt ? vsite_pbc_molt[mt] : NULL,
-     int mb,natoms,n_recursive_vsite,nexcl,nexcl_icg,a;
++                               rt->bConstr,rt->bSettle,rt->bBCheck,FALSE,
+ +                               &rt->ril_mt[mt]);
+ +        
+ +        rt->ril_mt_tot_size += rt->ril_mt[mt].index[molt->atoms.nr];
+ +    }
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"The total size of the atom to interaction index is %d integers\n",rt->ril_mt_tot_size);
+ +    }
+ +    
+ +    *nint = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        *nint += mtop->molblock[mb].nmol*nint_mt[mtop->molblock[mb].type];
+ +    }
+ +    sfree(nint_mt);
+ +    
+ +    if (bFE && gmx_mtop_bondeds_free_energy(mtop))
+ +    {
+ +        rt->ilsort = ilsortFE_UNSORTED;
+ +    }
+ +    else {
+ +        rt->ilsort = ilsortNO_FE;
+ +    }
+ +    
+ +    /* Make a molblock index for fast searching */
+ +    snew(rt->mbi,mtop->nmolblock);
++    rt->nmolblock = mtop->nmolblock;
+ +    i = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        rt->mbi[mb].a_start    = i;
+ +        i += mtop->molblock[mb].nmol*mtop->molblock[mb].natoms_mol;
+ +        rt->mbi[mb].a_end      = i;
+ +        rt->mbi[mb].natoms_mol = mtop->molblock[mb].natoms_mol;
+ +        rt->mbi[mb].type       = mtop->molblock[mb].type;
+ +    }
++
++    rt->nthread = gmx_omp_nthreads_get(emntDomdec);
++    snew(rt->idef_thread,rt->nthread);
++    if (vsite_pbc_molt != NULL)
++    {
++        snew(rt->vsite_pbc,rt->nthread);
++        snew(rt->vsite_pbc_nalloc,rt->nthread);
++        for(thread=0; thread<rt->nthread; thread++)
++        {
++            snew(rt->vsite_pbc[thread],F_VSITEN-F_VSITE2+1);
++            snew(rt->vsite_pbc_nalloc[thread],F_VSITEN-F_VSITE2+1);
++        }
++    }
++    snew(rt->nbonded_thread,rt->nthread);
++    snew(rt->excl_thread,rt->nthread);
++    snew(rt->excl_count_thread,rt->nthread);
+ +    
+ +    return rt;
+ +}
+ +
+ +void dd_make_reverse_top(FILE *fplog,
+ +                         gmx_domdec_t *dd,gmx_mtop_t *mtop,
+ +                         gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                         t_inputrec *ir,gmx_bool bBCheck)
+ +{
-                                        !dd->bInterCGcons,
++    int mb,n_recursive_vsite,nexcl,nexcl_icg,a;
+ +    gmx_molblock_t *molb;
+ +    gmx_moltype_t *molt;
+ +    
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\nLinking all bonded interactions to atoms\n");
+ +    }
++
++    /* If normal and/or settle constraints act only within charge groups,
++     * we can store them in the reverse top and simply assign them to domains.
++     * Otherwise we need to assign them to multiple domains and set up
++     * the parallel version constraint algoirthm(s).
++     */
+ +    
+ +    dd->reverse_top = make_reverse_top(mtop,ir->efep!=efepNO,
+ +                                       vsite ? vsite->vsite_pbc_molt : NULL,
-     
-     natoms = mtop->natoms;
++                                       !dd->bInterCGcons,!dd->bInterCGsettles,
+ +                                       bBCheck,&dd->nbonded_global);
+ +    
+ +    if (dd->reverse_top->ril_mt_tot_size >= 200000 &&
+ +        mtop->mols.nr > 1 &&
+ +        mtop->nmolblock == 1 && mtop->molblock[0].nmol == 1)
+ +    {
+ +        /* mtop comes from a pre Gromacs 4 tpr file */
+ +        const char *note="NOTE: The tpr file used for this simulation is in an old format, for less memory usage and possibly more performance create a new tpr file with an up to date version of grompp";
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"\n%s\n\n",note);
+ +        }
+ +        if (DDMASTER(dd))
+ +        {
+ +            fprintf(stderr,"\n%s\n\n",note);
+ +        }
+ +    }
+ +    
+ +    dd->reverse_top->bExclRequired = IR_EXCL_FORCES(*ir);
+ +    
+ +    nexcl = 0;
+ +    dd->n_intercg_excl = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        molt = &mtop->moltype[molb->type];
+ +        nexcl += molb->nmol*count_excls(&molt->cgs,&molt->excls,&nexcl_icg);
+ +        dd->n_intercg_excl += molb->nmol*nexcl_icg;
+ +    }
+ +    if (dd->reverse_top->bExclRequired)
+ +    {
+ +        dd->nbonded_global += nexcl;
+ +        if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0 && fplog)
+ +        {
+ +            fprintf(fplog,"There are %d inter charge-group exclusions,\n"
+ +                    "will use an extra communication step for exclusion forces for %s\n",
+ +                    dd->n_intercg_excl,eel_names[ir->coulombtype]);
+ +        }
+ +    }
-         init_domdec_vsites(dd,natoms);
+ +
+ +    if (vsite && vsite->n_intercg_vsite > 0)
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"There are %d inter charge-group virtual sites,\n"
+ +                    "will an extra communication step for selected coordinates and forces\n",
+ +            vsite->n_intercg_vsite);
+ +        }
-     if (dd->bInterCGcons)
++        init_domdec_vsites(dd,vsite->n_intercg_vsite);
+ +    }
+ +    
-         init_domdec_constraints(dd,natoms,mtop,constr);
++    if (dd->bInterCGcons || dd->bInterCGsettles)
+ +    {
-         il->nalloc += over_alloc_large(il->nr+1+nral);
++        init_domdec_constraints(dd,mtop,constr);
+ +    }
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\n");
+ +    }
+ +}
+ +
+ +static inline void add_ifunc(int nral,t_iatom *tiatoms,t_ilist *il)
+ +{
+ +    t_iatom *liatoms;
+ +    int     k;
+ +    
+ +    if (il->nr+1+nral > il->nalloc)
+ +    {
- static void add_posres(int mol,int a_mol,gmx_molblock_t *molb,
-                        t_iatom *iatoms,t_idef *idef)
++        il->nalloc = over_alloc_large(il->nr+1+nral);
+ +        srenew(il->iatoms,il->nalloc);
+ +    }
+ +    liatoms = il->iatoms + il->nr;
+ +    for(k=0; k<=nral; k++)
+ +    {
+ +        liatoms[k] = tiatoms[k];
+ +    }
+ +    il->nr += 1 + nral;
+ +}
+ +
-     *ip = idef->iparams[iatoms[0]];
++static void add_posres(int mol,int a_mol,const gmx_molblock_t *molb,
++                       t_iatom *iatoms,const t_iparams *ip_in,
++                       t_idef *idef)
+ +{
+ +    int n,a_molb;
+ +    t_iparams *ip;
+ +    
+ +    /* This position restraint has not been added yet,
+ +     * so it's index is the current number of position restraints.
+ +     */
+ +    n = idef->il[F_POSRES].nr/2;
+ +    if (n+1 > idef->iparams_posres_nalloc)
+ +    {
+ +        idef->iparams_posres_nalloc = over_alloc_dd(n+1);
+ +        srenew(idef->iparams_posres,idef->iparams_posres_nalloc);
+ +    }
+ +    ip = &idef->iparams_posres[n];
+ +    /* Copy the force constants */
-     /* Get the position restriant coordinats from the molblock */
++    *ip = ip_in[iatoms[0]];
+ +    
- static void add_fbposres(int mol,int a_mol,gmx_molblock_t *molb,
-                        t_iatom *iatoms,t_idef *idef)
++    /* Get the position restraint coordinates from the molblock */
+ +    a_molb = mol*molb->natoms_mol + a_mol;
+ +    if (a_molb >= molb->nposres_xA)
+ +    {
+ +        gmx_incons("Not enough position restraint coordinates");
+ +    }
+ +    ip->posres.pos0A[XX] = molb->posres_xA[a_molb][XX];
+ +    ip->posres.pos0A[YY] = molb->posres_xA[a_molb][YY];
+ +    ip->posres.pos0A[ZZ] = molb->posres_xA[a_molb][ZZ];
+ +    if (molb->nposres_xB > 0)
+ +    {
+ +        ip->posres.pos0B[XX] = molb->posres_xB[a_molb][XX];
+ +        ip->posres.pos0B[YY] = molb->posres_xB[a_molb][YY];
+ +        ip->posres.pos0B[ZZ] = molb->posres_xB[a_molb][ZZ];
+ +    }
+ +    else
+ +    {
+ +        ip->posres.pos0B[XX] = ip->posres.pos0A[XX];
+ +        ip->posres.pos0B[YY] = ip->posres.pos0A[YY];
+ +        ip->posres.pos0B[ZZ] = ip->posres.pos0A[ZZ];
+ +    }
+ +    /* Set the parameter index for idef->iparams_posre */
+ +    iatoms[0] = n;
+ +}
+ +
-     *ip = idef->iparams[iatoms[0]];
++static void add_fbposres(int mol,int a_mol,const gmx_molblock_t *molb,
++                         t_iatom *iatoms,const t_iparams *ip_in,
++                         t_idef *idef)
+ +{
+ +    int n,a_molb;
+ +    t_iparams *ip;
+ +
+ +    /* This flat-bottom position restraint has not been added yet,
+ +     * so it's index is the current number of position restraints.
+ +     */
+ +    n = idef->il[F_FBPOSRES].nr/2;
+ +    if (n+1 > idef->iparams_fbposres_nalloc)
+ +    {
+ +        idef->iparams_fbposres_nalloc = over_alloc_dd(n+1);
+ +        srenew(idef->iparams_fbposres,idef->iparams_fbposres_nalloc);
+ +    }
+ +    ip = &idef->iparams_fbposres[n];
+ +    /* Copy the force constants */
- static int make_local_bondeds(gmx_domdec_t *dd,gmx_domdec_zones_t *zones,
-                               gmx_molblock_t *molb,
-                               gmx_bool bRCheckMB,ivec rcheck,gmx_bool bRCheck2B,
-                               real rc,
-                               int *la2lc,t_pbc *pbc_null,rvec *cg_cm,
-                               t_idef *idef,gmx_vsite_t *vsite)
++    *ip = ip_in[iatoms[0]];
+ +
+ +    /* Get the position restriant coordinats from the molblock */
+ +    a_molb = mol*molb->natoms_mol + a_mol;
+ +    if (a_molb >= molb->nposres_xA)
+ +    {
+ +        gmx_incons("Not enough position restraint coordinates");
+ +    }
+ +    /* Take reference positions from A position of normal posres */
+ +    ip->fbposres.pos0[XX] = molb->posres_xA[a_molb][XX];
+ +    ip->fbposres.pos0[YY] = molb->posres_xA[a_molb][YY];
+ +    ip->fbposres.pos0[ZZ] = molb->posres_xA[a_molb][ZZ];
+ +
+ +    /* Note: no B-type for flat-bottom posres */
+ +
+ +    /* Set the parameter index for idef->iparams_posre */
+ +    iatoms[0] = n;
+ +}
+ +
+ +static void add_vsite(gmx_ga2la_t ga2la,int *index,int *rtil,
+ +                      int ftype,int nral,
+ +                      gmx_bool bHomeA,int a,int a_gl,int a_mol,
+ +                      t_iatom *iatoms,
+ +                      t_idef *idef,int **vsite_pbc,int *vsite_pbc_nalloc)
+ +{
+ +    int  k,ak_gl,vsi,pbc_a_mol;
+ +    t_iatom tiatoms[1+MAXATOMLIST],*iatoms_r;
+ +    int  j,ftype_r,nral_r;
+ +    
+ +    /* Copy the type */
+ +    tiatoms[0] = iatoms[0];
+ +
+ +    if (bHomeA)
+ +    {
+ +        /* We know the local index of the first atom */
+ +        tiatoms[1] = a;
+ +    }
+ +    else
+ +    {
+ +        /* Convert later in make_local_vsites */
+ +        tiatoms[1] = -a_gl - 1;
+ +    }
+ +    
+ +    for(k=2; k<1+nral; k++)
+ +    {
+ +        ak_gl = a_gl + iatoms[k] - a_mol;
+ +        if (!ga2la_get_home(ga2la,ak_gl,&tiatoms[k]))
+ +        {
+ +            /* Copy the global index, convert later in make_local_vsites */
+ +            tiatoms[k] = -(ak_gl + 1);
+ +        }
+ +    }
+ +    
+ +    /* Add this interaction to the local topology */
+ +    add_ifunc(nral,tiatoms,&idef->il[ftype]);
+ +    if (vsite_pbc)
+ +    {
+ +        vsi = idef->il[ftype].nr/(1+nral) - 1;
+ +        if (vsi >= vsite_pbc_nalloc[ftype-F_VSITE2])
+ +        {
+ +            vsite_pbc_nalloc[ftype-F_VSITE2] = over_alloc_large(vsi+1);
+ +            srenew(vsite_pbc[ftype-F_VSITE2],vsite_pbc_nalloc[ftype-F_VSITE2]);
+ +        }
+ +        if (bHomeA)
+ +        {
+ +            pbc_a_mol = iatoms[1+nral+1];
+ +            if (pbc_a_mol < 0)
+ +            {
+ +                /* The pbc flag is one of the following two options:
+ +                 * -2: vsite and all constructing atoms are within the same cg, no pbc
+ +                 * -1: vsite and its first constructing atom are in the same cg, do pbc
+ +                 */
+ +                vsite_pbc[ftype-F_VSITE2][vsi] = pbc_a_mol;
+ +            }
+ +            else
+ +            {
+ +                /* Set the pbc atom for this vsite so we can make its pbc 
+ +                 * identical to the rest of the atoms in its charge group.
+ +                 * Since the order of the atoms does not change within a charge
+ +                 * group, we do not need the global to local atom index.
+ +                 */
+ +                vsite_pbc[ftype-F_VSITE2][vsi] = a + pbc_a_mol - iatoms[1];
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* This vsite is non-home (required for recursion),
+ +             * and therefore there is no charge group to match pbc with.
+ +             * But we always turn on full_pbc to assure that higher order
+ +             * recursion works correctly.
+ +             */
+ +            vsite_pbc[ftype-F_VSITE2][vsi] = -1;
+ +        }
+ +    }
+ +    
+ +    if (iatoms[1+nral])
+ +    {
+ +        /* Check for recursion */
+ +        for(k=2; k<1+nral; k++)
+ +        {
+ +            if ((iatoms[1+nral] & (2<<k)) && (tiatoms[k] < 0))
+ +            {
+ +                /* This construction atoms is a vsite and not a home atom */
+ +                if (gmx_debug_at)
+ +                {
+ +                    fprintf(debug,"Constructing atom %d of vsite atom %d is a vsite and non-home\n",iatoms[k]+1,a_mol+1);
+ +                }
+ +                /* Find the vsite construction */
+ +                
+ +                /* Check all interactions assigned to this atom */
+ +                j = index[iatoms[k]];
+ +                while (j < index[iatoms[k]+1])
+ +                {
+ +                    ftype_r = rtil[j++];
+ +                    nral_r = NRAL(ftype_r);
+ +                    if (interaction_function[ftype_r].flags & IF_VSITE)
+ +                    {
+ +                        /* Add this vsite (recursion) */
+ +                        add_vsite(ga2la,index,rtil,ftype_r,nral_r,
+ +                                  FALSE,-1,a_gl+iatoms[k]-iatoms[1],iatoms[k],
+ +                                  rtil+j,idef,vsite_pbc,vsite_pbc_nalloc);
+ +                        j += 1 + nral_r + 2;
+ +                    }
+ +                    else
+ +                    {
+ +                        j += 1 + nral_r;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void make_la2lc(gmx_domdec_t *dd)
+ +{
+ +    int *cgindex,*la2lc,cg,a;
+ +    
+ +    cgindex = dd->cgindex;
+ +    
+ +    if (dd->nat_tot > dd->la2lc_nalloc)
+ +    {
+ +        dd->la2lc_nalloc = over_alloc_dd(dd->nat_tot);
+ +        snew(dd->la2lc,dd->la2lc_nalloc);
+ +    }
+ +    la2lc = dd->la2lc;
+ +    
+ +    /* Make the local atom to local cg index */
+ +    for(cg=0; cg<dd->ncg_tot; cg++)
+ +    {
+ +        for(a=cgindex[cg]; a<cgindex[cg+1]; a++)
+ +        {
+ +            la2lc[a] = cg;
+ +        }
+ +    }
+ +}
+ +
+ +static real dd_dist2(t_pbc *pbc_null,rvec *cg_cm,const int *la2lc,int i,int j)
+ +{
+ +    rvec dx;
+ +    
+ +    if (pbc_null)
+ +    {
+ +        pbc_dx_aiuc(pbc_null,cg_cm[la2lc[i]],cg_cm[la2lc[j]],dx);
+ +    }
+ +    else
+ +    {
+ +        rvec_sub(cg_cm[la2lc[i]],cg_cm[la2lc[j]],dx);
+ +    }
+ +    
+ +    return norm2(dx);
+ +}
+ +
-     int nzone,nizone,ic,la0,la1,i,i_gl,mb,mt,mol,i_mol,j,ftype,nral,d,k;
-     int *index,*rtil,**vsite_pbc,*vsite_pbc_nalloc;
++/* Append the nsrc t_blocka block structures in src to *dest */
++static void combine_blocka(t_blocka *dest,const t_blocka *src,int nsrc)
++{
++    int ni,na,s,i;
++
++    ni = src[nsrc-1].nr;
++    na = 0;
++    for(s=0; s<nsrc; s++)
++    {
++        na += src[s].nra;
++    }
++    if (ni + 1 > dest->nalloc_index)
++    {
++        dest->nalloc_index = over_alloc_large(ni+1);
++        srenew(dest->index,dest->nalloc_index);
++    }
++    if (dest->nra + na > dest->nalloc_a)
++    {
++        dest->nalloc_a = over_alloc_large(dest->nra+na);
++        srenew(dest->a,dest->nalloc_a);
++    }
++    for(s=0; s<nsrc; s++)
++    {
++        for(i=dest->nr+1; i<src[s].nr+1; i++)
++        {
++            dest->index[i] = dest->nra + src[s].index[i];
++        }
++        for(i=0; i<src[s].nra; i++)
++        {
++            dest->a[dest->nra+i] = src[s].a[i];
++        }
++        dest->nr   = src[s].nr;
++        dest->nra += src[s].nra;
++    }
++}
++
++/* Append the nsrc t_idef structures in src to *dest,
++ * virtual sites need special attention, as pbc info differs per vsite.
++ */
++static void combine_idef(t_idef *dest,const t_idef *src,int nsrc,
++                         gmx_vsite_t *vsite,int ***vsite_pbc_t)
+ +{
-     real rc2;
++    int ftype,n,s,i;
++    t_ilist *ild;
++    const t_ilist *ils;
++    gmx_bool vpbc;
++    int nral1=0,ftv=0;
++
++    for(ftype=0; ftype<F_NRE; ftype++)
++    {
++        n = 0;
++        for(s=0; s<nsrc; s++)
++        {
++            n += src[s].il[ftype].nr;
++        }
++        if (n > 0)
++        {
++            ild = &dest->il[ftype];
++
++            if (ild->nr + n > ild->nalloc)
++            {
++                ild->nalloc = over_alloc_large(ild->nr+n);
++                srenew(ild->iatoms,ild->nalloc);
++            }
++
++            vpbc = ((interaction_function[ftype].flags & IF_VSITE) &&
++                    vsite->vsite_pbc_loc != NULL);
++            if (vpbc)
++            {
++                nral1 = 1 + NRAL(ftype);
++                ftv = ftype - F_VSITE2;
++                if ((ild->nr + n)/nral1 > vsite->vsite_pbc_loc_nalloc[ftv])
++                {
++                    vsite->vsite_pbc_loc_nalloc[ftv] =
++                        over_alloc_large((ild->nr + n)/nral1);
++                    srenew(vsite->vsite_pbc_loc[ftv],
++                           vsite->vsite_pbc_loc_nalloc[ftv]);
++                }
++            }
++
++            for(s=0; s<nsrc; s++)
++            {
++                ils = &src[s].il[ftype];
++                for(i=0; i<ils->nr; i++)
++                {
++                    ild->iatoms[ild->nr+i] = ils->iatoms[i];
++                }
++                if (vpbc)
++                {
++                    for(i=0; i<ils->nr; i+=nral1)
++                    {
++                        vsite->vsite_pbc_loc[ftv][(ild->nr+i)/nral1] =
++                            vsite_pbc_t[s][ftv][i/nral1];
++                    }
++                }
++                
++                ild->nr += ils->nr;
++            }
++        }
++    }
++
++    /* Position restraints need an additional treatment */
++    if (dest->il[F_POSRES].nr > 0)
++    {
++        n = dest->il[F_POSRES].nr/2;
++        if (n > dest->iparams_posres_nalloc)
++        {
++            dest->iparams_posres_nalloc = over_alloc_large(n);
++            srenew(dest->iparams_posres,dest->iparams_posres_nalloc);
++        }
++        /* Set n to the number of original position restraints in dest */
++        for(s=0; s<nsrc; s++)
++        {
++            n -= src[s].il[F_POSRES].nr/2;
++        }
++        for(s=0; s<nsrc; s++)
++        {
++            for(i=0; i<src[s].il[F_POSRES].nr/2; i++)
++            {
++                /* Correct the index into iparams_posres */
++                dest->il[F_POSRES].iatoms[n*2] = n;
++                /* Copy the position restraint force parameters */
++                dest->iparams_posres[n] = src[s].iparams_posres[i];
++                n++;
++            }
++        }
++    }
++}
++
++/* This function looks up and assigns bonded interactions for zone iz.
++ * With thread parallelizing each thread acts on a different atom range:
++ * at_start to at_end.
++ */
++static int make_bondeds_zone(gmx_domdec_t *dd,
++                             const gmx_domdec_zones_t *zones,
++                             const gmx_molblock_t *molb,
++                             gmx_bool bRCheckMB,ivec rcheck,gmx_bool bRCheck2B,
++                             real rc2,
++                             int *la2lc,t_pbc *pbc_null,rvec *cg_cm,
++                             const t_iparams *ip_in,
++                             t_idef *idef,gmx_vsite_t *vsite,
++                             int **vsite_pbc,
++                             int *vsite_pbc_nalloc,
++                             int iz,int nzone,
++                             int at_start,int at_end)
++{
++    int i,i_gl,mb,mt,mol,i_mol,j,ftype,nral,d,k;
++    int *index,*rtil;
+ +    t_iatom *iatoms,tiatoms[1+MAXATOMLIST];
+ +    gmx_bool bBCheck,bUse,bLocal;
-     int  kc;
-     gmx_domdec_ns_ranges_t *izone;
+ +    ivec k_zero,k_plus;
+ +    gmx_ga2la_t ga2la;
+ +    int  a_loc;
-     gmx_molblock_ind_t *mbi;
++    int  kz;
++    int  nizone;
++    const gmx_domdec_ns_ranges_t *izone;
+ +    gmx_reverse_top_t *rt;
-     
-     nzone  = zones->n;
+ +    int nbonded_local;
-     rc2 = rc*rc;
-     
-     if (vsite && vsite->n_intercg_vsite > 0)
-     {
-         vsite_pbc        = vsite->vsite_pbc_loc;
-         vsite_pbc_nalloc = vsite->vsite_pbc_loc_nalloc;
-     }
-     else
-     {
-         vsite_pbc        = NULL;
-         vsite_pbc_nalloc = NULL;
-     }
-     
++
+ +    nizone = zones->nizone;
+ +    izone  = zones->izone;
+ +    
-     /* Clear the counts */
-     for(ftype=0; ftype<F_NRE; ftype++)
-     {
-         idef->il[ftype].nr = 0;
-     }
+ +    rt = dd->reverse_top;
+ +    
+ +    bBCheck = rt->bBCheck;
+ +    
-     mbi = rt->mbi;
- 
+ +    nbonded_local = 0;
+ +    
-     
-     for(ic=0; ic<nzone; ic++)
+ +    ga2la = dd->ga2la;
-         la0 = dd->cgindex[zones->cg_range[ic]];
-         la1 = dd->cgindex[zones->cg_range[ic+1]];
-         for(i=la0; i<la1; i++)
++
++    for(i=at_start; i<at_end; i++)
+ +    {
-             /* Get the global atom number */
-             i_gl = dd->gatindex[i];
-             global_atomnr_to_moltype_ind(mbi,i_gl,&mb,&mt,&mol,&i_mol);
-             /* Check all interactions assigned to this atom */
-             index = rt->ril_mt[mt].index;
-             rtil  = rt->ril_mt[mt].il;
-             j = index[i_mol];
-             while (j < index[i_mol+1])
++        /* Get the global atom number */
++        i_gl = dd->gatindex[i];
++        global_atomnr_to_moltype_ind(rt,i_gl,&mb,&mt,&mol,&i_mol);
++        /* Check all interactions assigned to this atom */
++        index = rt->ril_mt[mt].index;
++        rtil  = rt->ril_mt[mt].il;
++        j = index[i_mol];
++        while (j < index[i_mol+1])
+ +        {
-                 ftype  = rtil[j++];
-                 iatoms = rtil + j;
-                 nral = NRAL(ftype);
-                 if (interaction_function[ftype].flags & IF_VSITE)
++            ftype  = rtil[j++];
++            iatoms = rtil + j;
++            nral = NRAL(ftype);
++            if (ftype == F_SETTLE)
+ +            {
-                     /* The vsite construction goes where the vsite itself is */
-                     if (ic == 0)
-                     {
-                         add_vsite(dd->ga2la,index,rtil,ftype,nral,
-                                   TRUE,i,i_gl,i_mol,
-                                   iatoms,idef,vsite_pbc,vsite_pbc_nalloc);
-                     }
-                     j += 1 + nral + 2;
++                /* Settles are only in the reverse top when they
++                 * operate within a charge group. So we can assign
++                 * them without checks. We do this only for performance
++                 * reasons; it could be handled by the code below.
++                 */
++                if (iz == 0)
+ +                {
-                 else
++                    /* Home zone: add this settle to the local topology */
++                    tiatoms[0] = iatoms[0];
++                    tiatoms[1] = i;
++                    tiatoms[2] = i + iatoms[2] - iatoms[1];
++                    tiatoms[3] = i + iatoms[3] - iatoms[1];
++                    add_ifunc(nral,tiatoms,&idef->il[ftype]);
++                    nbonded_local++;
+ +                }
-                     /* Copy the type */
-                     tiatoms[0] = iatoms[0];
-                     
-                     if (nral == 1)
++                j += 1 + nral;
++            }
++            else if (interaction_function[ftype].flags & IF_VSITE)
++            {
++                /* The vsite construction goes where the vsite itself is */
++                if (iz == 0)
+ +                {
-                         /* Assign single-body interactions to the home zone */
-                         if (ic == 0)
-                         {
-                             bUse = TRUE;
++                    add_vsite(dd->ga2la,index,rtil,ftype,nral,
++                              TRUE,i,i_gl,i_mol,
++                              iatoms,idef,vsite_pbc,vsite_pbc_nalloc);
++                }
++                j += 1 + nral + 2;
++            }
++            else
++            {
++                /* Copy the type */
++                tiatoms[0] = iatoms[0];
++
++                if (nral == 1)
++                {
++                    /* Assign single-body interactions to the home zone */
++                    if (iz == 0)
+ +                    {
-                                 add_posres(mol,i_mol,&molb[mb],tiatoms,idef);
++                        bUse = TRUE;
+ +                            tiatoms[1] = i;
+ +                            if (ftype == F_POSRES)
+ +                            {
-                                 add_fbposres(mol,i_mol,&molb[mb],tiatoms,idef);
++                                add_posres(mol,i_mol,&molb[mb],tiatoms,ip_in,
++                                           idef);
+ +                            }
+ +                            else if (ftype == F_FBPOSRES)
+ +                            {
-                         else
++                                add_fbposres(mol,i_mol,&molb[mb],tiatoms,ip_in,
++                                             idef);
+ +                            }
++                    }
++                    else
++                    {
++                        bUse = FALSE;
++                    }
++                }
++                else if (nral == 2)
++                {
++                    /* This is a two-body interaction, we can assign
++                     * analogous to the non-bonded assignments.
++                     */
++                    if (!ga2la_get(ga2la,i_gl+iatoms[2]-i_mol,&a_loc,&kz))
++                    {
++                        bUse = FALSE;
++                    }
++                    else
++                    {
++                        if (kz >= nzone)
++                        {
++                            kz -= nzone;
+ +                        }
-                             bUse = FALSE;
++                        /* Check zone interaction assignments */
++                        bUse = ((iz < nizone && iz <= kz &&
++                                 izone[iz].j0 <= kz && kz < izone[iz].j1) ||
++                                (kz < nizone && iz >  kz &&
++                                 izone[kz].j0 <= iz && iz < izone[kz].j1));
++                        if (bUse)
+ +                        {
-                     else if (nral == 2)
++                            tiatoms[1] = i;
++                            tiatoms[2] = a_loc;
++                            /* If necessary check the cgcm distance */
++                            if (bRCheck2B &&
++                                dd_dist2(pbc_null,cg_cm,la2lc,
++                                         tiatoms[1],tiatoms[2]) >= rc2)
++                            {
++                                bUse = FALSE;
++                            }
+ +                        }
+ +                    }
-                         /* This is a two-body interaction, we can assign
-                          * analogous to the non-bonded assignments.
-                          */
-                         if (!ga2la_get(ga2la,i_gl+iatoms[2]-i_mol,&a_loc,&kc))
++                }
++                else
++                {
++                    /* Assign this multi-body bonded interaction to
++                     * the local node if we have all the atoms involved
++                     * (local or communicated) and the minimum zone shift
++                     * in each dimension is zero, for dimensions
++                     * with 2 DD cells an extra check may be necessary.
++                     */
++                    bUse = TRUE;
++                    clear_ivec(k_zero);
++                    clear_ivec(k_plus);
++                    for(k=1; k<=nral && bUse; k++)
+ +                    {
-                             if (kc >= nzone)
++                        bLocal = ga2la_get(ga2la,i_gl+iatoms[k]-i_mol,
++                                           &a_loc,&kz);
++                        if (!bLocal || kz >= zones->n)
+ +                        {
++                            /* We do not have this atom of this interaction
++                             * locally, or it comes from more than one cell
++                             * away.
++                             */
+ +                            bUse = FALSE;
+ +                        }
+ +                        else
+ +                        {
-                                 kc -= nzone;
-                             }
-                             /* Check zone interaction assignments */
-                             bUse = ((ic < nizone && ic <= kc &&
-                                      izone[ic].j0 <= kc && kc < izone[ic].j1) ||
-                                     (kc < nizone && ic >  kc &&
-                                      izone[kc].j0 <= ic && ic < izone[kc].j1));
-                             if (bUse)
-                             {
-                                 tiatoms[1] = i;
-                                 tiatoms[2] = a_loc;
-                                 /* If necessary check the cgcm distance */
-                                 if (bRCheck2B &&
-                                     dd_dist2(pbc_null,cg_cm,la2lc,
-                                              tiatoms[1],tiatoms[2]) >= rc2)
++                            tiatoms[k] = a_loc;
++                            for(d=0; d<DIM; d++)
+ +                            {
-                                     bUse = FALSE;
++                                if (zones->shift[kz][d] == 0)
++                                {
++                                    k_zero[d] = k;
++                                }
++                                else
+ +                                {
-                     else
++                                    k_plus[d] = k;
+ +                                }
+ +                            }
+ +                        }
+ +                    }
-                         /* Assign this multi-body bonded interaction to
-                          * the local node if we have all the atoms involved
-                          * (local or communicated) and the minimum zone shift
-                          * in each dimension is zero, for dimensions
-                          * with 2 DD cells an extra check may be necessary.
-                          */
-                         bUse = TRUE;
-                         clear_ivec(k_zero);
-                         clear_ivec(k_plus);
-                         for(k=1; k<=nral && bUse; k++)
++                    bUse = (bUse &&
++                            k_zero[XX] && k_zero[YY] && k_zero[ZZ]);
++                    if (bRCheckMB)
+ +                    {
-                             bLocal = ga2la_get(ga2la,i_gl+iatoms[k]-i_mol,
-                                                &a_loc,&kc);
-                             if (!bLocal || kc >= zones->n)
++                        for(d=0; (d<DIM && bUse); d++)
+ +                        {
-                                 /* We do not have this atom of this interaction
-                                  * locally, or it comes from more than one cell
-                                  * away.
-                                  */
++                            /* Check if the cg_cm distance falls within
++                             * the cut-off to avoid possible multiple
++                             * assignments of bonded interactions.
++                             */
++                            if (rcheck[d] && 
++                                k_plus[d] &&
++                                dd_dist2(pbc_null,cg_cm,la2lc,
++                                         tiatoms[k_zero[d]],tiatoms[k_plus[d]]) >= rc2)
+ +                            {
-                             else
-                             {
-                                 tiatoms[k] = a_loc;
-                                 for(d=0; d<DIM; d++)
-                                 {
-                                     if (zones->shift[kc][d] == 0)
-                                     {
-                                         k_zero[d] = k;
-                                     }
-                                     else
-                                     {
-                                         k_plus[d] = k;
-                                     }
-                                 }
-                             }
-                         }
-                         bUse = (bUse &&
-                                 k_zero[XX] && k_zero[YY] && k_zero[ZZ]);
-                         if (bRCheckMB)
-                         {
-                             for(d=0; (d<DIM && bUse); d++)
-                             {
-                                 /* Check if the cg_cm distance falls within
-                                  * the cut-off to avoid possible multiple
-                                  * assignments of bonded interactions.
-                                  */
-                                 if (rcheck[d] && 
-                                     k_plus[d] &&
-                                     dd_dist2(pbc_null,cg_cm,la2lc,
-                                              tiatoms[k_zero[d]],tiatoms[k_plus[d]]) >= rc2)
-                                 {
-                                     bUse = FALSE;
-                                 }
-                             }
+ +                                bUse = FALSE;
+ +                            }
-                     if (bUse)
+ +                        }
+ +                    }
-                         /* Add this interaction to the local topology */
-                         add_ifunc(nral,tiatoms,&idef->il[ftype]);
-                         /* Sum so we can check in global_stat
-                          * if we have everything.
-                          */
-                         if (bBCheck ||
-                             !(interaction_function[ftype].flags & IF_LIMZERO))
-                         {
-                             nbonded_local++;
-                         }
++                }
++                if (bUse)
++                {
++                    /* Add this interaction to the local topology */
++                    add_ifunc(nral,tiatoms,&idef->il[ftype]);
++                    /* Sum so we can check in global_stat
++                     * if we have everything.
++                     */
++                    if (bBCheck ||
++                        !(interaction_function[ftype].flags & IF_LIMZERO))
+ +                    {
-                     j += 1 + nral;
++                        nbonded_local++;
+ +                    }
-     
+ +                }
++                j += 1 + nral;
+ +            }
+ +        }
+ +    }
- static int make_local_bondeds_intracg(gmx_domdec_t *dd,gmx_molblock_t *molb,
-                                       t_idef *idef,gmx_vsite_t *vsite)
++
+ +    return nbonded_local;
+ +}
+ +
-     int i,i_gl,mb,mt,mol,i_mol,j,ftype,nral,k;
-     int *index,*rtil,**vsite_pbc,*vsite_pbc_nalloc;
-     t_iatom *iatoms,tiatoms[1+MAXATOMLIST];
-     gmx_reverse_top_t *rt;
-     gmx_molblock_ind_t *mbi;
-     int nbonded_local;
++static void set_no_exclusions_zone(gmx_domdec_t *dd,gmx_domdec_zones_t *zones,
++                                   int iz,t_blocka *lexcls)
+ +{
-     if (vsite && vsite->n_intercg_vsite > 0)
-     {
-         vsite_pbc        = vsite->vsite_pbc_loc;
-         vsite_pbc_nalloc = vsite->vsite_pbc_loc_nalloc;
-     }
-     else
-     {
-         vsite_pbc        = NULL;
-         vsite_pbc_nalloc = NULL;
-     }
-     
-     /* Clear the counts */
-     for(ftype=0; ftype<F_NRE; ftype++)
-     {
-         idef->il[ftype].nr = 0;
-     }
-     nbonded_local = 0;
-     
-     rt = dd->reverse_top;
-     
-     if (rt->ril_mt_tot_size == 0)
-     {
-         /* There are no interactions to assign */
-         return nbonded_local;
-     }
-     
-     mbi = rt->mbi;
-     
-     for(i=0; i<dd->nat_home; i++)
++    int  a0,a1,a;
+ +    
-         /* Get the global atom number */
-         i_gl = dd->gatindex[i];
-         global_atomnr_to_moltype_ind(mbi,i_gl,&mb,&mt,&mol,&i_mol);
-         /* Check all interactions assigned to this atom */
-         index = rt->ril_mt[mt].index;
-         rtil  = rt->ril_mt[mt].il;
-         /* Check all interactions assigned to this atom */
-         j = index[i_mol];
-         while (j < index[i_mol+1])
-         {
-             ftype  = rtil[j++];
-             iatoms = rtil + j;
-             nral = NRAL(ftype);
-             if (interaction_function[ftype].flags & IF_VSITE)
-             {
-                 /* The vsite construction goes where the vsite itself is */
-                 add_vsite(dd->ga2la,index,rtil,ftype,nral,
-                           TRUE,i,i_gl,i_mol,
-                           iatoms,idef,vsite_pbc,vsite_pbc_nalloc);
-                 j += 1 + nral + 2;
-             }
-             else
-             {
-                 /* Copy the type */
-                 tiatoms[0] = iatoms[0];
-                 tiatoms[1] = i;
-                 for(k=2; k<=nral; k++)
-                 {
-                     tiatoms[k] = i + iatoms[k] - iatoms[1];
-                 }
-                 if (ftype == F_POSRES)
-                 {
-                     add_posres(mol,i_mol,&molb[mb],tiatoms,idef);
-                 }
-                 else if (ftype == F_FBPOSRES)
-                 {
-                     add_fbposres(mol,i_mol,&molb[mb],tiatoms,idef);
-                 }
-                 /* Add this interaction to the local topology */
-                 add_ifunc(nral,tiatoms,&idef->il[ftype]);
-                 /* Sum so we can check in global_stat if we have everything */
-                 nbonded_local++;
-                 j += 1 + nral;
-             }
-         }
++    a0 = dd->cgindex[zones->cg_range[iz]];
++    a1 = dd->cgindex[zones->cg_range[iz+1]];
++
++    for(a=a0+1; a<a1+1; a++)
+ +    {
-     
-     return nbonded_local;
++        lexcls->index[a] = lexcls->nra;
+ +    }
- static int make_local_exclusions(gmx_domdec_t *dd,gmx_domdec_zones_t *zones,
-                                  gmx_mtop_t *mtop,
-                                  gmx_bool bRCheck,real rc,
-                                  int *la2lc,t_pbc *pbc_null,rvec *cg_cm,
-                                  t_forcerec *fr,
-                                  t_blocka *lexcls)
+ +}
+ +
-     int  nizone,n,count,ic,jla0,jla1,jla;
++static int make_exclusions_zone(gmx_domdec_t *dd,gmx_domdec_zones_t *zones,
++                                const gmx_moltype_t *moltype,
++                                gmx_bool bRCheck,real rc2,
++                                int *la2lc,t_pbc *pbc_null,rvec *cg_cm,
++                                const int *cginfo,
++                                t_blocka *lexcls,
++                                int iz,
++                                int cg_start,int cg_end)
+ +{
-     t_blocka *excls;
++    int  nizone,n,count,jla0,jla1,jla;
+ +    int  cg,la0,la1,la,a_gl,mb,mt,mol,a_mol,j,aj_mol;
-     gmx_molblock_ind_t *mbi;
-     real rc2;
-     
-     /* Since for RF and PME we need to loop over the exclusions
-      * we should store each exclusion only once. This is done
-      * using the same zone scheme as used for neighbor searching.
-      * The exclusions involving non-home atoms are stored only
-      * one way: atom j is in the excl list of i only for j > i,
-      * where i and j are local atom numbers.
-      */
-     
-     lexcls->nr = dd->cgindex[zones->izone[zones->nizone-1].cg1];
-     if (lexcls->nr+1 > lexcls->nalloc_index)
-     {
-         lexcls->nalloc_index = over_alloc_dd(lexcls->nr)+1;
-         srenew(lexcls->index,lexcls->nalloc_index);
-     }
-     
-     mbi = dd->reverse_top->mbi;
-     
++    const t_blocka *excls;
+ +    gmx_ga2la_t ga2la;
+ +    int  a_loc;
+ +    int  cell;
-     rc2 = rc*rc;
-     
-     if (dd->n_intercg_excl)
-     {
-         nizone = zones->nizone;
-     }
-     else
-     {
-         nizone = 1;
-     }
-     n = 0;
++
+ +    ga2la = dd->ga2la;
+ +
-     for(ic=0; ic<nizone; ic++)
++    jla0 = dd->cgindex[zones->izone[iz].jcg0];
++    jla1 = dd->cgindex[zones->izone[iz].jcg1];
++
++    /* We set the end index, but note that we might not start at zero here */
++    lexcls->nr = dd->cgindex[cg_end];
++
++    n = lexcls->nra;
+ +    count = 0;
-         jla0 = dd->cgindex[zones->izone[ic].jcg0];
-         jla1 = dd->cgindex[zones->izone[ic].jcg1];
-         for(cg=zones->cg_range[ic]; cg<zones->cg_range[ic+1]; cg++)
++    for(cg=cg_start; cg<cg_end; cg++)
+ +    {
-             /* Here we assume the number of exclusions in one charge group
-              * is never larger than 1000.
-              */
-             if (n+1000 > lexcls->nalloc_a)
-             {
-                 lexcls->nalloc_a = over_alloc_large(n+1000);
-                 srenew(lexcls->a,lexcls->nalloc_a);
-             }
-             la0 = dd->cgindex[cg];
-             la1 = dd->cgindex[cg+1];
-             if (GET_CGINFO_EXCL_INTER(fr->cginfo[cg]) ||
-                 !GET_CGINFO_EXCL_INTRA(fr->cginfo[cg]))
-             {
-                 /* Copy the exclusions from the global top */
-                 for(la=la0; la<la1; la++) {
-                     lexcls->index[la] = n;
-                     a_gl = dd->gatindex[la];
-                     global_atomnr_to_moltype_ind(mbi,a_gl,&mb,&mt,&mol,&a_mol);
-                     excls = &mtop->moltype[mt].excls;
-                     for(j=excls->index[a_mol]; j<excls->index[a_mol+1]; j++)
++        /* Here we assume the number of exclusions in one charge group
++         * is never larger than 1000.
++         */
++        if (n+1000 > lexcls->nalloc_a)
+ +        {
-                         aj_mol = excls->a[j];
-                         /* This computation of jla is only correct intra-cg */
-                         jla = la + aj_mol - a_mol;
-                         if (jla >= la0 && jla < la1)
++            lexcls->nalloc_a = over_alloc_large(n+1000);
++            srenew(lexcls->a,lexcls->nalloc_a);
++        }
++        la0 = dd->cgindex[cg];
++        la1 = dd->cgindex[cg+1];
++        if (GET_CGINFO_EXCL_INTER(cginfo[cg]) ||
++            !GET_CGINFO_EXCL_INTRA(cginfo[cg]))
++        {
++            /* Copy the exclusions from the global top */
++            for(la=la0; la<la1; la++) {
++                lexcls->index[la] = n;
++                a_gl = dd->gatindex[la];
++                global_atomnr_to_moltype_ind(dd->reverse_top,a_gl,&mb,&mt,&mol,&a_mol);
++                excls = &moltype[mt].excls;
++                for(j=excls->index[a_mol]; j<excls->index[a_mol+1]; j++)
++                {
++                    aj_mol = excls->a[j];
++                    /* This computation of jla is only correct intra-cg */
++                    jla = la + aj_mol - a_mol;
++                    if (jla >= la0 && jla < la1)
+ +                    {
-                             /* This is an intra-cg exclusion. We can skip
-                              *  the global indexing and distance checking.
-                              */
-                             /* Intra-cg exclusions are only required
-                              * for the home zone.
-                              */
-                             if (ic == 0)
++                        /* This is an intra-cg exclusion. We can skip
++                         *  the global indexing and distance checking.
++                         */
++                        /* Intra-cg exclusions are only required
++                         * for the home zone.
++                         */
++                        if (iz == 0)
+ +                        {
-                         }
-                         else
-                         {
-                             /* This is a inter-cg exclusion */
-                             /* Since exclusions are pair interactions,
-                              * just like non-bonded interactions,
-                              * they can be assigned properly up
-                              * to the DD cutoff (not cutoff_min as
-                              * for the other bonded interactions).
-                              */
-                             if (ga2la_get(ga2la,a_gl+aj_mol-a_mol,&jla,&cell))
++                            lexcls->a[n++] = jla;
++                            /* Check to avoid double counts */
++                            if (jla > la)
++                            {
++                                count++;
++                            }
++                        }
++                    }
++                    else
++                    {
++                        /* This is a inter-cg exclusion */
++                        /* Since exclusions are pair interactions,
++                         * just like non-bonded interactions,
++                         * they can be assigned properly up
++                         * to the DD cutoff (not cutoff_min as
++                         * for the other bonded interactions).
++                         */
++                        if (ga2la_get(ga2la,a_gl+aj_mol-a_mol,&jla,&cell))
++                        {
++                            if (iz == 0 && cell == 0)
+ +                            {
+ +                                lexcls->a[n++] = jla;
+ +                                /* Check to avoid double counts */
+ +                                if (jla > la)
+ +                                {
+ +                                    count++;
+ +                                }
+ +                            }
-                                 if (ic == 0 && cell == 0)
-                                 {
-                                     lexcls->a[n++] = jla;
-                                     /* Check to avoid double counts */
-                                     if (jla > la)
-                                     {
-                                         count++;
-                                     }
-                                 }
-                                 else if (jla >= jla0 && jla < jla1 &&
-                                          (!bRCheck ||
-                                           dd_dist2(pbc_null,cg_cm,la2lc,la,jla) < rc2))
-                                 {
-                                     /* jla > la, since jla0 > la */
-                                     lexcls->a[n++] = jla;
-                                     count++;
-                                 }
++                            else if (jla >= jla0 && jla < jla1 &&
++                                     (!bRCheck ||
++                                      dd_dist2(pbc_null,cg_cm,la2lc,la,jla) < rc2))
+ +                            {
-             else
++                                /* jla > la, since jla0 > la */
++                                lexcls->a[n++] = jla;
++                                count++;
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +            }
-                 /* There are no inter-cg excls and this cg is self-excluded.
-                  * These exclusions are only required for zone 0,
-                  * since other zones do not see themselves.
-                  */
-                 if (ic == 0)
++        }
++        else
++        {
++            /* There are no inter-cg excls and this cg is self-excluded.
++             * These exclusions are only required for zone 0,
++             * since other zones do not see themselves.
++             */
++            if (iz == 0)
+ +            {
-                     for(la=la0; la<la1; la++)
++                for(la=la0; la<la1; la++)
+ +                {
-                         lexcls->index[la] = n;
-                         for(j=la0; j<la1; j++)
-                         {
-                             lexcls->a[n++] = j;
-                         }
++                    lexcls->index[la] = n;
++                    for(j=la0; j<la1; j++)
+ +                    {
-                     count += ((la1 - la0)*(la1 - la0 - 1))/2;
++                        lexcls->a[n++] = j;
+ +                    }
-                 else
+ +                }
-                     /* We don't need exclusions for this cg */
-                     for(la=la0; la<la1; la++)
-                     {
-                         lexcls->index[la] = n;
-                     }
++                count += ((la1 - la0)*(la1 - la0 - 1))/2;
++            }
++            else
++            {
++                /* We don't need exclusions for this cg */
++                for(la=la0; la<la1; la++)
+ +                {
-             lexcls->index[la] = n;
++                    lexcls->index[la] = n;
+ +                }
+ +            }
+ +        }
+ +    }
++
++    lexcls->index[lexcls->nr] = n;
++    lexcls->nra = n;
++
++    return count;
++}
++
++static void check_alloc_index(t_blocka *ba,int nindex_max)
++{
++    if (nindex_max+1 > ba->nalloc_index)
++    {
++        ba->nalloc_index = over_alloc_dd(nindex_max+1);
++        srenew(ba->index,ba->nalloc_index);
++    }
++}
++
++static void check_exclusions_alloc(gmx_domdec_t *dd,gmx_domdec_zones_t *zones,
++                                   t_blocka *lexcls)
++{
++    int nr;
++    int thread;
++
++    nr = dd->cgindex[zones->izone[zones->nizone-1].cg1];
++
++    check_alloc_index(lexcls,nr);
++
++    for(thread=1; thread<dd->reverse_top->nthread; thread++)
++    {
++        check_alloc_index(&dd->reverse_top->excl_thread[thread],nr);
++    }
++}
++
++static void finish_local_exclusions(gmx_domdec_t *dd,gmx_domdec_zones_t *zones,
++                                    t_blocka *lexcls)
++{
++    int la0,la;
++
++    lexcls->nr = dd->cgindex[zones->izone[zones->nizone-1].cg1];
++
+ +    if (dd->n_intercg_excl == 0)
+ +    {
+ +        /* There are no exclusions involving non-home charge groups,
+ +         * but we need to set the indices for neighborsearching.
+ +         */
+ +        la0 = dd->cgindex[zones->izone[0].cg1];
+ +        for(la=la0; la<lexcls->nr; la++)
+ +        {
-     }
-     lexcls->index[lexcls->nr] = n;
-     lexcls->nra = n;
-     if (dd->n_intercg_excl == 0)
-     {
++            lexcls->index[la] = lexcls->nra;
+ +        }
-                 lexcls->nra,count);
++
+ +        /* nr is only used to loop over the exclusions for Ewald and RF,
+ +         * so we can set it to the number of home atoms for efficiency.
+ +         */
+ +        lexcls->nr = dd->cgindex[zones->izone[0].cg1];
+ +    }
++}
++
++static void clear_idef(t_idef *idef)
++{
++    int  ftype;
++
++     /* Clear the counts */
++    for(ftype=0; ftype<F_NRE; ftype++)
++    {
++        idef->il[ftype].nr = 0;
++    }
++}
++
++static int make_local_bondeds_excls(gmx_domdec_t *dd,
++                                    gmx_domdec_zones_t *zones,
++                                    const gmx_mtop_t *mtop,
++                                    const int *cginfo,
++                                    gmx_bool bRCheckMB,ivec rcheck,gmx_bool bRCheck2B,
++                                    real rc,
++                                    int *la2lc,t_pbc *pbc_null,rvec *cg_cm,
++                                    t_idef *idef,gmx_vsite_t *vsite,
++                                    t_blocka *lexcls,int *excl_count)
++{
++    int  nzone_bondeds,nzone_excl;
++    int  iz,cg0,cg1;
++    real rc2;
++    int  nbonded_local;
++    int  thread;
++    gmx_reverse_top_t *rt;
++
++    if (dd->reverse_top->bMultiCGmols)
++    {
++        nzone_bondeds = zones->n;
++    }
++    else
++    {
++        /* Only single charge group molecules, so interactions don't
++         * cross zone boundaries and we only need to assign in the home zone.
++         */
++        nzone_bondeds = 1;
++    }
++
++    if (dd->n_intercg_excl > 0)
++    {
++        /* We only use exclusions from i-zones to i- and j-zones */
++        nzone_excl = zones->nizone;
++    }
++    else
++    {
++        /* There are no inter-cg exclusions and only zone 0 sees itself */
++        nzone_excl = 1;
++    }
++
++    check_exclusions_alloc(dd,zones,lexcls);
++    
++    rt = dd->reverse_top;
++
++    rc2 = rc*rc;
++    
++    /* Clear the counts */
++    clear_idef(idef);
++    nbonded_local = 0;
++
++    lexcls->nr    = 0;
++    lexcls->nra   = 0;
++    *excl_count   = 0;
++
++    for(iz=0; iz<nzone_bondeds; iz++)
++    {
++        cg0 = zones->cg_range[iz];
++        cg1 = zones->cg_range[iz+1];
++
++#pragma omp parallel for num_threads(rt->nthread) schedule(static)
++        for(thread=0; thread<rt->nthread; thread++)
++        {
++            int cg0t,cg1t;
++            t_idef *idef_t;
++            int ftype;
++            int **vsite_pbc;
++            int *vsite_pbc_nalloc;
++            t_blocka *excl_t;
++
++            cg0t = cg0 + ((cg1 - cg0)* thread   )/rt->nthread;
++            cg1t = cg0 + ((cg1 - cg0)*(thread+1))/rt->nthread;
++
++            if (thread == 0)
++            {
++                idef_t = idef;
++            }
++            else
++            {
++                idef_t = &rt->idef_thread[thread];
++                clear_idef(idef_t);
++            }
++
++            if (vsite && vsite->n_intercg_vsite > 0)
++            {
++                if (thread == 0)
++                {
++                    vsite_pbc        = vsite->vsite_pbc_loc;
++                    vsite_pbc_nalloc = vsite->vsite_pbc_loc_nalloc;
++                }
++                else
++                {
++                    vsite_pbc        = rt->vsite_pbc[thread];
++                    vsite_pbc_nalloc = rt->vsite_pbc_nalloc[thread];
++                }
++            }
++            else
++            {
++                vsite_pbc        = NULL;
++                vsite_pbc_nalloc = NULL;
++            }
++
++            rt->nbonded_thread[thread] =
++                make_bondeds_zone(dd,zones,
++                                  mtop->molblock,
++                                  bRCheckMB,rcheck,bRCheck2B,rc2,
++                                  la2lc,pbc_null,cg_cm,idef->iparams,
++                                  idef_t,
++                                  vsite,vsite_pbc,vsite_pbc_nalloc,
++                                  iz,zones->n,
++                                  dd->cgindex[cg0t],dd->cgindex[cg1t]);
++
++            if (iz < nzone_excl)
++            {
++                if (thread == 0)
++                {
++                    excl_t = lexcls;
++                }
++                else
++                {
++                    excl_t = &rt->excl_thread[thread];
++                    excl_t->nr  = 0;
++                    excl_t->nra = 0;
++                }
++
++                rt->excl_count_thread[thread] =
++                    make_exclusions_zone(dd,zones,
++                                         mtop->moltype,bRCheck2B,rc2,
++                                         la2lc,pbc_null,cg_cm,cginfo,
++                                         excl_t,
++                                         iz,
++                                         cg0t,cg1t);
++            }
++        }
++
++        if (rt->nthread > 1)
++        {
++            combine_idef(idef,rt->idef_thread+1,rt->nthread-1,
++                         vsite,rt->vsite_pbc+1);
++        }
++
++        for(thread=0; thread<rt->nthread; thread++)
++        {
++            nbonded_local += rt->nbonded_thread[thread];
++        }
++
++        if (iz < nzone_excl)
++        {
++            if (rt->nthread > 1)
++            {
++                combine_blocka(lexcls,rt->excl_thread+1,rt->nthread-1);
++            }
++
++            for(thread=0; thread<rt->nthread; thread++)
++            {
++                *excl_count += rt->excl_count_thread[thread];
++            }
++        }
++    }
++
++    /* Some zones might not have exclusions, but some code still needs to
++     * loop over the index, so we set the indices here.
++     */
++    for(iz=nzone_excl; iz<zones->nizone; iz++)
++    {
++        set_no_exclusions_zone(dd,zones,iz,lexcls);
++    }
++
++    finish_local_exclusions(dd,zones,lexcls);
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"We have %d exclusions, check count %d\n",
-     return count;
++                lexcls->nra,*excl_count);
+ +    }
+ +    
-                        t_forcerec *fr,gmx_vsite_t *vsite,
++    return nbonded_local;
+ +}
+ +
+ +void dd_make_local_cgs(gmx_domdec_t *dd,t_block *lcgs)
+ +{
+ +  lcgs->nr    = dd->ncg_tot;
+ +  lcgs->index = dd->cgindex;
+ +}
+ +
+ +void dd_make_local_top(FILE *fplog,
+ +                       gmx_domdec_t *dd,gmx_domdec_zones_t *zones,
+ +                       int npbcdim,matrix box,
+ +                       rvec cellsize_min,ivec npulse,
-     if (!dd->reverse_top->bMultiCGmols)
-     {
-         /* We don't need checks, assign all interactions with local atoms */
-         
-         dd->nbonded_local = make_local_bondeds_intracg(dd,mtop->molblock,
-                                                        &ltop->idef,vsite);
-     }
-     else
++                       t_forcerec *fr,
++                       rvec *cgcm_or_x,
++                       gmx_vsite_t *vsite,
+ +                       gmx_mtop_t *mtop,gmx_localtop_t *ltop)
+ +{
+ +    gmx_bool bUniqueExcl,bRCheckMB,bRCheck2B,bRCheckExcl;
+ +    real rc=-1;
+ +    ivec rcheck;
+ +    int  d,nexcl;
+ +    t_pbc pbc,*pbc_null=NULL;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Making local topology\n");
+ +    }
+ +    
+ +    dd_make_local_cgs(dd,&ltop->cgs);
+ +    
+ +    bRCheckMB   = FALSE;
+ +    bRCheck2B   = FALSE;
+ +    bRCheckExcl = FALSE;
+ +    
-         
-         dd->nbonded_local = make_local_bondeds(dd,zones,mtop->molblock,
-                                                bRCheckMB,rcheck,bRCheck2B,rc,
-                                                dd->la2lc,
-                                                pbc_null,fr->cg_cm,
-                                                &ltop->idef,vsite);
++    if (dd->reverse_top->bMultiCGmols)
+ +    {
+ +        /* We need to check to which cell bondeds should be assigned */
+ +        rc = dd_cutoff_twobody(dd);
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Two-body bonded cut-off distance is %g\n",rc);
+ +        }
+ +        
+ +        /* Should we check cg_cm distances when assigning bonded interactions? */
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            rcheck[d] = FALSE;
+ +            /* Only need to check for dimensions where the part of the box
+ +             * that is not communicated is smaller than the cut-off.
+ +             */
+ +            if (d < npbcdim && dd->nc[d] > 1 &&
+ +                (dd->nc[d] - npulse[d])*cellsize_min[d] < 2*rc)
+ +            {
+ +                if (dd->nc[d] == 2)
+ +                {
+ +                    rcheck[d] = TRUE;
+ +                    bRCheckMB = TRUE;
+ +                }
+ +                /* Check for interactions between two atoms,
+ +                 * where we can allow interactions up to the cut-off,
+ +                 * instead of up to the smallest cell dimension.
+ +                 */
+ +                bRCheck2B = TRUE;
+ +            }
+ +            if (debug)
+ +            {
+ +                fprintf(debug,
+ +                        "dim %d cellmin %f bonded rcheck[%d] = %d, bRCheck2B = %d\n",
+ +                        d,cellsize_min[d],d,rcheck[d],bRCheck2B);
+ +            }
+ +        }
+ +        if (dd->reverse_top->bExclRequired)
+ +        {
+ +            bRCheckExcl = bRCheck2B;
+ +        }
+ +        else
+ +        {
+ +            /* If we don't have forces on exclusions,
+ +             * we don't care about exclusions being assigned mulitple times.
+ +             */
+ +            bRCheckExcl = FALSE;
+ +        }
+ +        if (bRCheckMB || bRCheck2B)
+ +        {
+ +            make_la2lc(dd);
+ +            if (fr->bMolPBC)
+ +            {
+ +                set_pbc_dd(&pbc,fr->ePBC,dd,TRUE,box);
+ +                pbc_null = &pbc;
+ +            }
+ +            else
+ +            {
+ +                pbc_null = NULL;
+ +            }
+ +        }
-     nexcl = make_local_exclusions(dd,zones,mtop,bRCheckExcl,
-                                   rc,dd->la2lc,pbc_null,fr->cg_cm,
-                                   fr,&ltop->excls);
-     
+ +    }
++        
++    dd->nbonded_local =
++        make_local_bondeds_excls(dd,zones,mtop,fr->cginfo,
++                                 bRCheckMB,rcheck,bRCheck2B,rc,
++                                 dd->la2lc,
++                                 pbc_null,cgcm_or_x,
++                                 &ltop->idef,vsite,
++                                 &ltop->excls,&nexcl);
+ +    
+ +    /* The ilist is not sorted yet,
+ +     * we can only do this when we have the charge arrays.
+ +     */
+ +    ltop->idef.ilsort = ilsortUNKNOWN;
+ +    
-         make_reverse_ilist(molt,NULL,FALSE,FALSE,TRUE,&ril);
+ +    if (dd->reverse_top->bExclRequired)
+ +    {
+ +        dd->nbonded_local += nexcl;
++
++        forcerec_set_excl_load(fr,ltop,NULL);
+ +    }
+ +    
+ +    ltop->atomtypes  = mtop->atomtypes;
+ +    
+ +    /* For an error message only */
+ +    dd->reverse_top->err_top_global = mtop;
+ +    dd->reverse_top->err_top_local  = ltop;
+ +}
+ +
+ +void dd_sort_local_top(gmx_domdec_t *dd,t_mdatoms *mdatoms,
+ +                       gmx_localtop_t *ltop)
+ +{
+ +    if (dd->reverse_top->ilsort == ilsortNO_FE)
+ +    {
+ +        ltop->idef.ilsort = ilsortNO_FE;
+ +    }
+ +    else
+ +    {
+ +        gmx_sort_ilist_fe(&ltop->idef,mdatoms->chargeA,mdatoms->chargeB);
+ +    }
+ +}
+ +
+ +gmx_localtop_t *dd_init_local_top(gmx_mtop_t *top_global)
+ +{
+ +    gmx_localtop_t *top;
+ +    int i;
+ +    
+ +    snew(top,1);
+ +    
+ +    top->idef.ntypes   = top_global->ffparams.ntypes;
+ +    top->idef.atnr     = top_global->ffparams.atnr;
+ +    top->idef.functype = top_global->ffparams.functype;
+ +    top->idef.iparams  = top_global->ffparams.iparams;
+ +    top->idef.fudgeQQ  = top_global->ffparams.fudgeQQ;
+ +    top->idef.cmap_grid= top_global->ffparams.cmap_grid;
+ +    
+ +    for(i=0; i<F_NRE; i++)
+ +    {
+ +        top->idef.il[i].iatoms = NULL;
+ +        top->idef.il[i].nalloc = 0;
+ +    }
+ +    top->idef.ilsort   = ilsortUNKNOWN;
+ +    
+ +    return top;
+ +}
+ +
+ +void dd_init_local_state(gmx_domdec_t *dd,
+ +                         t_state *state_global,t_state *state_local)
+ +{
+ +    int buf[NITEM_DD_INIT_LOCAL_STATE];
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        buf[0] = state_global->flags;
+ +        buf[1] = state_global->ngtc;
+ +        buf[2] = state_global->nnhpres;
+ +        buf[3] = state_global->nhchainlength;
+ +        buf[4] = state_global->dfhist.nlambda;
+ +    }
+ +    dd_bcast(dd,NITEM_DD_INIT_LOCAL_STATE*sizeof(int),buf);
+ +
+ +    init_state(state_local,0,buf[1],buf[2],buf[3],buf[4]);
+ +    state_local->flags = buf[0];
+ +    
+ +    /* With Langevin Dynamics we need to make proper storage space
+ +     * in the global and local state for the random numbers.
+ +     */
+ +    if (state_local->flags & (1<<estLD_RNG))
+ +    {
+ +        if (DDMASTER(dd) && state_global->nrngi > 1)
+ +        {
+ +            state_global->nrng = dd->nnodes*gmx_rng_n();
+ +            srenew(state_global->ld_rng,state_global->nrng);
+ +        }
+ +        state_local->nrng = gmx_rng_n();
+ +        snew(state_local->ld_rng,state_local->nrng);
+ +    }
+ +    if (state_local->flags & (1<<estLD_RNGI))
+ +    {
+ +        if (DDMASTER(dd) && state_global->nrngi > 1)
+ +        {
+ +            state_global->nrngi = dd->nnodes;
+ +            srenew(state_global->ld_rngi,state_global->nrngi);
+ +        }
+ +        snew(state_local->ld_rngi,1);
+ +    }
+ +}
+ +
+ +static void check_link(t_blocka *link,int cg_gl,int cg_gl_j)
+ +{
+ +    int  k,aj;
+ +    gmx_bool bFound;
+ +    
+ +    bFound = FALSE;
+ +    for(k=link->index[cg_gl]; k<link->index[cg_gl+1]; k++)
+ +    {
+ +        if (link->a[k] == cg_gl_j)
+ +        {
+ +            bFound = TRUE;
+ +        }
+ +    }
+ +    if (!bFound)
+ +    {
+ +        /* Add this charge group link */
+ +        if (link->index[cg_gl+1]+1 > link->nalloc_a)
+ +        {
+ +            link->nalloc_a = over_alloc_large(link->index[cg_gl+1]+1);
+ +            srenew(link->a,link->nalloc_a);
+ +        }
+ +        link->a[link->index[cg_gl+1]] = cg_gl_j;
+ +        link->index[cg_gl+1]++;
+ +    }
+ +}
+ +
+ +static int *make_at2cg(t_block *cgs)
+ +{
+ +    int *at2cg,cg,a;
+ +    
+ +    snew(at2cg,cgs->index[cgs->nr]);
+ +    for(cg=0; cg<cgs->nr; cg++)
+ +    {
+ +        for(a=cgs->index[cg]; a<cgs->index[cg+1]; a++)
+ +        {
+ +            at2cg[a] = cg;
+ +        }
+ +    }
+ +    
+ +    return at2cg;
+ +}
+ +
+ +t_blocka *make_charge_group_links(gmx_mtop_t *mtop,gmx_domdec_t *dd,
+ +                                  cginfo_mb_t *cginfo_mb)
+ +{
+ +    gmx_reverse_top_t *rt;
+ +    int  mb,cg_offset,cg,cg_gl,a,aj,i,j,ftype,nral,nlink_mol,mol,ncgi;
+ +    gmx_molblock_t *molb;
+ +    gmx_moltype_t *molt;
+ +    t_block *cgs;
+ +    t_blocka *excls;
+ +    int *a2c;
+ +    gmx_reverse_ilist_t ril;
+ +    t_blocka *link;
+ +    cginfo_mb_t *cgi_mb;
+ +    
+ +    /* For each charge group make a list of other charge groups
+ +     * in the system that a linked to it via bonded interactions
+ +     * which are also stored in reverse_top.
+ +     */
+ +    
+ +    rt = dd->reverse_top;
+ +    
+ +    snew(link,1);
+ +    snew(link->index,ncg_mtop(mtop)+1);
+ +    link->nalloc_a = 0;
+ +    link->a = NULL;
+ +    
+ +    link->index[0] = 0;
+ +    cg_offset = 0;
+ +    ncgi = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        if (molb->nmol == 0)
+ +        {
+ +            continue;
+ +        }
+ +        molt = &mtop->moltype[molb->type];
+ +        cgs   = &molt->cgs;
+ +        excls = &molt->excls;
+ +        a2c = make_at2cg(cgs);
+ +        /* Make a reverse ilist in which the interactions are linked
+ +         * to all atoms, not only the first atom as in gmx_reverse_top.
+ +         * The constraints are discarded here.
+ +         */
-         if (dd_check_ftype(ftype,bBCheck,FALSE))
++        make_reverse_ilist(molt,NULL,FALSE,FALSE,FALSE,TRUE,&ril);
+ +
+ +        cgi_mb = &cginfo_mb[mb];
+ +        
+ +        for(cg=0; cg<cgs->nr; cg++)
+ +        {
+ +            cg_gl = cg_offset + cg;
+ +            link->index[cg_gl+1] = link->index[cg_gl];
+ +            for(a=cgs->index[cg]; a<cgs->index[cg+1]; a++)
+ +            {
+ +                i = ril.index[a];
+ +                while (i < ril.index[a+1])
+ +                {
+ +                    ftype = ril.il[i++];
+ +                    nral = NRAL(ftype);
+ +                    /* Skip the ifunc index */
+ +                    i++;
+ +                    for(j=0; j<nral; j++)
+ +                    {
+ +                        aj = ril.il[i+j];
+ +                        if (a2c[aj] != cg)
+ +                        {
+ +                            check_link(link,cg_gl,cg_offset+a2c[aj]);
+ +                        }
+ +                    }
+ +                    i += nral_rt(ftype);
+ +                }
+ +                if (rt->bExclRequired)
+ +                {
+ +                    /* Exclusions always go both ways */
+ +                    for(j=excls->index[a]; j<excls->index[a+1]; j++)
+ +                    {
+ +                        aj = excls->a[j];
+ +                        if (a2c[aj] != cg)
+ +                        {
+ +                            check_link(link,cg_gl,cg_offset+a2c[aj]);
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            if (link->index[cg_gl+1] - link->index[cg_gl] > 0)
+ +            {
+ +                SET_CGINFO_BOND_INTER(cgi_mb->cginfo[cg]);
+ +                ncgi++;
+ +            }
+ +        }
+ +        nlink_mol = link->index[cg_offset+cgs->nr] - link->index[cg_offset];
+ +        
+ +        cg_offset += cgs->nr;
+ +        
+ +        destroy_reverse_ilist(&ril);
+ +        sfree(a2c);
+ +        
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"molecule type '%s' %d cgs has %d cg links through bonded interac.\n",*molt->name,cgs->nr,nlink_mol);
+ +        }
+ +        
+ +        if (molb->nmol > 1)
+ +        {
+ +            /* Copy the data for the rest of the molecules in this block */
+ +            link->nalloc_a += (molb->nmol - 1)*nlink_mol;
+ +            srenew(link->a,link->nalloc_a);
+ +            for(mol=1; mol<molb->nmol; mol++)
+ +            {
+ +                for(cg=0; cg<cgs->nr; cg++)
+ +                {
+ +                    cg_gl = cg_offset + cg;
+ +                    link->index[cg_gl+1] =
+ +                        link->index[cg_gl+1-cgs->nr] + nlink_mol;
+ +                    for(j=link->index[cg_gl]; j<link->index[cg_gl+1]; j++)
+ +                    {
+ +                        link->a[j] = link->a[j-nlink_mol] + cgs->nr;
+ +                    }
+ +                    if (link->index[cg_gl+1] - link->index[cg_gl] > 0 &&
+ +                        cg_gl - cgi_mb->cg_start < cgi_mb->cg_mod)
+ +                    {
+ +                        SET_CGINFO_BOND_INTER(cgi_mb->cginfo[cg_gl - cgi_mb->cg_start]);
+ +                        ncgi++;
+ +                    }
+ +                }
+ +                cg_offset += cgs->nr;
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Of the %d charge groups %d are linked via bonded interactions\n",ncg_mtop(mtop),ncgi);
+ +    }
+ +    
+ +    return link;
+ +}
+ +
+ +static void bonded_cg_distance_mol(gmx_moltype_t *molt,int *at2cg,
+ +                                   gmx_bool bBCheck,gmx_bool bExcl,rvec *cg_cm,
+ +                                   real *r_2b,int *ft2b,int *a2_1,int *a2_2,
+ +                                   real *r_mb,int *ftmb,int *am_1,int *am_2)
+ +{
+ +    int ftype,nral,i,j,ai,aj,cgi,cgj;
+ +    t_ilist *il;
+ +    t_blocka *excls;
+ +    real r2_2b,r2_mb,rij2;
+ +    
+ +    r2_2b = 0;
+ +    r2_mb = 0;
+ +    for(ftype=0; ftype<F_NRE; ftype++)
+ +    {
++        if (dd_check_ftype(ftype,bBCheck,FALSE,FALSE))
+ +        {
+ +            il = &molt->ilist[ftype];
+ +            nral = NRAL(ftype);
+ +            if (nral > 1)
+ +            {
+ +                for(i=0; i<il->nr; i+=1+nral)
+ +                {
+ +                    for(ai=0; ai<nral; ai++)
+ +                    {
+ +                        cgi = at2cg[il->iatoms[i+1+ai]];
+ +                                              for(aj=0; aj<nral; aj++) {
+ +                                                      cgj = at2cg[il->iatoms[i+1+aj]];
+ +                                                      if (cgi != cgj)
+ +                            {
+ +                                                              rij2 = distance2(cg_cm[cgi],cg_cm[cgj]);
+ +                                                              if (nral == 2 && rij2 > r2_2b)
+ +                                {
+ +                                                                      r2_2b = rij2;
+ +                                    *ft2b = ftype;
+ +                                    *a2_1 = il->iatoms[i+1+ai];
+ +                                    *a2_2 = il->iatoms[i+1+aj];
+ +                                                              }
+ +                                                              if (nral >  2 && rij2 > r2_mb)
+ +                                {
+ +                                                                      r2_mb = rij2;
+ +                                    *ftmb = ftype;
+ +                                    *am_1 = il->iatoms[i+1+ai];
+ +                                    *am_2 = il->iatoms[i+1+aj];
+ +                                                              }
+ +                                                      }
+ +                                              }
+ +                                      }
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +      if (bExcl)
+ +    {
+ +              excls = &molt->excls;
+ +              for(ai=0; ai<excls->nr; ai++)
+ +        {
+ +                      cgi = at2cg[ai];
+ +                      for(j=excls->index[ai]; j<excls->index[ai+1]; j++) {
+ +                              cgj = at2cg[excls->a[j]];
+ +                              if (cgi != cgj)
+ +                {
+ +                                      rij2 = distance2(cg_cm[cgi],cg_cm[cgj]);
+ +                                      if (rij2 > r2_2b)
+ +                    {
+ +                                              r2_2b = rij2;
+ +                                      }
+ +                              }
+ +                      }
+ +              }
+ +      }
+ +      
+ +      *r_2b = sqrt(r2_2b);
+ +      *r_mb = sqrt(r2_mb);
+ +}
+ +
+ +static void get_cgcm_mol(gmx_moltype_t *molt,gmx_ffparams_t *ffparams,
+ +                         int ePBC,t_graph *graph,matrix box,
+ +                         gmx_vsite_t *vsite,
+ +                         rvec *x,rvec *xs,rvec *cg_cm)
+ +{
+ +    int n,i;
+ +
+ +    if (ePBC != epbcNONE)
+ +    {
+ +        mk_mshift(NULL,graph,ePBC,box,x);
+ +        
+ +        shift_x(graph,box,x,xs);
+ +        /* By doing an extra mk_mshift the molecules that are broken
+ +         * because they were e.g. imported from another software
+ +         * will be made whole again. Such are the healing powers
+ +         * of GROMACS.
+ +         */  
+ +        mk_mshift(NULL,graph,ePBC,box,xs);
+ +    }
+ +    else
+ +    {
+ +        /* We copy the coordinates so the original coordinates remain
+ +         * unchanged, just to be 100% sure that we do not affect
+ +         * binary reproducibility of simulations.
+ +         */
+ +        n = molt->cgs.index[molt->cgs.nr];
+ +        for(i=0; i<n; i++)
+ +        {
+ +            copy_rvec(x[i],xs[i]);
+ +        }
+ +    }
+ +    
+ +    if (vsite)
+ +    {
+ +        construct_vsites(NULL,vsite,xs,NULL,0.0,NULL,
+ +                         ffparams->iparams,molt->ilist,
+ +                         epbcNONE,TRUE,NULL,NULL,NULL);
+ +    }
+ +    
+ +    calc_cgcm(NULL,0,molt->cgs.nr,&molt->cgs,xs,cg_cm);
+ +}
+ +
+ +static int have_vsite_molt(gmx_moltype_t *molt)
+ +{
+ +    int  i;
+ +    gmx_bool bVSite;
+ +    
+ +    bVSite = FALSE;
+ +    for(i=0; i<F_NRE; i++)
+ +    {
+ +        if ((interaction_function[i].flags & IF_VSITE) &&
+ +            molt->ilist[i].nr > 0) {
+ +            bVSite = TRUE;
+ +        }
+ +    }
+ +    
+ +    return bVSite;
+ +}
+ +
+ +void dd_bonded_cg_distance(FILE *fplog,
+ +                           gmx_domdec_t *dd,gmx_mtop_t *mtop,
+ +                           t_inputrec *ir,rvec *x,matrix box,
+ +                           gmx_bool bBCheck,
+ +                           real *r_2b,real *r_mb)
+ +{
+ +    gmx_bool bExclRequired;
+ +    int  mb,cg_offset,at_offset,*at2cg,mol;
+ +    t_graph graph;
+ +    gmx_vsite_t *vsite;
+ +    gmx_molblock_t *molb;
+ +    gmx_moltype_t *molt;
+ +    rvec *xs,*cg_cm;
+ +    real rmol_2b,rmol_mb;
+ +    int ft2b=-1,a_2b_1=-1,a_2b_2=-1,ftmb=-1,a_mb_1=-1,a_mb_2=-1;
+ +    int ftm2b=-1,amol_2b_1=-1,amol_2b_2=-1,ftmmb=-1,amol_mb_1=-1,amol_mb_2=-1;
+ +    
+ +    bExclRequired = IR_EXCL_FORCES(*ir);
+ +    
+ +    /* For gmx_vsite_t everything 0 should work (without pbc) */
+ +    snew(vsite,1);
+ +    
+ +    *r_2b = 0;
+ +    *r_mb = 0;
+ +    cg_offset = 0;
+ +    at_offset = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        molt = &mtop->moltype[molb->type];
+ +        if (molt->cgs.nr == 1 || molb->nmol == 0)
+ +        {
+ +            cg_offset += molb->nmol*molt->cgs.nr;
+ +            at_offset += molb->nmol*molt->atoms.nr;
+ +        }
+ +        else
+ +        {
+ +            if (ir->ePBC != epbcNONE)
+ +            {
+ +                mk_graph_ilist(NULL,molt->ilist,0,molt->atoms.nr,FALSE,FALSE,
+ +                               &graph);
+ +            }
+ +            
+ +            at2cg = make_at2cg(&molt->cgs);
+ +            snew(xs,molt->atoms.nr);
+ +            snew(cg_cm,molt->cgs.nr);
+ +            for(mol=0; mol<molb->nmol; mol++)
+ +            {
+ +                get_cgcm_mol(molt,&mtop->ffparams,ir->ePBC,&graph,box,
+ +                             have_vsite_molt(molt) ? vsite : NULL,
+ +                             x+at_offset,xs,cg_cm);
+ +                
+ +                bonded_cg_distance_mol(molt,at2cg,bBCheck,bExclRequired,cg_cm,
+ +                                       &rmol_2b,&ftm2b,&amol_2b_1,&amol_2b_2,
+ +                                       &rmol_mb,&ftmmb,&amol_mb_1,&amol_mb_2);
+ +                if (rmol_2b > *r_2b)
+ +                {
+ +                    *r_2b  = rmol_2b;
+ +                    ft2b   = ftm2b;
+ +                    a_2b_1 = at_offset + amol_2b_1;
+ +                    a_2b_2 = at_offset + amol_2b_2;
+ +                }
+ +                if (rmol_mb > *r_mb)
+ +                {
+ +                    *r_mb  = rmol_mb;
+ +                    ftmb   = ftmmb;
+ +                    a_mb_1 = at_offset + amol_mb_1;
+ +                    a_mb_2 = at_offset + amol_mb_2;
+ +                }
+ +                
+ +                cg_offset += molt->cgs.nr;
+ +                at_offset += molt->atoms.nr;
+ +            }
+ +            sfree(cg_cm);
+ +            sfree(xs);
+ +            sfree(at2cg);
+ +            if (ir->ePBC != epbcNONE)
+ +            {
+ +                done_graph(&graph);
+ +            }
+ +        }
+ +    }
+ +    
+ +    sfree(vsite);
+ +
+ +    if (fplog && (ft2b >= 0 || ftmb >= 0))
+ +    {
+ +        fprintf(fplog,
+ +                "Initial maximum inter charge-group distances:\n");
+ +        if (ft2b >= 0)
+ +        {
+ +            fprintf(fplog,
+ +                    "    two-body bonded interactions: %5.3f nm, %s, atoms %d %d\n",
+ +                    *r_2b,interaction_function[ft2b].longname,
+ +                    a_2b_1+1,a_2b_2+1);
+ +        }
+ +        if (ftmb >= 0)
+ +        {
+ +            fprintf(fplog,
+ +                    "  multi-body bonded interactions: %5.3f nm, %s, atoms %d %d\n",
+ +                    *r_mb,interaction_function[ftmb].longname,
+ +                    a_mb_1+1,a_mb_2+1);
+ +        }
+ +    }
+ +}
diff --cc src/gromacs/mdlib/edsam.c

index e88d92485134fb9fe553c2e163951cf79eb7deaa,0000000000000000000000000000000000000000..ae2c31c1ec590315783bdf074562db5365edc9e2

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/edsam.c
--- /dev/null
+++ b/src/gromacs/mdlib/edsam.c
@@@ -1,2609 -1,0 +1,2614 @@@
-             gmx_mtop_atomnr_to_atom(mtop,edi->sref.anrs[i],&atom);
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + *
+ + *                This source code is part of
+ + *
+ + *                 G   R   O   M   A   C   S
+ + *
+ + *          GROningen MAchine for Chemical Simulations
+ + *
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + *
+ + * For more info, check our website at http://www.gromacs.org
+ + *
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <stdio.h>
+ +#include <time.h>
+ +#include "typedefs.h"
+ +#include "string2.h"
+ +#include "smalloc.h"
+ +#include "names.h"
+ +#include "confio.h"
+ +#include "mvdata.h"
+ +#include "txtdump.h"
+ +#include "vec.h"
+ +#include <time.h>
+ +#include "nrnb.h"
+ +#include "mshift.h"
+ +#include "mdrun.h"
+ +#include "update.h"
+ +#include "physics.h"
+ +#include "nrjac.h"
+ +#include "mtop_util.h"
+ +#include "edsam.h"
+ +#include "gmxfio.h"
+ +#include "groupcoord.h"
+ +
+ +
+ +/* We use the same defines as in mvdata.c here */
+ +#define  block_bc(cr,   d) gmx_bcast(     sizeof(d),     &(d),(cr))
+ +#define nblock_bc(cr,nr,d) gmx_bcast((nr)*sizeof((d)[0]), (d),(cr))
+ +#define   snew_bc(cr,d,nr) { if (!MASTER(cr)) snew((d),(nr)); }
+ +
+ +
+ +/* enum to identify the type of ED: none, normal ED, flooding */
+ +enum {eEDnone, eEDedsam, eEDflood, eEDnr};
+ +
+ +/* enum to identify operations on reference, average, origin, target structures */
+ +enum {eedREF, eedAV, eedORI, eedTAR, eedNR};
+ +
+ +
+ +typedef struct
+ +{
+ +    int    neig;     /* nr of eigenvectors             */
+ +    int   *ieig;     /* index nrs of eigenvectors      */
+ +    real  *stpsz;    /* stepsizes (per eigenvector)    */
+ +    rvec  **vec;     /* eigenvector components         */
+ +    real  *xproj;    /* instantaneous x projections    */
+ +    real  *fproj;    /* instantaneous f projections    */
+ +    real  radius;    /* instantaneous radius           */
+ +    real  *refproj;  /* starting or target projecions  */
+ +    /* When using flooding as harmonic restraint: The current reference projection
+ +     * is at each step calculated from the initial refproj0 and the slope. */
+ +    real  *refproj0,*refprojslope;
+ +} t_eigvec;
+ +
+ +
+ +typedef struct
+ +{
+ +    t_eigvec      mon;            /* only monitored, no constraints       */
+ +    t_eigvec      linfix;         /* fixed linear constraints             */
+ +    t_eigvec      linacc;         /* acceptance linear constraints        */
+ +    t_eigvec      radfix;         /* fixed radial constraints (exp)       */
+ +    t_eigvec      radacc;         /* acceptance radial constraints (exp)  */
+ +    t_eigvec      radcon;         /* acceptance rad. contraction constr.  */
+ +} t_edvecs;
+ +
+ +
+ +typedef struct
+ +{
+ +    real deltaF0;
+ +    gmx_bool bHarmonic;           /* Use flooding for harmonic restraint on
+ +                                     the eigenvector                          */
+ +    gmx_bool bConstForce;         /* Do not calculate a flooding potential,
+ +                                     instead flood with a constant force      */
+ +    real tau;
+ +    real deltaF;
+ +    real Efl;
+ +    real kT;
+ +    real Vfl;
+ +    real dt;
+ +    real constEfl;
+ +    real alpha2;
+ +    int flood_id;
+ +    rvec *forces_cartesian;
+ +    t_eigvec vecs;         /* use flooding for these */
+ +} t_edflood;
+ +
+ +
+ +/* This type is for the average, reference, target, and origin structure    */
+ +typedef struct gmx_edx
+ +{
+ +    int           nr;             /* number of atoms this structure contains  */
+ +    int           nr_loc;         /* number of atoms on local node            */
+ +    int           *anrs;          /* atom index numbers                       */
+ +    int           *anrs_loc;      /* local atom index numbers                 */
+ +    int           nalloc_loc;     /* allocation size of anrs_loc              */
+ +    int           *c_ind;         /* at which position of the whole anrs
+ +                                   * array is a local atom?, i.e.
+ +                                   * c_ind[0...nr_loc-1] gives the atom index
+ +                                   * with respect to the collective
+ +                                   * anrs[0...nr-1] array                     */
+ +    rvec          *x;             /* positions for this structure             */
+ +    rvec          *x_old;         /* used to keep track of the shift vectors
+ +                                     such that the ED molecule can always be
+ +                                     made whole in the parallel case          */
+ +    real          *m;             /* masses                                   */
+ +    real          mtot;           /* total mass (only used in sref)           */
+ +    real          *sqrtm;         /* sqrt of the masses used for mass-
+ +                                   * weighting of analysis (only used in sav) */
+ +} t_gmx_edx;
+ +
+ +
+ +typedef struct edpar
+ +{
+ +    int            nini;           /* total Nr of atoms                    */
+ +    gmx_bool       fitmas;         /* true if trans fit with cm            */
+ +    gmx_bool       pcamas;         /* true if mass-weighted PCA            */
+ +    int            presteps;       /* number of steps to run without any
+ +                                    *    perturbations ... just monitoring */
+ +    int            outfrq;         /* freq (in steps) of writing to edo    */
+ +    int            maxedsteps;     /* max nr of steps per cycle            */
+ +
+ +    /* all gmx_edx datasets are copied to all nodes in the parallel case   */
+ +    struct gmx_edx sref;           /* reference positions, to these fitting
+ +                                    * will be done                         */
+ +    gmx_bool       bRefEqAv;       /* If true, reference & average indices
+ +                                    * are the same. Used for optimization  */
+ +    struct gmx_edx sav;            /* average positions                    */
+ +    struct gmx_edx star;           /* target positions                     */
+ +    struct gmx_edx sori;           /* origin positions                     */
+ +
+ +    t_edvecs       vecs;           /* eigenvectors                         */
+ +    real           slope;          /* minimal slope in acceptance radexp   */
+ +
+ +    gmx_bool       bNeedDoEdsam;   /* if any of the options mon, linfix, ...
+ +                                    * is used (i.e. apart from flooding)   */
+ +    t_edflood      flood;          /* parameters especially for flooding   */
+ +    struct t_ed_buffer *buf;       /* handle to local buffers              */
+ +    struct edpar   *next_edi;      /* Pointer to another ed dataset        */
+ +} t_edpar;
+ +
+ +
+ +typedef struct gmx_edsam
+ +{
+ +    int           eEDtype;        /* Type of ED: see enums above          */
+ +    const char    *edinam;        /* name of ED sampling input file       */
+ +    const char    *edonam;        /*                     output           */
+ +    FILE          *edo;           /* output file pointer                  */
+ +    t_edpar       *edpar;
+ +    gmx_bool      bFirst;
+ +    gmx_bool      bStartFromCpt;
+ +} t_gmx_edsam;
+ +
+ +
+ +struct t_do_edsam
+ +{
+ +    matrix old_rotmat;
+ +    real oldrad;
+ +    rvec old_transvec,older_transvec,transvec_compact;
+ +    rvec *xcoll;         /* Positions from all nodes, this is the
+ +                            collective set we work on.
+ +                            These are the positions of atoms with
+ +                            average structure indices */
+ +    rvec *xc_ref;        /* same but with reference structure indices */
+ +    ivec *shifts_xcoll;        /* Shifts for xcoll  */
+ +    ivec *extra_shifts_xcoll;  /* xcoll shift changes since last NS step */
+ +    ivec *shifts_xc_ref;       /* Shifts for xc_ref */
+ +    ivec *extra_shifts_xc_ref; /* xc_ref shift changes since last NS step */
+ +    gmx_bool bUpdateShifts;    /* TRUE in NS steps to indicate that the
+ +                                  ED shifts for this ED dataset need to
+ +                                  be updated */
+ +};
+ +
+ +
+ +/* definition of ED buffer structure */
+ +struct t_ed_buffer
+ +{
+ +    struct t_fit_to_ref *           fit_to_ref;
+ +    struct t_do_edfit *             do_edfit;
+ +    struct t_do_edsam *             do_edsam;
+ +    struct t_do_radcon *            do_radcon;
+ +};
+ +
+ +
+ +/* Function declarations */
+ +static void fit_to_reference(rvec *xcoll,rvec transvec,matrix rotmat,t_edpar *edi);
+ +
+ +static void translate_and_rotate(rvec *x,int nat,rvec transvec,matrix rotmat);
+ +/* End function declarations */
+ +
+ +
+ +/* Does not subtract average positions, projection on single eigenvector is returned
+ + * used by: do_linfix, do_linacc, do_radfix, do_radacc, do_radcon
+ + * Average position is subtracted in ed_apply_constraints prior to calling projectx
+ + */
+ +static real projectx(t_edpar *edi, rvec *xcoll, rvec *vec)
+ +{
+ +    int  i;
+ +    real proj=0.0;
+ +
+ +
+ +    for (i=0; i<edi->sav.nr; i++)
+ +        proj += edi->sav.sqrtm[i]*iprod(vec[i], xcoll[i]);
+ +
+ +    return proj;
+ +}
+ +
+ +
+ +/* Specialized: projection is stored in vec->refproj
+ + * -> used for radacc, radfix, radcon  and center of flooding potential
+ + * subtracts average positions, projects vector x */
+ +static void rad_project(t_edpar *edi, rvec *x, t_eigvec *vec, t_commrec *cr)
+ +{
+ +    int i;
+ +    real rad=0.0;
+ +
+ +    /* Subtract average positions */
+ +    for (i = 0; i < edi->sav.nr; i++)
+ +        rvec_dec(x[i], edi->sav.x[i]);
+ +
+ +    for (i = 0; i < vec->neig; i++)
+ +    {
+ +        vec->refproj[i] = projectx(edi,x,vec->vec[i]);
+ +        rad += pow((vec->refproj[i]-vec->xproj[i]),2);
+ +    }
+ +    vec->radius=sqrt(rad);
+ +
+ +    /* Add average positions */
+ +    for (i = 0; i < edi->sav.nr; i++)
+ +        rvec_inc(x[i], edi->sav.x[i]);
+ +}
+ +
+ +
+ +/* Project vector x, subtract average positions prior to projection and add
+ + * them afterwards to retain the unchanged vector. Store in xproj. Mass-weighting
+ + * is applied. */
+ +static void project_to_eigvectors(rvec       *x,    /* The positions to project to an eigenvector */
+ +                                  t_eigvec   *vec,  /* The eigenvectors */
+ +                                  t_edpar    *edi)
+ +{
+ +    int  i;
+ +
+ +
+ +    if (!vec->neig) return;
+ +
+ +    /* Subtract average positions */
+ +    for (i=0; i<edi->sav.nr; i++)
+ +        rvec_dec(x[i], edi->sav.x[i]);
+ +
+ +    for (i=0; i<vec->neig; i++)
+ +        vec->xproj[i] = projectx(edi, x, vec->vec[i]);
+ +
+ +    /* Add average positions */
+ +    for (i=0; i<edi->sav.nr; i++)
+ +        rvec_inc(x[i], edi->sav.x[i]);
+ +}
+ +
+ +
+ +/* Project vector x onto all edi->vecs (mon, linfix,...) */
+ +static void project(rvec      *x,     /* positions to project */
+ +                    t_edpar   *edi)   /* edi data set */
+ +{
+ +    /* It is not more work to subtract the average position in every
+ +     * subroutine again, because these routines are rarely used simultanely */
+ +    project_to_eigvectors(x, &edi->vecs.mon   , edi);
+ +    project_to_eigvectors(x, &edi->vecs.linfix, edi);
+ +    project_to_eigvectors(x, &edi->vecs.linacc, edi);
+ +    project_to_eigvectors(x, &edi->vecs.radfix, edi);
+ +    project_to_eigvectors(x, &edi->vecs.radacc, edi);
+ +    project_to_eigvectors(x, &edi->vecs.radcon, edi);
+ +}
+ +
+ +
+ +static real calc_radius(t_eigvec *vec)
+ +{
+ +    int i;
+ +    real rad=0.0;
+ +
+ +
+ +    for (i=0; i<vec->neig; i++)
+ +        rad += pow((vec->refproj[i]-vec->xproj[i]),2);
+ +
+ +    return rad=sqrt(rad);
+ +}
+ +
+ +
+ +/* Debug helper */
+ +#ifdef DEBUGHELPERS
+ +static void dump_xcoll(t_edpar *edi, struct t_do_edsam *buf, t_commrec *cr,
+ +                       int step)
+ +{
+ +    int i;
+ +    FILE *fp;
+ +    char fn[STRLEN];
+ +    rvec *xcoll;
+ +    ivec *shifts, *eshifts;
+ +
+ +
+ +    if (!MASTER(cr))
+ +        return;
+ +
+ +    xcoll   = buf->xcoll;
+ +    shifts  = buf->shifts_xcoll;
+ +    eshifts = buf->extra_shifts_xcoll;
+ +
+ +    sprintf(fn, "xcolldump_step%d.txt", step);
+ +    fp = fopen(fn, "w");
+ +
+ +    for (i=0; i<edi->sav.nr; i++)
+ +        fprintf(fp, "%d %9.5f %9.5f %9.5f   %d %d %d   %d %d %d\n",
+ +                edi->sav.anrs[i]+1,
+ +                xcoll[i][XX]  , xcoll[i][YY]  , xcoll[i][ZZ],
+ +                shifts[i][XX] , shifts[i][YY] , shifts[i][ZZ],
+ +                eshifts[i][XX], eshifts[i][YY], eshifts[i][ZZ]);
+ +
+ +    fclose(fp);
+ +}
+ +
+ +
+ +/* Debug helper */
+ +static void dump_edi_positions(FILE *out, struct gmx_edx *s, const char name[])
+ +{
+ +    int i;
+ +
+ +
+ +    fprintf(out, "#%s positions:\n%d\n", name, s->nr);
+ +    if (s->nr == 0)
+ +        return;
+ +
+ +    fprintf(out, "#index, x, y, z");
+ +    if (s->sqrtm)
+ +        fprintf(out, ", sqrt(m)");
+ +    for (i=0; i<s->nr; i++)
+ +    {
+ +        fprintf(out, "\n%6d  %11.6f %11.6f %11.6f",s->anrs[i], s->x[i][XX], s->x[i][YY], s->x[i][ZZ]);
+ +        if (s->sqrtm)
+ +            fprintf(out,"%9.3f",s->sqrtm[i]);
+ +    }
+ +    fprintf(out, "\n");
+ +}
+ +
+ +
+ +/* Debug helper */
+ +static void dump_edi_eigenvecs(FILE *out, t_eigvec *ev,
+ +                               const char name[], int length)
+ +{
+ +    int i,j;
+ +
+ +
+ +    fprintf(out, "#%s eigenvectors:\n%d\n", name, ev->neig);
+ +    /* Dump the data for every eigenvector: */
+ +    for (i=0; i<ev->neig; i++)
+ +    {
+ +        fprintf(out, "EV %4d\ncomponents %d\nstepsize %f\nxproj %f\nfproj %f\nrefproj %f\nradius %f\nComponents:\n",
+ +                ev->ieig[i], length, ev->stpsz[i], ev->xproj[i], ev->fproj[i], ev->refproj[i], ev->radius);
+ +        for (j=0; j<length; j++)
+ +            fprintf(out, "%11.6f %11.6f %11.6f\n", ev->vec[i][j][XX], ev->vec[i][j][YY], ev->vec[i][j][ZZ]);
+ +    }
+ +}
+ +
+ +
+ +/* Debug helper */
+ +static void dump_edi(t_edpar *edpars, t_commrec *cr, int nr_edi)
+ +{
+ +    FILE  *out;
+ +    char  fn[STRLEN];
+ +
+ +
+ +    sprintf(fn, "EDdump_node%d_edi%d", cr->nodeid, nr_edi);
+ +    out = ffopen(fn, "w");
+ +
+ +    fprintf(out,"#NINI\n %d\n#FITMAS\n %d\n#ANALYSIS_MAS\n %d\n",
+ +            edpars->nini,edpars->fitmas,edpars->pcamas);
+ +    fprintf(out,"#OUTFRQ\n %d\n#MAXLEN\n %d\n#SLOPECRIT\n %f\n",
+ +            edpars->outfrq,edpars->maxedsteps,edpars->slope);
+ +    fprintf(out,"#PRESTEPS\n %d\n#DELTA_F0\n %f\n#TAU\n %f\n#EFL_NULL\n %f\n#ALPHA2\n %f\n",
+ +            edpars->presteps,edpars->flood.deltaF0,edpars->flood.tau,
+ +            edpars->flood.constEfl,edpars->flood.alpha2);
+ +
+ +    /* Dump reference, average, target, origin positions */
+ +    dump_edi_positions(out, &edpars->sref, "REFERENCE");
+ +    dump_edi_positions(out, &edpars->sav , "AVERAGE"  );
+ +    dump_edi_positions(out, &edpars->star, "TARGET"   );
+ +    dump_edi_positions(out, &edpars->sori, "ORIGIN"   );
+ +
+ +    /* Dump eigenvectors */
+ +    dump_edi_eigenvecs(out, &edpars->vecs.mon   , "MONITORED", edpars->sav.nr);
+ +    dump_edi_eigenvecs(out, &edpars->vecs.linfix, "LINFIX"   , edpars->sav.nr);
+ +    dump_edi_eigenvecs(out, &edpars->vecs.linacc, "LINACC"   , edpars->sav.nr);
+ +    dump_edi_eigenvecs(out, &edpars->vecs.radfix, "RADFIX"   , edpars->sav.nr);
+ +    dump_edi_eigenvecs(out, &edpars->vecs.radacc, "RADACC"   , edpars->sav.nr);
+ +    dump_edi_eigenvecs(out, &edpars->vecs.radcon, "RADCON"   , edpars->sav.nr);
+ +
+ +    /* Dump flooding eigenvectors */
+ +    dump_edi_eigenvecs(out, &edpars->flood.vecs, "FLOODING"  , edpars->sav.nr);
+ +
+ +    /* Dump ed local buffer */
+ +    fprintf(out, "buf->do_edfit         =%p\n", (void*)edpars->buf->do_edfit  );
+ +    fprintf(out, "buf->do_edsam         =%p\n", (void*)edpars->buf->do_edsam  );
+ +    fprintf(out, "buf->do_radcon        =%p\n", (void*)edpars->buf->do_radcon );
+ +
+ +    ffclose(out);
+ +}
+ +
+ +
+ +/* Debug helper */
+ +static void dump_rotmat(FILE* out,matrix rotmat)
+ +{
+ +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[XX][XX],rotmat[XX][YY],rotmat[XX][ZZ]);
+ +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[YY][XX],rotmat[YY][YY],rotmat[YY][ZZ]);
+ +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[ZZ][XX],rotmat[ZZ][YY],rotmat[ZZ][ZZ]);
+ +}
+ +
+ +
+ +/* Debug helper */
+ +static void dump_rvec(FILE *out, int dim, rvec *x)
+ +{
+ +    int i;
+ +
+ +
+ +    for (i=0; i<dim; i++)
+ +        fprintf(out,"%4d   %f %f %f\n",i,x[i][XX],x[i][YY],x[i][ZZ]);
+ +}
+ +
+ +
+ +/* Debug helper */
+ +static void dump_mat(FILE* out, int dim, double** mat)
+ +{
+ +    int i,j;
+ +
+ +
+ +    fprintf(out,"MATRIX:\n");
+ +    for (i=0;i<dim;i++)
+ +    {
+ +        for (j=0;j<dim;j++)
+ +            fprintf(out,"%f ",mat[i][j]);
+ +        fprintf(out,"\n");
+ +    }
+ +}
+ +#endif
+ +
+ +
+ +struct t_do_edfit {
+ +    double **omega;
+ +    double **om;
+ +};
+ +
+ +static void do_edfit(int natoms,rvec *xp,rvec *x,matrix R,t_edpar *edi)
+ +{
+ +    /* this is a copy of do_fit with some modifications */
+ +    int    c,r,n,j,i,irot;
+ +    double d[6],xnr,xpc;
+ +    matrix vh,vk,u;
+ +    int    index;
+ +    real   max_d;
+ +
+ +    struct t_do_edfit *loc;
+ +    gmx_bool bFirst;
+ +
+ +    if(edi->buf->do_edfit != NULL)
+ +        bFirst = FALSE;
+ +    else
+ +    {
+ +        bFirst = TRUE;
+ +        snew(edi->buf->do_edfit,1);
+ +    }
+ +    loc = edi->buf->do_edfit;
+ +
+ +    if (bFirst)
+ +    {
+ +        snew(loc->omega,2*DIM);
+ +        snew(loc->om,2*DIM);
+ +        for(i=0; i<2*DIM; i++)
+ +        {
+ +            snew(loc->omega[i],2*DIM);
+ +            snew(loc->om[i],2*DIM);
+ +        }
+ +    }
+ +
+ +    for(i=0;(i<6);i++)
+ +    {
+ +        d[i]=0;
+ +        for(j=0;(j<6);j++)
+ +        {
+ +            loc->omega[i][j]=0;
+ +            loc->om[i][j]=0;
+ +        }
+ +    }
+ +
+ +    /* calculate the matrix U */
+ +    clear_mat(u);
+ +    for(n=0;(n<natoms);n++)
+ +    {
+ +        for(c=0; (c<DIM); c++)
+ +        {
+ +            xpc=xp[n][c];
+ +            for(r=0; (r<DIM); r++)
+ +            {
+ +                xnr=x[n][r];
+ +                u[c][r]+=xnr*xpc;
+ +            }
+ +        }
+ +    }
+ +
+ +    /* construct loc->omega */
+ +    /* loc->omega is symmetric -> loc->omega==loc->omega' */
+ +    for(r=0;(r<6);r++)
+ +        for(c=0;(c<=r);c++)
+ +            if ((r>=3) && (c<3))
+ +            {
+ +                loc->omega[r][c]=u[r-3][c];
+ +                loc->omega[c][r]=u[r-3][c];
+ +            }
+ +            else
+ +            {
+ +                loc->omega[r][c]=0;
+ +                loc->omega[c][r]=0;
+ +            }
+ +
+ +    /* determine h and k */
+ +#ifdef DEBUG
+ +    {
+ +        int i;
+ +        dump_mat(stderr,2*DIM,loc->omega);
+ +        for (i=0; i<6; i++)
+ +            fprintf(stderr,"d[%d] = %f\n",i,d[i]);
+ +    }
+ +#endif
+ +    jacobi(loc->omega,6,d,loc->om,&irot);
+ +
+ +    if (irot==0)
+ +        fprintf(stderr,"IROT=0\n");
+ +
+ +    index=0; /* For the compiler only */
+ +
+ +    for(j=0;(j<3);j++)
+ +    {
+ +        max_d=-1000;
+ +        for(i=0;(i<6);i++)
+ +            if (d[i]>max_d)
+ +            {
+ +                max_d=d[i];
+ +                index=i;
+ +            }
+ +        d[index]=-10000;
+ +        for(i=0;(i<3);i++)
+ +        {
+ +            vh[j][i]=M_SQRT2*loc->om[i][index];
+ +            vk[j][i]=M_SQRT2*loc->om[i+DIM][index];
+ +        }
+ +    }
+ +
+ +    /* determine R */
+ +    for(c=0;(c<3);c++)
+ +        for(r=0;(r<3);r++)
+ +            R[c][r]=vk[0][r]*vh[0][c]+
+ +            vk[1][r]*vh[1][c]+
+ +            vk[2][r]*vh[2][c];
+ +    if (det(R) < 0)
+ +        for(c=0;(c<3);c++)
+ +            for(r=0;(r<3);r++)
+ +                R[c][r]=vk[0][r]*vh[0][c]+
+ +                vk[1][r]*vh[1][c]-
+ +                vk[2][r]*vh[2][c];
+ +}
+ +
+ +
+ +static void rmfit(int nat, rvec *xcoll, rvec transvec, matrix rotmat)
+ +{
+ +    rvec vec;
+ +    matrix tmat;
+ +
+ +
+ +    /* Remove rotation.
+ +     * The inverse rotation is described by the transposed rotation matrix */
+ +    transpose(rotmat,tmat);
+ +    rotate_x(xcoll, nat, tmat);
+ +
+ +    /* Remove translation */
+ +    vec[XX]=-transvec[XX];
+ +    vec[YY]=-transvec[YY];
+ +    vec[ZZ]=-transvec[ZZ];
+ +    translate_x(xcoll, nat, vec);
+ +}
+ +
+ +
+ +/**********************************************************************************
+ + ******************** FLOODING ****************************************************
+ + **********************************************************************************
+ +
+ +The flooding ability was added later to edsam. Many of the edsam functionality could be reused for that purpose.
+ +The flooding covariance matrix, i.e. the selected eigenvectors and their corresponding eigenvalues are
+ +read as 7th Component Group. The eigenvalues are coded into the stepsize parameter (as used by -linfix or -linacc).
+ +
+ +do_md clls right in the beginning the function init_edsam, which reads the edi file, saves all the necessary information in
+ +the edi structure and calls init_flood, to initialise some extra fields in the edi->flood structure.
+ +
+ +since the flooding acts on forces do_flood is called from the function force() (force.c), while the other
+ +edsam functionality is hooked into md via the update() (update.c) function acting as constraint on positions.
+ +
+ +do_flood makes a copy of the positions,
+ +fits them, projects them computes flooding_energy, and flooding forces. The forces are computed in the
+ +space of the eigenvectors and are then blown up to the full cartesian space and rotated back to remove the
+ +fit. Then do_flood adds these forces to the forcefield-forces
+ +(given as parameter) and updates the adaptive flooding parameters Efl and deltaF.
+ +
+ +To center the flooding potential at a different location one can use the -ori option in make_edi. The ori
+ +structure is projected to the system of eigenvectors and then this position in the subspace is used as
+ +center of the flooding potential.   If the option is not used, the center will be zero in the subspace,
+ +i.e. the average structure as given in the make_edi file.
+ +
+ +To use the flooding potential as restraint, make_edi has the option -restrain, which leads to inverted
+ +signs of alpha2 and Efl, such that the sign in the exponential of Vfl is not inverted but the sign of
+ +Vfl is inverted. Vfl = Efl * exp (- .../Efl/alpha2*x^2...) With tau>0 the negative Efl will grow slowly
+ +so that the restraint is switched off slowly. When Efl==0 and inverted flooding is ON is reached no
+ + further adaption is applied, Efl will stay constant at zero.
+ +
+ +To use restraints with harmonic potentials switch -restrain and -harmonic. Then the eigenvalues are
+ +used as spring constants for the harmonic potential.
+ +Note that eq3 in the flooding paper (J. Comp. Chem. 2006, 27, 1693-1702) defines the parameter lambda \
+ +as the inverse of the spring constant, whereas the implementation uses lambda as the spring constant.
+ +
+ +To use more than one flooding matrix just concatenate several .edi files (cat flood1.edi flood2.edi > flood_all.edi)
+ +the routine read_edi_file reads all of theses flooding files.
+ +The structure t_edi is now organized as a list of t_edis and the function do_flood cycles through the list
+ +calling the do_single_flood() routine for every single entry. Since every state variables have been kept in one
+ +edi there is no interdependence whatsoever. The forces are added together.
+ +
+ +  To write energies into the .edr file, call the function
+ +        get_flood_enx_names(char**, int *nnames) to get the Header (Vfl1 Vfl2... Vfln)
+ +and call
+ +        get_flood_energies(real Vfl[],int nnames);
+ +
+ +  TODO:
+ +- one could program the whole thing such that Efl, Vfl and deltaF is written to the .edr file. -- i dont know how to do that, yet.
+ +
+ +  Maybe one should give a range of atoms for which to remove motion, so that motion is removed with
+ +  two edsam files from two peptide chains
+ +*/
+ +
+ +static void write_edo_flood(t_edpar *edi, FILE *fp, gmx_large_int_t step)
+ +{
+ +    int i;
+ +    char buf[22];
+ +    gmx_bool bOutputRef=FALSE;
+ +
+ +
+ +    fprintf(fp,"%d.th FL: %s %12.5e %12.5e %12.5e\n",
+ +            edi->flood.flood_id, gmx_step_str(step,buf),
+ +            edi->flood.Efl, edi->flood.Vfl, edi->flood.deltaF);
+ +
+ +
+ +    /* Check whether any of the references changes with time (this can happen
+ +     * in case flooding is used as harmonic restraint). If so, output all the
+ +     * current reference projections. */
+ +    if (edi->flood.bHarmonic)
+ +    {
+ +        for (i = 0; i < edi->flood.vecs.neig; i++)
+ +        {
+ +            if (edi->flood.vecs.refprojslope[i] != 0.0)
+ +                bOutputRef=TRUE;
+ +        }
+ +        if (bOutputRef)
+ +        {
+ +            fprintf(fp, "Ref. projs.: ");
+ +            for (i = 0; i < edi->flood.vecs.neig; i++)
+ +            {
+ +                fprintf(fp, "%12.5e ", edi->flood.vecs.refproj[i]);
+ +            }
+ +            fprintf(fp, "\n");
+ +        }
+ +    }
+ +    fprintf(fp,"FL_FORCES: ");
+ +
+ +    for (i=0; i<edi->flood.vecs.neig; i++)
+ +        fprintf(fp," %12.5e",edi->flood.vecs.fproj[i]);
+ +
+ +    fprintf(fp,"\n");
+ +}
+ +
+ +
+ +/* From flood.xproj compute the Vfl(x) at this point */
+ +static real flood_energy(t_edpar *edi, gmx_large_int_t step)
+ +{
+ +    /* compute flooding energy Vfl
+ +     Vfl = Efl * exp( - \frac {kT} {2Efl alpha^2} * sum_i { \lambda_i c_i^2 } )
+ +     \lambda_i is the reciprocal eigenvalue 1/\sigma_i
+ +         it is already computed by make_edi and stored in stpsz[i]
+ +     bHarmonic:
+ +       Vfl = - Efl * 1/2(sum _i {\frac 1{\lambda_i} c_i^2})
+ +     */
+ +    real sum;
+ +    real Vfl;
+ +    int i;
+ +
+ +
+ +    /* Each time this routine is called (i.e. each time step), we add a small
+ +     * value to the reference projection. This way a harmonic restraint towards
+ +     * a moving reference is realized. If no value for the additive constant
+ +     * is provided in the edi file, the reference will not change. */
+ +    if (edi->flood.bHarmonic)
+ +    {
+ +        for (i=0; i<edi->flood.vecs.neig; i++)
+ +        {
+ +            edi->flood.vecs.refproj[i] = edi->flood.vecs.refproj0[i] + step * edi->flood.vecs.refprojslope[i];
+ +        }
+ +    }
+ +
+ +    sum=0.0;
+ +    /* Compute sum which will be the exponent of the exponential */
+ +    for (i=0; i<edi->flood.vecs.neig; i++)
+ +    {
+ +        /* stpsz stores the reciprocal eigenvalue 1/sigma_i */
+ +        sum += edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i])*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]);
+ +    }
+ +
+ +    /* Compute the Gauss function*/
+ +    if (edi->flood.bHarmonic)
+ +    {
+ +        Vfl = -0.5*edi->flood.Efl*sum;  /* minus sign because Efl is negative, if restrain is on. */
+ +    }
+ +    else
+ +    {
+ +        Vfl = edi->flood.Efl!=0 ? edi->flood.Efl*exp(-edi->flood.kT/2/edi->flood.Efl/edi->flood.alpha2*sum) :0;
+ +    }
+ +
+ +    return Vfl;
+ +}
+ +
+ +
+ +/* From the position and from Vfl compute forces in subspace -> store in edi->vec.flood.fproj */
+ +static void flood_forces(t_edpar *edi)
+ +{
+ +    /* compute the forces in the subspace of the flooding eigenvectors
+ +     * by the formula F_i= V_{fl}(c) * ( \frac {kT} {E_{fl}} \lambda_i c_i */
+ +
+ +    int i;
+ +    real energy=edi->flood.Vfl;
+ +
+ +
+ +    if (edi->flood.bHarmonic)
+ +        for (i=0; i<edi->flood.vecs.neig; i++)
+ +        {
+ +            edi->flood.vecs.fproj[i] = edi->flood.Efl* edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]);
+ +        }
+ +    else
+ +        for (i=0; i<edi->flood.vecs.neig; i++)
+ +        {
+ +            /* if Efl is zero the forces are zero if not use the formula */
+ +            edi->flood.vecs.fproj[i] = edi->flood.Efl!=0 ? edi->flood.kT/edi->flood.Efl/edi->flood.alpha2*energy*edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]) : 0;
+ +        }
+ +}
+ +
+ +
+ +/* Raise forces from subspace into cartesian space */
+ +static void flood_blowup(t_edpar *edi, rvec *forces_cart)
+ +{
+ +    /* this function lifts the forces from the subspace to the cartesian space
+ +     all the values not contained in the subspace are assumed to be zero and then
+ +     a coordinate transformation from eigenvector to cartesian vectors is performed
+ +     The nonexistent values don't have to be set to zero explicitly, they would occur
+ +     as zero valued summands, hence we just stop to compute this part of the sum.
+ +
+ +     for every atom we add all the contributions to this atom from all the different eigenvectors.
+ +
+ +     NOTE: one could add directly to the forcefield forces, would mean we wouldn't have to clear the
+ +     field forces_cart prior the computation, but we compute the forces separately
+ +     to have them accessible for diagnostics
+ +     */
+ +    int  j,eig;
+ +    rvec dum;
+ +    real *forces_sub;
+ +
+ +
+ +    forces_sub = edi->flood.vecs.fproj;
+ +
+ +
+ +    /* Calculate the cartesian forces for the local atoms */
+ +
+ +    /* Clear forces first */
+ +    for (j=0; j<edi->sav.nr_loc; j++)
+ +        clear_rvec(forces_cart[j]);
+ +
+ +    /* Now compute atomwise */
+ +    for (j=0; j<edi->sav.nr_loc; j++)
+ +    {
+ +        /* Compute forces_cart[edi->sav.anrs[j]] */
+ +        for (eig=0; eig<edi->flood.vecs.neig; eig++)
+ +        {
+ +            /* Force vector is force * eigenvector (compute only atom j) */
+ +            svmul(forces_sub[eig],edi->flood.vecs.vec[eig][edi->sav.c_ind[j]],dum);
+ +            /* Add this vector to the cartesian forces */
+ +            rvec_inc(forces_cart[j],dum);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +/* Update the values of Efl, deltaF depending on tau and Vfl */
+ +static void update_adaption(t_edpar *edi)
+ +{
+ +    /* this function updates the parameter Efl and deltaF according to the rules given in
+ +     * 'predicting unimolecular chemical reactions: chemical flooding' M Mueller et al,
+ +     * J. chem Phys. */
+ +
+ +    if ((edi->flood.tau < 0 ? -edi->flood.tau : edi->flood.tau ) > 0.00000001)
+ +    {
+ +        edi->flood.Efl = edi->flood.Efl+edi->flood.dt/edi->flood.tau*(edi->flood.deltaF0-edi->flood.deltaF);
+ +        /* check if restrain (inverted flooding) -> don't let EFL become positive */
+ +        if (edi->flood.alpha2<0 && edi->flood.Efl>-0.00000001)
+ +            edi->flood.Efl = 0;
+ +
+ +        edi->flood.deltaF = (1-edi->flood.dt/edi->flood.tau)*edi->flood.deltaF+edi->flood.dt/edi->flood.tau*edi->flood.Vfl;
+ +    }
+ +}
+ +
+ +
+ +static void do_single_flood(
+ +        FILE *edo,
+ +        rvec x[],
+ +        rvec force[],
+ +        t_edpar *edi,
+ +        gmx_large_int_t step,
+ +        matrix box,
+ +        t_commrec *cr,
+ +        gmx_bool bNS)       /* Are we in a neighbor searching step? */
+ +{
+ +    int i;
+ +    matrix  rotmat;         /* rotation matrix */
+ +    matrix  tmat;           /* inverse rotation */
+ +    rvec    transvec;       /* translation vector */
+ +    struct t_do_edsam *buf;
+ +
+ +
+ +    buf=edi->buf->do_edsam;
+ +
+ +
+ +    /* Broadcast the positions of the AVERAGE structure such that they are known on
+ +     * every processor. Each node contributes its local positions x and stores them in
+ +     * the collective ED array buf->xcoll */
+ +    communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, bNS, x,
+ +                    edi->sav.nr, edi->sav.nr_loc, edi->sav.anrs_loc, edi->sav.c_ind, edi->sav.x_old, box);
+ +
+ +    /* Only assembly REFERENCE positions if their indices differ from the average ones */
+ +    if (!edi->bRefEqAv)
+ +        communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, bNS, x,
+ +                edi->sref.nr, edi->sref.nr_loc, edi->sref.anrs_loc, edi->sref.c_ind, edi->sref.x_old, box);
+ +
+ +    /* If bUpdateShifts was TRUE, the shifts have just been updated in get_positions.
+ +     * We do not need to update the shifts until the next NS step */
+ +    buf->bUpdateShifts = FALSE;
+ +
+ +    /* Now all nodes have all of the ED/flooding positions in edi->sav->xcoll,
+ +     * as well as the indices in edi->sav.anrs */
+ +
+ +    /* Fit the reference indices to the reference structure */
+ +    if (edi->bRefEqAv)
+ +        fit_to_reference(buf->xcoll , transvec, rotmat, edi);
+ +    else
+ +        fit_to_reference(buf->xc_ref, transvec, rotmat, edi);
+ +
+ +    /* Now apply the translation and rotation to the ED structure */
+ +    translate_and_rotate(buf->xcoll, edi->sav.nr, transvec, rotmat);
+ +
+ +    /* Project fitted structure onto supbspace -> store in edi->flood.vecs.xproj */
+ +    project_to_eigvectors(buf->xcoll,&edi->flood.vecs,edi);
+ +
+ +    if (FALSE == edi->flood.bConstForce)
+ +    {
+ +        /* Compute Vfl(x) from flood.xproj */
+ +        edi->flood.Vfl = flood_energy(edi, step);
+ +
+ +        update_adaption(edi);
+ +
+ +        /* Compute the flooding forces */
+ +        flood_forces(edi);
+ +    }
+ +
+ +    /* Translate them into cartesian positions */
+ +    flood_blowup(edi, edi->flood.forces_cartesian);
+ +
+ +    /* Rotate forces back so that they correspond to the given structure and not to the fitted one */
+ +    /* Each node rotates back its local forces */
+ +    transpose(rotmat,tmat);
+ +    rotate_x(edi->flood.forces_cartesian, edi->sav.nr_loc, tmat);
+ +
+ +    /* Finally add forces to the main force variable */
+ +    for (i=0; i<edi->sav.nr_loc; i++)
+ +        rvec_inc(force[edi->sav.anrs_loc[i]],edi->flood.forces_cartesian[i]);
+ +
+ +    /* Output is written by the master process */
+ +    if (do_per_step(step,edi->outfrq) && MASTER(cr))
+ +        write_edo_flood(edi,edo,step);
+ +}
+ +
+ +
+ +/* Main flooding routine, called from do_force */
+ +extern void do_flood(
+ +        FILE            *log,    /* md.log file */
+ +        t_commrec       *cr,     /* Communication record */
+ +        rvec            x[],     /* Positions on the local processor */
+ +        rvec            force[], /* forcefield forces, to these the flooding forces are added */
+ +        gmx_edsam_t     ed,      /* ed data structure contains all ED and flooding datasets */
+ +        matrix          box,     /* the box */
+ +        gmx_large_int_t step,    /* The relative time step since ir->init_step is already subtracted */
+ +        gmx_bool        bNS)     /* Are we in a neighbor searching step? */
+ +{
+ +    t_edpar *edi;
+ +
+ +
+ +    if (ed->eEDtype != eEDflood)
+ +        return;
+ +
+ +    edi = ed->edpar;
+ +    while (edi)
+ +    {
+ +        /* Call flooding for one matrix */
+ +        if (edi->flood.vecs.neig)
+ +            do_single_flood(ed->edo,x,force,edi,step,box,cr,bNS);
+ +        edi = edi->next_edi;
+ +    }
+ +}
+ +
+ +
+ +/* Called by init_edi, configure some flooding related variables and structures,
+ + * print headers to output files */
+ +static void init_flood(t_edpar *edi, gmx_edsam_t ed, real dt, t_commrec *cr)
+ +{
+ +    int i;
+ +
+ +
+ +    edi->flood.Efl = edi->flood.constEfl;
+ +    edi->flood.Vfl = 0;
+ +    edi->flood.dt  = dt;
+ +
+ +    if (edi->flood.vecs.neig)
+ +    {
+ +        /* If in any of the datasets we find a flooding vector, flooding is turned on */
+ +        ed->eEDtype = eEDflood;
+ +
+ +        fprintf(stderr,"ED: Flooding of matrix %d is switched on.\n", edi->flood.flood_id);
+ +
+ +        if (edi->flood.bConstForce)
+ +        {
+ +            /* We have used stpsz as a vehicle to carry the fproj values for constant
+ +             * force flooding. Now we copy that to flood.vecs.fproj. Note that
+ +             * in const force flooding, fproj is never changed. */
+ +            for (i=0; i<edi->flood.vecs.neig; i++)
+ +            {
+ +                edi->flood.vecs.fproj[i] = edi->flood.vecs.stpsz[i];
+ +
+ +                fprintf(stderr, "ED: applying on eigenvector %d a constant force of %g\n",
+ +                        edi->flood.vecs.ieig[i], edi->flood.vecs.fproj[i]);
+ +            }
+ +        }
+ +        fprintf(ed->edo,"FL_HEADER: Flooding of matrix %d is switched on! The flooding output will have the following format:\n",
+ +                edi->flood.flood_id);
+ +        fprintf(ed->edo,"FL_HEADER: Step     Efl          Vfl       deltaF\n");
+ +    }
+ +}
+ +
+ +
+ +#ifdef DEBUGHELPERS
+ +/*********** Energy book keeping ******/
+ +static void get_flood_enx_names(t_edpar *edi, char** names, int *nnames)  /* get header of energies */
+ +{
+ +    t_edpar *actual;
+ +    int count;
+ +    char buf[STRLEN];
+ +    actual=edi;
+ +    count = 1;
+ +    while (actual)
+ +    {
+ +        srenew(names,count);
+ +        sprintf(buf,"Vfl_%d",count);
+ +        names[count-1]=strdup(buf);
+ +        actual=actual->next_edi;
+ +        count++;
+ +    }
+ +    *nnames=count-1;
+ +}
+ +
+ +
+ +static void get_flood_energies(t_edpar *edi, real Vfl[],int nnames)
+ +{
+ +    /*fl has to be big enough to capture nnames-many entries*/
+ +    t_edpar *actual;
+ +    int count;
+ +
+ +
+ +    actual=edi;
+ +    count = 1;
+ +    while (actual)
+ +    {
+ +        Vfl[count-1]=actual->flood.Vfl;
+ +        actual=actual->next_edi;
+ +        count++;
+ +    }
+ +    if (nnames!=count-1)
+ +        gmx_fatal(FARGS,"Number of energies is not consistent with t_edi structure");
+ +}
+ +/************* END of FLOODING IMPLEMENTATION ****************************/
+ +#endif
+ +
+ +
+ +gmx_edsam_t ed_open(int nfile,const t_filenm fnm[],unsigned long Flags,t_commrec *cr)
+ +{
+ +    gmx_edsam_t ed;
+ +
+ +
+ +    /* Allocate space for the ED data structure */
+ +    snew(ed, 1);
+ +
+ +    /* We want to perform ED (this switch might later be upgraded to eEDflood) */
+ +    ed->eEDtype = eEDedsam;
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        /* Open .edi input file: */
+ +        ed->edinam=ftp2fn(efEDI,nfile,fnm);
+ +        /* The master opens the .edo output file */
+ +        fprintf(stderr,"ED sampling will be performed!\n");
+ +        ed->edonam = ftp2fn(efEDO,nfile,fnm);
+ +        ed->edo    = gmx_fio_fopen(ed->edonam,(Flags & MD_APPENDFILES)? "a+" : "w+");
+ +        ed->bStartFromCpt = Flags & MD_STARTFROMCPT;
+ +    }
+ +    return ed;
+ +}
+ +
+ +
+ +/* Broadcasts the structure data */
+ +static void bc_ed_positions(t_commrec *cr, struct gmx_edx *s, int stype)
+ +{
+ +    snew_bc(cr, s->anrs, s->nr   );    /* Index numbers     */
+ +    snew_bc(cr, s->x   , s->nr   );    /* Positions         */
+ +    nblock_bc(cr, s->nr, s->anrs );
+ +    nblock_bc(cr, s->nr, s->x    );
+ +
+ +    /* For the average & reference structures we need an array for the collective indices,
+ +     * and we need to broadcast the masses as well */
+ +    if (stype == eedAV || stype == eedREF)
+ +    {
+ +        /* We need these additional variables in the parallel case: */
+ +        snew(s->c_ind    , s->nr   );   /* Collective indices */
+ +        /* Local atom indices get assigned in dd_make_local_group_indices.
+ +         * There, also memory is allocated */
+ +        s->nalloc_loc = 0;              /* allocation size of s->anrs_loc */
+ +        snew_bc(cr, s->x_old, s->nr);   /* To be able to always make the ED molecule whole, ...        */
+ +        nblock_bc(cr, s->nr, s->x_old); /* ... keep track of shift changes with the help of old coords */
+ +    }
+ +
+ +    /* broadcast masses for the reference structure (for mass-weighted fitting) */
+ +    if (stype == eedREF)
+ +    {
+ +        snew_bc(cr, s->m, s->nr);
+ +        nblock_bc(cr, s->nr, s->m);
+ +    }
+ +
+ +    /* For the average structure we might need the masses for mass-weighting */
+ +    if (stype == eedAV)
+ +    {
+ +        snew_bc(cr, s->sqrtm, s->nr);
+ +        nblock_bc(cr, s->nr, s->sqrtm);
+ +        snew_bc(cr, s->m, s->nr);
+ +        nblock_bc(cr, s->nr, s->m);
+ +    }
+ +}
+ +
+ +
+ +/* Broadcasts the eigenvector data */
+ +static void bc_ed_vecs(t_commrec *cr, t_eigvec *ev, int length, gmx_bool bHarmonic)
+ +{
+ +    int i;
+ +
+ +    snew_bc(cr, ev->ieig   , ev->neig);  /* index numbers of eigenvector  */
+ +    snew_bc(cr, ev->stpsz  , ev->neig);  /* stepsizes per eigenvector     */
+ +    snew_bc(cr, ev->xproj  , ev->neig);  /* instantaneous x projection    */
+ +    snew_bc(cr, ev->fproj  , ev->neig);  /* instantaneous f projection    */
+ +    snew_bc(cr, ev->refproj, ev->neig);  /* starting or target projection */
+ +
+ +    nblock_bc(cr, ev->neig, ev->ieig   );
+ +    nblock_bc(cr, ev->neig, ev->stpsz  );
+ +    nblock_bc(cr, ev->neig, ev->xproj  );
+ +    nblock_bc(cr, ev->neig, ev->fproj  );
+ +    nblock_bc(cr, ev->neig, ev->refproj);
+ +
+ +    snew_bc(cr, ev->vec, ev->neig);      /* Eigenvector components        */
+ +    for (i=0; i<ev->neig; i++)
+ +    {
+ +        snew_bc(cr, ev->vec[i], length);
+ +        nblock_bc(cr, length, ev->vec[i]);
+ +    }
+ +
+ +    /* For harmonic restraints the reference projections can change with time */
+ +    if (bHarmonic)
+ +    {
+ +        snew_bc(cr, ev->refproj0    , ev->neig);
+ +        snew_bc(cr, ev->refprojslope, ev->neig);
+ +        nblock_bc(cr, ev->neig, ev->refproj0    );
+ +        nblock_bc(cr, ev->neig, ev->refprojslope);
+ +    }
+ +}
+ +
+ +
+ +/* Broadcasts the ED / flooding data to other nodes
+ + * and allocates memory where needed */
+ +static void broadcast_ed_data(t_commrec *cr, gmx_edsam_t ed, int numedis)
+ +{
+ +    int     nr;
+ +    t_edpar *edi;
+ +
+ +
+ +    /* Master lets the other nodes know if its ED only or also flooding */
+ +    gmx_bcast(sizeof(ed->eEDtype), &(ed->eEDtype), cr);
+ +
+ +    snew_bc(cr, ed->edpar,1);
+ +    /* Now transfer the ED data set(s) */
+ +    edi = ed->edpar;
+ +    for (nr=0; nr<numedis; nr++)
+ +    {
+ +        /* Broadcast a single ED data set */
+ +        block_bc(cr, *edi);
+ +
+ +        /* Broadcast positions */
+ +        bc_ed_positions(cr, &(edi->sref), eedREF); /* reference positions (don't broadcast masses)    */
+ +        bc_ed_positions(cr, &(edi->sav ), eedAV ); /* average positions (do broadcast masses as well) */
+ +        bc_ed_positions(cr, &(edi->star), eedTAR); /* target positions                                */
+ +        bc_ed_positions(cr, &(edi->sori), eedORI); /* origin positions                                */
+ +
+ +        /* Broadcast eigenvectors */
+ +        bc_ed_vecs(cr, &edi->vecs.mon   , edi->sav.nr, FALSE);
+ +        bc_ed_vecs(cr, &edi->vecs.linfix, edi->sav.nr, FALSE);
+ +        bc_ed_vecs(cr, &edi->vecs.linacc, edi->sav.nr, FALSE);
+ +        bc_ed_vecs(cr, &edi->vecs.radfix, edi->sav.nr, FALSE);
+ +        bc_ed_vecs(cr, &edi->vecs.radacc, edi->sav.nr, FALSE);
+ +        bc_ed_vecs(cr, &edi->vecs.radcon, edi->sav.nr, FALSE);
+ +        /* Broadcast flooding eigenvectors and, if needed, values for the moving reference */
+ +        bc_ed_vecs(cr, &edi->flood.vecs,  edi->sav.nr, edi->flood.bHarmonic);
+ +
+ +        /* Set the pointer to the next ED dataset */
+ +        if (edi->next_edi)
+ +        {
+ +          snew_bc(cr, edi->next_edi, 1);
+ +          edi = edi->next_edi;
+ +        }
+ +    }
+ +}
+ +
+ +
+ +/* init-routine called for every *.edi-cycle, initialises t_edpar structure */
+ +static void init_edi(gmx_mtop_t *mtop,t_inputrec *ir,
+ +                     t_commrec *cr,gmx_edsam_t ed,t_edpar *edi)
+ +{
+ +    int  i;
+ +    real totalmass = 0.0;
+ +    rvec com;
++    gmx_mtop_atomlookup_t alook=NULL;
+ +    t_atom *atom;
+ +
+ +    /* NOTE Init_edi is executed on the master process only
+ +     * The initialized data sets are then transmitted to the
+ +     * other nodes in broadcast_ed_data */
+ +
+ +    edi->bNeedDoEdsam = edi->vecs.mon.neig
+ +                     || edi->vecs.linfix.neig
+ +                     || edi->vecs.linacc.neig
+ +                     || edi->vecs.radfix.neig
+ +                     || edi->vecs.radacc.neig
+ +                     || edi->vecs.radcon.neig;
+ +
++    alook = gmx_mtop_atomlookup_init(mtop);
++
+ +    /* evaluate masses (reference structure) */
+ +    snew(edi->sref.m, edi->sref.nr);
+ +    for (i = 0; i < edi->sref.nr; i++)
+ +    {
+ +        if (edi->fitmas)
+ +        {
-         gmx_mtop_atomnr_to_atom(mtop,edi->sav.anrs[i],&atom);
++            gmx_mtop_atomnr_to_atom(alook,edi->sref.anrs[i],&atom);
+ +            edi->sref.m[i] = atom->m;
+ +        }
+ +        else
+ +        {
+ +            edi->sref.m[i] = 1.0;
+ +        }
+ +
+ +        /* Check that every m > 0. Bad things will happen otherwise. */
+ +        if (edi->sref.m[i] <= 0.0)
+ +        {
+ +            gmx_fatal(FARGS, "Reference structure atom %d (sam.edi index %d) has a mass of %g.\n"
+ +                             "For a mass-weighted fit, all reference structure atoms need to have a mass >0.\n"
+ +                             "Either make the covariance analysis non-mass-weighted, or exclude massless\n"
+ +                             "atoms from the reference structure by creating a proper index group.\n",
+ +                      i, edi->sref.anrs[i]+1, edi->sref.m[i]);
+ +        }
+ +
+ +        totalmass += edi->sref.m[i];
+ +    }
+ +    edi->sref.mtot = totalmass;
+ +
+ +    /* Masses m and sqrt(m) for the average structure. Note that m
+ +     * is needed if forces have to be evaluated in do_edsam */
+ +    snew(edi->sav.sqrtm, edi->sav.nr );
+ +    snew(edi->sav.m    , edi->sav.nr );
+ +    for (i = 0; i < edi->sav.nr; i++)
+ +    {
++        gmx_mtop_atomnr_to_atom(alook,edi->sav.anrs[i],&atom);
+ +        edi->sav.m[i] = atom->m;
+ +        if (edi->pcamas)
+ +        {
+ +            edi->sav.sqrtm[i] = sqrt(atom->m);
+ +        }
+ +        else
+ +        {
+ +            edi->sav.sqrtm[i] = 1.0;
+ +        }
+ +
+ +        /* Check that every m > 0. Bad things will happen otherwise. */
+ +        if (edi->sav.sqrtm[i] <= 0.0)
+ +        {
+ +            gmx_fatal(FARGS, "Average structure atom %d (sam.edi index %d) has a mass of %g.\n"
+ +                             "For ED with mass-weighting, all average structure atoms need to have a mass >0.\n"
+ +                             "Either make the covariance analysis non-mass-weighted, or exclude massless\n"
+ +                             "atoms from the average structure by creating a proper index group.\n",
+ +                      i, edi->sav.anrs[i]+1, atom->m);
+ +        }
+ +    }
+ +
++    gmx_mtop_atomlookup_destroy(alook);
++
+ +    /* put reference structure in origin */
+ +    get_center(edi->sref.x, edi->sref.m, edi->sref.nr, com);
+ +    com[XX] = -com[XX];
+ +    com[YY] = -com[YY];
+ +    com[ZZ] = -com[ZZ];
+ +    translate_x(edi->sref.x, edi->sref.nr, com);
+ +
+ +    /* Init ED buffer */
+ +    snew(edi->buf, 1);
+ +}
+ +
+ +
+ +static void check(const char *line, const char *label)
+ +{
+ +    if (!strstr(line,label))
+ +        gmx_fatal(FARGS,"Could not find input parameter %s at expected position in edsam input-file (.edi)\nline read instead is %s",label,line);
+ +}
+ +
+ +
+ +static int read_checked_edint(FILE *file,const char *label)
+ +{
+ +    char line[STRLEN+1];
+ +    int idum;
+ +
+ +
+ +    fgets2 (line,STRLEN,file);
+ +    check(line,label);
+ +    fgets2 (line,STRLEN,file);
+ +    sscanf (line,"%d",&idum);
+ +    return idum;
+ +}
+ +
+ +
+ +static int read_edint(FILE *file,gmx_bool *bEOF)
+ +{
+ +    char line[STRLEN+1];
+ +    int idum;
+ +    char *eof;
+ +
+ +
+ +    eof=fgets2 (line,STRLEN,file);
+ +    if (eof==NULL)
+ +    {
+ +        *bEOF = TRUE;
+ +        return -1;
+ +    }
+ +    eof=fgets2 (line,STRLEN,file);
+ +    if (eof==NULL)
+ +    {
+ +        *bEOF = TRUE;
+ +        return -1;
+ +    }
+ +    sscanf (line,"%d",&idum);
+ +    *bEOF = FALSE;
+ +    return idum;
+ +}
+ +
+ +
+ +static real read_checked_edreal(FILE *file,const char *label)
+ +{
+ +    char line[STRLEN+1];
+ +    double rdum;
+ +
+ +
+ +    fgets2 (line,STRLEN,file);
+ +    check(line,label);
+ +    fgets2 (line,STRLEN,file);
+ +    sscanf (line,"%lf",&rdum);
+ +    return (real) rdum; /* always read as double and convert to single */
+ +}
+ +
+ +
+ +static void read_edx(FILE *file,int number,int *anrs,rvec *x)
+ +{
+ +    int i,j;
+ +    char line[STRLEN+1];
+ +    double d[3];
+ +
+ +
+ +    for(i=0; i<number; i++)
+ +    {
+ +        fgets2 (line,STRLEN,file);
+ +        sscanf (line,"%d%lf%lf%lf",&anrs[i],&d[0],&d[1],&d[2]);
+ +        anrs[i]--; /* we are reading FORTRAN indices */
+ +        for(j=0; j<3; j++)
+ +            x[i][j]=d[j]; /* always read as double and convert to single */
+ +    }
+ +}
+ +
+ +
+ +static void scan_edvec(FILE *in,int nr,rvec *vec)
+ +{
+ +    char line[STRLEN+1];
+ +    int i;
+ +    double x,y,z;
+ +
+ +
+ +    for(i=0; (i < nr); i++)
+ +    {
+ +        fgets2 (line,STRLEN,in);
+ +        sscanf (line,"%le%le%le",&x,&y,&z);
+ +        vec[i][XX]=x;
+ +        vec[i][YY]=y;
+ +        vec[i][ZZ]=z;
+ +    }
+ +}
+ +
+ +
+ +static void read_edvec(FILE *in,int nr,t_eigvec *tvec,gmx_bool bReadRefproj, gmx_bool *bHaveReference)
+ +{
+ +    int i,idum,nscan;
+ +    double rdum,refproj_dum=0.0,refprojslope_dum=0.0;
+ +    char line[STRLEN+1];
+ +
+ +
+ +    tvec->neig=read_checked_edint(in,"NUMBER OF EIGENVECTORS");
+ +    if (tvec->neig >0)
+ +    {
+ +        snew(tvec->ieig   ,tvec->neig);
+ +        snew(tvec->stpsz  ,tvec->neig);
+ +        snew(tvec->vec    ,tvec->neig);
+ +        snew(tvec->xproj  ,tvec->neig);
+ +        snew(tvec->fproj  ,tvec->neig);
+ +        snew(tvec->refproj,tvec->neig);
+ +        if (bReadRefproj)
+ +        {
+ +            snew(tvec->refproj0    ,tvec->neig);
+ +            snew(tvec->refprojslope,tvec->neig);
+ +        }
+ +
+ +        for(i=0; (i < tvec->neig); i++)
+ +        {
+ +            fgets2 (line,STRLEN,in);
+ +            if (bReadRefproj) /* ONLY when using flooding as harmonic restraint */
+ +            {
+ +                nscan = sscanf(line,"%d%lf%lf%lf",&idum,&rdum,&refproj_dum,&refprojslope_dum);
+ +                /* Zero out values which were not scanned */
+ +                switch(nscan)
+ +                {
+ +                    case 4:
+ +                        /* Every 4 values read, including reference position */
+ +                        *bHaveReference = TRUE;
+ +                        break;
+ +                    case 3:
+ +                        /* A reference position is provided */
+ +                        *bHaveReference = TRUE;
+ +                        /* No value for slope, set to 0 */
+ +                        refprojslope_dum = 0.0;
+ +                        break;
+ +                    case 2:
+ +                        /* No values for reference projection and slope, set to 0 */
+ +                        refproj_dum      = 0.0;
+ +                        refprojslope_dum = 0.0;
+ +                        break;
+ +                    default:
+ +                        gmx_fatal(FARGS,"Expected 2 - 4 (not %d) values for flooding vec: <nr> <spring const> <refproj> <refproj-slope>\n", nscan);
+ +                        break;
+ +                }
+ +                tvec->refproj[i]=refproj_dum;
+ +                tvec->refproj0[i]=refproj_dum;
+ +                tvec->refprojslope[i]=refprojslope_dum;
+ +            }
+ +            else /* Normal flooding */
+ +            {
+ +                nscan = sscanf(line,"%d%lf",&idum,&rdum);
+ +                if (nscan != 2)
+ +                    gmx_fatal(FARGS,"Expected 2 values for flooding vec: <nr> <stpsz>\n");
+ +            }
+ +            tvec->ieig[i]=idum;
+ +            tvec->stpsz[i]=rdum;
+ +        } /* end of loop over eigenvectors */
+ +
+ +        for(i=0; (i < tvec->neig); i++)
+ +        {
+ +            snew(tvec->vec[i],nr);
+ +            scan_edvec(in,nr,tvec->vec[i]);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +/* calls read_edvec for the vector groups, only for flooding there is an extra call */
+ +static void read_edvecs(FILE *in,int nr,t_edvecs *vecs)
+ +{
+ +      gmx_bool bHaveReference = FALSE;
+ +
+ +
+ +    read_edvec(in, nr, &vecs->mon   , FALSE, &bHaveReference);
+ +    read_edvec(in, nr, &vecs->linfix, FALSE, &bHaveReference);
+ +    read_edvec(in, nr, &vecs->linacc, FALSE, &bHaveReference);
+ +    read_edvec(in, nr, &vecs->radfix, FALSE, &bHaveReference);
+ +    read_edvec(in, nr, &vecs->radacc, FALSE, &bHaveReference);
+ +    read_edvec(in, nr, &vecs->radcon, FALSE, &bHaveReference);
+ +}
+ +
+ +
+ +/* Check if the same atom indices are used for reference and average positions */
+ +static gmx_bool check_if_same(struct gmx_edx sref, struct gmx_edx sav)
+ +{
+ +    int i;
+ +
+ +
+ +    /* If the number of atoms differs between the two structures,
+ +     * they cannot be identical */
+ +    if (sref.nr != sav.nr)
+ +        return FALSE;
+ +
+ +    /* Now that we know that both stuctures have the same number of atoms,
+ +     * check if also the indices are identical */
+ +    for (i=0; i < sav.nr; i++)
+ +    {
+ +        if (sref.anrs[i] != sav.anrs[i])
+ +            return FALSE;
+ +    }
+ +    fprintf(stderr, "ED: Note: Reference and average structure are composed of the same atom indices.\n");
+ +
+ +    return TRUE;
+ +}
+ +
+ +
+ +static int read_edi(FILE* in, gmx_edsam_t ed,t_edpar *edi,int nr_mdatoms, int edi_nr, t_commrec *cr)
+ +{
+ +    int readmagic;
+ +    const int magic=670;
+ +    gmx_bool bEOF;
+ +
+ +    /* Was a specific reference point for the flooding/umbrella potential provided in the edi file? */
+ +    gmx_bool bHaveReference = FALSE;
+ +
+ +
+ +    /* the edi file is not free format, so expect problems if the input is corrupt. */
+ +
+ +    /* check the magic number */
+ +    readmagic=read_edint(in,&bEOF);
+ +    /* Check whether we have reached the end of the input file */
+ +    if (bEOF)
+ +        return 0;
+ +
+ +    if (readmagic != magic)
+ +    {
+ +        if (readmagic==666 || readmagic==667 || readmagic==668)
+ +            gmx_fatal(FARGS,"Wrong magic number: Use newest version of make_edi to produce edi file");
+ +        else if (readmagic != 669)
+ +            gmx_fatal(FARGS,"Wrong magic number %d in %s",readmagic,ed->edinam);
+ +    }
+ +
+ +    /* check the number of atoms */
+ +    edi->nini=read_edint(in,&bEOF);
+ +    if (edi->nini != nr_mdatoms)
+ +        gmx_fatal(FARGS,"Nr of atoms in %s (%d) does not match nr of md atoms (%d)",
+ +                ed->edinam,edi->nini,nr_mdatoms);
+ +
+ +    /* Done checking. For the rest we blindly trust the input */
+ +    edi->fitmas          = read_checked_edint(in,"FITMAS");
+ +    edi->pcamas          = read_checked_edint(in,"ANALYSIS_MAS");
+ +    edi->outfrq          = read_checked_edint(in,"OUTFRQ");
+ +    edi->maxedsteps      = read_checked_edint(in,"MAXLEN");
+ +    edi->slope           = read_checked_edreal(in,"SLOPECRIT");
+ +
+ +    edi->presteps        = read_checked_edint(in,"PRESTEPS");
+ +    edi->flood.deltaF0   = read_checked_edreal(in,"DELTA_F0");
+ +    edi->flood.deltaF    = read_checked_edreal(in,"INIT_DELTA_F");
+ +    edi->flood.tau       = read_checked_edreal(in,"TAU");
+ +    edi->flood.constEfl  = read_checked_edreal(in,"EFL_NULL");
+ +    edi->flood.alpha2    = read_checked_edreal(in,"ALPHA2");
+ +    edi->flood.kT        = read_checked_edreal(in,"KT");
+ +    edi->flood.bHarmonic = read_checked_edint(in,"HARMONIC");
+ +    if (readmagic > 669)
+ +        edi->flood.bConstForce = read_checked_edint(in,"CONST_FORCE_FLOODING");
+ +    else
+ +        edi->flood.bConstForce = FALSE;
+ +    edi->flood.flood_id  = edi_nr;
+ +    edi->sref.nr         = read_checked_edint(in,"NREF");
+ +
+ +    /* allocate space for reference positions and read them */
+ +    snew(edi->sref.anrs,edi->sref.nr);
+ +    snew(edi->sref.x   ,edi->sref.nr);
+ +    snew(edi->sref.x_old,edi->sref.nr);
+ +    edi->sref.sqrtm    =NULL;
+ +    read_edx(in,edi->sref.nr,edi->sref.anrs,edi->sref.x);
+ +
+ +    /* average positions. they define which atoms will be used for ED sampling */
+ +    edi->sav.nr=read_checked_edint(in,"NAV");
+ +    snew(edi->sav.anrs,edi->sav.nr);
+ +    snew(edi->sav.x   ,edi->sav.nr);
+ +    snew(edi->sav.x_old,edi->sav.nr);
+ +    read_edx(in,edi->sav.nr,edi->sav.anrs,edi->sav.x);
+ +
+ +    /* Check if the same atom indices are used for reference and average positions */
+ +    edi->bRefEqAv = check_if_same(edi->sref, edi->sav);
+ +
+ +    /* eigenvectors */
+ +    read_edvecs(in,edi->sav.nr,&edi->vecs);
+ +    read_edvec(in,edi->sav.nr,&edi->flood.vecs,edi->flood.bHarmonic, &bHaveReference);
+ +
+ +    /* target positions */
+ +    edi->star.nr=read_edint(in,&bEOF);
+ +    if (edi->star.nr > 0)
+ +    {
+ +        snew(edi->star.anrs,edi->star.nr);
+ +        snew(edi->star.x   ,edi->star.nr);
+ +        edi->star.sqrtm    =NULL;
+ +        read_edx(in,edi->star.nr,edi->star.anrs,edi->star.x);
+ +    }
+ +
+ +    /* positions defining origin of expansion circle */
+ +    edi->sori.nr=read_edint(in,&bEOF);
+ +    if (edi->sori.nr > 0)
+ +    {
+ +      if (bHaveReference)
+ +      {
+ +              /* Both an -ori structure and a at least one manual reference point have been
+ +               * specified. That's ambiguous and probably not intentional. */
+ +              gmx_fatal(FARGS, "ED: An origin structure has been provided and a at least one (moving) reference\n"
+ +                               "    point was manually specified in the edi file. That is ambiguous. Aborting.\n");
+ +      }
+ +        snew(edi->sori.anrs,edi->sori.nr);
+ +        snew(edi->sori.x   ,edi->sori.nr);
+ +        edi->sori.sqrtm    =NULL;
+ +        read_edx(in,edi->sori.nr,edi->sori.anrs,edi->sori.x);
+ +    }
+ +
+ +    /* all done */
+ +    return 1;
+ +}
+ +
+ +
+ +
+ +/* Read in the edi input file. Note that it may contain several ED data sets which were
+ + * achieved by concatenating multiple edi files. The standard case would be a single ED
+ + * data set, though. */
+ +static void read_edi_file(gmx_edsam_t ed, t_edpar *edi, int nr_mdatoms, t_commrec *cr)
+ +{
+ +    FILE    *in;
+ +    t_edpar *curr_edi,*last_edi;
+ +    t_edpar *edi_read;
+ +    int     edi_nr = 0;
+ +
+ +
+ +    /* This routine is executed on the master only */
+ +
+ +    /* Open the .edi parameter input file */
+ +    in = gmx_fio_fopen(ed->edinam,"r");
+ +    fprintf(stderr, "ED: Reading edi file %s\n", ed->edinam);
+ +
+ +    /* Now read a sequence of ED input parameter sets from the edi file */
+ +    curr_edi=edi;
+ +    last_edi=edi;
+ +    while( read_edi(in, ed, curr_edi, nr_mdatoms, edi_nr, cr) )
+ +    {
+ +        edi_nr++;
+ +        /* Make shure that the number of atoms in each dataset is the same as in the tpr file */
+ +        if (edi->nini != nr_mdatoms)
+ +            gmx_fatal(FARGS,"edi file %s (dataset #%d) was made for %d atoms, but the simulation contains %d atoms.",
+ +                    ed->edinam, edi_nr, edi->nini, nr_mdatoms);
+ +        /* Since we arrived within this while loop we know that there is still another data set to be read in */
+ +        /* We need to allocate space for the data: */
+ +        snew(edi_read,1);
+ +        /* Point the 'next_edi' entry to the next edi: */
+ +        curr_edi->next_edi=edi_read;
+ +        /* Keep the curr_edi pointer for the case that the next dataset is empty: */
+ +        last_edi = curr_edi;
+ +        /* Let's prepare to read in the next edi data set: */
+ +        curr_edi = edi_read;
+ +    }
+ +    if (edi_nr == 0)
+ +        gmx_fatal(FARGS, "No complete ED data set found in edi file %s.", ed->edinam);
+ +
+ +    /* Terminate the edi dataset list with a NULL pointer: */
+ +    last_edi->next_edi = NULL;
+ +
+ +    fprintf(stderr, "ED: Found %d ED dataset%s.\n", edi_nr, edi_nr>1? "s" : "");
+ +
+ +    /* Close the .edi file again */
+ +    gmx_fio_fclose(in);
+ +}
+ +
+ +
+ +struct t_fit_to_ref {
+ +    rvec *xcopy;       /* Working copy of the positions in fit_to_reference */
+ +};
+ +
+ +/* Fit the current positions to the reference positions
+ + * Do not actually do the fit, just return rotation and translation.
+ + * Note that the COM of the reference structure was already put into
+ + * the origin by init_edi. */
+ +static void fit_to_reference(rvec      *xcoll,    /* The positions to be fitted */
+ +                             rvec      transvec,  /* The translation vector */
+ +                             matrix    rotmat,    /* The rotation matrix */
+ +                             t_edpar   *edi)      /* Just needed for do_edfit */
+ +{
+ +    rvec com;          /* center of mass */
+ +    int  i;
+ +    struct t_fit_to_ref *loc;
+ +
+ +
+ +    /* Allocate memory the first time this routine is called for each edi dataset */
+ +    if (NULL == edi->buf->fit_to_ref)
+ +    {
+ +        snew(edi->buf->fit_to_ref, 1);
+ +        snew(edi->buf->fit_to_ref->xcopy, edi->sref.nr);
+ +    }
+ +    loc = edi->buf->fit_to_ref;
+ +
+ +    /* We do not touch the original positions but work on a copy. */
+ +    for (i=0; i<edi->sref.nr; i++)
+ +        copy_rvec(xcoll[i], loc->xcopy[i]);
+ +
+ +    /* Calculate the center of mass */
+ +    get_center(loc->xcopy, edi->sref.m, edi->sref.nr, com);
+ +
+ +    transvec[XX] = -com[XX];
+ +    transvec[YY] = -com[YY];
+ +    transvec[ZZ] = -com[ZZ];
+ +
+ +    /* Subtract the center of mass from the copy */
+ +    translate_x(loc->xcopy, edi->sref.nr, transvec);
+ +
+ +    /* Determine the rotation matrix */
+ +    do_edfit(edi->sref.nr, edi->sref.x, loc->xcopy, rotmat, edi);
+ +}
+ +
+ +
+ +static void translate_and_rotate(rvec *x,         /* The positions to be translated and rotated */
+ +                                 int nat,         /* How many positions are there? */
+ +                                 rvec transvec,   /* The translation vector */
+ +                                 matrix rotmat)   /* The rotation matrix */
+ +{
+ +    /* Translation */
+ +    translate_x(x, nat, transvec);
+ +
+ +    /* Rotation */
+ +    rotate_x(x, nat, rotmat);
+ +}
+ +
+ +
+ +/* Gets the rms deviation of the positions to the structure s */
+ +/* fit_to_structure has to be called before calling this routine! */
+ +static real rmsd_from_structure(rvec           *x,  /* The positions under consideration */
+ +                                struct gmx_edx *s)  /* The structure from which the rmsd shall be computed */
+ +{
+ +    real  rmsd=0.0;
+ +    int   i;
+ +
+ +
+ +    for (i=0; i < s->nr; i++)
+ +        rmsd += distance2(s->x[i], x[i]);
+ +
+ +    rmsd /= (real) s->nr;
+ +    rmsd = sqrt(rmsd);
+ +
+ +    return rmsd;
+ +}
+ +
+ +
+ +void dd_make_local_ed_indices(gmx_domdec_t *dd, struct gmx_edsam *ed)
+ +{
+ +    t_edpar *edi;
+ +
+ +
+ +    if (ed->eEDtype != eEDnone)
+ +    {
+ +        /* Loop over ED datasets (usually there is just one dataset, though) */
+ +        edi=ed->edpar;
+ +        while (edi)
+ +        {
+ +            /* Local atoms of the reference structure (for fitting), need only be assembled
+ +             * if their indices differ from the average ones */
+ +            if (!edi->bRefEqAv)
+ +                dd_make_local_group_indices(dd->ga2la, edi->sref.nr, edi->sref.anrs,
+ +                        &edi->sref.nr_loc, &edi->sref.anrs_loc, &edi->sref.nalloc_loc, edi->sref.c_ind);
+ +
+ +            /* Local atoms of the average structure (on these ED will be performed) */
+ +            dd_make_local_group_indices(dd->ga2la, edi->sav.nr, edi->sav.anrs,
+ +                    &edi->sav.nr_loc, &edi->sav.anrs_loc, &edi->sav.nalloc_loc, edi->sav.c_ind);
+ +
+ +            /* Indicate that the ED shift vectors for this structure need to be updated
+ +             * at the next call to communicate_group_positions, since obviously we are in a NS step */
+ +            edi->buf->do_edsam->bUpdateShifts = TRUE;
+ +
+ +            /* Set the pointer to the next ED dataset (if any) */
+ +            edi=edi->next_edi;
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static inline void ed_unshift_single_coord(matrix box, const rvec x, const ivec is, rvec xu)
+ +{
+ +    int tx,ty,tz;
+ +
+ +
+ +    tx=is[XX];
+ +    ty=is[YY];
+ +    tz=is[ZZ];
+ +
+ +    if(TRICLINIC(box))
+ +    {
+ +        xu[XX] = x[XX]-tx*box[XX][XX]-ty*box[YY][XX]-tz*box[ZZ][XX];
+ +        xu[YY] = x[YY]-ty*box[YY][YY]-tz*box[ZZ][YY];
+ +        xu[ZZ] = x[ZZ]-tz*box[ZZ][ZZ];
+ +    } else
+ +    {
+ +        xu[XX] = x[XX]-tx*box[XX][XX];
+ +        xu[YY] = x[YY]-ty*box[YY][YY];
+ +        xu[ZZ] = x[ZZ]-tz*box[ZZ][ZZ];
+ +    }
+ +}
+ +
+ +
+ +static void do_linfix(rvec *xcoll, t_edpar *edi, int step, t_commrec *cr)
+ +{
+ +    int  i, j;
+ +    real proj, add;
+ +    rvec vec_dum;
+ +
+ +
+ +    /* loop over linfix vectors */
+ +    for (i=0; i<edi->vecs.linfix.neig; i++)
+ +    {
+ +        /* calculate the projection */
+ +        proj = projectx(edi, xcoll, edi->vecs.linfix.vec[i]);
+ +
+ +        /* calculate the correction */
+ +        add = edi->vecs.linfix.refproj[i] + step*edi->vecs.linfix.stpsz[i] - proj;
+ +
+ +        /* apply the correction */
+ +        add /= edi->sav.sqrtm[i];
+ +        for (j=0; j<edi->sav.nr; j++)
+ +        {
+ +            svmul(add, edi->vecs.linfix.vec[i][j], vec_dum);
+ +            rvec_inc(xcoll[j], vec_dum);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void do_linacc(rvec *xcoll, t_edpar *edi, t_commrec *cr)
+ +{
+ +    int  i, j;
+ +    real proj, add;
+ +    rvec vec_dum;
+ +
+ +
+ +    /* loop over linacc vectors */
+ +    for (i=0; i<edi->vecs.linacc.neig; i++)
+ +    {
+ +        /* calculate the projection */
+ +        proj=projectx(edi, xcoll, edi->vecs.linacc.vec[i]);
+ +
+ +        /* calculate the correction */
+ +        add = 0.0;
+ +        if (edi->vecs.linacc.stpsz[i] > 0.0)
+ +        {
+ +            if ((proj-edi->vecs.linacc.refproj[i]) < 0.0)
+ +                add = edi->vecs.linacc.refproj[i] - proj;
+ +        }
+ +        if (edi->vecs.linacc.stpsz[i] < 0.0)
+ +        {
+ +            if ((proj-edi->vecs.linacc.refproj[i]) > 0.0)
+ +                add = edi->vecs.linacc.refproj[i] - proj;
+ +        }
+ +
+ +        /* apply the correction */
+ +        add /= edi->sav.sqrtm[i];
+ +        for (j=0; j<edi->sav.nr; j++)
+ +        {
+ +            svmul(add, edi->vecs.linacc.vec[i][j], vec_dum);
+ +            rvec_inc(xcoll[j], vec_dum);
+ +        }
+ +
+ +        /* new positions will act as reference */
+ +        edi->vecs.linacc.refproj[i] = proj + add;
+ +    }
+ +}
+ +
+ +
+ +static void do_radfix(rvec *xcoll, t_edpar *edi, int step, t_commrec *cr)
+ +{
+ +    int  i,j;
+ +    real *proj, rad=0.0, ratio;
+ +    rvec vec_dum;
+ +
+ +
+ +    if (edi->vecs.radfix.neig == 0)
+ +        return;
+ +
+ +    snew(proj, edi->vecs.radfix.neig);
+ +
+ +    /* loop over radfix vectors */
+ +    for (i=0; i<edi->vecs.radfix.neig; i++)
+ +    {
+ +        /* calculate the projections, radius */
+ +        proj[i] = projectx(edi, xcoll, edi->vecs.radfix.vec[i]);
+ +        rad += pow(proj[i] - edi->vecs.radfix.refproj[i], 2);
+ +    }
+ +
+ +    rad   = sqrt(rad);
+ +    ratio = (edi->vecs.radfix.stpsz[0]+edi->vecs.radfix.radius)/rad - 1.0;
+ +    edi->vecs.radfix.radius += edi->vecs.radfix.stpsz[0];
+ +
+ +    /* loop over radfix vectors */
+ +    for (i=0; i<edi->vecs.radfix.neig; i++)
+ +    {
+ +        proj[i] -= edi->vecs.radfix.refproj[i];
+ +
+ +        /* apply the correction */
+ +        proj[i] /= edi->sav.sqrtm[i];
+ +        proj[i] *= ratio;
+ +        for (j=0; j<edi->sav.nr; j++) {
+ +            svmul(proj[i], edi->vecs.radfix.vec[i][j], vec_dum);
+ +            rvec_inc(xcoll[j], vec_dum);
+ +        }
+ +    }
+ +
+ +    sfree(proj);
+ +}
+ +
+ +
+ +static void do_radacc(rvec *xcoll, t_edpar *edi, t_commrec *cr)
+ +{
+ +    int  i,j;
+ +    real *proj, rad=0.0, ratio=0.0;
+ +    rvec vec_dum;
+ +
+ +
+ +    if (edi->vecs.radacc.neig == 0)
+ +        return;
+ +
+ +    snew(proj,edi->vecs.radacc.neig);
+ +
+ +    /* loop over radacc vectors */
+ +    for (i=0; i<edi->vecs.radacc.neig; i++)
+ +    {
+ +        /* calculate the projections, radius */
+ +        proj[i] = projectx(edi, xcoll, edi->vecs.radacc.vec[i]);
+ +        rad += pow(proj[i] - edi->vecs.radacc.refproj[i], 2);
+ +    }
+ +    rad = sqrt(rad);
+ +
+ +    /* only correct when radius decreased */
+ +    if (rad < edi->vecs.radacc.radius)
+ +    {
+ +        ratio = edi->vecs.radacc.radius/rad - 1.0;
+ +        rad   = edi->vecs.radacc.radius;
+ +    }
+ +    else
+ +        edi->vecs.radacc.radius = rad;
+ +
+ +    /* loop over radacc vectors */
+ +    for (i=0; i<edi->vecs.radacc.neig; i++)
+ +    {
+ +        proj[i] -= edi->vecs.radacc.refproj[i];
+ +
+ +        /* apply the correction */
+ +        proj[i] /= edi->sav.sqrtm[i];
+ +        proj[i] *= ratio;
+ +        for (j=0; j<edi->sav.nr; j++)
+ +        {
+ +            svmul(proj[i], edi->vecs.radacc.vec[i][j], vec_dum);
+ +            rvec_inc(xcoll[j], vec_dum);
+ +        }
+ +    }
+ +    sfree(proj);
+ +}
+ +
+ +
+ +struct t_do_radcon {
+ +    real *proj;
+ +};
+ +
+ +static void do_radcon(rvec *xcoll, t_edpar *edi, t_commrec *cr)
+ +{
+ +    int  i,j;
+ +    real rad=0.0, ratio=0.0;
+ +    struct t_do_radcon *loc;
+ +    gmx_bool bFirst;
+ +    rvec vec_dum;
+ +
+ +
+ +    if(edi->buf->do_radcon != NULL)
+ +    {
+ +        bFirst = FALSE;
+ +        loc    = edi->buf->do_radcon;
+ +    }
+ +    else
+ +    {
+ +        bFirst = TRUE;
+ +        snew(edi->buf->do_radcon, 1);
+ +    }
+ +    loc = edi->buf->do_radcon;
+ +
+ +    if (edi->vecs.radcon.neig == 0)
+ +        return;
+ +
+ +    if (bFirst)
+ +        snew(loc->proj, edi->vecs.radcon.neig);
+ +
+ +    /* loop over radcon vectors */
+ +    for (i=0; i<edi->vecs.radcon.neig; i++)
+ +    {
+ +        /* calculate the projections, radius */
+ +        loc->proj[i] = projectx(edi, xcoll, edi->vecs.radcon.vec[i]);
+ +        rad += pow(loc->proj[i] - edi->vecs.radcon.refproj[i], 2);
+ +    }
+ +    rad = sqrt(rad);
+ +    /* only correct when radius increased */
+ +    if (rad > edi->vecs.radcon.radius)
+ +    {
+ +        ratio = edi->vecs.radcon.radius/rad - 1.0;
+ +
+ +        /* loop over radcon vectors */
+ +        for (i=0; i<edi->vecs.radcon.neig; i++)
+ +        {
+ +            /* apply the correction */
+ +            loc->proj[i] -= edi->vecs.radcon.refproj[i];
+ +            loc->proj[i] /= edi->sav.sqrtm[i];
+ +            loc->proj[i] *= ratio;
+ +
+ +            for (j=0; j<edi->sav.nr; j++)
+ +            {
+ +                svmul(loc->proj[i], edi->vecs.radcon.vec[i][j], vec_dum);
+ +                rvec_inc(xcoll[j], vec_dum);
+ +            }
+ +        }
+ +    }
+ +    else
+ +        edi->vecs.radcon.radius = rad;
+ +
+ +    if (rad != edi->vecs.radcon.radius)
+ +    {
+ +        rad = 0.0;
+ +        for (i=0; i<edi->vecs.radcon.neig; i++)
+ +        {
+ +            /* calculate the projections, radius */
+ +            loc->proj[i] = projectx(edi, xcoll, edi->vecs.radcon.vec[i]);
+ +            rad += pow(loc->proj[i] - edi->vecs.radcon.refproj[i], 2);
+ +        }
+ +        rad = sqrt(rad);
+ +    }
+ +}
+ +
+ +
+ +static void ed_apply_constraints(rvec *xcoll, t_edpar *edi, gmx_large_int_t step, t_commrec *cr)
+ +{
+ +    int i;
+ +
+ +
+ +    /* subtract the average positions */
+ +    for (i=0; i<edi->sav.nr; i++)
+ +        rvec_dec(xcoll[i], edi->sav.x[i]);
+ +
+ +    /* apply the constraints */
+ +    if (step >= 0)
+ +        do_linfix(xcoll, edi, step, cr);
+ +    do_linacc(xcoll, edi, cr);
+ +    if (step >= 0)
+ +        do_radfix(xcoll, edi, step, cr);
+ +    do_radacc(xcoll, edi, cr);
+ +    do_radcon(xcoll, edi, cr);
+ +
+ +    /* add back the average positions */
+ +    for (i=0; i<edi->sav.nr; i++)
+ +        rvec_inc(xcoll[i], edi->sav.x[i]);
+ +}
+ +
+ +
+ +/* Write out the projections onto the eigenvectors */
+ +static void write_edo(int nr_edi, t_edpar *edi, gmx_edsam_t ed, gmx_large_int_t step,real rmsd)
+ +{
+ +    int i;
+ +    char buf[22];
+ +
+ +
+ +    if (edi->bNeedDoEdsam)
+ +    {
+ +        if (step == -1)
+ +            fprintf(ed->edo, "Initial projections:\n");
+ +        else
+ +        {
+ +            fprintf(ed->edo,"Step %s, ED #%d  ", gmx_step_str(step, buf), nr_edi);
+ +            fprintf(ed->edo,"  RMSD %f nm\n",rmsd);
+ +        }
+ +
+ +        if (edi->vecs.mon.neig)
+ +        {
+ +            fprintf(ed->edo,"  Monitor eigenvectors");
+ +            for (i=0; i<edi->vecs.mon.neig; i++)
+ +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.mon.ieig[i],edi->vecs.mon.xproj[i]);
+ +            fprintf(ed->edo,"\n");
+ +        }
+ +        if (edi->vecs.linfix.neig)
+ +        {
+ +            fprintf(ed->edo,"  Linfix  eigenvectors");
+ +            for (i=0; i<edi->vecs.linfix.neig; i++)
+ +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.linfix.ieig[i],edi->vecs.linfix.xproj[i]);
+ +            fprintf(ed->edo,"\n");
+ +        }
+ +        if (edi->vecs.linacc.neig)
+ +        {
+ +            fprintf(ed->edo,"  Linacc  eigenvectors");
+ +            for (i=0; i<edi->vecs.linacc.neig; i++)
+ +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.linacc.ieig[i],edi->vecs.linacc.xproj[i]);
+ +            fprintf(ed->edo,"\n");
+ +        }
+ +        if (edi->vecs.radfix.neig)
+ +        {
+ +            fprintf(ed->edo,"  Radfix  eigenvectors");
+ +            for (i=0; i<edi->vecs.radfix.neig; i++)
+ +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radfix.ieig[i],edi->vecs.radfix.xproj[i]);
+ +            fprintf(ed->edo,"\n");
+ +            fprintf(ed->edo,"  fixed increment radius = %f\n", calc_radius(&edi->vecs.radfix));
+ +        }
+ +        if (edi->vecs.radacc.neig)
+ +        {
+ +            fprintf(ed->edo,"  Radacc  eigenvectors");
+ +            for (i=0; i<edi->vecs.radacc.neig; i++)
+ +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radacc.ieig[i],edi->vecs.radacc.xproj[i]);
+ +            fprintf(ed->edo,"\n");
+ +            fprintf(ed->edo,"  acceptance radius      = %f\n", calc_radius(&edi->vecs.radacc));
+ +        }
+ +        if (edi->vecs.radcon.neig)
+ +        {
+ +            fprintf(ed->edo,"  Radcon  eigenvectors");
+ +            for (i=0; i<edi->vecs.radcon.neig; i++)
+ +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radcon.ieig[i],edi->vecs.radcon.xproj[i]);
+ +            fprintf(ed->edo,"\n");
+ +            fprintf(ed->edo,"  contracting radius     = %f\n", calc_radius(&edi->vecs.radcon));
+ +        }
+ +    }
+ +}
+ +
+ +/* Returns if any constraints are switched on */
+ +static int ed_constraints(gmx_bool edtype, t_edpar *edi)
+ +{
+ +    if (edtype == eEDedsam || edtype == eEDflood)
+ +    {
+ +        return (edi->vecs.linfix.neig || edi->vecs.linacc.neig ||
+ +                edi->vecs.radfix.neig || edi->vecs.radacc.neig ||
+ +                edi->vecs.radcon.neig);
+ +    }
+ +    return 0;
+ +}
+ +
+ +
+ +/* Copies reference projection 'refproj' to fixed 'refproj0' variable for flooding/
+ + * umbrella sampling simulations. */
+ +static void copyEvecReference(t_eigvec* floodvecs)
+ +{
+ +    int i;
+ +
+ +
+ +    if (NULL==floodvecs->refproj0)
+ +        snew(floodvecs->refproj0, floodvecs->neig);
+ +
+ +    for (i=0; i<floodvecs->neig; i++)
+ +    {
+ +        floodvecs->refproj0[i] = floodvecs->refproj[i];
+ +    }
+ +}
+ +
+ +
+ +void init_edsam(gmx_mtop_t  *mtop,   /* global topology                    */
+ +                t_inputrec  *ir,     /* input record                       */
+ +                t_commrec   *cr,     /* communication record               */
+ +                gmx_edsam_t ed,      /* contains all ED data               */
+ +                rvec        x[],     /* positions of the whole MD system   */
+ +                matrix      box)     /* the box                            */
+ +{
+ +    t_edpar *edi = NULL;    /* points to a single edi data set */
+ +    int     numedis=0;      /* keep track of the number of ED data sets in edi file */
+ +    int     i,nr_edi,avindex;
+ +    rvec    *x_pbc  = NULL; /* positions of the whole MD system with pbc removed  */
+ +    rvec    *xfit   = NULL; /* the positions which will be fitted to the reference structure  */
+ +    rvec    *xstart = NULL; /* the positions which are subject to ED sampling */
+ +    rvec    fit_transvec;   /* translation ... */
+ +    matrix  fit_rotmat;     /* ... and rotation from fit to reference structure */
+ +
+ +
+ +    if (!DOMAINDECOMP(cr) && PAR(cr) && MASTER(cr))
+ +        gmx_fatal(FARGS, "Please switch on domain decomposition to use essential dynamics in parallel.");
+ +
+ +    if (MASTER(cr))
+ +        fprintf(stderr, "ED: Initializing essential dynamics constraints.\n");
+ +
+ +    /* Needed for initializing radacc radius in do_edsam */
+ +    ed->bFirst = 1;
+ +
+ +    /* The input file is read by the master and the edi structures are
+ +     * initialized here. Input is stored in ed->edpar. Then the edi
+ +     * structures are transferred to the other nodes */
+ +    if (MASTER(cr))
+ +    {
+ +        snew(ed->edpar,1);
+ +        /* Read the whole edi file at once: */
+ +        read_edi_file(ed,ed->edpar,mtop->natoms,cr);
+ +
+ +        /* Initialization for every ED/flooding dataset. Flooding uses one edi dataset per
+ +         * flooding vector, Essential dynamics can be applied to more than one structure
+ +         * as well, but will be done in the order given in the edi file, so
+ +         * expect different results for different order of edi file concatenation! */
+ +        edi=ed->edpar;
+ +        while(edi != NULL)
+ +        {
+ +            init_edi(mtop,ir,cr,ed,edi);
+ +
+ +            /* Init flooding parameters if needed */
+ +            init_flood(edi,ed,ir->delta_t,cr);
+ +
+ +            edi=edi->next_edi;
+ +            numedis++;
+ +        }
+ +    }
+ +
+ +    /* The master does the work here. The other nodes get the positions
+ +     * not before dd_partition_system which is called after init_edsam */
+ +    if (MASTER(cr))
+ +    {
+ +        /* Remove pbc, make molecule whole.
+ +         * When ir->bContinuation=TRUE this has already been done, but ok.
+ +         */
+ +        snew(x_pbc,mtop->natoms);
+ +        m_rveccopy(mtop->natoms,x,x_pbc);
+ +        do_pbc_first_mtop(NULL,ir->ePBC,box,mtop,x_pbc);
+ +
+ +        /* Reset pointer to first ED data set which contains the actual ED data */
+ +        edi=ed->edpar;
+ +
+ +        /* Loop over all ED/flooding data sets (usually only one, though) */
+ +        for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
+ +        {
+ +            /* We use srenew to allocate memory since the size of the buffers
+ +             * is likely to change with every ED dataset */
+ +            srenew(xfit  , edi->sref.nr );
+ +            srenew(xstart, edi->sav.nr  );
+ +
+ +            /* Extract the positions of the atoms to which will be fitted */
+ +            for (i=0; i < edi->sref.nr; i++)
+ +            {
+ +                copy_rvec(x_pbc[edi->sref.anrs[i]], xfit[i]);
+ +
+ +                /* Save the sref positions such that in the next time step we can make the ED group whole
+ +                 * in case any of the atoms do not have the correct PBC representation */
+ +                copy_rvec(xfit[i], edi->sref.x_old[i]);
+ +            }
+ +
+ +            /* Extract the positions of the atoms subject to ED sampling */
+ +            for (i=0; i < edi->sav.nr; i++)
+ +            {
+ +                copy_rvec(x_pbc[edi->sav.anrs[i]], xstart[i]);
+ +
+ +                /* Save the sav positions such that in the next time step we can make the ED group whole
+ +                 * in case any of the atoms do not have the correct PBC representation */
+ +                copy_rvec(xstart[i], edi->sav.x_old[i]);
+ +            }
+ +
+ +            /* Make the fit to the REFERENCE structure, get translation and rotation */
+ +            fit_to_reference(xfit, fit_transvec, fit_rotmat, edi);
+ +
+ +            /* Output how well we fit to the reference at the start */
+ +            translate_and_rotate(xfit, edi->sref.nr, fit_transvec, fit_rotmat);
+ +            fprintf(stderr, "ED: Initial RMSD from reference after fit = %f nm (dataset #%d)\n",
+ +                    rmsd_from_structure(xfit, &edi->sref), nr_edi);
+ +
+ +            /* Now apply the translation and rotation to the atoms on which ED sampling will be performed */
+ +            translate_and_rotate(xstart, edi->sav.nr, fit_transvec, fit_rotmat);
+ +
+ +            /* calculate initial projections */
+ +            project(xstart, edi);
+ +
+ +            /* For the target and origin structure both a reference (fit) and an
+ +             * average structure can be provided in make_edi. If both structures
+ +             * are the same, make_edi only stores one of them in the .edi file.
+ +             * If they differ, first the fit and then the average structure is stored
+ +             * in star (or sor), thus the number of entries in star/sor is
+ +             * (n_fit + n_av) with n_fit the size of the fitting group and n_av
+ +             * the size of the average group. */
+ +
+ +            /* process target structure, if required */
+ +            if (edi->star.nr > 0)
+ +            {
+ +                fprintf(stderr, "ED: Fitting target structure to reference structure\n");
+ +
+ +                /* get translation & rotation for fit of target structure to reference structure */
+ +                fit_to_reference(edi->star.x, fit_transvec, fit_rotmat, edi);
+ +                /* do the fit */
+ +                translate_and_rotate(edi->star.x, edi->star.nr, fit_transvec, fit_rotmat);
+ +                if (edi->star.nr == edi->sav.nr)
+ +                {
+ +                    avindex = 0;
+ +                }
+ +                else /* edi->star.nr = edi->sref.nr + edi->sav.nr */
+ +                {
+ +                    /* The last sav.nr indices of the target structure correspond to
+ +                     * the average structure, which must be projected */
+ +                    avindex = edi->star.nr - edi->sav.nr;
+ +                }
+ +                rad_project(edi, &edi->star.x[avindex], &edi->vecs.radcon, cr);
+ +            } else
+ +                rad_project(edi, xstart, &edi->vecs.radcon, cr);
+ +
+ +            /* process structure that will serve as origin of expansion circle */
+ +            if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
+ +                fprintf(stderr, "ED: Setting center of flooding potential (0 = average structure)\n");
+ +
+ +            if (edi->sori.nr > 0)
+ +            {
+ +                fprintf(stderr, "ED: Fitting origin structure to reference structure\n");
+ +
+ +                /* fit this structure to reference structure */
+ +                fit_to_reference(edi->sori.x, fit_transvec, fit_rotmat, edi);
+ +                /* do the fit */
+ +                translate_and_rotate(edi->sori.x, edi->sori.nr, fit_transvec, fit_rotmat);
+ +                if (edi->sori.nr == edi->sav.nr)
+ +                {
+ +                    avindex = 0;
+ +                }
+ +                else /* edi->sori.nr = edi->sref.nr + edi->sav.nr */
+ +                {
+ +                    /* For the projection, we need the last sav.nr indices of sori */
+ +                    avindex = edi->sori.nr - edi->sav.nr;
+ +                }
+ +
+ +                rad_project(edi, &edi->sori.x[avindex], &edi->vecs.radacc, cr);
+ +                rad_project(edi, &edi->sori.x[avindex], &edi->vecs.radfix, cr);
+ +                if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
+ +                {
+ +                    fprintf(stderr, "ED: The ORIGIN structure will define the flooding potential center.\n");
+ +                    /* Set center of flooding potential to the ORIGIN structure */
+ +                    rad_project(edi, &edi->sori.x[avindex], &edi->flood.vecs, cr);
+ +                    /* We already know that no (moving) reference position was provided,
+ +                     * therefore we can overwrite refproj[0]*/
+ +                    copyEvecReference(&edi->flood.vecs);
+ +                }
+ +            }
+ +            else /* No origin structure given */
+ +            {
+ +                rad_project(edi, xstart, &edi->vecs.radacc, cr);
+ +                rad_project(edi, xstart, &edi->vecs.radfix, cr);
+ +                if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
+ +                {
+ +                    if (edi->flood.bHarmonic)
+ +                    {
+ +                        fprintf(stderr, "ED: A (possibly changing) ref. projection will define the flooding potential center.\n");
+ +                        for (i=0; i<edi->flood.vecs.neig; i++)
+ +                            edi->flood.vecs.refproj[i] = edi->flood.vecs.refproj0[i];
+ +                    }
+ +                    else
+ +                    {
+ +                        fprintf(stderr, "ED: The AVERAGE structure will define the flooding potential center.\n");
+ +                        /* Set center of flooding potential to the center of the covariance matrix,
+ +                         * i.e. the average structure, i.e. zero in the projected system */
+ +                        for (i=0; i<edi->flood.vecs.neig; i++)
+ +                            edi->flood.vecs.refproj[i] = 0.0;
+ +                    }
+ +                }
+ +            }
+ +            /* For convenience, output the center of the flooding potential for the eigenvectors */
+ +            if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
+ +            {
+ +                for (i=0; i<edi->flood.vecs.neig; i++)
+ +                {
+ +                    fprintf(stdout, "ED: EV %d flooding potential center: %11.4e", i, edi->flood.vecs.refproj[i]);
+ +                    if (edi->flood.bHarmonic)
+ +                        fprintf(stdout, " (adding %11.4e/timestep)", edi->flood.vecs.refprojslope[i]);
+ +                    fprintf(stdout, "\n");
+ +                }
+ +            }
+ +
+ +            /* set starting projections for linsam */
+ +            rad_project(edi, xstart, &edi->vecs.linacc, cr);
+ +            rad_project(edi, xstart, &edi->vecs.linfix, cr);
+ +
+ +            /* Output to file, set the step to -1 so that write_edo knows it was called from init_edsam */
+ +            if (ed->edo && !(ed->bStartFromCpt))
+ +                write_edo(nr_edi, edi, ed, -1, 0);
+ +
+ +            /* Prepare for the next edi data set: */
+ +            edi=edi->next_edi;
+ +        }
+ +        /* Cleaning up on the master node: */
+ +        sfree(x_pbc);
+ +        sfree(xfit);
+ +        sfree(xstart);
+ +
+ +    } /* end of MASTER only section */
+ +
+ +    if (PAR(cr))
+ +    {
+ +        /* First let everybody know how many ED data sets to expect */
+ +        gmx_bcast(sizeof(numedis), &numedis, cr);
+ +        /* Broadcast the essential dynamics / flooding data to all nodes */
+ +        broadcast_ed_data(cr, ed, numedis);
+ +    }
+ +    else
+ +    {
+ +        /* In the single-CPU case, point the local atom numbers pointers to the global
+ +         * one, so that we can use the same notation in serial and parallel case: */
+ +
+ +        /* Loop over all ED data sets (usually only one, though) */
+ +        edi=ed->edpar;
+ +        for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
+ +        {
+ +            edi->sref.anrs_loc = edi->sref.anrs;
+ +            edi->sav.anrs_loc  = edi->sav.anrs;
+ +            edi->star.anrs_loc = edi->star.anrs;
+ +            edi->sori.anrs_loc = edi->sori.anrs;
+ +            /* For the same reason as above, make a dummy c_ind array: */
+ +            snew(edi->sav.c_ind, edi->sav.nr);
+ +            /* Initialize the array */
+ +            for (i=0; i<edi->sav.nr; i++)
+ +                edi->sav.c_ind[i] = i;
+ +            /* In the general case we will need a different-sized array for the reference indices: */
+ +            if (!edi->bRefEqAv)
+ +            {
+ +                snew(edi->sref.c_ind, edi->sref.nr);
+ +                for (i=0; i<edi->sref.nr; i++)
+ +                    edi->sref.c_ind[i] = i;
+ +            }
+ +            /* Point to the very same array in case of other structures: */
+ +            edi->star.c_ind = edi->sav.c_ind;
+ +            edi->sori.c_ind = edi->sav.c_ind;
+ +            /* In the serial case, the local number of atoms is the global one: */
+ +            edi->sref.nr_loc = edi->sref.nr;
+ +            edi->sav.nr_loc  = edi->sav.nr;
+ +            edi->star.nr_loc = edi->star.nr;
+ +            edi->sori.nr_loc = edi->sori.nr;
+ +
+ +            /* An on we go to the next edi dataset */
+ +            edi=edi->next_edi;
+ +        }
+ +    }
+ +
+ +    /* Allocate space for ED buffer variables */
+ +    /* Again, loop over ED data sets */
+ +    edi=ed->edpar;
+ +    for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
+ +    {
+ +        /* Allocate space for ED buffer */
+ +        snew(edi->buf, 1);
+ +        snew(edi->buf->do_edsam, 1);
+ +
+ +        /* Space for collective ED buffer variables */
+ +
+ +        /* Collective positions of atoms with the average indices */
+ +        snew(edi->buf->do_edsam->xcoll                  , edi->sav.nr);
+ +        snew(edi->buf->do_edsam->shifts_xcoll           , edi->sav.nr); /* buffer for xcoll shifts */
+ +        snew(edi->buf->do_edsam->extra_shifts_xcoll     , edi->sav.nr);
+ +        /* Collective positions of atoms with the reference indices */
+ +        if (!edi->bRefEqAv)
+ +        {
+ +            snew(edi->buf->do_edsam->xc_ref             , edi->sref.nr);
+ +            snew(edi->buf->do_edsam->shifts_xc_ref      , edi->sref.nr); /* To store the shifts in */
+ +            snew(edi->buf->do_edsam->extra_shifts_xc_ref, edi->sref.nr);
+ +        }
+ +
+ +        /* Get memory for flooding forces */
+ +        snew(edi->flood.forces_cartesian                , edi->sav.nr);
+ +
+ +#ifdef DUMPEDI
+ +        /* Dump it all into one file per process */
+ +        dump_edi(edi, cr, nr_edi);
+ +#endif
+ +
+ +        /* An on we go to the next edi dataset */
+ +        edi=edi->next_edi;
+ +    }
+ +
+ +    /* Flush the edo file so that the user can check some things
+ +     * when the simulation has started */
+ +    if (ed->edo)
+ +        fflush(ed->edo);
+ +}
+ +
+ +
+ +void do_edsam(t_inputrec  *ir,
+ +              gmx_large_int_t step,
+ +              t_mdatoms   *md,
+ +              t_commrec   *cr,
+ +              rvec        xs[],   /* The local current positions on this processor */
+ +              rvec        v[],    /* The velocities */
+ +              matrix      box,
+ +              gmx_edsam_t ed)
+ +{
+ +    int     i,edinr,iupdate=500;
+ +    matrix  rotmat;         /* rotation matrix */
+ +    rvec    transvec;       /* translation vector */
+ +    rvec    dv,dx,x_unsh;   /* tmp vectors for velocity, distance, unshifted x coordinate */
+ +    real    dt_1;           /* 1/dt */
+ +    struct t_do_edsam *buf;
+ +    t_edpar *edi;
+ +    real    rmsdev=-1;      /* RMSD from reference structure prior to applying the constraints */
+ +    gmx_bool bSuppress=FALSE; /* Write .edo file on master? */
+ +
+ +
+ +    /* Check if ED sampling has to be performed */
+ +    if ( ed->eEDtype==eEDnone )
+ +        return;
+ +
+ +    /* Suppress output on first call of do_edsam if
+ +     * two-step sd2 integrator is used */
+ +    if ( (ir->eI==eiSD2) && (v != NULL) )
+ +        bSuppress = TRUE;
+ +
+ +    dt_1 = 1.0/ir->delta_t;
+ +
+ +    /* Loop over all ED datasets (usually one) */
+ +    edi  = ed->edpar;
+ +    edinr = 0;
+ +    while (edi != NULL)
+ +    {
+ +        edinr++;
+ +        if (edi->bNeedDoEdsam)
+ +        {
+ +
+ +            buf=edi->buf->do_edsam;
+ +
+ +            if (ed->bFirst)
+ +                /* initialise radacc radius for slope criterion */
+ +                buf->oldrad=calc_radius(&edi->vecs.radacc);
+ +
+ +            /* Copy the positions into buf->xc* arrays and after ED
+ +             * feed back corrections to the official positions */
+ +
+ +            /* Broadcast the ED positions such that every node has all of them
+ +             * Every node contributes its local positions xs and stores it in
+ +             * the collective buf->xcoll array. Note that for edinr > 1
+ +             * xs could already have been modified by an earlier ED */
+ +
+ +            communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, PAR(cr) ? buf->bUpdateShifts : TRUE, xs,
+ +                    edi->sav.nr, edi->sav.nr_loc, edi->sav.anrs_loc, edi->sav.c_ind, edi->sav.x_old,  box);
+ +
+ +#ifdef DEBUG_ED
+ +            dump_xcoll(edi, buf, cr, step);
+ +#endif
+ +            /* Only assembly reference positions if their indices differ from the average ones */
+ +            if (!edi->bRefEqAv)
+ +                communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, PAR(cr) ? buf->bUpdateShifts : TRUE, xs,
+ +                        edi->sref.nr, edi->sref.nr_loc, edi->sref.anrs_loc, edi->sref.c_ind, edi->sref.x_old, box);
+ +
+ +            /* If bUpdateShifts was TRUE then the shifts have just been updated in communicate_group_positions.
+ +             * We do not need to update the shifts until the next NS step. Note that dd_make_local_ed_indices
+ +             * set bUpdateShifts=TRUE in the parallel case. */
+ +            buf->bUpdateShifts = FALSE;
+ +
+ +            /* Now all nodes have all of the ED positions in edi->sav->xcoll,
+ +             * as well as the indices in edi->sav.anrs */
+ +
+ +            /* Fit the reference indices to the reference structure */
+ +            if (edi->bRefEqAv)
+ +                fit_to_reference(buf->xcoll , transvec, rotmat, edi);
+ +            else
+ +                fit_to_reference(buf->xc_ref, transvec, rotmat, edi);
+ +
+ +            /* Now apply the translation and rotation to the ED structure */
+ +            translate_and_rotate(buf->xcoll, edi->sav.nr, transvec, rotmat);
+ +
+ +            /* Find out how well we fit to the reference (just for output steps) */
+ +            if (do_per_step(step,edi->outfrq) && MASTER(cr))
+ +            {
+ +                if (edi->bRefEqAv)
+ +                {
+ +                    /* Indices of reference and average structures are identical,
+ +                     * thus we can calculate the rmsd to SREF using xcoll */
+ +                    rmsdev = rmsd_from_structure(buf->xcoll,&edi->sref);
+ +                }
+ +                else
+ +                {
+ +                    /* We have to translate & rotate the reference atoms first */
+ +                    translate_and_rotate(buf->xc_ref, edi->sref.nr, transvec, rotmat);
+ +                    rmsdev = rmsd_from_structure(buf->xc_ref,&edi->sref);
+ +                }
+ +            }
+ +
+ +            /* update radsam references, when required */
+ +            if (do_per_step(step,edi->maxedsteps) && step >= edi->presteps)
+ +            {
+ +                project(buf->xcoll, edi);
+ +                rad_project(edi, buf->xcoll, &edi->vecs.radacc, cr);
+ +                rad_project(edi, buf->xcoll, &edi->vecs.radfix, cr);
+ +                buf->oldrad=-1.e5;
+ +            }
+ +
+ +            /* update radacc references, when required */
+ +            if (do_per_step(step,iupdate) && step >= edi->presteps)
+ +            {
+ +                edi->vecs.radacc.radius = calc_radius(&edi->vecs.radacc);
+ +                if (edi->vecs.radacc.radius - buf->oldrad < edi->slope)
+ +                {
+ +                    project(buf->xcoll, edi);
+ +                    rad_project(edi, buf->xcoll, &edi->vecs.radacc, cr);
+ +                    buf->oldrad = 0.0;
+ +                } else
+ +                    buf->oldrad = edi->vecs.radacc.radius;
+ +            }
+ +
+ +            /* apply the constraints */
+ +            if (step >= edi->presteps && ed_constraints(ed->eEDtype, edi))
+ +            {
+ +                /* ED constraints should be applied already in the first MD step
+ +                 * (which is step 0), therefore we pass step+1 to the routine */
+ +                ed_apply_constraints(buf->xcoll, edi, step+1 - ir->init_step, cr);
+ +            }
+ +
+ +            /* write to edo, when required */
+ +            if (do_per_step(step,edi->outfrq))
+ +            {
+ +                project(buf->xcoll, edi);
+ +                if (MASTER(cr) && !bSuppress)
+ +                    write_edo(edinr, edi, ed, step, rmsdev);
+ +            }
+ +
+ +            /* Copy back the positions unless monitoring only */
+ +            if (ed_constraints(ed->eEDtype, edi))
+ +            {
+ +                /* remove fitting */
+ +                rmfit(edi->sav.nr, buf->xcoll, transvec, rotmat);
+ +
+ +                /* Copy the ED corrected positions into the coordinate array */
+ +                /* Each node copies its local part. In the serial case, nat_loc is the
+ +                 * total number of ED atoms */
+ +                for (i=0; i<edi->sav.nr_loc; i++)
+ +                {
+ +                    /* Unshift local ED coordinate and store in x_unsh */
+ +                    ed_unshift_single_coord(box, buf->xcoll[edi->sav.c_ind[i]],
+ +                                            buf->shifts_xcoll[edi->sav.c_ind[i]], x_unsh);
+ +
+ +                    /* dx is the ED correction to the positions: */
+ +                    rvec_sub(x_unsh, xs[edi->sav.anrs_loc[i]], dx);
+ +
+ +                    if (v != NULL)
+ +                    {
+ +                        /* dv is the ED correction to the velocity: */
+ +                        svmul(dt_1, dx, dv);
+ +                        /* apply the velocity correction: */
+ +                        rvec_inc(v[edi->sav.anrs_loc[i]], dv);
+ +                    }
+ +                    /* Finally apply the position correction due to ED: */
+ +                    copy_rvec(x_unsh, xs[edi->sav.anrs_loc[i]]);
+ +                }
+ +            }
+ +        } /* END of if (edi->bNeedDoEdsam) */
+ +
+ +        /* Prepare for the next ED dataset */
+ +        edi = edi->next_edi;
+ +
+ +    } /* END of loop over ED datasets */
+ +
+ +    ed->bFirst = FALSE;
+ +}
diff --cc src/gromacs/mdlib/fft5d.cpp

index 26403c74b4b8fef06055753b0d3d72afdf5e1a5c,0000000000000000000000000000000000000000..c82da14c68d25abcbfa7caa499f13565d6479062

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/fft5d.cpp
--- /dev/null
+++ b/src/gromacs/mdlib/fft5d.cpp
@@@ -1,1241 -1,0 +1,1261 @@@
- #endif
- #ifdef FFT5D_THREADS
- #include "gmx_omp.h"
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2012, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ + 
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Groningen Machine for Chemical Simulation
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <algorithm>
+ +
+ +#include <stdio.h>
+ +#include <stdlib.h>
+ +#include <string.h>
+ +
+ +#ifdef NOGMX
+ +#define GMX_PARALLEL_ENV_INITIALIZED 1
+ +#else 
+ +#ifdef GMX_MPI
+ +#define GMX_PARALLEL_ENV_INITIALIZED 1
+ +#else
+ +#define GMX_PARALLEL_ENV_INITIALIZED 0
+ +#endif
+ +#endif
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#ifdef GMX_OPENMP
+ +/* TODO: Do we still need this? Are we still planning ot use fftw + OpenMP? */
+ +#define FFT5D_THREADS
-         snew_aligned(lout2, lsize, 32);
-         snew_aligned(lout3, lsize, 32);
+ +/* requires fftw compiled with openmp */
+ +/* #define FFT5D_FFTW_THREADS (now set by cmake) */
+ +#endif
+ +
+ +#include "fft5d.h"
+ +#include <float.h>
+ +#include <math.h>
+ +#include <assert.h>
+ +#include "smalloc.h"
+ +
+ +#ifndef __FLT_EPSILON__
+ +#define __FLT_EPSILON__ FLT_EPSILON
+ +#define __DBL_EPSILON__ DBL_EPSILON
+ +#endif
+ +
+ +#ifdef NOGMX
+ +FILE* debug=0;
+ +#endif
+ +
+ +#include "gmx_fatal.h"
+ +
+ +
+ +#ifdef GMX_FFT_FFTW3 
+ +#include "thread_mpi/mutex.h"
+ +#include "gromacs/utility/exceptions.h"
+ +/* none of the fftw3 calls, except execute(), are thread-safe, so 
+ +   we need to serialize them with this mutex. */
+ +static tMPI::mutex big_fftw_mutex;
+ +#define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
+ +#define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
+ +#endif /* GMX_FFT_FFTW3 */
+ +
+ +/* largest factor smaller than sqrt */
+ +static int lfactor(int z) {  
+ +      int i;
+ +      for (i=static_cast<int>(sqrt(static_cast<double>(z)));;i--)
+ +              if (z%i==0) return i;
+ +      return 1;
+ +}
+ +
+ +/* largest factor */
+ +static int l2factor(int z) {  
+ +      int i;
+ +      if (z==1) return 1;
+ +      for (i=z/2;;i--)
+ +              if (z%i==0) return i;
+ +      return 1;
+ +}
+ +
+ +/* largest prime factor: WARNING: slow recursion, only use for small numbers */
+ +static int lpfactor(int z) {
+ +      int f = l2factor(z);
+ +      if (f==1) return z;
+ +      return std::max(lpfactor(f),lpfactor(z/f));
+ +}
+ +
+ +#ifndef GMX_MPI
+ +#ifdef HAVE_GETTIMEOFDAY
+ +#include <sys/time.h>
+ +double MPI_Wtime() {
+ +    struct timeval tv;
+ +    gettimeofday(&tv,0);
+ +    return tv.tv_sec+tv.tv_usec*1e-6;
+ +}
+ +#else
+ +double MPI_Wtime() {
+ +    return 0.0;
+ +}
+ +#endif
+ +#endif
+ +
+ +static int vmax(int* a, int s) {
+ +    int i,max=0;
+ +    for (i=0;i<s;i++) 
+ +    {
+ +        if (a[i]>max) max=a[i];
+ +    }
+ +    return max;
+ +} 
+ +
+ +
+ +/* NxMxK the size of the data
+ + * comm communicator to use for fft5d
+ + * P0 number of processor in 1st axes (can be null for automatic)
+ + * lin is allocated by fft5d because size of array is only known after planning phase 
+ + * rlout2 is only used as intermediate buffer - only returned after allocation to reuse for back transform - should not be used by caller
+ +*/
+ +fft5d_plan fft5d_plan_3d(int NG, int MG, int KG, MPI_Comm comm[2], int flags, t_complex** rlin, t_complex** rlout, t_complex** rlout2, t_complex** rlout3, int nthreads)
+ +{
+ +
+ +    int P[2],bMaster,prank[2],i,t;
+ +    int rNG,rMG,rKG;
+ +    int *N0=0, *N1=0, *M0=0, *M1=0, *K0=0, *K1=0, *oN0=0, *oN1=0, *oM0=0, *oM1=0, *oK0=0, *oK1=0;
+ +    int N[3],M[3],K[3],pN[3],pM[3],pK[3],oM[3],oK[3],*iNin[3]={0},*oNin[3]={0},*iNout[3]={0},*oNout[3]={0};
+ +    int C[3],rC[3],nP[2];
+ +    int lsize;
+ +    t_complex *lin=0,*lout=0,*lout2=0,*lout3=0;
+ +    fft5d_plan plan;
+ +    int s;
+ +
+ +    /* comm, prank and P are in the order of the decomposition (plan->cart is in the order of transposes) */
+ +#ifdef GMX_MPI
+ +    if (GMX_PARALLEL_ENV_INITIALIZED && comm[0] != MPI_COMM_NULL)
+ +    {
+ +        MPI_Comm_size(comm[0],&P[0]);
+ +        MPI_Comm_rank(comm[0],&prank[0]);
+ +    }
+ +    else
+ +#endif
+ +    {
+ +        P[0] = 1;
+ +        prank[0] = 0;
+ +    }
+ +#ifdef GMX_MPI
+ +    if (GMX_PARALLEL_ENV_INITIALIZED && comm[1] != MPI_COMM_NULL)
+ +    {
+ +        MPI_Comm_size(comm[1],&P[1]);
+ +        MPI_Comm_rank(comm[1],&prank[1]);
+ +    }
+ +    else
+ +#endif
+ +    {
+ +        P[1] = 1;
+ +        prank[1] = 0;
+ +    }
+ +   
+ +    bMaster=(prank[0]==0&&prank[1]==0);
+ +   
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"FFT5D: Using %dx%d processor grid, rank %d,%d\n",
+ +                P[0],P[1],prank[0],prank[1]);
+ +    }
+ +    
+ +    if (bMaster) {
+ +        if (debug) 
+ +            fprintf(debug,"FFT5D: N: %d, M: %d, K: %d, P: %dx%d, real2complex: %d, backward: %d, order yz: %d, debug %d\n",
+ +                NG,MG,KG,P[0],P[1],(flags&FFT5D_REALCOMPLEX)>0,(flags&FFT5D_BACKWARD)>0,(flags&FFT5D_ORDER_YZ)>0,(flags&FFT5D_DEBUG)>0);
+ +        /* The check below is not correct, one prime factor 11 or 13 is ok.
+ +        if (fft5d_fmax(fft5d_fmax(lpfactor(NG),lpfactor(MG)),lpfactor(KG))>7) {
+ +            printf("WARNING: FFT very slow with prime factors larger 7\n");
+ +            printf("Change FFT size or in case you cannot change it look at\n");
+ +            printf("http://www.fftw.org/fftw3_doc/Generating-your-own-code.html\n");
+ +        }
+ +        */
+ +    }
+ +    
+ +    if (NG==0 || MG==0 || KG==0) {
+ +        if (bMaster) printf("FFT5D: FATAL: Datasize cannot be zero in any dimension\n");
+ +        return 0;
+ +    }
+ +
+ +    rNG=NG;rMG=MG;rKG=KG;
+ +    
+ +    if (flags&FFT5D_REALCOMPLEX) {
+ +        if (!(flags&FFT5D_BACKWARD)) NG = NG/2+1;
+ +        else {
+ +            if (!(flags&FFT5D_ORDER_YZ)) MG=MG/2+1;
+ +            else KG=KG/2+1;
+ +        }
+ +    }
+ +    
+ +    
+ +    /*for transpose we need to know the size for each processor not only our own size*/
+ +
+ +    N0 = (int*)malloc(P[0]*sizeof(int)); N1 = (int*)malloc(P[1]*sizeof(int)); 
+ +    M0 = (int*)malloc(P[0]*sizeof(int)); M1 = (int*)malloc(P[1]*sizeof(int));
+ +    K0 = (int*)malloc(P[0]*sizeof(int)); K1 = (int*)malloc(P[1]*sizeof(int));
+ +    oN0 = (int*)malloc(P[0]*sizeof(int));oN1 = (int*)malloc(P[1]*sizeof(int));
+ +    oM0 = (int*)malloc(P[0]*sizeof(int));oM1 = (int*)malloc(P[1]*sizeof(int));
+ +    oK0 = (int*)malloc(P[0]*sizeof(int));oK1 = (int*)malloc(P[1]*sizeof(int));
+ +    
+ +    for (i=0;i<P[0];i++) 
+ +    {
+ +        #define EVENDIST
+ +        #ifndef EVENDIST
+ +        oN0[i]=i*ceil((double)NG/P[0]);
+ +        oM0[i]=i*ceil((double)MG/P[0]);
+ +        oK0[i]=i*ceil((double)KG/P[0]);
+ +        #else
+ +        oN0[i]=(NG*i)/P[0];
+ +        oM0[i]=(MG*i)/P[0];
+ +        oK0[i]=(KG*i)/P[0];
+ +        #endif
+ +    }
+ +    for (i=0;i<P[1];i++) 
+ +    {
+ +        #ifndef EVENDIST
+ +        oN1[i]=i*ceil((double)NG/P[1]); 
+ +        oM1[i]=i*ceil((double)MG/P[1]); 
+ +        oK1[i]=i*ceil((double)KG/P[1]); 
+ +        #else
+ +        oN1[i]=(NG*i)/P[1]; 
+ +        oM1[i]=(MG*i)/P[1]; 
+ +        oK1[i]=(KG*i)/P[1]; 
+ +        #endif
+ +    }
+ +    for (i=0;i<P[0]-1;i++) 
+ +    {
+ +        N0[i]=oN0[i+1]-oN0[i];
+ +        M0[i]=oM0[i+1]-oM0[i];
+ +        K0[i]=oK0[i+1]-oK0[i];
+ +    }
+ +    N0[P[0]-1]=NG-oN0[P[0]-1];
+ +    M0[P[0]-1]=MG-oM0[P[0]-1];
+ +    K0[P[0]-1]=KG-oK0[P[0]-1];
+ +    for (i=0;i<P[1]-1;i++) 
+ +    {
+ +        N1[i]=oN1[i+1]-oN1[i];
+ +        M1[i]=oM1[i+1]-oM1[i];
+ +        K1[i]=oK1[i+1]-oK1[i];
+ +    }
+ +    N1[P[1]-1]=NG-oN1[P[1]-1];
+ +    M1[P[1]-1]=MG-oM1[P[1]-1];
+ +    K1[P[1]-1]=KG-oK1[P[1]-1];
+ +
+ +    /* for step 1-3 the local N,M,K sizes of the transposed system
+ +       C: contiguous dimension, and nP: number of processor in subcommunicator 
+ +       for that step */
+ +    
+ +    
+ +    pM[0] = M0[prank[0]];
+ +    oM[0] = oM0[prank[0]];
+ +    pK[0] = K1[prank[1]];
+ +    oK[0] = oK1[prank[1]];
+ +    C[0] = NG;
+ +    rC[0] = rNG;
+ +    if (!(flags&FFT5D_ORDER_YZ)) {
+ +        N[0] = vmax(N1,P[1]);
+ +        M[0] = M0[prank[0]];
+ +        K[0] = vmax(K1,P[1]);
+ +        pN[0] = N1[prank[1]];
+ +        iNout[0] = N1;
+ +        oNout[0] = oN1;
+ +        nP[0] = P[1];
+ +        C[1] = KG;
+ +        rC[1] =rKG;
+ +        N[1] = vmax(K0,P[0]);
+ +        pN[1] = K0[prank[0]];
+ +        iNin[1] = K1;
+ +        oNin[1] = oK1; 
+ +        iNout[1] = K0;
+ +        oNout[1] = oK0;
+ +        M[1] = vmax(M0,P[0]);
+ +        pM[1] = M0[prank[0]];
+ +        oM[1] = oM0[prank[0]];
+ +        K[1] = N1[prank[1]];
+ +        pK[1] = N1[prank[1]];
+ +        oK[1] = oN1[prank[1]];
+ +        nP[1] = P[0];
+ +        C[2] = MG;
+ +        rC[2] = rMG;
+ +        iNin[2] = M0;
+ +        oNin[2] = oM0;
+ +        M[2] = vmax(K0,P[0]);
+ +        pM[2] = K0[prank[0]];
+ +        oM[2] = oK0[prank[0]];
+ +        K[2] = vmax(N1,P[1]);
+ +        pK[2] = N1[prank[1]];
+ +        oK[2] = oN1[prank[1]];
+ +        free(N0); free(oN0); /*these are not used for this order*/
+ +        free(M1); free(oM1); /*the rest is freed in destroy*/
+ +    } else {
+ +        N[0] = vmax(N0,P[0]);
+ +        M[0] = vmax(M0,P[0]);
+ +        K[0] = K1[prank[1]];
+ +        pN[0] = N0[prank[0]];
+ +        iNout[0] = N0;
+ +        oNout[0] = oN0;
+ +        nP[0] = P[0];
+ +        C[1] = MG;
+ +        rC[1] =rMG;
+ +        N[1] = vmax(M1,P[1]);
+ +        pN[1] = M1[prank[1]];
+ +        iNin[1] = M0;
+ +        oNin[1] = oM0;
+ +        iNout[1] = M1;
+ +        oNout[1] = oM1;
+ +        M[1] = N0[prank[0]];
+ +        pM[1] = N0[prank[0]];
+ +        oM[1] = oN0[prank[0]];
+ +        K[1] = vmax(K1,P[1]);
+ +        pK[1] = K1[prank[1]];
+ +        oK[1] = oK1[prank[1]];
+ +        nP[1] = P[1];
+ +        C[2] = KG;
+ +        rC[2] = rKG;
+ +        iNin[2] = K1;
+ +        oNin[2] = oK1;
+ +        M[2] = vmax(N0,P[0]);
+ +        pM[2] = N0[prank[0]];
+ +        oM[2] = oN0[prank[0]];
+ +        K[2] = vmax(M1,P[1]);
+ +        pK[2] = M1[prank[1]];
+ +        oK[2] = oM1[prank[1]];
+ +        free(N1); free(oN1); /*these are not used for this order*/
+ +        free(K0); free(oK0); /*the rest is freed in destroy*/
+ +    }
+ +    N[2]=pN[2]=-1; /*not used*/
+ +    
+ +    /*
+ +      Difference between x-y-z regarding 2d decomposition is whether they are 
+ +      distributed along axis 1, 2 or both 
+ +    */
+ +    
+ +    /* int lsize = fmax(N[0]*M[0]*K[0]*nP[0],N[1]*M[1]*K[1]*nP[1]); */
+ +    lsize = std::max(N[0]*M[0]*K[0]*nP[0],std::max(N[1]*M[1]*K[1]*nP[1],C[2]*M[2]*K[2]));
+ +    /* int lsize = fmax(C[0]*M[0]*K[0],fmax(C[1]*M[1]*K[1],C[2]*M[2]*K[2])); */
+ +    if (!(flags&FFT5D_NOMALLOC)) { 
+ +        snew_aligned(lin, lsize, 32);
+ +        snew_aligned(lout, lsize, 32);
-         lout2 = *rlout2;
-         lout3 = *rlout3;
++        if (nthreads > 1)
++        {
++            /* We need extra transpose buffers to avoid OpenMP barriers */
++            snew_aligned(lout2, lsize, 32);
++            snew_aligned(lout3, lsize, 32);
++        }
++        else
++        {
++            /* We can reuse the buffers to avoid cache misses */
++            lout2 = lin;
++            lout3 = lout;
++        }
+ +    } else {
+ +        lin = *rlin;
+ +        lout = *rlout;
-         if (bParallelDim) {
++        if (nthreads > 1)
++        {
++            lout2 = *rlout2;
++            lout3 = *rlout3;
++        }
++        else
++        {
++            lout2 = lin;
++            lout3 = lout;
++        }
+ +    }
+ +
+ +    plan = (fft5d_plan)calloc(1,sizeof(struct fft5d_plan_t));
+ +
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug, "Running on %d threads\n",nthreads);        
+ +    }
+ +
+ +#ifdef GMX_FFT_FFTW3  /*if not FFTW - then we don't do a 3d plan but instead use only 1D plans */
+ +    /* It is possible to use the 3d plan with OMP threads - but in that case it is not allowed to be called from
+ +     * within a parallel region. For now deactivated. If it should be supported it has to made sure that
+ +     * that the execute of the 3d plan is in a master/serial block (since it contains it own parallel region)
+ +     * and that the 3d plan is faster than the 1d plan.
+ +     */
+ +    if ((!(flags&FFT5D_INPLACE)) && (!(P[0]>1 || P[1]>1)) && nthreads==1) {  /*don't do 3d plan in parallel or if in_place requested */
+ +            int fftwflags=FFTW_DESTROY_INPUT;
+ +            FFTW(iodim) dims[3];
+ +            int inNG=NG,outMG=MG,outKG=KG;
+ +
+ +            FFTW_LOCK;
+ +            if (!(flags&FFT5D_NOMEASURE)) fftwflags|=FFTW_MEASURE;
+ +            if (flags&FFT5D_REALCOMPLEX) {
+ +                if (!(flags&FFT5D_BACKWARD)) {  /*input pointer is not complex*/
+ +                    inNG*=2; 
+ +                } else {                        /*output pointer is not complex*/
+ +                    if (!(flags&FFT5D_ORDER_YZ)) outMG*=2;
+ +                    else outKG*=2;
+ +                }
+ +            }
+ +
+ +            if (!(flags&FFT5D_BACKWARD)) {
+ +                dims[0].n  = KG;
+ +                dims[1].n  = MG;
+ +                dims[2].n  = rNG;
+ +                
+ +                dims[0].is = inNG*MG;     /*N M K*/
+ +                dims[1].is = inNG;
+ +                dims[2].is = 1;
+ +                if (!(flags&FFT5D_ORDER_YZ)) {
+ +                    dims[0].os = MG;       /*M K N*/
+ +                    dims[1].os = 1;
+ +                    dims[2].os = MG*KG;
+ +                } else  {
+ +                    dims[0].os = 1;       /*K N M*/
+ +                    dims[1].os = KG*NG;
+ +                    dims[2].os = KG;
+ +                }
+ +            } else {
+ +                if (!(flags&FFT5D_ORDER_YZ)) {
+ +                    dims[0].n  = NG;   
+ +                    dims[1].n  = KG;   
+ +                    dims[2].n  = rMG;  
+ +                    
+ +                    dims[0].is = 1;     
+ +                    dims[1].is = NG*MG;
+ +                    dims[2].is = NG;
+ +
+ +                    dims[0].os = outMG*KG;       
+ +                    dims[1].os = outMG;
+ +                    dims[2].os = 1;                  
+ +                } else {
+ +                    dims[0].n  = MG;
+ +                    dims[1].n  = NG;
+ +                    dims[2].n  = rKG;
+ +                    
+ +                    dims[0].is = NG;     
+ +                    dims[1].is = 1;
+ +                    dims[2].is = NG*MG;
+ +
+ +                    dims[0].os = outKG*NG;       
+ +                    dims[1].os = outKG;
+ +                    dims[2].os = 1;                  
+ +                }           
+ +            }
+ +#ifdef FFT5D_THREADS
+ +#ifdef FFT5D_FFTW_THREADS
+ +            FFTW(plan_with_nthreads)(nthreads);
+ +#endif
+ +#endif
+ +            if ((flags&FFT5D_REALCOMPLEX) && !(flags&FFT5D_BACKWARD)) {
+ +                plan->p3d = FFTW(plan_guru_dft_r2c)(/*rank*/ 3, dims,
+ +                                     /*howmany*/ 0, /*howmany_dims*/0 ,
+ +                                     (real*)lin, (FFTW(complex) *)lout,
+ +                                     /*flags*/ fftwflags);              
+ +            } else if ((flags&FFT5D_REALCOMPLEX) && (flags&FFT5D_BACKWARD)) {
+ +                plan->p3d = FFTW(plan_guru_dft_c2r)(/*rank*/ 3, dims,
+ +                                     /*howmany*/ 0, /*howmany_dims*/0 ,
+ +                                     (FFTW(complex) *)lin, (real*)lout,
+ +                                     /*flags*/ fftwflags);              
+ +            } else {
+ +                plan->p3d = FFTW(plan_guru_dft)(/*rank*/ 3, dims,
+ +                                     /*howmany*/ 0, /*howmany_dims*/0 ,
+ +                                     (FFTW(complex) *)lin, (FFTW(complex) *)lout,
+ +                                     /*sign*/ (flags&FFT5D_BACKWARD)?1:-1, /*flags*/ fftwflags);
+ +            }
+ +#ifdef FFT5D_THREADS
+ +#ifdef FFT5D_FFTW_THREADS
+ +            FFTW(plan_with_nthreads)(1);
+ +#endif
+ +#endif
+ +            FFTW_UNLOCK;
+ +    }
+ +    if (!plan->p3d) {  /* for decomposition and if 3d plan did not work */
+ +#endif /* GMX_FFT_FFTW3 */
+ +        for (s=0;s<3;s++) {
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"FFT5D: Plan s %d rC %d M %d pK %d C %d lsize %d\n",
+ +                        s,rC[s],M[s],pK[s],C[s],lsize);
+ +            }
+ +            plan->p1d[s] = (gmx_fft_t*)malloc(sizeof(gmx_fft_t)*nthreads);
+ +
+ +            /* Make sure that the init routines are only called by one thread at a time and in order
+ +               (later is only important to not confuse valgrind)
+ +             */
+ +#pragma omp parallel for num_threads(nthreads) schedule(static) ordered
+ +            for(t=0; t<nthreads; t++)
++#pragma omp ordered
+ +            {
+ +                int tsize = ((t+1)*pM[s]*pK[s]/nthreads)-(t*pM[s]*pK[s]/nthreads);
+ +
+ +                if ((flags&FFT5D_REALCOMPLEX) && ((!(flags&FFT5D_BACKWARD) && s==0) || ((flags&FFT5D_BACKWARD) && s==2))) {
+ +                    gmx_fft_init_many_1d_real( &plan->p1d[s][t], rC[s], tsize, (flags&FFT5D_NOMEASURE)?GMX_FFT_FLAG_CONSERVATIVE:0 );
+ +                } else {
+ +                    gmx_fft_init_many_1d     ( &plan->p1d[s][t],  C[s], tsize, (flags&FFT5D_NOMEASURE)?GMX_FFT_FLAG_CONSERVATIVE:0 );
+ +                }
+ +            }
+ +        }
+ +
+ +#ifdef GMX_FFT_FFTW3 
+ +    }
+ +#endif
+ +    if ((flags&FFT5D_ORDER_YZ)) { /*plan->cart is in the order of transposes */
+ +        plan->cart[0]=comm[0]; plan->cart[1]=comm[1];
+ +    } else {
+ +        plan->cart[1]=comm[0]; plan->cart[0]=comm[1];
+ +    }
+ +#ifdef FFT5D_MPI_TRANSPOSE
+ +    FFTW_LOCK;
+ +    for (s=0;s<2;s++) {
+ +        if ((s==0 && !(flags&FFT5D_ORDER_YZ)) || (s==1 && (flags&FFT5D_ORDER_YZ))) 
+ +            plan->mpip[s] = FFTW(mpi_plan_many_transpose)(nP[s], nP[s], N[s]*K[s]*pM[s]*2, 1, 1, (real*)lout2, (real*)lout3, plan->cart[s], FFTW_PATIENT);
+ +        else
+ +            plan->mpip[s] = FFTW(mpi_plan_many_transpose)(nP[s], nP[s], N[s]*pK[s]*M[s]*2, 1, 1, (real*)lout2, (real*)lout3, plan->cart[s], FFTW_PATIENT);
+ +    }
+ +    FFTW_UNLOCK;
+ +#endif 
+ +
+ +    
+ +    plan->lin=lin;
+ +    plan->lout=lout;
+ +    plan->lout2=lout2;
+ +    plan->lout3=lout3;
+ +    
+ +    plan->NG=NG;plan->MG=MG;plan->KG=KG;
+ +    
+ +    for (s=0;s<3;s++) {
+ +        plan->N[s]=N[s];plan->M[s]=M[s];plan->K[s]=K[s];plan->pN[s]=pN[s];plan->pM[s]=pM[s];plan->pK[s]=pK[s];
+ +        plan->oM[s]=oM[s];plan->oK[s]=oK[s];
+ +        plan->C[s]=C[s];plan->rC[s]=rC[s];
+ +        plan->iNin[s]=iNin[s];plan->oNin[s]=oNin[s];plan->iNout[s]=iNout[s];plan->oNout[s]=oNout[s];
+ +    }
+ +    for (s=0;s<2;s++) {
+ +        plan->P[s]=nP[s];plan->coor[s]=prank[s];
+ +    }
+ +    
+ +/*    plan->fftorder=fftorder;
+ +    plan->direction=direction;    
+ +    plan->realcomplex=realcomplex;
+ +*/
+ +    plan->flags=flags;
+ +    plan->nthreads=nthreads;
+ +    *rlin=lin;
+ +    *rlout=lout;
+ +    *rlout2=lout2;
+ +    *rlout3=lout3;
+ +    return plan;
+ +}
+ +
+ +
+ +enum order {
+ +    XYZ,
+ +    XZY,
+ +    YXZ,
+ +    YZX,
+ +    ZXY,
+ +    ZYX
+ +};
+ +
+ +
+ +
+ +/*here x,y,z and N,M,K is in rotated coordinate system!!
+ +  x (and N) is mayor (consecutive) dimension, y (M) middle and z (K) major
+ +  maxN,maxM,maxK is max size of local data
+ +  pN, pM, pK is local size specific to current processor (only different to max if not divisible)
+ +  NG, MG, KG is size of global data*/
+ +static void splitaxes(t_complex* lout,const t_complex* lin,
+ +                      int maxN,int maxM,int maxK, int pN, int pM, int pK,
+ +                      int P,int NG,int *N, int* oN,int starty,int startz,int endy, int endz)
+ +{
+ +    int x,y,z,i;
+ +    int in_i,out_i,in_z,out_z,in_y,out_y;
+ +    int s_y,e_y;
+ +
+ +    for (z=startz; z<endz+1; z++) /*3. z l*/
+ +    {
+ +        if (z==startz) {
+ +            s_y=starty;
+ +        } else {
+ +            s_y=0;
+ +        }
+ +        if (z==endz) {
+ +            e_y=endy;
+ +        } else {
+ +            e_y=pM;
+ +        }
+ +        out_z  = z*maxN*maxM;
+ +        in_z = z*NG*pM;
+ +
+ +        for (i=0; i<P; i++) /*index cube along long axis*/
+ +        {
+ +            out_i  = out_z  + i*maxN*maxM*maxK;
+ +            in_i = in_z + oN[i];
+ +            for (y=s_y;y<e_y;y++) { /*2. y k*/
+ +                out_y  = out_i  + y*maxN;
+ +                in_y = in_i + y*NG;
+ +                for (x=0;x<N[i];x++) { /*1. x j*/
+ +                    lout[out_y+x] = lin[in_y+x];    /*in=z*NG*pM+oN[i]+y*NG+x*/
+ +                    /*after split important that each processor chunk i has size maxN*maxM*maxK and thus being the same size*/
+ +                    /*before split data contiguos - thus if different processor get different amount oN is different*/
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +/*make axis contiguous again (after AllToAll) and also do local transpose*/
+ +/*transpose mayor and major dimension
+ +  variables see above
+ +  the major, middle, minor order is only correct for x,y,z (N,M,K) for the input
+ +  N,M,K local dimensions
+ +  KG global size*/
+ +static void joinAxesTrans13(t_complex* lout,const t_complex* lin,
+ +                            int maxN,int maxM,int maxK,int pN, int pM, int pK, 
+ +                            int P,int KG, int* K, int* oK,int starty, int startx, int endy, int endx)
+ +{
+ +    int i,x,y,z;
+ +    int out_i,in_i,out_x,in_x,out_z,in_z;
+ +    int s_y,e_y;
+ +
+ +    for (x=startx;x<endx+1;x++) /*1.j*/
+ +    {
+ +        if (x==startx)
+ +        {
+ +            s_y=starty;
+ +        }
+ +        else
+ +        {
+ +            s_y=0;
+ +        }
+ +        if (x==endx)
+ +        {
+ +            e_y=endy;
+ +        }
+ +        else
+ +        {
+ +            e_y=pM;
+ +        }
+ +
+ +        out_x  = x*KG*pM;
+ +        in_x = x;
+ +
+ +        for (i=0;i<P;i++) /*index cube along long axis*/
+ +        {
+ +            out_i  = out_x  + oK[i];
+ +            in_i = in_x + i*maxM*maxN*maxK;
+ +            for (z=0;z<K[i];z++) /*3.l*/
+ +            {
+ +                out_z  = out_i  + z;
+ +                in_z = in_i + z*maxM*maxN;
+ +                for (y=s_y;y<e_y;y++) /*2.k*/
+ +                {
+ +                    lout[out_z+y*KG] = lin[in_z+y*maxN]; /*out=x*KG*pM+oK[i]+z+y*KG*/
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +/*make axis contiguous again (after AllToAll) and also do local transpose
+ +  tranpose mayor and middle dimension
+ +  variables see above
+ +  the minor, middle, major order is only correct for x,y,z (N,M,K) for the input
+ +  N,M,K local size
+ +  MG, global size*/
+ +static void joinAxesTrans12(t_complex* lout,const t_complex* lin,int maxN,int maxM,int maxK,int pN, int pM, int pK,
+ +                            int P,int MG, int* M, int* oM, int startx, int startz, int endx, int endz) {
+ +    int i,z,y,x;
+ +    int out_i,in_i,out_z,in_z,out_x,in_x;
+ +    int s_x,e_x;
+ +
+ +    for (z=startz; z<endz+1; z++)
+ +    {
+ +        if (z==startz)
+ +        {
+ +            s_x=startx;
+ +        }
+ +        else
+ +        {
+ +            s_x=0;
+ +        }
+ +        if (z==endz)
+ +        {
+ +            e_x=endx;
+ +        }
+ +        else
+ +        {
+ +            e_x=pN;
+ +        }
+ +        out_z  = z*MG*pN;
+ +        in_z = z*maxM*maxN;
+ +
+ +        for (i=0; i<P; i++) /*index cube along long axis*/
+ +        {
+ +            out_i  = out_z  + oM[i];
+ +            in_i = in_z + i*maxM*maxN*maxK;
+ +            for (x=s_x;x<e_x;x++)
+ +            {
+ +                out_x  = out_i  + x*MG;
+ +                in_x = in_i + x;
+ +                for (y=0;y<M[i];y++)
+ +                {
+ +                    lout[out_x+y] = lin[in_x+y*maxN]; /*out=z*MG*pN+oM[i]+x*MG+y*/
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void rotate_offsets(int x[]) {
+ +    int t=x[0];
+ +/*    x[0]=x[2];
+ +    x[2]=x[1];
+ +    x[1]=t;*/
+ +    x[0]=x[1];
+ +    x[1]=x[2];
+ +    x[2]=t;
+ +}
+ +
+ +/*compute the offset to compare or print transposed local data in original input coordinates
+ +  xs matrix dimension size, xl dimension length, xc decomposition offset 
+ +  s: step in computation = number of transposes*/
+ +static void compute_offsets(fft5d_plan plan, int xs[], int xl[], int xc[], int NG[], int s) {
+ +/*    int direction = plan->direction;
+ +    int fftorder = plan->fftorder;*/
+ +    
+ +    int o=0;
+ +    int pos[3],i;
+ +    int *pM=plan->pM, *pK=plan->pK, *oM=plan->oM, *oK=plan->oK,
+ +        *C=plan->C, *rC=plan->rC;
+ +
+ +    NG[0]=plan->NG;NG[1]=plan->MG;NG[2]=plan->KG;
+ +
+ +    if (!(plan->flags&FFT5D_ORDER_YZ)) {
+ +        switch (s) {
+ +        case 0: o=XYZ; break;
+ +        case 1: o=ZYX; break;
+ +        case 2: o=YZX; break;
+ +        default: assert(0);
+ +        }
+ +    } else {
+ +        switch (s) {
+ +        case 0: o=XYZ; break;
+ +        case 1: o=YXZ; break;
+ +        case 2: o=ZXY; break;
+ +        default: assert(0);
+ +        }
+ +    }
+ + 
+ +    switch (o) {
+ +        case XYZ:pos[0]=1;pos[1]=2;pos[2]=3;break;
+ +        case XZY:pos[0]=1;pos[1]=3;pos[2]=2;break;
+ +        case YXZ:pos[0]=2;pos[1]=1;pos[2]=3;break;
+ +        case YZX:pos[0]=3;pos[1]=1;pos[2]=2;break;
+ +        case ZXY:pos[0]=2;pos[1]=3;pos[2]=1;break;
+ +        case ZYX:pos[0]=3;pos[1]=2;pos[2]=1;break;
+ +    }
+ +    /*if (debug) printf("pos: %d %d %d\n",pos[0],pos[1],pos[2]);*/
+ +        
+ +    /*xs, xl give dimension size and data length in local transposed coordinate system
+ +      for 0(/1/2): x(/y/z) in original coordinate system*/
+ +    for (i=0;i<3;i++) {
+ +        switch (pos[i]) {
+ +        case 1: xs[i]=1;         xc[i]=0;     xl[i]=C[s];break;
+ +        case 2: xs[i]=C[s];      xc[i]=oM[s]; xl[i]=pM[s];break;
+ +        case 3: xs[i]=C[s]*pM[s];xc[i]=oK[s]; xl[i]=pK[s];break;
+ +        }
+ +    }
+ +    /*input order is different for test program to match FFTW order 
+ +      (important for complex to real)*/
+ +    if (plan->flags&FFT5D_BACKWARD) {
+ +        rotate_offsets(xs);
+ +        rotate_offsets(xl);
+ +        rotate_offsets(xc);
+ +        rotate_offsets(NG);
+ +        if (plan->flags&FFT5D_ORDER_YZ) {
+ +            rotate_offsets(xs);
+ +            rotate_offsets(xl);
+ +            rotate_offsets(xc);
+ +            rotate_offsets(NG);
+ +        }
+ +    }
+ +    if ((plan->flags&FFT5D_REALCOMPLEX) && ((!(plan->flags&FFT5D_BACKWARD) && s==0) || ((plan->flags&FFT5D_BACKWARD) && s==2))) {
+ +        xl[0] = rC[s];
+ +    }
+ +}
+ +
+ +static void print_localdata(const t_complex* lin, const char* txt, int s, fft5d_plan plan) {
+ +    int x,y,z,l;
+ +    int *coor = plan->coor;
+ +    int xs[3],xl[3],xc[3],NG[3];        
+ +    int ll=(plan->flags&FFT5D_REALCOMPLEX)?1:2;
+ +    compute_offsets(plan,xs,xl,xc,NG,s);
+ +    fprintf(debug,txt,coor[0],coor[1],s);
+ +    /*printf("xs: %d %d %d, xl: %d %d %d\n",xs[0],xs[1],xs[2],xl[0],xl[1],xl[2]);*/
+ +    for(z=0;z<xl[2];z++) {
+ +        for(y=0;y<xl[1];y++) {
+ +            fprintf(debug,"%d %d: ",coor[0],coor[1]);
+ +            for (x=0;x<xl[0];x++) {
+ +                for (l=0;l<ll;l++) {
+ +                    fprintf(debug,"%f ",((real*)lin)[(z*xs[2]+y*xs[1])*2+(x*xs[0])*ll+l]);
+ +                }
+ +                fprintf(debug,",");
+ +            }
+ +            fprintf(debug,"\n");
+ +        }
+ +    }
+ +}
+ +
+ +void fft5d_execute(fft5d_plan plan,int thread,fft5d_time times) {
+ +    t_complex *lin = plan->lin;
+ +    t_complex *lout = plan->lout;
+ +    t_complex *lout2 = plan->lout2;
+ +    t_complex *lout3 = plan->lout3;
+ +    t_complex *fftout,*joinin;
+ +
+ +    gmx_fft_t **p1d=plan->p1d;
+ +#ifdef FFT5D_MPI_TRANSPOSE
+ +    FFTW(plan) *mpip=plan->mpip;
+ +#endif
+ +#ifdef GMX_MPI
+ +    MPI_Comm *cart=plan->cart;
+ +#endif
+ +#ifdef NOGMX
+ +    double time_fft=0,time_local=0,time_mpi[2]={0},time=0;    
+ +#endif
+ +    int *N=plan->N,*M=plan->M,*K=plan->K,*pN=plan->pN,*pM=plan->pM,*pK=plan->pK,
+ +        *C=plan->C,*P=plan->P,**iNin=plan->iNin,**oNin=plan->oNin,**iNout=plan->iNout,**oNout=plan->oNout;
+ +    int s=0,tstart,tend,bParallelDim;
+ +    
+ +    
+ +#ifdef GMX_FFT_FFTW3 
+ +    if (plan->p3d)
+ +    {
+ +        if (thread == 0)
+ +        {
+ +#ifdef NOGMX
+ +            if (times!=0)
+ +            {
+ +                time=MPI_Wtime();
+ +            }
+ +#endif
+ +            FFTW(execute)(plan->p3d);
+ +#ifdef NOGMX
+ +            if (times!=0)
+ +            {
+ +                times->fft+=MPI_Wtime()-time;
+ +            }
+ +#endif
+ +        }
+ +        return;
+ +    }
+ +#endif
+ +
+ +        s=0;
+ +
+ +    /*lin: x,y,z*/
+ +        if (plan->flags&FFT5D_DEBUG && thread == 0)
+ +        {
+ +            print_localdata(lin, "%d %d: copy in lin\n", s, plan);
+ +        }
+ +
+ +        for (s=0;s<2;s++) {  /*loop over first two FFT steps (corner rotations)*/
+ +
+ +#ifdef GMX_MPI
+ +        if (GMX_PARALLEL_ENV_INITIALIZED && cart[s]!=MPI_COMM_NULL && P[s]>1)
+ +        {
+ +            bParallelDim = 1;
+ +        }
+ +        else
+ +#endif
+ +        {
+ +            bParallelDim = 0;
+ +        }
+ +
+ +        /* ---------- START FFT ------------ */
+ +#ifdef NOGMX
+ +        if (times!=0 && thread == 0)
+ +        {
+ +            time=MPI_Wtime();
+ +        }
+ +#endif
+ +
-         sfree_aligned(plan->lout2);
-         sfree_aligned(plan->lout3);
++        if (bParallelDim || plan->nthreads == 1) {
+ +            fftout = lout;
+ +        }
+ +        else
+ +        {
+ +            if (s==0)
+ +            {
+ +                fftout = lout3;
+ +            } else
+ +            {
+ +                fftout = lout2;
+ +            }
+ +        }
+ +
+ +        tstart = (thread*pM[s]*pK[s]/plan->nthreads)*C[s];
+ +        if ((plan->flags&FFT5D_REALCOMPLEX) && !(plan->flags&FFT5D_BACKWARD) && s==0)
+ +        {
+ +            gmx_fft_many_1d_real(p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin+tstart,fftout+tstart);
+ +        } else
+ +        {
+ +            gmx_fft_many_1d(     p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin+tstart,fftout+tstart);
+ +
+ +        }
+ +
+ +#ifdef NOGMX
+ +        if (times != NULL && thread == 0)
+ +        {
+ +            time_fft+=MPI_Wtime()-time;
+ +        }
+ +#endif
+ +        if (plan->flags&FFT5D_DEBUG && thread == 0)
+ +        {
+ +            print_localdata(lout, "%d %d: FFT %d\n", s, plan);
+ +        }
+ +        /* ---------- END FFT ------------ */
+ +
+ +        /* ---------- START SPLIT + TRANSPOSE------------ (if parallel in in this dimension)*/
+ +        if (bParallelDim) {
+ +#ifdef NOGMX
+ +            if (times != NULL && thread == 0)
+ +            {
+ +                time=MPI_Wtime();
+ +            }
+ +#endif
+ +            /*prepare for A
+ +llToAll
+ +              1. (most outer) axes (x) is split into P[s] parts of size N[s]
+ +              for sending*/
+ +            if (pM[s]>0)
+ +            {
+ +                tend = ((thread+1)*pM[s]*pK[s]/plan->nthreads);
+ +                tstart/=C[s];
+ +                splitaxes(lout2,lout,N[s],M[s],K[s], pN[s],pM[s],pK[s],P[s],C[s],iNout[s],oNout[s],tstart%pM[s],tstart/pM[s],tend%pM[s],tend/pM[s]);
+ +            }
+ +#pragma omp barrier /*barrier required before AllToAll (all input has to be their) - before timing to make timing more acurate*/
+ +#ifdef NOGMX
+ +            if (times != NULL && thread == 0)
+ +            {
+ +                time_local+=MPI_Wtime()-time;
+ +            }
+ +#endif
+ +
+ +        /* ---------- END SPLIT , START TRANSPOSE------------ */
+ +
+ +            if (thread == 0)
+ +            {
+ +#ifdef NOGMX
+ +                if (times!=0)
+ +                {
+ +                    time=MPI_Wtime();
+ +                }
+ +#else
+ +                wallcycle_start(times,ewcPME_FFTCOMM);
+ +#endif
+ +#ifdef FFT5D_MPI_TRANSPOSE
+ +                FFTW(execute)(mpip[s]);
+ +#else
+ +#ifdef GMX_MPI
+ +                if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ)))
+ +                    MPI_Alltoall(lout2,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout3,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
+ +                else
+ +                    MPI_Alltoall(lout2,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout3,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
+ +#else
+ +                gmx_incons("fft5d MPI call without MPI configuration");
+ +#endif /*GMX_MPI*/
+ +#endif /*FFT5D_MPI_TRANSPOSE*/
+ +#ifdef NOGMX
+ +                if (times!=0)
+ +                {
+ +                    time_mpi[s]=MPI_Wtime()-time;
+ +                }
+ +#else
+ +                wallcycle_stop(times,ewcPME_FFTCOMM);
+ +#endif
+ +            }  /*master*/
+ +        }  /* bPrallelDim */
+ +#pragma omp barrier  /*both needed for parallel and non-parallel dimension (either have to wait on data from AlltoAll or from last FFT*/
+ +
+ +        /* ---------- END SPLIT + TRANSPOSE------------ */
+ +
+ +        /* ---------- START JOIN ------------ */
+ +#ifdef NOGMX
+ +        if (times != NULL && thread == 0)
+ +        {
+ +            time=MPI_Wtime();
+ +        }
+ +#endif
+ +
+ +        if (bParallelDim) {
+ +            joinin = lout3;
+ +        } else {
+ +            joinin = fftout;
+ +        }
+ +        /*bring back in matrix form 
+ +          thus make  new 1. axes contiguos
+ +          also local transpose 1 and 2/3 
+ +          runs on thread used for following FFT (thus needing a barrier before but not afterwards)
+ +        */
+ +        if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ))) {
+ +            if (pM[s]>0)
+ +            {
+ +                tstart = ( thread   *pM[s]*pN[s]/plan->nthreads);
+ +                tend   = ((thread+1)*pM[s]*pN[s]/plan->nthreads);
+ +                joinAxesTrans13(lin,joinin,N[s],pM[s],K[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1],tstart%pM[s],tstart/pM[s],tend%pM[s],tend/pM[s]);
+ +            }
+ +        }
+ +        else {
+ +            if (pN[s]>0)
+ +            {
+ +                tstart = ( thread   *pK[s]*pN[s]/plan->nthreads);
+ +                tend   = ((thread+1)*pK[s]*pN[s]/plan->nthreads);
+ +                joinAxesTrans12(lin,joinin,N[s],M[s],pK[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1],tstart%pN[s],tstart/pN[s],tend%pN[s],tend/pN[s]);
+ +            }
+ +        }
+ +
+ +#ifdef NOGMX
+ +        if (times != NULL && thread == 0)
+ +        {
+ +            time_local+=MPI_Wtime()-time;
+ +        }
+ +#endif
+ +        if (plan->flags&FFT5D_DEBUG && thread == 0)
+ +        {
+ +            print_localdata(lin, "%d %d: tranposed %d\n", s+1, plan);
+ +        }
+ +        /* ---------- END JOIN ------------ */
+ +
+ +        /*if (debug) print_localdata(lin, "%d %d: transposed x-z\n", N1, M0, K, ZYX, coor);*/
+ +    }  /* for(s=0;s<2;s++) */
+ +#ifdef NOGMX
+ +        if (times != NULL && thread == 0)
+ +        {
+ +            time=MPI_Wtime();
+ +        }
+ +#endif
+ +
+ +    if (plan->flags&FFT5D_INPLACE) lout=lin; /*in place currently not supported*/
+ +
+ +    /*  ----------- FFT ----------- */
+ +    tstart = (thread*pM[s]*pK[s]/plan->nthreads)*C[s];
+ +    if ((plan->flags&FFT5D_REALCOMPLEX) && (plan->flags&FFT5D_BACKWARD)) {
+ +        gmx_fft_many_1d_real(p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin+tstart,lout+tstart);
+ +    } else {
+ +        gmx_fft_many_1d(     p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin+tstart,lout+tstart);
+ +    }
+ +    /* ------------ END FFT ---------*/
+ +
+ +#ifdef NOGMX
+ +    if (times != NULL && thread == 0)
+ +    {
+ +        time_fft+=MPI_Wtime()-time;
+ +
+ +        times->fft+=time_fft;
+ +        times->local+=time_local;
+ +        times->mpi2+=time_mpi[1];
+ +        times->mpi1+=time_mpi[0];
+ +    }
+ +#endif
+ +
+ +    if (plan->flags&FFT5D_DEBUG && thread == 0)
+ +    {
+ +        print_localdata(lout, "%d %d: FFT %d\n", s, plan);
+ +    }
+ +}
+ +
+ +void fft5d_destroy(fft5d_plan plan) {
+ +    int s,t;
++
+ +    for (s=0;s<3;s++)
+ +    {
+ +        if (plan->p1d[s])
+ +        {
+ +            for (t=0;t<plan->nthreads;t++)
+ +            {
+ +                gmx_many_fft_destroy(plan->p1d[s][t]);
+ +            }
+ +            free(plan->p1d[s]);
+ +        }
+ +        if (plan->iNin[s])
+ +        {
+ +            free(plan->iNin[s]);
+ +            plan->iNin[s]=0;
+ +        }
+ +        if (plan->oNin[s])
+ +        {
+ +            free(plan->oNin[s]);
+ +            plan->oNin[s]=0;
+ +        }
+ +        if (plan->iNout[s])
+ +        {
+ +            free(plan->iNout[s]);
+ +            plan->iNout[s]=0;
+ +        }
+ +        if (plan->oNout[s])
+ +        {
+ +            free(plan->oNout[s]);
+ +            plan->oNout[s]=0;
+ +        }
+ +    }
+ +#ifdef GMX_FFT_FFTW3 
+ +    FFTW_LOCK;
+ +#ifdef FFT5D_MPI_TRANSPOS
+ +    for (s=0;s<2;s++)    
+ +    {
+ +        FFTW(destroy_plan)(plan->mpip[s]);
+ +    }
+ +#endif /* FFT5D_MPI_TRANSPOS */
+ +    if (plan->p3d)
+ +    {
+ +        FFTW(destroy_plan)(plan->p3d);
+ +    }
+ +    FFTW_UNLOCK;
+ +#endif /* GMX_FFT_FFTW3 */
+ +
+ +    if (!(plan->flags&FFT5D_NOMALLOC))
+ +    {
+ +        sfree_aligned(plan->lin);
+ +        sfree_aligned(plan->lout);
++        if (plan->nthreads > 1)
++        {
++            sfree_aligned(plan->lout2);
++            sfree_aligned(plan->lout3);
++        }
+ +    }
+ +    
+ +#ifdef FFT5D_THREADS
+ +#ifdef FFT5D_FFTW_THREADS
+ +    /*FFTW(cleanup_threads)();*/
+ +#endif
+ +#endif
+ +
+ +    free(plan);
+ +}
+ +
+ +/*Is this better than direct access of plan? enough data?
+ +  here 0,1 reference divided by which processor grid dimension (not FFT step!)*/
+ +void fft5d_local_size(fft5d_plan plan,int* N1,int* M0,int* K0,int* K1,int** coor) {
+ +    *N1=plan->N[0];
+ +    *M0=plan->M[0];
+ +    *K1=plan->K[0];
+ +    *K0=plan->N[1];
+ +    
+ +    *coor=plan->coor;
+ +}
+ +
+ +
+ +/*same as fft5d_plan_3d but with cartesian coordinator and automatic splitting 
+ +  of processor dimensions*/
+ +fft5d_plan fft5d_plan_3d_cart(int NG, int MG, int KG, MPI_Comm comm, int P0, int flags, t_complex** rlin, t_complex** rlout, t_complex** rlout2, t_complex** rlout3, int nthreads) {
+ +    MPI_Comm cart[2]={0};
+ +#ifdef GMX_MPI
+ +    int size=1,prank=0;
+ +    int P[2];
+ +    int coor[2];
+ +    int wrap[]={0,0};
+ +    MPI_Comm gcart;
+ +    int rdim1[] = {0,1}, rdim2[] = {1,0};
+ +
+ +    MPI_Comm_size(comm,&size);
+ +    MPI_Comm_rank(comm,&prank);
+ +
+ +    if (P0==0) P0 = lfactor(size);
+ +    if (size%P0!=0)
+ +    {
+ +        if (prank==0) printf("FFT5D: WARNING: Number of processors %d not evenly dividable by %d\n",size,P0);
+ +        P0 = lfactor(size);
+ +    }
+ +        
+ +    P[0] = P0; P[1]=size/P0; /*number of processors in the two dimensions*/
+ +    
+ +    /*Difference between x-y-z regarding 2d decomposition is whether they are 
+ +      distributed along axis 1, 2 or both*/
+ +    
+ +    MPI_Cart_create(comm,2,P,wrap,1,&gcart); /*parameter 4: value 1: reorder*/
+ +    MPI_Cart_get(gcart,2,P,wrap,coor); 
+ +    MPI_Cart_sub(gcart, rdim1 , &cart[0]);
+ +    MPI_Cart_sub(gcart, rdim2 , &cart[1]);
+ +#endif
+ +    return fft5d_plan_3d(NG, MG, KG, cart, flags, rlin, rlout,rlout2,rlout3,nthreads);
+ +}
+ +
+ +
+ +
+ +/*prints in original coordinate system of data (as the input to FFT)*/
+ +void fft5d_compare_data(const t_complex* lin, const t_complex* in, fft5d_plan plan, int bothLocal, int normalize) {
+ +    int xs[3],xl[3],xc[3],NG[3];
+ +    int x,y,z,l;
+ +    int *coor = plan->coor;
+ +    int ll=2; /*compare ll values per element (has to be 2 for complex)*/
+ +    if ((plan->flags&FFT5D_REALCOMPLEX) && (plan->flags&FFT5D_BACKWARD))
+ +    {
+ +        ll=1;
+ +    }
+ +
+ +    compute_offsets(plan,xs,xl,xc,NG,2);
+ +    if (plan->flags&FFT5D_DEBUG) printf("Compare2\n");
+ +    for (z=0;z<xl[2];z++) {
+ +        for(y=0;y<xl[1];y++) {
+ +            if (plan->flags&FFT5D_DEBUG) printf("%d %d: ",coor[0],coor[1]);
+ +            for (x=0;x<xl[0];x++) {
+ +                for (l=0;l<ll;l++) { /*loop over real/complex parts*/
+ +                    real a,b;
+ +                    a=((real*)lin)[(z*xs[2]+y*xs[1])*2+x*xs[0]*ll+l];
+ +                    if (normalize) a/=plan->rC[0]*plan->rC[1]*plan->rC[2];
+ +                    if (!bothLocal) 
+ +                        b=((real*)in)[((z+xc[2])*NG[0]*NG[1]+(y+xc[1])*NG[0])*2+(x+xc[0])*ll+l];
+ +                    else 
+ +                        b=((real*)in)[(z*xs[2]+y*xs[1])*2+x*xs[0]*ll+l];
+ +                    if (plan->flags&FFT5D_DEBUG) {
+ +                        printf("%f %f, ",a,b);
+ +                    } else {
+ +                        if (fabs(a-b)>2*NG[0]*NG[1]*NG[2]*GMX_REAL_EPS) {
+ +                            printf("result incorrect on %d,%d at %d,%d,%d: FFT5D:%f reference:%f\n",coor[0],coor[1],x,y,z,a,b);
+ +                        }
+ +/*                        assert(fabs(a-b)<2*NG[0]*NG[1]*NG[2]*GMX_REAL_EPS);*/
+ +                    }
+ +                }
+ +                if (plan->flags&FFT5D_DEBUG) printf(",");
+ +            }
+ +            if (plan->flags&FFT5D_DEBUG) printf("\n");
+ +        }
+ +    }
+ +    
+ +}
+ +
diff --cc src/gromacs/mdlib/force.c

index ef3dd6f9c1d01d3cab30582ec757ad65262c3834,0000000000000000000000000000000000000000..27e06cb0c79fa9bf4347f04bca30f07d081baca7

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/force.c
--- /dev/null
+++ b/src/gromacs/mdlib/force.c
@@@ -1,829 -1,0 +1,927 @@@
-     real    Vsr,Vlr,Vcorr=0,vdip,vcharge;
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + *
+ + *                This source code is part of
+ + *
+ + *                 G   R   O   M   A   C   S
+ + *
+ + *          GROningen MAchine for Chemical Simulations
+ + *
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + *
+ + * For more info, check our website at http://www.gromacs.org
+ + *
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <math.h>
+ +#include <string.h>
+ +#include <assert.h>
+ +#include "sysstuff.h"
+ +#include "typedefs.h"
+ +#include "macros.h"
+ +#include "smalloc.h"
+ +#include "macros.h"
+ +#include "physics.h"
+ +#include "force.h"
+ +#include "nonbonded.h"
+ +#include "names.h"
+ +#include "network.h"
+ +#include "pbc.h"
+ +#include "ns.h"
+ +#include "nrnb.h"
+ +#include "bondf.h"
+ +#include "mshift.h"
+ +#include "txtdump.h"
+ +#include "coulomb.h"
+ +#include "pme.h"
+ +#include "mdrun.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "qmmm.h"
++#include "gmx_omp_nthreads.h"
+ +
+ +
+ +void ns(FILE *fp,
+ +        t_forcerec *fr,
+ +        rvec       x[],
+ +        matrix     box,
+ +        gmx_groups_t *groups,
+ +        t_grpopts  *opts,
+ +        gmx_localtop_t *top,
+ +        t_mdatoms  *md,
+ +        t_commrec  *cr,
+ +        t_nrnb     *nrnb,
+ +        real       *lambda,
+ +        real       *dvdlambda,
+ +        gmx_grppairener_t *grppener,
+ +        gmx_bool       bFillGrid,
+ +        gmx_bool       bDoLongRange,
+ +        gmx_bool       bDoForces,
+ +        rvec       *f)
+ +{
+ +  char   *ptr;
+ +  int    nsearch;
+ +
+ +
+ +  if (!fr->ns.nblist_initialized)
+ +  {
+ +      init_neighbor_list(fp, fr, md->homenr);
+ +  }
+ +
+ +  if (fr->bTwinRange)
+ +    fr->nlr=0;
+ +
+ +    nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
+ +                                lambda,dvdlambda,grppener,
+ +                                bFillGrid,bDoLongRange,
+ +                                bDoForces,f);
+ +  if (debug)
+ +    fprintf(debug,"nsearch = %d\n",nsearch);
+ +
+ +  /* Check whether we have to do dynamic load balancing */
+ +  /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
+ +    count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
+ +    &(top->idef),opts->ngener);
+ +  */
+ +  if (fr->ns.dump_nl > 0)
+ +    dump_nblist(fp,cr,fr,fr->ns.dump_nl);
+ +}
+ +
++static void reduce_thread_forces(int n,rvec *f,
++                                 tensor vir,
++                                 real *Vcorr,
++                                 int efpt_ind,real *dvdl,
++                                 int nthreads,f_thread_t *f_t)
++{
++    int t,i;
++
++    /* This reduction can run over any number of threads */
++#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
++    for(i=0; i<n; i++)
++    {
++        for(t=1; t<nthreads; t++)
++        {
++            rvec_inc(f[i],f_t[t].f[i]);
++        }
++    }
++    for(t=1; t<nthreads; t++)
++    {
++        *Vcorr += f_t[t].Vcorr;
++        *dvdl  += f_t[t].dvdl[efpt_ind];
++        m_add(vir,f_t[t].vir,vir);
++    }
++}
++
+ +void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
+ +                       t_forcerec *fr,      t_inputrec *ir,
+ +                       t_idef     *idef,    t_commrec  *cr,
+ +                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
+ +                       t_mdatoms  *md,
+ +                       t_grpopts  *opts,
+ +                       rvec       x[],      history_t  *hist,
+ +                       rvec       f[],
+ +                       gmx_enerdata_t *enerd,
+ +                       t_fcdata   *fcd,
+ +                       gmx_mtop_t     *mtop,
+ +                       gmx_localtop_t *top,
+ +                       gmx_genborn_t *born,
+ +                       t_atomtypes *atype,
+ +                       gmx_bool       bBornRadii,
+ +                       matrix     box,
+ +                       t_lambda   *fepvals,
+ +                       real       *lambda,
+ +                       t_graph    *graph,
+ +                       t_blocka   *excl,
+ +                       rvec       mu_tot[],
+ +                       int        flags,
+ +                       float      *cycles_pme)
+ +{
+ +    int     i,j,status;
+ +    int     donb_flags;
+ +    gmx_bool    bDoEpot,bSepDVDL,bSB;
+ +    int     pme_flags;
+ +    matrix  boxs;
+ +    rvec    box_size;
-     real    dvdl_dum[efptNR], dvdlambda[efptNR], lam_i[efptNR];
-     real    dvdlsum,dvdl_walls;
++    real    Vsr,Vlr,Vcorr=0;
+ +    t_pbc   pbc;
+ +    real    dvdgb;
+ +    char    buf[22];
+ +    gmx_enerdata_t ed_lam;
+ +    double  clam_i,vlam_i;
-         dvdlambda[i] = 0;
++    real    dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
++    real    dvdlsum;
+ +
+ +#ifdef GMX_MPI
+ +    double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
+ +#endif
+ +
+ +#define PRINT_SEPDVDL(s,v,dvdlambda) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdlambda);
+ +
+ +
+ +    set_pbc(&pbc,fr->ePBC,box);
+ +
+ +    /* reset free energy components */
+ +    for (i=0;i<efptNR;i++)
+ +    {
-         dvdl_walls = do_walls(ir,fr,box,md,x,f,lambda[efptVDW],
-                  enerd->grpp.ener[egLJSR],nrnb);
-         PRINT_SEPDVDL("Walls",0.0,dvdl_walls);
-         dvdlambda[efptVDW] += dvdl_walls;
-         enerd->dvdl_lin[efptVDW] += dvdl_walls;
++        dvdl_nb[i]  = 0;
+ +        dvdl_dum[i] = 0;
+ +    }
+ +
+ +    /* Reset box */
+ +    for(i=0; (i<DIM); i++)
+ +    {
+ +        box_size[i]=box[i][i];
+ +    }
+ +
+ +    bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
+ +    debug_gmx();
+ +
+ +    /* do QMMM first if requested */
+ +    if(fr->bQMMM)
+ +    {
+ +        enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
+ +    }
+ +
+ +    if (bSepDVDL)
+ +    {
+ +        fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
+ +                gmx_step_str(step,buf),cr->nodeid);
+ +    }
+ +
+ +    /* Call the short range functions all in one go. */
+ +
+ +#ifdef GMX_MPI
+ +    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
+ +#define TAKETIME FALSE
+ +    if (TAKETIME)
+ +    {
+ +        MPI_Barrier(cr->mpi_comm_mygroup);
+ +        t0=MPI_Wtime();
+ +    }
+ +#endif
+ +
+ +    if (ir->nwall)
+ +    {
+ +        /* foreign lambda component for walls */
-               /* wallcycle_start(wcycle,ewcGB); */
++        dvdl = do_walls(ir,fr,box,md,x,f,lambda[efptVDW],
++                        enerd->grpp.ener[egLJSR],nrnb);
++        PRINT_SEPDVDL("Walls",0.0,dvdl);
++        enerd->dvdl_lin[efptVDW] += dvdl;
+ +    }
+ +
+ +      /* If doing GB, reset dvda and calculate the Born radii */
+ +      if (ir->implicit_solvent)
+ +      {
-               /* wallcycle_stop(wcycle, ewcGB); */
++        wallcycle_sub_start(wcycle, ewcsNONBONDED);
+ +
+ +              for(i=0;i<born->nr;i++)
+ +              {
+ +                      fr->dvda[i]=0;
+ +              }
+ +
+ +              if(bBornRadii)
+ +              {
+ +                      calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
+ +              }
+ +
-     donb_flags = 0;
-     if (flags & GMX_FORCE_FORCES)
++        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
+ +      }
+ +
+ +    where();
-         donb_flags |= GMX_DONB_FORCES;
++    if (flags & GMX_FORCE_NONBONDED)
+ +    {
-     do_nonbonded(cr,fr,x,f,md,excl,
-                  fr->bBHAM ?
-                  enerd->grpp.ener[egBHAMSR] :
-                  enerd->grpp.ener[egLJSR],
-                  enerd->grpp.ener[egCOULSR],
-                                enerd->grpp.ener[egGB],box_size,nrnb,
-                  lambda,dvdlambda,-1,-1,donb_flags);
++        donb_flags = 0;
++        if (flags & GMX_FORCE_FORCES)
++        {
++            donb_flags |= GMX_DONB_FORCES;
++        }
++
++        wallcycle_sub_start(wcycle, ewcsNONBONDED);
++        do_nonbonded(cr,fr,x,f,md,excl,
++                    fr->bBHAM ?
++                    enerd->grpp.ener[egBHAMSR] :
++                    enerd->grpp.ener[egLJSR],
++                    enerd->grpp.ener[egCOULSR],
++                    enerd->grpp.ener[egGB],box_size,nrnb,
++                    lambda,dvdl_nb,-1,-1,donb_flags);
++        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
+ +    }
+ +
-       if (ir->implicit_solvent)  {
+ +    /* If we do foreign lambda and we have soft-core interactions
+ +     * we have to recalculate the (non-linear) energies contributions.
+ +     */
+ +    if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
+ +    {
++        wallcycle_sub_start(wcycle, ewcsNONBONDED);
+ +        init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
+ +
+ +        for(i=0; i<enerd->n_lambda; i++)
+ +        {
+ +            for (j=0;j<efptNR;j++)
+ +            {
+ +                lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
+ +            }
+ +            reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
+ +            do_nonbonded(cr,fr,x,f,md,excl,
+ +                         fr->bBHAM ?
+ +                         ed_lam.grpp.ener[egBHAMSR] :
+ +                         ed_lam.grpp.ener[egLJSR],
+ +                         ed_lam.grpp.ener[egCOULSR],
+ +                         enerd->grpp.ener[egGB], box_size,nrnb,
+ +                         lam_i,dvdl_dum,-1,-1,
+ +                         GMX_DONB_FOREIGNLAMBDA);
+ +            sum_epot(&ir->opts,&ed_lam);
+ +            enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
+ +        }
+ +        destroy_enerdata(&ed_lam);
++        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
+ +    }
+ +    where();
+ +
+ +      /* If we are doing GB, calculate bonded forces and apply corrections
+ +       * to the solvation forces */
+ +    /* MRS: Eventually, many need to include free energy contribution here! */
-         enerd->dvdl_nonlin[efptVDW] += dvdlambda[efptVDW];
++      if (ir->implicit_solvent)
++    {
+ +              calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
+ +                       ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
++        wallcycle_sub_stop(wcycle, ewcsBONDED);
+ +    }
+ +
+ +#ifdef GMX_MPI
+ +    if (TAKETIME)
+ +    {
+ +        t1=MPI_Wtime();
+ +        fr->t_fnbf += t1-t0;
+ +    }
+ +#endif
+ +
+ +    if (fepvals->sc_alpha!=0)
+ +    {
-         enerd->dvdl_lin[efptVDW] += dvdlambda[efptVDW];
++        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
+ +    }
+ +    else
+ +    {
-         enerd->dvdl_nonlin[efptCOUL] += dvdlambda[efptCOUL];
++        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
+ +    }
+ +
+ +    if (fepvals->sc_alpha!=0)
+ +
+ +        /* even though coulomb part is linear, we already added it, beacuse we
+ +           need to go through the vdw calculation anyway */
+ +    {
-         enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
++        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
+ +    }
+ +    else
+ +    {
-         dvdlsum = dvdlambda[efptVDW]+dvdlambda[efptCOUL];
++        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
+ +    }
+ +
+ +    Vsr = 0;
+ +    if (bSepDVDL)
+ +    {
+ +        for(i=0; i<enerd->grpp.nener; i++)
+ +        {
+ +            Vsr +=
+ +                (fr->bBHAM ?
+ +                 enerd->grpp.ener[egBHAMSR][i] :
+ +                 enerd->grpp.ener[egLJSR][i])
+ +                + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
+ +        }
-             if (fr->n_tpi == 0)
++        dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
+ +        PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlsum);
+ +    }
+ +    debug_gmx();
+ +
+ +
+ +    if (debug)
+ +    {
+ +        pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
+ +    }
+ +
+ +    /* Shift the coordinates. Must be done before bonded forces and PPPM,
+ +     * but is also necessary for SHAKE and update, therefore it can NOT
+ +     * go when no bonded forces have to be evaluated.
+ +     */
+ +
+ +    /* Here sometimes we would not need to shift with NBFonly,
+ +     * but we do so anyhow for consistency of the returned coordinates.
+ +     */
+ +    if (graph)
+ +    {
+ +        shift_self(graph,box,x);
+ +        if (TRICLINIC(box))
+ +        {
+ +            inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
+ +        }
+ +        else
+ +        {
+ +            inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
+ +        }
+ +    }
+ +    /* Check whether we need to do bondeds or correct for exclusions */
+ +    if (fr->bMolPBC &&
+ +        ((flags & GMX_FORCE_BONDED)
+ +         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
+ +    {
+ +        /* Since all atoms are in the rectangular or triclinic unit-cell,
+ +         * only single box vector shifts (2 in x) are required.
+ +         */
+ +        set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
+ +    }
+ +    debug_gmx();
+ +
+ +    if (flags & GMX_FORCE_BONDED)
+ +    {
++        wallcycle_sub_start(wcycle, ewcsBONDED);
+ +        calc_bonds(fplog,cr->ms,
+ +                   idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
+ +                   DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
++                   flags,
+ +                   fr->bSepDVDL && do_per_step(step,ir->nstlog),step);
+ +
+ +        /* Check if we have to determine energy differences
+ +         * at foreign lambda's.
+ +         */
+ +        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
+ +            idef->ilsort != ilsortNO_FE)
+ +        {
+ +            if (idef->ilsort != ilsortFE_SORTED)
+ +            {
+ +                gmx_incons("The bonded interactions are not sorted for free energy");
+ +            }
+ +            init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
+ +
+ +            for(i=0; i<enerd->n_lambda; i++)
+ +            {
+ +                reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
+ +                for (j=0;j<efptNR;j++)
+ +                {
+ +                    lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
+ +                }
+ +                calc_bonds_lambda(fplog,idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
+ +                                  fcd,DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
+ +                sum_epot(&ir->opts,&ed_lam);
+ +                enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
+ +            }
+ +            destroy_enerdata(&ed_lam);
+ +        }
+ +        debug_gmx();
++
++        wallcycle_sub_stop(wcycle, ewcsBONDED);
+ +    }
+ +
+ +    where();
+ +
+ +    *cycles_pme = 0;
+ +    if (EEL_FULL(fr->eeltype))
+ +    {
+ +        bSB = (ir->nwall == 2);
+ +        if (bSB)
+ +        {
+ +            copy_mat(box,boxs);
+ +            svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
+ +            box_size[ZZ] *= ir->wall_ewald_zfac;
+ +        }
+ +
+ +        clear_mat(fr->vir_el_recip);
+ +
+ +        if (fr->bEwald)
+ +        {
-                 dvdlambda[efptCOUL] = 0;
-                 Vcorr = ewald_LRcorrection(fplog,md->start,md->start+md->homenr,
-                                            cr,fr,
++            Vcorr = 0;
++            dvdl  = 0;
++
++            /* With the Verlet scheme exclusion forces are calculated
++             * in the non-bonded kernel.
++             */
++            /* The TPI molecule does not have exclusions with the rest
++             * of the system and no intra-molecular PME grid contributions
++             * will be calculated in gmx_pme_calc_energy.
++             */
++            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
++                ir->ewald_geometry != eewg3D ||
++                ir->epsilon_surface != 0)
+ +            {
-                                            lambda[efptCOUL],&dvdlambda[efptCOUL],&vdip,&vcharge);
-                 PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdlambda);
-                 enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
-             }
-             else
-             {
-                 if (ir->ewald_geometry != eewg3D || ir->epsilon_surface != 0)
++                int nthreads,t;
++
++                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
++
++                if (fr->n_tpi > 0)
++                {
++                    gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
++                }
++
++                nthreads = gmx_omp_nthreads_get(emntBonded);
++#pragma omp parallel for num_threads(nthreads) schedule(static)
++                for(t=0; t<nthreads; t++)
++                {
++                    int s,e,i;
++                    rvec *fnv;
++                    tensor *vir;
++                    real *Vcorrt,*dvdlt;
++                    if (t == 0)
++                    {
++                        fnv    = fr->f_novirsum;
++                        vir    = &fr->vir_el_recip;
++                        Vcorrt = &Vcorr;
++                        dvdlt  = &dvdl;
++                    }
++                    else
++                    {
++                        fnv    = fr->f_t[t].f;
++                        vir    = &fr->f_t[t].vir;
++                        Vcorrt = &fr->f_t[t].Vcorr;
++                        dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
++                        for(i=0; i<fr->natoms_force; i++)
++                        {
++                            clear_rvec(fnv[i]);
++                        }
++                        clear_mat(*vir);
++                    }
++                    *dvdlt = 0;
++                    *Vcorrt =
++                        ewald_LRcorrection(fplog,
++                                           fr->excl_load[t],fr->excl_load[t+1],
++                                           cr,t,fr,
+ +                                           md->chargeA,
+ +                                           md->nChargePerturbed ? md->chargeB : NULL,
++                                           ir->cutoff_scheme != ecutsVERLET,
+ +                                           excl,x,bSB ? boxs : box,mu_tot,
+ +                                           ir->ewald_geometry,
+ +                                           ir->epsilon_surface,
-                     gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
++                                           fnv,*vir,
++                                           lambda[efptCOUL],dvdlt);
++                }
++                if (nthreads > 1)
+ +                {
-                 /* The TPI molecule does not have exclusions with the rest
-                  * of the system and no intra-molecular PME grid contributions
-                  * will be calculated in gmx_pme_calc_energy.
-                  */
-                 Vcorr = 0;
++                    reduce_thread_forces(fr->natoms_force,fr->f_novirsum,
++                                         fr->vir_el_recip,
++                                         &Vcorr,efptCOUL,&dvdl,
++                                         nthreads,fr->f_t);
+ +                }
-         dvdlambda[efptCOUL] = 0;
++
++                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
++            }
++
++            if (fr->n_tpi == 0)
++            {
++                Vcorr += ewald_charge_correction(cr,fr,lambda[efptCOUL],box,
++                                                 &dvdl,fr->vir_el_recip);
+ +            }
++
++            PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdl);
++            enerd->dvdl_lin[efptCOUL] += dvdl;
+ +        }
+ +
-                     if (flags & GMX_FORCE_VIRIAL)
+ +        status = 0;
++        dvdl = 0;
+ +        switch (fr->eeltype)
+ +        {
+ +        case eelPME:
+ +        case eelPMESWITCH:
+ +        case eelPMEUSER:
+ +        case eelPMEUSERSWITCH:
+ +        case eelP3M_AD:
+ +            if (cr->duty & DUTY_PME)
+ +            {
+ +                assert(fr->n_tpi >= 0);
+ +                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
+ +                {
+ +                    pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
+ +                    if (flags & GMX_FORCE_FORCES)
+ +                    {
+ +                        pme_flags |= GMX_PME_CALC_F;
+ +                    }
-                                         &Vlr,lambda[efptCOUL],&dvdlambda[efptCOUL],
++                    if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
+ +                    {
+ +                        pme_flags |= GMX_PME_CALC_ENER_VIR;
+ +                    }
+ +                    if (fr->n_tpi > 0)
+ +                    {
+ +                        /* We don't calculate f, but we do want the potential */
+ +                        pme_flags |= GMX_PME_CALC_POT;
+ +                    }
+ +                    wallcycle_start(wcycle,ewcPMEMESH);
+ +                    status = gmx_pme_do(fr->pmedata,
+ +                                        md->start,md->homenr - fr->n_tpi,
+ +                                        x,fr->f_novirsum,
+ +                                        md->chargeA,md->chargeB,
+ +                                        bSB ? boxs : box,cr,
+ +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
+ +                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
+ +                                        nrnb,wcycle,
+ +                                        fr->vir_el_recip,fr->ewaldcoeff,
-                 PRINT_SEPDVDL("PME mesh",Vlr,dvdlambda[efptCOUL]);
++                                        &Vlr,lambda[efptCOUL],&dvdl,
+ +                                        pme_flags);
+ +                    *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);
+ +
+ +                    /* We should try to do as little computation after
+ +                     * this as possible, because parallel PME synchronizes
+ +                     * the nodes, so we want all load imbalance of the rest
+ +                     * of the force calculation to be before the PME call.
+ +                     * DD load balancing is done on the whole time of
+ +                     * the force call (without PME).
+ +                     */
+ +                }
+ +                if (fr->n_tpi > 0)
+ +                {
+ +                    /* Determine the PME grid energy of the test molecule
+ +                     * with the PME grid potential of the other charges.
+ +                     */
+ +                    gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
+ +                                        x + md->homenr - fr->n_tpi,
+ +                                        md->chargeA + md->homenr - fr->n_tpi,
+ +                                        &Vlr);
+ +                }
-                            lambda[efptCOUL],&dvdlambda[efptCOUL],fr->ewald_table);
-             PRINT_SEPDVDL("Ewald long-range",Vlr,dvdlambda[efptCOUL]);
++                PRINT_SEPDVDL("PME mesh",Vlr,dvdl);
+ +            }
+ +            else
+ +            {
+ +                /* Energies and virial are obtained later from the PME nodes */
+ +                /* but values have to be zeroed out here */
+ +                Vlr=0.0;
+ +            }
+ +            break;
+ +        case eelEWALD:
+ +            Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
+ +                           md->chargeA,md->chargeB,
+ +                           box_size,cr,md->homenr,
+ +                           fr->vir_el_recip,fr->ewaldcoeff,
-         enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
++                           lambda[efptCOUL],&dvdl,fr->ewald_table);
++            PRINT_SEPDVDL("Ewald long-range",Vlr,dvdl);
+ +            break;
+ +        default:
+ +            Vlr = 0;
+ +            gmx_fatal(FARGS,"No such electrostatics method implemented %s",
+ +                      eel_names[fr->eeltype]);
+ +        }
+ +        if (status != 0)
+ +        {
+ +            gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
+ +                      status,EELTYPE(fr->eeltype));
+ +              }
-             dvdlambda[efptCOUL] = 0;
- 
-             if (fr->eeltype != eelRF_NEC)
++        enerd->dvdl_lin[efptCOUL] += dvdl;
+ +        enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
+ +                    Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
+ +            pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
+ +            pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        if (EEL_RF(fr->eeltype))
+ +        {
-                                        fr->fshift,&pbc,lambda[efptCOUL],&dvdlambda[efptCOUL]);
++            /* With the Verlet scheme exclusion forces are calculated
++             * in the non-bonded kernel.
++             */
++            if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
+ +            {
++                dvdl = 0;
+ +                enerd->term[F_RF_EXCL] =
+ +                    RF_excl_correction(fplog,fr,graph,md,excl,x,f,
-             enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
++                                       fr->fshift,&pbc,lambda[efptCOUL],&dvdl);
+ +            }
+ +
-                           enerd->term[F_RF_EXCL],dvdlambda[efptCOUL]);
++            enerd->dvdl_lin[efptCOUL] += dvdl;
+ +            PRINT_SEPDVDL("RF exclusion correction",
++                          enerd->term[F_RF_EXCL],dvdl);
+ +        }
+ +    }
+ +    where();
+ +    debug_gmx();
+ +
+ +    if (debug)
+ +    {
+ +        print_nrnb(debug,nrnb);
+ +    }
+ +    debug_gmx();
+ +
+ +#ifdef GMX_MPI
+ +    if (TAKETIME)
+ +    {
+ +        t2=MPI_Wtime();
+ +        MPI_Barrier(cr->mpi_comm_mygroup);
+ +        t3=MPI_Wtime();
+ +        fr->t_wait += t3-t2;
+ +        if (fr->timesteps == 11)
+ +        {
+ +            fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
+ +                    cr->nodeid, gmx_step_str(fr->timesteps,buf),
+ +                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
+ +                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
+ +        }
+ +        fr->timesteps++;
+ +    }
+ +#endif
+ +
+ +    if (debug)
+ +    {
+ +        pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
+ +    }
+ +
+ +}
+ +
+ +void init_enerdata(int ngener,int n_lambda,gmx_enerdata_t *enerd)
+ +{
+ +    int i,n2;
+ +
+ +    for(i=0; i<F_NRE; i++)
+ +    {
+ +        enerd->term[i] = 0;
+ +    }
+ +
+ +
+ +    for(i=0; i<efptNR; i++) {
+ +        enerd->dvdl_lin[i]  = 0;
+ +        enerd->dvdl_nonlin[i]  = 0;
+ +    }
+ +
+ +    n2=ngener*ngener;
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Creating %d sized group matrix for energies\n",n2);
+ +    }
+ +    enerd->grpp.nener = n2;
+ +    for(i=0; (i<egNR); i++)
+ +    {
+ +        snew(enerd->grpp.ener[i],n2);
+ +    }
+ +
+ +    if (n_lambda)
+ +    {
+ +        enerd->n_lambda = 1 + n_lambda;
+ +        snew(enerd->enerpart_lambda,enerd->n_lambda);
+ +    }
+ +    else
+ +    {
+ +        enerd->n_lambda = 0;
+ +    }
+ +}
+ +
+ +void destroy_enerdata(gmx_enerdata_t *enerd)
+ +{
+ +    int i;
+ +
+ +    for(i=0; (i<egNR); i++)
+ +    {
+ +        sfree(enerd->grpp.ener[i]);
+ +    }
+ +
+ +    if (enerd->n_lambda)
+ +    {
+ +        sfree(enerd->enerpart_lambda);
+ +    }
+ +}
+ +
+ +static real sum_v(int n,real v[])
+ +{
+ +  real t;
+ +  int  i;
+ +
+ +  t = 0.0;
+ +  for(i=0; (i<n); i++)
+ +    t = t + v[i];
+ +
+ +  return t;
+ +}
+ +
+ +void sum_epot(t_grpopts *opts,gmx_enerdata_t *enerd)
+ +{
+ +  gmx_grppairener_t *grpp;
+ +  real *epot;
+ +  int i;
+ +
+ +  grpp = &enerd->grpp;
+ +  epot = enerd->term;
+ +
+ +  /* Accumulate energies */
+ +  epot[F_COUL_SR]  = sum_v(grpp->nener,grpp->ener[egCOULSR]);
+ +  epot[F_LJ]       = sum_v(grpp->nener,grpp->ener[egLJSR]);
+ +  epot[F_LJ14]     = sum_v(grpp->nener,grpp->ener[egLJ14]);
+ +  epot[F_COUL14]   = sum_v(grpp->nener,grpp->ener[egCOUL14]);
+ +  epot[F_COUL_LR]  = sum_v(grpp->nener,grpp->ener[egCOULLR]);
+ +  epot[F_LJ_LR]    = sum_v(grpp->nener,grpp->ener[egLJLR]);
+ +  /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
+ +  epot[F_GBPOL]   += sum_v(grpp->nener,grpp->ener[egGB]);
+ +
+ +/* lattice part of LR doesnt belong to any group
+ + * and has been added earlier
+ + */
+ +  epot[F_BHAM]     = sum_v(grpp->nener,grpp->ener[egBHAMSR]);
+ +  epot[F_BHAM_LR]  = sum_v(grpp->nener,grpp->ener[egBHAMLR]);
+ +
+ +  epot[F_EPOT] = 0;
+ +  for(i=0; (i<F_EPOT); i++)
+ +  {
+ +      if (i != F_DISRESVIOL && i != F_ORIRESDEV)
+ +      {
+ +          epot[F_EPOT] += epot[i];
+ +      }
+ +  }
+ +}
+ +
+ +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
+ +{
+ +    int i,j,index;
+ +    double dlam;
+ +
+ +    enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
+ +    enerd->term[F_DVDL] = 0.0;
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        if (fepvals->separate_dvdl[i])
+ +        {
+ +            /* could this be done more readably/compactly? */
+ +            switch (i) {
+ +            case (efptCOUL):
+ +                index = F_DVDL_COUL;
+ +                break;
+ +            case (efptVDW):
+ +                index = F_DVDL_VDW;
+ +                break;
+ +            case (efptBONDED):
+ +                index = F_DVDL_BONDED;
+ +                break;
+ +            case (efptRESTRAINT):
+ +                index = F_DVDL_RESTRAINT;
+ +                break;
+ +            case (efptMASS):
+ +                index = F_DKDL;
+ +                break;
+ +            default:
+ +                index = F_DVDL;
+ +                break;
+ +            }
+ +            enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
+ +                        efpt_names[i],i,enerd->term[index],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
+ +                        efpt_names[0],i,enerd->term[F_DVDL],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
+ +            }
+ +        }
+ +    }
+ +
+ +    /* Notes on the foreign lambda free energy difference evaluation:
+ +     * Adding the potential and ekin terms that depend linearly on lambda
+ +     * as delta lam * dvdl to the energy differences is exact.
+ +     * For the constraints this is not exact, but we have no other option
+ +     * without literally changing the lengths and reevaluating the energies at each step.
+ +     * (try to remedy this post 4.6 - MRS)
+ +     * For the non-bonded LR term we assume that the soft-core (if present)
+ +     * no longer affects the energy beyond the short-range cut-off,
+ +     * which is a very good approximation (except for exotic settings).
+ +     * (investigate how to overcome this post 4.6 - MRS)
+ +     */
+ +
+ +    for(i=0; i<fepvals->n_lambda; i++)
+ +    {                                         /* note we are iterating over fepvals here!
+ +                                                 For the current lam, dlam = 0 automatically,
+ +                                                 so we don't need to add anything to the
+ +                                                 enerd->enerpart_lambda[0] */
+ +
+ +        /* we don't need to worry about dvdl contributions to the current lambda, because
+ +           it's automatically zero */
+ +
+ +        /* first kinetic energy term */
+ +        dlam = (fepvals->all_lambda[efptMASS][i] - lambda[efptMASS]);
+ +
+ +        enerd->enerpart_lambda[i+1] += enerd->term[F_DKDL]*dlam;
+ +
+ +        for (j=0;j<efptNR;j++)
+ +        {
+ +            if (j==efptMASS) {continue;} /* no other mass term to worry about */
+ +
+ +            dlam = (fepvals->all_lambda[j][i]-lambda[j]);
+ +            enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
+ +                        fepvals->all_lambda[j][i],efpt_names[j],
+ +                        (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
+ +                        dlam,enerd->dvdl_lin[j]);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +void reset_enerdata(t_grpopts *opts,
+ +                    t_forcerec *fr,gmx_bool bNS,
+ +                    gmx_enerdata_t *enerd,
+ +                    gmx_bool bMaster)
+ +{
+ +    gmx_bool bKeepLR;
+ +    int  i,j;
+ +
+ +    /* First reset all energy components, except for the long range terms
+ +     * on the master at non neighbor search steps, since the long range
+ +     * terms have already been summed at the last neighbor search step.
+ +     */
+ +    bKeepLR = (fr->bTwinRange && !bNS);
+ +    for(i=0; (i<egNR); i++) {
+ +        if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) {
+ +            for(j=0; (j<enerd->grpp.nener); j++)
+ +                enerd->grpp.ener[i][j] = 0.0;
+ +        }
+ +    }
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        enerd->dvdl_lin[i]    = 0.0;
+ +        enerd->dvdl_nonlin[i] = 0.0;
+ +    }
+ +
+ +    /* Normal potential energy components */
+ +    for(i=0; (i<=F_EPOT); i++) {
+ +        enerd->term[i] = 0.0;
+ +    }
+ +    /* Initialize the dVdlambda term with the long range contribution */
+ +    /* Initialize the dvdl term with the long range contribution */
+ +    enerd->term[F_DVDL]            = 0.0;
+ +    enerd->term[F_DVDL_COUL]       = 0.0;
+ +    enerd->term[F_DVDL_VDW]        = 0.0;
+ +    enerd->term[F_DVDL_BONDED]     = 0.0;
+ +    enerd->term[F_DVDL_RESTRAINT]  = 0.0;
+ +    enerd->term[F_DKDL]            = 0.0;
+ +    if (enerd->n_lambda > 0)
+ +    {
+ +        for(i=0; i<enerd->n_lambda; i++)
+ +        {
+ +            enerd->enerpart_lambda[i] = 0.0;
+ +        }
+ +    }
+ +}
diff --cc src/gromacs/mdlib/forcerec.c

index 5d45b7b141e144e58166ed04d23909428e396bbb,0000000000000000000000000000000000000000..83f1d6b6d5374f81cd8116c4524d84c2dcf0c461

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/forcerec.c
--- /dev/null
+++ b/src/gromacs/mdlib/forcerec.c
@@@ -1,1829 -1,0 +1,2544 @@@
- #include "mdrun.h"
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <math.h>
+ +#include <string.h>
++#include <assert.h>
+ +#include "sysstuff.h"
+ +#include "typedefs.h"
++#include "vec.h"
++#include "maths.h"
+ +#include "macros.h"
+ +#include "smalloc.h"
+ +#include "macros.h"
++#include "gmx_fatal.h"
++#include "gmx_fatal_collective.h"
+ +#include "physics.h"
+ +#include "force.h"
++#include "tables.h"
+ +#include "nonbonded.h"
+ +#include "invblock.h"
+ +#include "names.h"
+ +#include "network.h"
+ +#include "pbc.h"
+ +#include "ns.h"
+ +#include "mshift.h"
+ +#include "txtdump.h"
+ +#include "coulomb.h"
- #include "gmx_detectcpu.h"
++#include "md_support.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "qmmm.h"
+ +#include "copyrite.h"
+ +#include "mtop_util.h"
- 
++#include "nbnxn_search.h"
++#include "nbnxn_consts.h"
++#include "statutil.h"
++#include "gmx_omp_nthreads.h"
+ +
+ +#ifdef _MSC_VER
+ +/* MSVC definition for __cpuid() */
+ +#include <intrin.h>
+ +#endif
+ +
-                                    t_forcerec *fr,gmx_bool bNoSolvOpt)
++#include "types/nbnxn_cuda_types_ext.h"
++#include "gpu_utils.h"
++#include "nbnxn_cuda_data_mgmt.h"
++#include "pmalloc_cuda.h"
+ +
+ +t_forcerec *mk_forcerec(void)
+ +{
+ +  t_forcerec *fr;
+ +  
+ +  snew(fr,1);
+ +  
+ +  return fr;
+ +}
+ +
+ +#ifdef DEBUG
+ +static void pr_nbfp(FILE *fp,real *nbfp,gmx_bool bBHAM,int atnr)
+ +{
+ +  int i,j;
+ +  
+ +  for(i=0; (i<atnr); i++) {
+ +    for(j=0; (j<atnr); j++) {
+ +      fprintf(fp,"%2d - %2d",i,j);
+ +      if (bBHAM)
+ +      fprintf(fp,"  a=%10g, b=%10g, c=%10g\n",BHAMA(nbfp,atnr,i,j),
+ +              BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j));
+ +      else
+ +      fprintf(fp,"  c6=%10g, c12=%10g\n",C6(nbfp,atnr,i,j),
+ +              C12(nbfp,atnr,i,j));
+ +    }
+ +  }
+ +}
+ +#endif
+ +
+ +static real *mk_nbfp(const gmx_ffparams_t *idef,gmx_bool bBHAM)
+ +{
+ +  real *nbfp;
+ +  int  i,j,k,atnr;
+ +  
+ +  atnr=idef->atnr;
+ +  if (bBHAM) {
+ +    snew(nbfp,3*atnr*atnr);
+ +    for(i=k=0; (i<atnr); i++) {
+ +      for(j=0; (j<atnr); j++,k++) {
+ +      BHAMA(nbfp,atnr,i,j) = idef->iparams[k].bham.a;
+ +      BHAMB(nbfp,atnr,i,j) = idef->iparams[k].bham.b;
+ +      BHAMC(nbfp,atnr,i,j) = idef->iparams[k].bham.c;
+ +      }
+ +    }
+ +  }
+ +  else {
+ +    snew(nbfp,2*atnr*atnr);
+ +    for(i=k=0; (i<atnr); i++) {
+ +      for(j=0; (j<atnr); j++,k++) {
+ +      C6(nbfp,atnr,i,j)   = idef->iparams[k].lj.c6;
+ +      C12(nbfp,atnr,i,j)  = idef->iparams[k].lj.c12;
+ +      }
+ +    }
+ +  }
+ +  return nbfp;
+ +}
+ +
+ +/* This routine sets fr->solvent_opt to the most common solvent in the 
+ + * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in 
+ + * the fr->solvent_type array with the correct type (or esolNO).
+ + *
+ + * Charge groups that fulfill the conditions but are not identical to the
+ + * most common one will be marked as esolNO in the solvent_type array. 
+ + *
+ + * TIP3p is identical to SPC for these purposes, so we call it
+ + * SPC in the arrays (Apologies to Bill Jorgensen ;-)
+ + * 
+ + * NOTE: QM particle should not
+ + * become an optimized solvent. Not even if there is only one charge
+ + * group in the Qm 
+ + */
+ +
+ +typedef struct 
+ +{
+ +    int    model;          
+ +    int    count;
+ +    int    vdwtype[4];
+ +    real   charge[4];
+ +} solvent_parameters_t;
+ +
+ +static void
+ +check_solvent_cg(const gmx_moltype_t   *molt,
+ +                 int                   cg0,
+ +                 int                   nmol,
+ +                 const unsigned char   *qm_grpnr,
+ +                 const t_grps          *qm_grps,
+ +                 t_forcerec *          fr,
+ +                 int                   *n_solvent_parameters,
+ +                 solvent_parameters_t  **solvent_parameters_p,
+ +                 int                   cginfo,
+ +                 int                   *cg_sp)
+ +{
+ +    const t_blocka *  excl;
+ +    t_atom            *atom;
+ +    int               j,k;
+ +    int               j0,j1,nj;
+ +    gmx_bool              perturbed;
+ +    gmx_bool              has_vdw[4];
+ +    gmx_bool              match;
+ +    real              tmp_charge[4];
+ +    int               tmp_vdwtype[4];
+ +    int               tjA;
+ +    gmx_bool              qm;
+ +    solvent_parameters_t *solvent_parameters;
+ +
+ +    /* We use a list with parameters for each solvent type. 
+ +     * Every time we discover a new molecule that fulfills the basic 
+ +     * conditions for a solvent we compare with the previous entries
+ +     * in these lists. If the parameters are the same we just increment
+ +     * the counter for that type, and otherwise we create a new type
+ +     * based on the current molecule.
+ +     *
+ +     * Once we've finished going through all molecules we check which
+ +     * solvent is most common, and mark all those molecules while we
+ +     * clear the flag on all others.
+ +     */   
+ +
+ +    solvent_parameters = *solvent_parameters_p;
+ +
+ +    /* Mark the cg first as non optimized */
+ +    *cg_sp = -1;
+ +    
+ +    /* Check if this cg has no exclusions with atoms in other charge groups
+ +     * and all atoms inside the charge group excluded.
+ +     * We only have 3 or 4 atom solvent loops.
+ +     */
+ +    if (GET_CGINFO_EXCL_INTER(cginfo) ||
+ +        !GET_CGINFO_EXCL_INTRA(cginfo))
+ +    {
+ +        return;
+ +    }
+ +
+ +    /* Get the indices of the first atom in this charge group */
+ +    j0     = molt->cgs.index[cg0];
+ +    j1     = molt->cgs.index[cg0+1];
+ +    
+ +    /* Number of atoms in our molecule */
+ +    nj     = j1 - j0;
+ +
+ +    if (debug) {
+ +        fprintf(debug,
+ +                "Moltype '%s': there are %d atoms in this charge group\n",
+ +                *molt->name,nj);
+ +    }
+ +    
+ +    /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
+ +     * otherwise skip it.
+ +     */
+ +    if (nj<3 || nj>4)
+ +    {
+ +        return;
+ +    }
+ +    
+ +    /* Check if we are doing QM on this group */
+ +    qm = FALSE; 
+ +    if (qm_grpnr != NULL)
+ +    {
+ +        for(j=j0 ; j<j1 && !qm; j++)
+ +        {
+ +            qm = (qm_grpnr[j] < qm_grps->nr - 1);
+ +        }
+ +    }
+ +    /* Cannot use solvent optimization with QM */
+ +    if (qm)
+ +    {
+ +        return;
+ +    }
+ +    
+ +    atom = molt->atoms.atom;
+ +
+ +    /* Still looks like a solvent, time to check parameters */
+ +    
+ +    /* If it is perturbed (free energy) we can't use the solvent loops,
+ +     * so then we just skip to the next molecule.
+ +     */   
+ +    perturbed = FALSE; 
+ +    
+ +    for(j=j0; j<j1 && !perturbed; j++)
+ +    {
+ +        perturbed = PERTURBED(atom[j]);
+ +    }
+ +    
+ +    if (perturbed)
+ +    {
+ +        return;
+ +    }
+ +    
+ +    /* Now it's only a question if the VdW and charge parameters 
+ +     * are OK. Before doing the check we compare and see if they are 
+ +     * identical to a possible previous solvent type.
+ +     * First we assign the current types and charges.    
+ +     */
+ +    for(j=0; j<nj; j++)
+ +    {
+ +        tmp_vdwtype[j] = atom[j0+j].type;
+ +        tmp_charge[j]  = atom[j0+j].q;
+ +    } 
+ +    
+ +    /* Does it match any previous solvent type? */
+ +    for(k=0 ; k<*n_solvent_parameters; k++)
+ +    {
+ +        match = TRUE;
+ +        
+ +        
+ +        /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
+ +        if( (solvent_parameters[k].model==esolSPC   && nj!=3)  ||
+ +            (solvent_parameters[k].model==esolTIP4P && nj!=4) )
+ +            match = FALSE;
+ +        
+ +        /* Check that types & charges match for all atoms in molecule */
+ +        for(j=0 ; j<nj && match==TRUE; j++)
+ +        {                     
+ +            if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
+ +            {
+ +                match = FALSE;
+ +            }
+ +            if(tmp_charge[j] != solvent_parameters[k].charge[j])
+ +            {
+ +                match = FALSE;
+ +            }
+ +        }
+ +        if (match == TRUE)
+ +        {
+ +            /* Congratulations! We have a matched solvent.
+ +             * Flag it with this type for later processing.
+ +             */
+ +            *cg_sp = k;
+ +            solvent_parameters[k].count += nmol;
+ +
+ +            /* We are done with this charge group */
+ +            return;
+ +        }
+ +    }
+ +    
+ +    /* If we get here, we have a tentative new solvent type.
+ +     * Before we add it we must check that it fulfills the requirements
+ +     * of the solvent optimized loops. First determine which atoms have
+ +     * VdW interactions.   
+ +     */
+ +    for(j=0; j<nj; j++) 
+ +    {
+ +        has_vdw[j] = FALSE;
+ +        tjA        = tmp_vdwtype[j];
+ +        
+ +        /* Go through all other tpes and see if any have non-zero
+ +         * VdW parameters when combined with this one.
+ +         */   
+ +        for(k=0; k<fr->ntype && (has_vdw[j]==FALSE); k++)
+ +        {
+ +            /* We already checked that the atoms weren't perturbed,
+ +             * so we only need to check state A now.
+ +             */ 
+ +            if (fr->bBHAM) 
+ +            {
+ +                has_vdw[j] = (has_vdw[j] || 
+ +                              (BHAMA(fr->nbfp,fr->ntype,tjA,k) != 0.0) ||
+ +                              (BHAMB(fr->nbfp,fr->ntype,tjA,k) != 0.0) ||
+ +                              (BHAMC(fr->nbfp,fr->ntype,tjA,k) != 0.0));
+ +            }
+ +            else
+ +            {
+ +                /* Standard LJ */
+ +                has_vdw[j] = (has_vdw[j] || 
+ +                              (C6(fr->nbfp,fr->ntype,tjA,k)  != 0.0) ||
+ +                              (C12(fr->nbfp,fr->ntype,tjA,k) != 0.0));
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* Now we know all we need to make the final check and assignment. */
+ +    if (nj == 3)
+ +    {
+ +        /* So, is it an SPC?
+ +         * For this we require thatn all atoms have charge, 
+ +         * the charges on atom 2 & 3 should be the same, and only
+ +         * atom 1 should have VdW.
+ +         */
+ +        if (has_vdw[0] == TRUE && 
+ +            has_vdw[1] == FALSE &&
+ +            has_vdw[2] == FALSE &&
+ +            tmp_charge[0]  != 0 &&
+ +            tmp_charge[1]  != 0 &&
+ +            tmp_charge[2]  == tmp_charge[1])
+ +        {
+ +            srenew(solvent_parameters,*n_solvent_parameters+1);
+ +            solvent_parameters[*n_solvent_parameters].model = esolSPC;
+ +            solvent_parameters[*n_solvent_parameters].count = nmol;
+ +            for(k=0;k<3;k++)
+ +            {
+ +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
+ +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
+ +            }
+ +
+ +            *cg_sp = *n_solvent_parameters;
+ +            (*n_solvent_parameters)++;
+ +        }
+ +    }
+ +    else if (nj==4)
+ +    {
+ +        /* Or could it be a TIP4P?
+ +         * For this we require thatn atoms 2,3,4 have charge, but not atom 1. 
+ +         * Only atom 1 should have VdW.
+ +         */
+ +        if(has_vdw[0] == TRUE && 
+ +           has_vdw[1] == FALSE &&
+ +           has_vdw[2] == FALSE &&
+ +           has_vdw[3] == FALSE &&
+ +           tmp_charge[0]  == 0 &&
+ +           tmp_charge[1]  != 0 &&
+ +           tmp_charge[2]  == tmp_charge[1] &&
+ +           tmp_charge[3]  != 0)
+ +        {
+ +            srenew(solvent_parameters,*n_solvent_parameters+1);
+ +            solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
+ +            solvent_parameters[*n_solvent_parameters].count = nmol;
+ +            for(k=0;k<4;k++)
+ +            {
+ +                solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
+ +                solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
+ +            }
+ +            
+ +            *cg_sp = *n_solvent_parameters;
+ +            (*n_solvent_parameters)++;
+ +        }
+ +    }
+ +
+ +    *solvent_parameters_p = solvent_parameters;
+ +}
+ +
+ +static void
+ +check_solvent(FILE *                fp,
+ +              const gmx_mtop_t *    mtop,
+ +              t_forcerec *          fr,
+ +              cginfo_mb_t           *cginfo_mb)
+ +{
+ +    const t_block *   cgs;
+ +    const t_block *   mols;
+ +    const gmx_moltype_t *molt;
+ +    int               mb,mol,cg_mol,at_offset,cg_offset,am,cgm,i,nmol_ch,nmol;
+ +    int               n_solvent_parameters;
+ +    solvent_parameters_t *solvent_parameters;
+ +    int               **cg_sp;
+ +    int               bestsp,bestsol;
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Going to determine what solvent types we have.\n");
+ +    }
+ +
+ +    mols = &mtop->mols;
+ +
+ +    n_solvent_parameters = 0;
+ +    solvent_parameters = NULL;
+ +    /* Allocate temporary array for solvent type */
+ +    snew(cg_sp,mtop->nmolblock);
+ +
+ +    cg_offset = 0;
+ +    at_offset = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molt = &mtop->moltype[mtop->molblock[mb].type];
+ +        cgs  = &molt->cgs;
+ +        /* Here we have to loop over all individual molecules
+ +         * because we need to check for QMMM particles.
+ +         */
+ +        snew(cg_sp[mb],cginfo_mb[mb].cg_mod);
+ +        nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
+ +        nmol    = mtop->molblock[mb].nmol/nmol_ch;
+ +        for(mol=0; mol<nmol_ch; mol++)
+ +        {
+ +            cgm = mol*cgs->nr;
+ +            am  = mol*cgs->index[cgs->nr];
+ +            for(cg_mol=0; cg_mol<cgs->nr; cg_mol++)
+ +            {
+ +                check_solvent_cg(molt,cg_mol,nmol,
+ +                                 mtop->groups.grpnr[egcQMMM] ?
+ +                                 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
+ +                                 &mtop->groups.grps[egcQMMM],
+ +                                 fr,
+ +                                 &n_solvent_parameters,&solvent_parameters,
+ +                                 cginfo_mb[mb].cginfo[cgm+cg_mol],
+ +                                 &cg_sp[mb][cgm+cg_mol]);
+ +            }
+ +        }
+ +        cg_offset += cgs->nr;
+ +        at_offset += cgs->index[cgs->nr];
+ +    }
+ +
+ +    /* Puh! We finished going through all charge groups.
+ +     * Now find the most common solvent model.
+ +     */   
+ +    
+ +    /* Most common solvent this far */
+ +    bestsp = -2;
+ +    for(i=0;i<n_solvent_parameters;i++)
+ +    {
+ +        if (bestsp == -2 ||
+ +            solvent_parameters[i].count > solvent_parameters[bestsp].count)
+ +        {
+ +            bestsp = i;
+ +        }
+ +    }
+ +    
+ +    if (bestsp >= 0)
+ +    {
+ +        bestsol = solvent_parameters[bestsp].model;
+ +    }
+ +    else
+ +    {
+ +        bestsol = esolNO;
+ +    }
+ +    
+ +#ifdef DISABLE_WATER_NLIST
+ +      bestsol = esolNO;
+ +#endif
+ +
+ +    fr->nWatMol = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
+ +        nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
+ +        for(i=0; i<cginfo_mb[mb].cg_mod; i++)
+ +        {
+ +            if (cg_sp[mb][i] == bestsp)
+ +            {
+ +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i],bestsol);
+ +                fr->nWatMol += nmol;
+ +            }
+ +            else
+ +            {
+ +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i],esolNO);
+ +            }
+ +        }
+ +        sfree(cg_sp[mb]);
+ +    }
+ +    sfree(cg_sp);
+ +    
+ +    if (bestsol != esolNO && fp!=NULL)
+ +    {
+ +        fprintf(fp,"\nEnabling %s-like water optimization for %d molecules.\n\n",
+ +                esol_names[bestsol],
+ +                solvent_parameters[bestsp].count);
+ +    }
+ +
+ +    sfree(solvent_parameters);
+ +    fr->solvent_opt = bestsol;
+ +}
+ +
++enum { acNONE=0, acCONSTRAINT, acSETTLE };
++
+ +static cginfo_mb_t *init_cginfo_mb(FILE *fplog,const gmx_mtop_t *mtop,
-     gmx_bool bId,*bExcl,bExclIntraAll,bExclInter;
++                                   t_forcerec *fr,gmx_bool bNoSolvOpt,
++                                   gmx_bool *bExcl_IntraCGAll_InterCGNone)
+ +{
+ +    const t_block *cgs;
+ +    const t_blocka *excl;
+ +    const gmx_moltype_t *molt;
+ +    const gmx_molblock_t *molb;
+ +    cginfo_mb_t *cginfo_mb;
++    gmx_bool *type_VDW;
+ +    int  *cginfo;
+ +    int  cg_offset,a_offset,cgm,am;
+ +    int  mb,m,ncg_tot,cg,a0,a1,gid,ai,j,aj,excl_nalloc;
-                 for(ai=a0; ai<a1; ai++) {
++    int  *a_con;
++    int  ftype;
++    int  ia;
++    gmx_bool bId,*bExcl,bExclIntraAll,bExclInter,bHaveVDW,bHaveQ;
+ +
+ +    ncg_tot = ncg_mtop(mtop);
+ +    snew(cginfo_mb,mtop->nmolblock);
+ +
++    snew(type_VDW,fr->ntype);
++    for(ai=0; ai<fr->ntype; ai++)
++    {
++        type_VDW[ai] = FALSE;
++        for(j=0; j<fr->ntype; j++)
++        {
++            type_VDW[ai] = type_VDW[ai] ||
++                fr->bBHAM ||
++                C6(fr->nbfp,fr->ntype,ai,j) != 0 ||
++                C12(fr->nbfp,fr->ntype,ai,j) != 0;
++        }
++    }
++
++    *bExcl_IntraCGAll_InterCGNone = TRUE;
++
+ +    excl_nalloc = 10;
+ +    snew(bExcl,excl_nalloc);
+ +    cg_offset = 0;
+ +    a_offset  = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        molt = &mtop->moltype[molb->type];
+ +        cgs  = &molt->cgs;
+ +        excl = &molt->excls;
+ +
+ +        /* Check if the cginfo is identical for all molecules in this block.
+ +         * If so, we only need an array of the size of one molecule.
+ +         * Otherwise we make an array of #mol times #cgs per molecule.
+ +         */
+ +        bId = TRUE;
+ +        am = 0;
+ +        for(m=0; m<molb->nmol; m++)
+ +        {
+ +            am = m*cgs->index[cgs->nr];
+ +            for(cg=0; cg<cgs->nr; cg++)
+ +            {
+ +                a0 = cgs->index[cg];
+ +                a1 = cgs->index[cg+1];
+ +                if (ggrpnr(&mtop->groups,egcENER,a_offset+am+a0) !=
+ +                    ggrpnr(&mtop->groups,egcENER,a_offset   +a0))
+ +                {
+ +                    bId = FALSE;
+ +                }
+ +                if (mtop->groups.grpnr[egcQMMM] != NULL)
+ +                {
+ +                    for(ai=a0; ai<a1; ai++)
+ +                    {
+ +                        if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
+ +                            mtop->groups.grpnr[egcQMMM][a_offset   +ai])
+ +                        {
+ +                            bId = FALSE;
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +        }
+ +
+ +        cginfo_mb[mb].cg_start = cg_offset;
+ +        cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
+ +        cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
+ +        snew(cginfo_mb[mb].cginfo,cginfo_mb[mb].cg_mod);
+ +        cginfo = cginfo_mb[mb].cginfo;
+ +
++        /* Set constraints flags for constrained atoms */
++        snew(a_con,molt->atoms.nr);
++        for(ftype=0; ftype<F_NRE; ftype++)
++        {
++            if (interaction_function[ftype].flags & IF_CONSTRAINT)
++            {
++                int nral;
++
++                nral = NRAL(ftype);
++                for(ia=0; ia<molt->ilist[ftype].nr; ia+=1+nral)
++                {
++                    int a;
++
++                    for(a=0; a<nral; a++)
++                    {
++                        a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
++                            (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
++                    }
++                }
++            }
++        }
++
+ +        for(m=0; m<(bId ? 1 : molb->nmol); m++)
+ +        {
+ +            cgm = m*cgs->nr;
+ +            am  = m*cgs->index[cgs->nr];
+ +            for(cg=0; cg<cgs->nr; cg++)
+ +            {
+ +                a0 = cgs->index[cg];
+ +                a1 = cgs->index[cg+1];
+ +
+ +                /* Store the energy group in cginfo */
+ +                gid = ggrpnr(&mtop->groups,egcENER,a_offset+am+a0);
+ +                SET_CGINFO_GID(cginfo[cgm+cg],gid);
+ +                
+ +                /* Check the intra/inter charge group exclusions */
+ +                if (a1-a0 > excl_nalloc) {
+ +                    excl_nalloc = a1 - a0;
+ +                    srenew(bExcl,excl_nalloc);
+ +                }
+ +                /* bExclIntraAll: all intra cg interactions excluded
+ +                 * bExclInter:    any inter cg interactions excluded
+ +                 */
+ +                bExclIntraAll = TRUE;
+ +                bExclInter    = FALSE;
-                     for(aj=a0; aj<a1; aj++) {
++                bHaveVDW      = FALSE;
++                bHaveQ        = FALSE;
++                for(ai=a0; ai<a1; ai++)
++                {
++                    /* Check VDW and electrostatic interactions */
++                    bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
++                                            type_VDW[molt->atoms.atom[ai].typeB]);
++                    bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
++                                            molt->atoms.atom[ai].qB != 0);
++
+ +                    /* Clear the exclusion list for atom ai */
-     double qsum;
++                    for(aj=a0; aj<a1; aj++)
++                    {
+ +                        bExcl[aj-a0] = FALSE;
+ +                    }
+ +                    /* Loop over all the exclusions of atom ai */
+ +                    for(j=excl->index[ai]; j<excl->index[ai+1]; j++)
+ +                    {
+ +                        aj = excl->a[j];
+ +                        if (aj < a0 || aj >= a1)
+ +                        {
+ +                            bExclInter = TRUE;
+ +                        }
+ +                        else
+ +                        {
+ +                            bExcl[aj-a0] = TRUE;
+ +                        }
+ +                    }
+ +                    /* Check if ai excludes a0 to a1 */
+ +                    for(aj=a0; aj<a1; aj++)
+ +                    {
+ +                        if (!bExcl[aj-a0])
+ +                        {
+ +                            bExclIntraAll = FALSE;
+ +                        }
+ +                    }
++
++                    switch (a_con[ai])
++                    {
++                    case acCONSTRAINT:
++                        SET_CGINFO_CONSTR(cginfo[cgm+cg]);
++                        break;
++                    case acSETTLE:
++                        SET_CGINFO_SETTLE(cginfo[cgm+cg]);
++                        break;
++                    default:
++                        break;
++                    }
+ +                }
+ +                if (bExclIntraAll)
+ +                {
+ +                    SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
+ +                }
+ +                if (bExclInter)
+ +                {
+ +                    SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
+ +                }
+ +                if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
+ +                {
+ +                    /* The size in cginfo is currently only read with DD */
+ +                    gmx_fatal(FARGS,"A charge group has size %d which is larger than the limit of %d atoms",a1-a0,MAX_CHARGEGROUP_SIZE);
+ +                }
++                if (bHaveVDW)
++                {
++                    SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
++                }
++                if (bHaveQ)
++                {
++                    SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
++                }
++                /* Store the charge group size */
+ +                SET_CGINFO_NATOMS(cginfo[cgm+cg],a1-a0);
++
++                if (!bExclIntraAll || bExclInter)
++                {
++                    *bExcl_IntraCGAll_InterCGNone = FALSE;
++                }
+ +            }
+ +        }
++
++        sfree(a_con);
++
+ +        cg_offset += molb->nmol*cgs->nr;
+ +        a_offset  += molb->nmol*cgs->index[cgs->nr];
+ +    }
+ +    sfree(bExcl);
+ +    
+ +    /* the solvent optimizer is called after the QM is initialized,
+ +     * because we don't want to have the QM subsystemto become an
+ +     * optimized solvent
+ +     */
+ +
+ +    check_solvent(fplog,mtop,fr,cginfo_mb);
+ +    
+ +    if (getenv("GMX_NO_SOLV_OPT"))
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Found environment variable GMX_NO_SOLV_OPT.\n"
+ +                    "Disabling all solvent optimization\n");
+ +        }
+ +        fr->solvent_opt = esolNO;
+ +    }
+ +    if (bNoSolvOpt)
+ +    {
+ +        fr->solvent_opt = esolNO;
+ +    }
+ +    if (!fr->solvent_opt)
+ +    {
+ +        for(mb=0; mb<mtop->nmolblock; mb++)
+ +        {
+ +            for(cg=0; cg<cginfo_mb[mb].cg_mod; cg++)
+ +            {
+ +                SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg],esolNO);
+ +            }
+ +        }
+ +    }
+ +    
+ +    return cginfo_mb;
+ +}
+ +
+ +static int *cginfo_expand(int nmb,cginfo_mb_t *cgi_mb)
+ +{
+ +    int ncg,mb,cg;
+ +    int *cginfo;
+ +
+ +    ncg = cgi_mb[nmb-1].cg_end;
+ +    snew(cginfo,ncg);
+ +    mb = 0;
+ +    for(cg=0; cg<ncg; cg++)
+ +    {
+ +        while (cg >= cgi_mb[mb].cg_end)
+ +        {
+ +            mb++;
+ +        }
+ +        cginfo[cg] =
+ +            cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
+ +    }
+ +
+ +    return cginfo;
+ +}
+ +
+ +static void set_chargesum(FILE *log,t_forcerec *fr,const gmx_mtop_t *mtop)
+ +{
-     qsum = 0;
++    double qsum,q2sum,q;
+ +    int    mb,nmol,i;
+ +    const t_atoms *atoms;
+ +    
-             qsum += nmol*atoms->atom[i].q;
++    qsum  = 0;
++    q2sum = 0;
+ +    for(mb=0; mb<mtop->nmolblock; mb++)
+ +    {
+ +        nmol  = mtop->molblock[mb].nmol;
+ +        atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ +        for(i=0; i<atoms->nr; i++)
+ +        {
-     fr->qsum[0] = qsum;
++            q = atoms->atom[i].q;
++            qsum  += nmol*q;
++            q2sum += nmol*q*q;
+ +        }
+ +    }
-         qsum = 0;
++    fr->qsum[0]  = qsum;
++    fr->q2sum[0] = q2sum;
+ +    if (fr->efep != efepNO)
+ +    {
-                 qsum += nmol*atoms->atom[i].qB;
++        qsum  = 0;
++        q2sum = 0;
+ +        for(mb=0; mb<mtop->nmolblock; mb++)
+ +        {
+ +            nmol  = mtop->molblock[mb].nmol;
+ +            atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ +            for(i=0; i<atoms->nr; i++)
+ +            {
-             fr->qsum[1] = qsum;
++                q = atoms->atom[i].qB;
++                qsum  += nmol*q;
++                q2sum += nmol*q*q;
+ +            }
-         fr->qsum[1] = fr->qsum[0];
++            fr->qsum[1]  = qsum;
++            fr->q2sum[1] = q2sum;
+ +        }
+ +    }
+ +    else
+ +    {
-                    gmx_bool       bNoSolvOpt,
++        fr->qsum[1]  = fr->qsum[0];
++        fr->q2sum[1] = fr->q2sum[0];
+ +    }
+ +    if (log) {
+ +        if (fr->efep == efepNO)
+ +            fprintf(log,"System total charge: %.3f\n",fr->qsum[0]);
+ +        else
+ +            fprintf(log,"System total charge, top. A: %.3f top. B: %.3f\n",
+ +                    fr->qsum[0],fr->qsum[1]);
+ +    }
+ +}
+ +
+ +void update_forcerec(FILE *log,t_forcerec *fr,matrix box)
+ +{
+ +    if (fr->eeltype == eelGRF)
+ +    {
+ +        calc_rffac(NULL,fr->eeltype,fr->epsilon_r,fr->epsilon_rf,
+ +                   fr->rcoulomb,fr->temp,fr->zsquare,box,
+ +                   &fr->kappa,&fr->k_rf,&fr->c_rf);
+ +    }
+ +}
+ +
+ +void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
+ +{
+ +    const t_atoms *atoms,*atoms_tpi;
+ +    const t_blocka *excl;
+ +    int    mb,nmol,nmolc,i,j,tpi,tpj,j1,j2,k,n,nexcl,q;
+ +#if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)    
+ +    long long int  npair,npair_ij,tmpi,tmpj;
+ +#else
+ +    double npair, npair_ij,tmpi,tmpj;
+ +#endif
+ +    double csix,ctwelve;
+ +    int    ntp,*typecount;
+ +    gmx_bool   bBHAM;
+ +    real   *nbfp;
+ +
+ +    ntp = fr->ntype;
+ +    bBHAM = fr->bBHAM;
+ +    nbfp = fr->nbfp;
+ +    
+ +    for(q=0; q<(fr->efep==efepNO ? 1 : 2); q++) {
+ +        csix = 0;
+ +        ctwelve = 0;
+ +        npair = 0;
+ +        nexcl = 0;
+ +        if (!fr->n_tpi) {
+ +            /* Count the types so we avoid natoms^2 operations */
+ +            snew(typecount,ntp);
+ +            for(mb=0; mb<mtop->nmolblock; mb++) {
+ +                nmol  = mtop->molblock[mb].nmol;
+ +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ +                for(i=0; i<atoms->nr; i++) {
+ +                    if (q == 0)
+ +                    {
+ +                        tpi = atoms->atom[i].type;
+ +                    }
+ +                    else
+ +                    {
+ +                        tpi = atoms->atom[i].typeB;
+ +                    }
+ +                    typecount[tpi] += nmol;
+ +                }
+ +            }
+ +            for(tpi=0; tpi<ntp; tpi++) {
+ +                for(tpj=tpi; tpj<ntp; tpj++) {
+ +                    tmpi = typecount[tpi];
+ +                    tmpj = typecount[tpj];
+ +                    if (tpi != tpj)
+ +                    {
+ +                        npair_ij = tmpi*tmpj;
+ +                    }
+ +                    else
+ +                    {
+ +                        npair_ij = tmpi*(tmpi - 1)/2;
+ +                    }
+ +                    if (bBHAM) {
+ +                        csix    += npair_ij*BHAMC(nbfp,ntp,tpi,tpj);
+ +                    } else {
+ +                        csix    += npair_ij*   C6(nbfp,ntp,tpi,tpj);
+ +                        ctwelve += npair_ij*  C12(nbfp,ntp,tpi,tpj);
+ +                    }
+ +                    npair += npair_ij;
+ +                }
+ +            }
+ +            sfree(typecount);
+ +            /* Subtract the excluded pairs.
+ +             * The main reason for substracting exclusions is that in some cases
+ +             * some combinations might never occur and the parameters could have
+ +             * any value. These unused values should not influence the dispersion
+ +             * correction.
+ +             */
+ +            for(mb=0; mb<mtop->nmolblock; mb++) {
+ +                nmol  = mtop->molblock[mb].nmol;
+ +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ +                excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
+ +                for(i=0; (i<atoms->nr); i++) {
+ +                    if (q == 0)
+ +                    {
+ +                        tpi = atoms->atom[i].type;
+ +                    }
+ +                    else
+ +                    {
+ +                        tpi = atoms->atom[i].typeB;
+ +                    }
+ +                    j1  = excl->index[i];
+ +                    j2  = excl->index[i+1];
+ +                    for(j=j1; j<j2; j++) {
+ +                        k = excl->a[j];
+ +                        if (k > i)
+ +                        {
+ +                            if (q == 0)
+ +                            {
+ +                                tpj = atoms->atom[k].type;
+ +                            }
+ +                            else
+ +                            {
+ +                                tpj = atoms->atom[k].typeB;
+ +                            }
+ +                            if (bBHAM) {
+ +                               csix -= nmol*BHAMC(nbfp,ntp,tpi,tpj);
+ +                            } else {
+ +                                csix    -= nmol*C6 (nbfp,ntp,tpi,tpj);
+ +                                ctwelve -= nmol*C12(nbfp,ntp,tpi,tpj);
+ +                            }
+ +                            nexcl += nmol;
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +        } else {
+ +            /* Only correct for the interaction of the test particle
+ +             * with the rest of the system.
+ +             */
+ +            atoms_tpi =
+ +                &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
+ +
+ +            npair = 0;
+ +            for(mb=0; mb<mtop->nmolblock; mb++) {
+ +                nmol  = mtop->molblock[mb].nmol;
+ +                atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ +                for(j=0; j<atoms->nr; j++) {
+ +                    nmolc = nmol;
+ +                    /* Remove the interaction of the test charge group
+ +                     * with itself.
+ +                     */
+ +                    if (mb == mtop->nmolblock-1)
+ +                    {
+ +                        nmolc--;
+ +                        
+ +                        if (mb == 0 && nmol == 1)
+ +                        {
+ +                            gmx_fatal(FARGS,"Old format tpr with TPI, please generate a new tpr file");
+ +                        }
+ +                    }
+ +                    if (q == 0)
+ +                    {
+ +                        tpj = atoms->atom[j].type;
+ +                    }
+ +                    else
+ +                    {
+ +                        tpj = atoms->atom[j].typeB;
+ +                    }
+ +                    for(i=0; i<fr->n_tpi; i++)
+ +                    {
+ +                        if (q == 0)
+ +                        {
+ +                            tpi = atoms_tpi->atom[i].type;
+ +                        }
+ +                        else
+ +                        {
+ +                            tpi = atoms_tpi->atom[i].typeB;
+ +                        }
+ +                        if (bBHAM)
+ +                        {
+ +                            csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj);
+ +                        }
+ +                        else
+ +                        {
+ +                            csix    += nmolc*C6 (nbfp,ntp,tpi,tpj);
+ +                            ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj);
+ +                        }
+ +                        npair += nmolc;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        if (npair - nexcl <= 0 && fplog) {
+ +            fprintf(fplog,"\nWARNING: There are no atom pairs for dispersion correction\n\n");
+ +            csix     = 0;
+ +            ctwelve  = 0;
+ +        } else {
+ +            csix    /= npair - nexcl;
+ +            ctwelve /= npair - nexcl;
+ +        }
+ +        if (debug) {
+ +            fprintf(debug,"Counted %d exclusions\n",nexcl);
+ +            fprintf(debug,"Average C6 parameter is: %10g\n",(double)csix);
+ +            fprintf(debug,"Average C12 parameter is: %10g\n",(double)ctwelve);
+ +        }
+ +        fr->avcsix[q]    = csix;
+ +        fr->avctwelve[q] = ctwelve;
+ +    }
+ +    if (fplog != NULL)
+ +    {
+ +        if (fr->eDispCorr == edispcAllEner ||
+ +            fr->eDispCorr == edispcAllEnerPres)
+ +        {
+ +            fprintf(fplog,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
+ +                    fr->avcsix[0],fr->avctwelve[0]);
+ +        }
+ +        else
+ +        {
+ +            fprintf(fplog,"Long Range LJ corr.: <C6> %10.4e\n",fr->avcsix[0]);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void set_bham_b_max(FILE *fplog,t_forcerec *fr,
+ +                           const gmx_mtop_t *mtop)
+ +{
+ +    const t_atoms *at1,*at2;
+ +    int  mt1,mt2,i,j,tpi,tpj,ntypes;
+ +    real b,bmin;
+ +    real *nbfp;
+ +
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"Determining largest Buckingham b parameter for table\n");
+ +    }
+ +    nbfp   = fr->nbfp;
+ +    ntypes = fr->ntype;
+ +    
+ +    bmin           = -1;
+ +    fr->bham_b_max = 0;
+ +    for(mt1=0; mt1<mtop->nmoltype; mt1++)
+ +    {
+ +        at1 = &mtop->moltype[mt1].atoms;
+ +        for(i=0; (i<at1->nr); i++)
+ +        {
+ +            tpi = at1->atom[i].type;
+ +            if (tpi >= ntypes)
+ +                gmx_fatal(FARGS,"Atomtype[%d] = %d, maximum = %d",i,tpi,ntypes);
+ +            
+ +            for(mt2=mt1; mt2<mtop->nmoltype; mt2++)
+ +            {
+ +                at2 = &mtop->moltype[mt2].atoms;
+ +                for(j=0; (j<at2->nr); j++) {
+ +                    tpj = at2->atom[j].type;
+ +                    if (tpj >= ntypes)
+ +                    {
+ +                        gmx_fatal(FARGS,"Atomtype[%d] = %d, maximum = %d",j,tpj,ntypes);
+ +                    }
+ +                    b = BHAMB(nbfp,ntypes,tpi,tpj);
+ +                    if (b > fr->bham_b_max)
+ +                    {
+ +                        fr->bham_b_max = b;
+ +                    }
+ +                    if ((b < bmin) || (bmin==-1))
+ +                    {
+ +                        bmin = b;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"Buckingham b parameters, min: %g, max: %g\n",
+ +                bmin,fr->bham_b_max);
+ +    }
+ +}
+ +
+ +static void make_nbf_tables(FILE *fp,const output_env_t oenv,
+ +                            t_forcerec *fr,real rtab,
+ +                          const t_commrec *cr,
+ +                          const char *tabfn,char *eg1,char *eg2,
+ +                          t_nblists *nbl)
+ +{
+ +  char buf[STRLEN];
+ +  int i,j;
+ +
+ +  if (tabfn == NULL) {
+ +    if (debug)
+ +      fprintf(debug,"No table file name passed, can not read table, can not do non-bonded interactions\n");
+ +    return;
+ +  }
+ +    
+ +  sprintf(buf,"%s",tabfn);
+ +  if (eg1 && eg2)
+ +    /* Append the two energy group names */
+ +    sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"_%s_%s.%s",
+ +          eg1,eg2,ftp2ext(efXVG));
+ +  nbl->tab = make_tables(fp,oenv,fr,MASTER(cr),buf,rtab,0);
+ +  /* Copy the contents of the table to separate coulomb and LJ tables too,
+ +   * to improve cache performance.
+ +   */
+ +
+ +  /* For performance reasons we want
+ +   * the table data to be aligned to 16-byte. The pointer could be freed
+ +   * but currently isn't.
+ +   */
+ +  snew_aligned(nbl->vdwtab,8*(nbl->tab.n+1),16);
+ +  snew_aligned(nbl->coultab,4*(nbl->tab.n+1),16);
+ +  
+ +  for(i=0; i<=nbl->tab.n; i++) {
+ +    for(j=0; j<4; j++)
+ +      nbl->coultab[4*i+j] = nbl->tab.tab[12*i+j];
+ +    for(j=0; j<8; j++)
+ +      nbl->vdwtab [8*i+j] = nbl->tab.tab[12*i+4+j];
+ +  }
+ +}
+ +
+ +static void count_tables(int ftype1,int ftype2,const gmx_mtop_t *mtop,
+ +                         int *ncount,int **count)
+ +{
+ +    const gmx_moltype_t *molt;
+ +    const t_ilist *il;
+ +    int mt,ftype,stride,i,j,tabnr;
+ +    
+ +    for(mt=0; mt<mtop->nmoltype; mt++)
+ +    {
+ +        molt = &mtop->moltype[mt];
+ +        for(ftype=0; ftype<F_NRE; ftype++)
+ +        {
+ +            if (ftype == ftype1 || ftype == ftype2) {
+ +                il = &molt->ilist[ftype];
+ +                stride = 1 + NRAL(ftype);
+ +                for(i=0; i<il->nr; i+=stride) {
+ +                    tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
+ +                    if (tabnr < 0)
+ +                        gmx_fatal(FARGS,"A bonded table number is smaller than 0: %d\n",tabnr);
+ +                    if (tabnr >= *ncount) {
+ +                        srenew(*count,tabnr+1);
+ +                        for(j=*ncount; j<tabnr+1; j++)
+ +                            (*count)[j] = 0;
+ +                        *ncount = tabnr+1;
+ +                    }
+ +                    (*count)[tabnr]++;
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static bondedtable_t *make_bonded_tables(FILE *fplog,
+ +                                         int ftype1,int ftype2,
+ +                                         const gmx_mtop_t *mtop,
+ +                                         const char *basefn,const char *tabext)
+ +{
+ +    int  i,ncount,*count;
+ +    char tabfn[STRLEN];
+ +    bondedtable_t *tab;
+ +    
+ +    tab = NULL;
+ +    
+ +    ncount = 0;
+ +    count = NULL;
+ +    count_tables(ftype1,ftype2,mtop,&ncount,&count);
+ +    
+ +    if (ncount > 0) {
+ +        snew(tab,ncount);
+ +        for(i=0; i<ncount; i++) {
+ +            if (count[i] > 0) {
+ +                sprintf(tabfn,"%s",basefn);
+ +                sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1,"_%s%d.%s",
+ +                        tabext,i,ftp2ext(efXVG));
+ +                tab[i] = make_bonded_table(fplog,tabfn,NRAL(ftype1)-2);
+ +            }
+ +        }
+ +        sfree(count);
+ +    }
+ +  
+ +    return tab;
+ +}
+ +
+ +void forcerec_set_ranges(t_forcerec *fr,
+ +                         int ncg_home,int ncg_force,
+ +                         int natoms_force,
+ +                         int natoms_force_constr,int natoms_f_novirsum)
+ +{
+ +    fr->cg0 = 0;
+ +    fr->hcg = ncg_home;
+ +
+ +    /* fr->ncg_force is unused in the standard code,
+ +     * but it can be useful for modified code dealing with charge groups.
+ +     */
+ +    fr->ncg_force           = ncg_force;
+ +    fr->natoms_force        = natoms_force;
+ +    fr->natoms_force_constr = natoms_force_constr;
+ +
+ +    if (fr->natoms_force_constr > fr->nalloc_force)
+ +    {
+ +        fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
+ +
+ +        if (fr->bTwinRange)
+ +        {
+ +            srenew(fr->f_twin,fr->nalloc_force);
+ +        }
+ +    }
+ +
+ +    if (fr->bF_NoVirSum)
+ +    {
+ +        fr->f_novirsum_n = natoms_f_novirsum;
+ +        if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
+ +        {
+ +            fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
+ +            srenew(fr->f_novirsum_alloc,fr->f_novirsum_nalloc);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        fr->f_novirsum_n = 0;
+ +    }
+ +}
+ +
+ +static real cutoff_inf(real cutoff)
+ +{
+ +    if (cutoff == 0)
+ +    {
+ +        cutoff = GMX_CUTOFF_INF;
+ +    }
+ +
+ +    return cutoff;
+ +}
+ +
+ +static void make_adress_tf_tables(FILE *fp,const output_env_t oenv,
+ +                            t_forcerec *fr,const t_inputrec *ir,
+ +                          const char *tabfn, const gmx_mtop_t *mtop,
+ +                            matrix     box)
+ +{
+ +  char buf[STRLEN];
+ +  int i,j;
+ +
+ +  if (tabfn == NULL) {
+ +        gmx_fatal(FARGS,"No thermoforce table file given. Use -tabletf to specify a file\n");
+ +    return;
+ +  }
+ +
+ +  snew(fr->atf_tabs, ir->adress->n_tf_grps);
+ +
+ +  for (i=0; i<ir->adress->n_tf_grps; i++){
+ +    j = ir->adress->tf_table_index[i]; /* get energy group index */
+ +    sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"tf_%s.%s",
+ +        *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]) ,ftp2ext(efXVG));
+ +    printf("loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[j], buf);
+ +    fr->atf_tabs[i] = make_atf_table(fp,oenv,fr,buf, box);
+ +  }
+ +
+ +}
+ +
+ +gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
+ +                      gmx_bool bPrintNote,t_commrec *cr,FILE *fp)
+ +{
+ +    gmx_bool bAllvsAll;
+ +
+ +    bAllvsAll =
+ +        (
+ +         ir->rlist==0            &&
+ +         ir->rcoulomb==0         &&
+ +         ir->rvdw==0             &&
+ +         ir->ePBC==epbcNONE      &&
+ +         ir->vdwtype==evdwCUT    &&
+ +         ir->coulombtype==eelCUT &&
+ +         ir->efep==efepNO        &&
+ +         (ir->implicit_solvent == eisNO || 
+ +          (ir->implicit_solvent==eisGBSA && (ir->gb_algorithm==egbSTILL || 
+ +                                             ir->gb_algorithm==egbHCT   || 
+ +                                             ir->gb_algorithm==egbOBC))) &&
+ +         getenv("GMX_NO_ALLVSALL") == NULL
+ +            );
+ +    
+ +    if (bAllvsAll && ir->opts.ngener > 1)
+ +    {
+ +        const char *note="NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
+ +
+ +        if (bPrintNote)
+ +        {
+ +            if (MASTER(cr))
+ +            {
+ +                fprintf(stderr,"\n%s\n",note);
+ +            }
+ +            if (fp != NULL)
+ +            {
+ +                fprintf(fp,"\n%s\n",note);
+ +            }
+ +        }
+ +        bAllvsAll = FALSE;
+ +    }
+ +
+ +    if(bAllvsAll && fp && MASTER(cr))
+ +    {
+ +        fprintf(fp,"\nUsing accelerated all-vs-all kernels.\n\n");
+ +    }
+ +    
+ +    return bAllvsAll;
+ +}
+ +
+ +
++static void init_forcerec_f_threads(t_forcerec *fr,int grpp_nener)
++{
++    int t,i;
++
++    fr->nthreads = gmx_omp_nthreads_get(emntBonded);
++
++    if (fr->nthreads > 1)
++    {
++        snew(fr->f_t,fr->nthreads);
++        /* Thread 0 uses the global force and energy arrays */
++        for(t=1; t<fr->nthreads; t++)
++        {
++            fr->f_t[t].f = NULL;
++            fr->f_t[t].f_nalloc = 0;
++            snew(fr->f_t[t].fshift,SHIFTS);
++            /* snew(fr->f_t[t].ener,F_NRE); */
++            fr->f_t[t].grpp.nener = grpp_nener;
++            for(i=0; i<egNR; i++)
++            {
++                snew(fr->f_t[t].grpp.ener[i],grpp_nener);
++            }
++        }
++    }
++}
++
++
++static void pick_nbnxn_kernel_cpu(FILE *fp,
++                                  const t_commrec *cr,
++                                  const gmx_cpuid_t cpuid_info,
++                                  int *kernel_type)
++{
++    *kernel_type = nbk4x4_PlainC;
++
++#ifdef GMX_X86_SSE2
++    {
++        /* On Intel Sandy-Bridge AVX-256 kernels are always faster.
++         * On AMD Bulldozer AVX-256 is much slower than AVX-128.
++         */
++        if(gmx_cpuid_feature(cpuid_info, GMX_CPUID_FEATURE_X86_AVX) == 1 &&
++           gmx_cpuid_vendor(cpuid_info) != GMX_CPUID_VENDOR_AMD)
++        {
++#ifdef GMX_X86_AVX_256
++            *kernel_type = nbk4xN_X86_SIMD256;
++#else
++            *kernel_type = nbk4xN_X86_SIMD128;
++#endif
++        }
++        else
++        {
++            *kernel_type = nbk4xN_X86_SIMD128;
++        }
++
++        if (getenv("GMX_NBNXN_AVX128") != NULL)
++        {
++            *kernel_type = nbk4xN_X86_SIMD128;
++        }
++        if (getenv("GMX_NBNXN_AVX256") != NULL)
++        {
++#ifdef GMX_X86_AVX_256
++            *kernel_type = nbk4xN_X86_SIMD256;
++#else
++            gmx_fatal(FARGS,"You requested AVX-256 nbnxn kernels, but GROMACS was built without AVX support");
++#endif
++        }
++    }
++#endif /* GMX_X86_SSE2 */
++}
++
++
++/* Note that _mm_... intrinsics can be converted to either SSE or AVX
++ * depending on compiler flags.
++ * For gcc we check for __AVX__
++ * At least a check for icc should be added (if there is a macro)
++ */
++static const char *nbk_name[] =
++  { "not set", "plain C 4x4",
++#if !(defined GMX_X86_AVX_256 || defined GMX_X86_AVX128_FMA || defined __AVX__)
++#ifndef GMX_X86_SSE4_1
++#ifndef GMX_DOUBLE
++    "SSE2 4x4",
++#else
++    "SSE2 4x2",
++#endif
++#else
++#ifndef GMX_DOUBLE
++    "SSE4.1 4x4",
++#else
++    "SSE4.1 4x2",
++#endif
++#endif
++#else
++#ifndef GMX_DOUBLE
++    "AVX-128 4x4",
++#else
++    "AVX-128 4x2",
++#endif
++#endif
++#ifndef GMX_DOUBLE
++    "AVX-256 4x8",
++#else
++    "AVX-256 4x4",
++#endif
++    "CUDA 8x8x8", "plain C 8x8x8" };
++
++static void pick_nbnxn_kernel(FILE *fp,
++                              const t_commrec *cr,
++                              const gmx_hw_info_t *hwinfo,
++                              gmx_bool use_cpu_acceleration,
++                              gmx_bool *bUseGPU,
++                              int *kernel_type)
++{
++    gmx_bool bEmulateGPU, bGPU;
++    char gpu_err_str[STRLEN];
++
++    assert(kernel_type);
++
++    *kernel_type = nbkNotSet;
++    /* if bUseGPU == NULL we don't want a GPU (e.g. hybrid mode kernel selection) */
++    bGPU = (bUseGPU != NULL) && hwinfo->bCanUseGPU;
++
++    /* Run GPU emulation mode if GMX_EMULATE_GPU is defined or in case if nobonded
++       calculations are turned off via GMX_NO_NONBONDED -- this is the simple way
++       to turn off GPU/CUDA initializations as well.. */
++    bEmulateGPU = ((getenv("GMX_EMULATE_GPU") != NULL) ||
++                   (getenv("GMX_NO_NONBONDED") != NULL));
++
++    if (bGPU)
++    {
++        if (bEmulateGPU)
++        {
++            bGPU = FALSE;
++        }
++        else
++        {
++            /* Each PP node will use the intra-node id-th device from the
++             * list of detected/selected GPUs. */ 
++            if (!init_gpu(cr->nodeid_group_intra, gpu_err_str, &hwinfo->gpu_info))
++            {
++                /* At this point the init should never fail as we made sure that 
++                 * we have all the GPUs we need. If it still does, we'll bail. */
++                gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
++                          cr->nodeid,
++                          get_gpu_device_id(&hwinfo->gpu_info, cr->nodeid_group_intra),
++                          gpu_err_str);
++            }
++        }
++        *bUseGPU = bGPU;
++    }
++
++    if (bEmulateGPU)
++    {
++        *kernel_type = nbk8x8x8_PlainC;
++
++        md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
++    }
++    else if (bGPU)
++    {
++        *kernel_type = nbk8x8x8_CUDA;
++    }
++
++    if (*kernel_type == nbkNotSet)
++    {
++        if (use_cpu_acceleration)
++        {
++            pick_nbnxn_kernel_cpu(fp,cr,hwinfo->cpuid_info,kernel_type);
++        }
++        else
++        {
++            *kernel_type = nbk4x4_PlainC;
++        }
++    }
++
++    if (fp != NULL)
++    {
++        if (MASTER(cr))
++        {
++            fprintf(stderr,"Using %s non-bonded kernels\n",
++                    nbk_name[*kernel_type]);
++        }
++        fprintf(fp,"\nUsing %s non-bonded kernels\n\n",
++                nbk_name[*kernel_type]);
++    }
++}
++
++
++static void init_verlet_ewald_f_table(interaction_const_t *ic,
++                                      int verlet_kernel_type)
++{
++    if (nbnxn_kernel_pairlist_simple(verlet_kernel_type))
++    {
++        /* With a spacing of 0.0005 we are at the force summation accuracy
++         * for the SSE kernels for "normal" atomistic simulations.
++         */
++        ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
++                                                   ic->rcoulomb);
++        ic->tabq_size  = (int)(ic->rcoulomb*ic->tabq_scale) + 2;
++#ifndef GMX_DOUBLE
++        ic->tabq_format = tableformatFDV0;
++#else
++        ic->tabq_format = tableformatF;
++#endif
++    }
++    else
++    {
++        ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
++        /* Subtract 2 iso 1 to avoid access out of range due to rounding */
++        ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
++        if (verlet_kernel_type == nbk8x8x8_CUDA)
++        {
++            /* This case is handled in the nbnxn CUDA module */
++            ic->tabq_format = tableformatNONE;
++        }
++        else
++        {
++            ic->tabq_format = tableformatF;
++        }
++    }
++
++    switch (ic->tabq_format)
++    {
++    case tableformatNONE:
++        break;
++    case tableformatF:
++        sfree_aligned(ic->tabq_coul_F);
++        sfree_aligned(ic->tabq_coul_V);
++        snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
++        snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
++        table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,
++                                    ic->tabq_size,ic->tabq_format,
++                                    1/ic->tabq_scale,ic->ewaldcoeff);
++        break;
++    case tableformatFDV0:
++        sfree_aligned(ic->tabq_coul_F);
++        snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
++        table_spline3_fill_ewald_lr(ic->tabq_coul_FDV0,NULL,
++                                    ic->tabq_size,ic->tabq_format,
++                                    1/ic->tabq_scale,ic->ewaldcoeff);
++        break;
++    default:
++        gmx_incons("Unknown table format");
++    }
++}
++
++void init_interaction_const_tables(FILE *fp, 
++                                   interaction_const_t *ic,
++                                   int verlet_kernel_type)
++{
++    real spacing;
++
++    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
++    {
++        init_verlet_ewald_f_table(ic,verlet_kernel_type);
++
++        if (fp != NULL)
++        {
++            fprintf(fp,"Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
++                    1/ic->tabq_scale,ic->tabq_size);
++        }
++    }
++}
++
++void init_interaction_const(FILE *fp, 
++                            interaction_const_t **interaction_const,
++                            const t_forcerec *fr)
++{
++    interaction_const_t *ic;
++
++    snew(ic, 1);
++
++    ic->rlist       = fr->rlist;
++
++    /* Lennard-Jones */
++    ic->rvdw        = fr->rvdw;
++    if (fr->vdw_pot_shift)
++    {
++        ic->sh_invrc6 = pow(ic->rvdw,-6.0);
++    }
++    else
++    {
++        ic->sh_invrc6 = 0;
++    }
++
++    /* Electrostatics */
++    ic->eeltype     = fr->eeltype;
++    ic->rcoulomb    = fr->rcoulomb;
++    ic->epsilon_r   = fr->epsilon_r;
++    ic->epsfac      = fr->epsfac;
++
++    /* Ewald */
++    ic->ewaldcoeff  = fr->ewaldcoeff;
++    if (fr->coul_pot_shift)
++    {
++        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
++    }
++    else
++    {
++        ic->sh_ewald = 0;
++    }
++
++    /* Reaction-field */
++    if (EEL_RF(ic->eeltype))
++    {
++        ic->epsilon_rf = fr->epsilon_rf;
++        ic->k_rf       = fr->k_rf;
++        ic->c_rf       = fr->c_rf;
++    }
++    else
++    {
++        /* For plain cut-off we might use the reaction-field kernels */
++        ic->epsilon_rf = ic->epsilon_r;
++        ic->k_rf       = 0;
++        if (fr->coul_pot_shift)
++        {
++            ic->c_rf   = 1/ic->rcoulomb;
++        }
++        else
++        {
++            ic->c_rf   = 0;
++        }
++    }
++
++    if (fp != NULL)
++    {
++        fprintf(fp,"Potential shift: LJ r^-12: %.3f r^-6 %.3f",
++                sqr(ic->sh_invrc6),ic->sh_invrc6);
++        if (ic->eeltype == eelCUT)
++        {
++            fprintf(fp,", Coulomb %.3f",ic->c_rf);
++        }
++        else if (EEL_PME(ic->eeltype))
++        {
++            fprintf(fp,", Ewald %.3e",ic->sh_ewald);
++        }
++        fprintf(fp,"\n");
++    }
++
++    *interaction_const = ic;
++
++    if (fr->nbv != NULL && fr->nbv->bUseGPU)
++    {
++        nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv);
++    }
++
++    if (fr->cutoff_scheme == ecutsVERLET)
++    {
++        assert(fr->nbv != NULL && fr->nbv->grp != NULL);
++        init_interaction_const_tables(fp,ic,fr->nbv->grp[fr->nbv->ngrp-1].kernel_type);
++    }
++}
++
++static void init_nb_verlet(FILE *fp,
++                           nonbonded_verlet_t **nb_verlet,
++                           const t_inputrec *ir,
++                           const t_forcerec *fr,
++                           const t_commrec *cr,
++                           const char *nbpu_opt)
++{
++    nonbonded_verlet_t *nbv;
++    int  i;
++    char *env;
++    gmx_bool bHybridGPURun = FALSE;
++
++    gmx_nbat_alloc_t *nb_alloc;
++    gmx_nbat_free_t  *nb_free;
++
++    snew(nbv, 1);
++
++    nbv->nbs = NULL;
++
++    nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
++    for(i=0; i<nbv->ngrp; i++)
++    {
++        nbv->grp[i].nbl_lists.nnbl = 0;
++        nbv->grp[i].nbat           = NULL;
++        nbv->grp[i].kernel_type    = nbkNotSet;
++
++        if (i == 0) /* local */
++        {
++            pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
++                              &nbv->bUseGPU,
++                              &nbv->grp[i].kernel_type);
++        }
++        else /* non-local */
++        {
++            if (nbpu_opt != NULL && strcmp(nbpu_opt,"gpu_cpu") == 0)
++            {
++                /* Use GPU for local, select a CPU kernel for non-local */
++                pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
++                                  NULL,
++                                  &nbv->grp[i].kernel_type);
++
++                bHybridGPURun = TRUE;
++            }
++            else
++            {
++                /* Use the same kernel for local and non-local interactions */
++                nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
++            }
++        }
++    }
++
++    if (nbv->bUseGPU)
++    {
++        /* init the NxN GPU data; the last argument tells whether we'll have
++         * both local and non-local NB calculation on GPU */
++        nbnxn_cuda_init(fp, &nbv->cu_nbv,
++                        &fr->hwinfo->gpu_info, cr->nodeid_group_intra,
++                        (nbv->ngrp > 1) && !bHybridGPURun);
++
++        if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
++        {
++            char *end;
++
++            nbv->min_ci_balanced = strtol(env, &end, 10);
++            if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
++            {
++                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
++            }
++
++            if (debug)
++            {
++                fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n", 
++                        nbv->min_ci_balanced);
++            }
++        }
++        else
++        {
++            nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
++            if (debug)
++            {
++                fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
++                        nbv->min_ci_balanced);
++            }
++        }
++    }
++    else
++    {
++        nbv->min_ci_balanced = 0;
++    }
++
++    *nb_verlet = nbv;
++
++    nbnxn_init_search(&nbv->nbs,
++                      DOMAINDECOMP(cr) ? & cr->dd->nc : NULL,
++                      DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
++                      gmx_omp_nthreads_get(emntNonbonded));
++
++    for(i=0; i<nbv->ngrp; i++)
++    {
++        if (nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
++        {
++            nb_alloc = &pmalloc;
++            nb_free  = &pfree;
++        }
++        else
++        {
++            nb_alloc = NULL;
++            nb_free  = NULL;
++        }
++
++        nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
++                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
++                                /* 8x8x8 "non-simple" lists are ATM always combined */
++                                !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
++                                nb_alloc, nb_free);
++
++        if (i == 0 ||
++            nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
++        {
++            snew(nbv->grp[i].nbat,1);
++            nbnxn_atomdata_init(fp,
++                                nbv->grp[i].nbat,
++                                nbv->grp[i].kernel_type,
++                                fr->ntype,fr->nbfp,
++                                ir->opts.ngener,
++                                nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
++                                nb_alloc, nb_free);
++        }
++        else
++        {
++            nbv->grp[i].nbat = nbv->grp[0].nbat;
++        }
++    }
++}
+ +
+ +void init_forcerec(FILE *fp,
+ +                   const output_env_t oenv,
+ +                   t_forcerec *fr,
+ +                   t_fcdata   *fcd,
+ +                   const t_inputrec *ir,
+ +                   const gmx_mtop_t *mtop,
+ +                   const t_commrec  *cr,
+ +                   matrix     box,
+ +                   gmx_bool       bMolEpot,
+ +                   const char *tabfn,
+ +                   const char *tabafn,
+ +                   const char *tabpfn,
+ +                   const char *tabbfn,
-     gmx_detectcpu(&fr->cpu_information);
-     if(MASTER(cr))
-     {
-         /* Only print warnings from master */
-         gmx_detectcpu_check_acceleration(fr->cpu_information,fp);
-     }
- 
++                   const char *nbpu_opt,
++                   gmx_bool   bNoSolvOpt,
+ +                   real       print_force)
+ +{
+ +    int     i,j,m,natoms,ngrp,negp_pp,negptable,egi,egj;
+ +    real    rtab;
+ +    char    *env;
+ +    double  dbl;
+ +    rvec    box_size;
+ +    const t_block *cgs;
+ +    gmx_bool    bGenericKernelOnly;
+ +    gmx_bool    bTab,bSep14tab,bNormalnblists;
+ +    t_nblists *nbl;
+ +    int     *nm_ind,egp_flags;
+ +    
-     fr->use_acceleration = TRUE;
+ +    /* By default we turn acceleration on, but it might be turned off further down... */
-     
-     if (getenv("GMX_DISABLE_ACCELERATION") != NULL)
++    fr->use_cpu_acceleration = TRUE;
+ +
+ +    fr->bDomDec = DOMAINDECOMP(cr);
+ +
+ +    natoms = mtop->natoms;
+ +
+ +    if (check_box(ir->ePBC,box))
+ +    {
+ +        gmx_fatal(FARGS,check_box(ir->ePBC,box));
+ +    }
+ +    
+ +    /* Test particle insertion ? */
+ +    if (EI_TPI(ir->eI)) {
+ +        /* Set to the size of the molecule to be inserted (the last one) */
+ +        /* Because of old style topologies, we have to use the last cg
+ +         * instead of the last molecule type.
+ +         */
+ +        cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
+ +        fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
+ +        if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1]) {
+ +            gmx_fatal(FARGS,"The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
+ +        }
+ +    } else {
+ +        fr->n_tpi = 0;
+ +    }
+ +    
+ +    /* Copy AdResS parameters */
+ +    if (ir->bAdress) {
+ +      fr->adress_type     = ir->adress->type;
+ +      fr->adress_const_wf = ir->adress->const_wf;
+ +      fr->adress_ex_width = ir->adress->ex_width;
+ +      fr->adress_hy_width = ir->adress->hy_width;
+ +      fr->adress_icor     = ir->adress->icor;
+ +      fr->adress_site     = ir->adress->site;
+ +      fr->adress_ex_forcecap = ir->adress->ex_forcecap;
+ +      fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
+ +
+ +
+ +      snew(fr->adress_group_explicit , ir->adress->n_energy_grps);
+ +      for (i=0; i< ir->adress->n_energy_grps; i++){
+ +          fr->adress_group_explicit[i]= ir->adress->group_explicit[i];
+ +      }
+ +
+ +      fr->n_adress_tf_grps = ir->adress->n_tf_grps;
+ +      snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
+ +      for (i=0; i< fr->n_adress_tf_grps; i++){
+ +          fr->adress_tf_table_index[i]= ir->adress->tf_table_index[i];
+ +      }
+ +      copy_rvec(ir->adress->refs,fr->adress_refs);
+ +    } else {
+ +      fr->adress_type = eAdressOff;
+ +      fr->adress_do_hybridpairs = FALSE;
+ +    }
+ +    
+ +    /* Copy the user determined parameters */
+ +    fr->userint1 = ir->userint1;
+ +    fr->userint2 = ir->userint2;
+ +    fr->userint3 = ir->userint3;
+ +    fr->userint4 = ir->userint4;
+ +    fr->userreal1 = ir->userreal1;
+ +    fr->userreal2 = ir->userreal2;
+ +    fr->userreal3 = ir->userreal3;
+ +    fr->userreal4 = ir->userreal4;
+ +    
+ +    /* Shell stuff */
+ +    fr->fc_stepsize = ir->fc_stepsize;
+ +    
+ +    /* Free energy */
+ +    fr->efep       = ir->efep;
+ +    fr->sc_alphavdw = ir->fepvals->sc_alpha;
+ +    if (ir->fepvals->bScCoul)
+ +    {
+ +        fr->sc_alphacoul = ir->fepvals->sc_alpha;
+ +        fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min,6);
+ +    }
+ +    else
+ +    {
+ +        fr->sc_alphacoul = 0;
+ +        fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
+ +    }
+ +    fr->sc_power   = ir->fepvals->sc_power;
+ +    fr->sc_r_power   = ir->fepvals->sc_r_power;
+ +    fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma,6);
+ +
+ +    env = getenv("GMX_SCSIGMA_MIN");
+ +    if (env != NULL)
+ +    {
+ +        dbl = 0;
+ +        sscanf(env,"%lf",&dbl);
+ +        fr->sc_sigma6_min = pow(dbl,6);
+ +        if (fp)
+ +        {
+ +            fprintf(fp,"Setting the minimum soft core sigma to %g nm\n",dbl);
+ +        }
+ +    }
+ +
++    fr->bNonbonded = TRUE;
++    if (getenv("GMX_NO_NONBONDED") != NULL)
++    {
++        /* turn off non-bonded calculations */
++        fr->bNonbonded = FALSE;
++        md_print_warn(cr,fp,
++                      "Found environment variable GMX_NO_NONBONDED.\n"
++                      "Disabling nonbonded calculations.\n");
++    }
++
+ +    bGenericKernelOnly = FALSE;
+ +    if (getenv("GMX_NB_GENERIC") != NULL)
+ +    {
+ +        if (fp != NULL)
+ +        {
+ +            fprintf(fp,
+ +                    "Found environment variable GMX_NB_GENERIC.\n"
+ +                    "Disabling interaction-specific nonbonded kernels.\n\n");
+ +        }
+ +        bGenericKernelOnly = TRUE;
+ +        bNoSolvOpt         = TRUE;
+ +    }
-         fr->use_acceleration = FALSE;
++
++    if( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
+ +    {
-                     "\nFound environment variable GMX_DISABLE_ACCELERATION.\n"
-                     "Disabling all architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
++        fr->use_cpu_acceleration = FALSE;
+ +        if (fp != NULL)
+ +        {
+ +            fprintf(fp,
-     
-     
++                    "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
++                    "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
+ +        }
+ +    }
+ +
+ +    /* Check if we can/should do all-vs-all kernels */
+ +    fr->bAllvsAll       = can_use_allvsall(ir,mtop,FALSE,NULL,NULL);
+ +    fr->AllvsAll_work   = NULL;
+ +    fr->AllvsAll_workgb = NULL;
+ +
-     fr->bGrid      = (ir->ns_type == ensGRID);
-     fr->ePBC       = ir->ePBC;
-     fr->bMolPBC    = ir->bPeriodicMols;
++
+ +    /* Neighbour searching stuff */
-     fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
-                       !gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS));
-     fr->bcoultab   = (!(fr->eeltype == eelCUT || EEL_RF(fr->eeltype)) ||
-                       fr->eeltype == eelRF_ZERO);
-     
-     if (getenv("GMX_REQUIRE_TABLES"))
++    fr->cutoff_scheme = ir->cutoff_scheme;
++    fr->bGrid         = (ir->ns_type == ensGRID);
++    fr->ePBC          = ir->ePBC;
++
++    /* Determine if we will do PBC for distances in bonded interactions */
++    if (fr->ePBC == epbcNONE)
++    {
++        fr->bMolPBC = FALSE;
++    }
++    else
++    {
++        if (!DOMAINDECOMP(cr))
++        {
++            /* The group cut-off scheme and SHAKE assume charge groups
++             * are whole, but not using molpbc is faster in most cases.
++             */
++            if (fr->cutoff_scheme == ecutsGROUP ||
++                (ir->eConstrAlg == econtSHAKE &&
++                 (gmx_mtop_ftype_count(mtop,F_CONSTR) > 0 ||
++                  gmx_mtop_ftype_count(mtop,F_CONSTRNC) > 0)))
++            {
++                fr->bMolPBC = ir->bPeriodicMols;
++            }
++            else
++            {
++                fr->bMolPBC = TRUE;
++                if (getenv("GMX_USE_GRAPH") != NULL)
++                {
++                    fr->bMolPBC = FALSE;
++                    if (fp)
++                    {
++                        fprintf(fp,"\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
++                    }
++                }
++            }
++        }
++        else
++        {
++            fr->bMolPBC = dd_bonded_molpbc(cr->dd,fr->ePBC);
++        }
++    }
+ +    fr->rc_scaling = ir->refcoord_scaling;
+ +    copy_rvec(ir->posres_com,fr->posres_com);
+ +    copy_rvec(ir->posres_comB,fr->posres_comB);
+ +    fr->rlist      = cutoff_inf(ir->rlist);
+ +    fr->rlistlong  = cutoff_inf(ir->rlistlong);
+ +    fr->eeltype    = ir->coulombtype;
+ +    fr->vdwtype    = ir->vdwtype;
++
++    fr->coul_pot_shift = (ir->coulomb_modifier == eintmodPOTSHIFT);
++    fr->vdw_pot_shift  = (ir->vdw_modifier     == eintmodPOTSHIFT);
+ +    
+ +    fr->bTwinRange = fr->rlistlong > fr->rlist;
+ +    fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype==eelEWALD);
+ +    
+ +    fr->reppow     = mtop->ffparams.reppow;
-         fr->bvdwtab  = TRUE;
-         fr->bcoultab = TRUE;
++
++    if (ir->cutoff_scheme == ecutsGROUP)
+ +    {
-     
-     if (fp) {
-         fprintf(fp,"Table routines are used for coulomb: %s\n",bool_names[fr->bcoultab]);
-         fprintf(fp,"Table routines are used for vdw:     %s\n",bool_names[fr->bvdwtab ]);
++        fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
++                          !gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS));
++        fr->bcoultab   = (!(fr->eeltype == eelCUT || EEL_RF(fr->eeltype)) ||
++                          fr->eeltype == eelRF_ZERO);
++
++        if (getenv("GMX_REQUIRE_TABLES"))
++        {
++            fr->bvdwtab  = TRUE;
++            fr->bcoultab = TRUE;
++        }
++
++        if (fp)
++        {
++            fprintf(fp,"Table routines are used for coulomb: %s\n",bool_names[fr->bcoultab]);
++            fprintf(fp,"Table routines are used for vdw:     %s\n",bool_names[fr->bvdwtab ]);
++        }
+ +    }
-     /* Mask that says whether or not this NBF list should be computed */
-     /*  if (fr->bMask == NULL) {
-         ngrp = ir->opts.ngener*ir->opts.ngener;
-         snew(fr->bMask,ngrp);*/
-     /* Defaults to always */
-     /*    for(i=0; (i<ngrp); i++)
-           fr->bMask[i] = TRUE;
-           }*/
-     
-     if (ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr)) {
++
++    if (ir->cutoff_scheme == ecutsVERLET)
++    {
++        if (!gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS))
++        {
++            gmx_fatal(FARGS,"Cut-off scheme %S only supports LJ repulsion power 12",ecutscheme_names[ir->cutoff_scheme]);
++        }
++        fr->bvdwtab  = FALSE;
++        fr->bcoultab = FALSE;
+ +    }
+ +    
+ +    /* Tables are used for direct ewald sum */
+ +    if(fr->bEwald)
+ +    {
+ +        if (EEL_PME(ir->coulombtype))
+ +        {
+ +            if (fp)
+ +                fprintf(fp,"Will do PME sum in reciprocal space.\n");
+ +            if (ir->coulombtype == eelP3M_AD)
+ +            {
+ +                please_cite(fp,"Hockney1988");
+ +                please_cite(fp,"Ballenegger2012");
+ +            }
+ +            else
+ +            {
+ +                please_cite(fp,"Essmann95a");
+ +            }
+ +            
+ +            if (ir->ewald_geometry == eewg3DC)
+ +            {
+ +                if (fp)
+ +                {
+ +                    fprintf(fp,"Using the Ewald3DC correction for systems with a slab geometry.\n");
+ +                }
+ +                please_cite(fp,"In-Chul99a");
+ +            }
+ +        }
+ +        fr->ewaldcoeff=calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
+ +        init_ewald_tab(&(fr->ewald_table), cr, ir, fp);
+ +        if (fp)
+ +        {
+ +            fprintf(fp,"Using a Gaussian width (1/beta) of %g nm for Ewald\n",
+ +                    1/fr->ewaldcoeff);
+ +        }
+ +    }
+ +    
+ +    /* Electrostatics */
+ +    fr->epsilon_r  = ir->epsilon_r;
+ +    fr->epsilon_rf = ir->epsilon_rf;
+ +    fr->fudgeQQ    = mtop->ffparams.fudgeQQ;
+ +    fr->rcoulomb_switch = ir->rcoulomb_switch;
+ +    fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
+ +    
+ +    /* Parameters for generalized RF */
+ +    fr->zsquare = 0.0;
+ +    fr->temp    = 0.0;
+ +    
+ +    if (fr->eeltype == eelGRF)
+ +    {
+ +        init_generalized_rf(fp,mtop,ir,fr);
+ +    }
+ +    else if (fr->eeltype == eelSHIFT)
+ +    {
+ +        for(m=0; (m<DIM); m++)
+ +            box_size[m]=box[m][m];
+ +        
+ +        if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
+ +            set_shift_consts(fp,fr->rcoulomb_switch,fr->rcoulomb,box_size,fr);
+ +    }
+ +    
+ +    fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
+ +                       gmx_mtop_ftype_count(mtop,F_POSRES) > 0 ||
+ +                       gmx_mtop_ftype_count(mtop,F_FBPOSRES) > 0 ||
+ +                       IR_ELEC_FIELD(*ir) ||
+ +                       (fr->adress_icor != eAdressICOff)
+ +                      );
+ +    
-     if (ir->eDispCorr != edispcNO)
-     {
-         calc_enervirdiff(fp,ir->eDispCorr,fr);
-     }
- 
++    if (fr->cutoff_scheme == ecutsGROUP &&
++        ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr)) {
+ +        /* Count the total number of charge groups */
+ +        fr->cg_nalloc = ncg_mtop(mtop);
+ +        srenew(fr->cg_cm,fr->cg_nalloc);
+ +    }
+ +    if (fr->shift_vec == NULL)
+ +        snew(fr->shift_vec,SHIFTS);
+ +    
+ +    if (fr->fshift == NULL)
+ +        snew(fr->fshift,SHIFTS);
+ +    
+ +    if (fr->nbfp == NULL) {
+ +        fr->ntype = mtop->ffparams.atnr;
+ +        fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
+ +        fr->nbfp  = mk_nbfp(&mtop->ffparams,fr->bBHAM);
+ +    }
+ +    
+ +    /* Copy the energy group exclusions */
+ +    fr->egp_flags = ir->opts.egp_flags;
+ +    
+ +    /* Van der Waals stuff */
+ +    fr->rvdw        = cutoff_inf(ir->rvdw);
+ +    fr->rvdw_switch = ir->rvdw_switch;
+ +    if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM) {
+ +        if (fr->rvdw_switch >= fr->rvdw)
+ +            gmx_fatal(FARGS,"rvdw_switch (%f) must be < rvdw (%f)",
+ +                      fr->rvdw_switch,fr->rvdw);
+ +        if (fp)
+ +            fprintf(fp,"Using %s Lennard-Jones, switch between %g and %g nm\n",
+ +                    (fr->eeltype==eelSWITCH) ? "switched":"shifted",
+ +                    fr->rvdw_switch,fr->rvdw);
+ +    } 
+ +    
+ +    if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
+ +        gmx_fatal(FARGS,"Switch/shift interaction not supported with Buckingham");
+ +    
+ +    if (fp)
+ +        fprintf(fp,"Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
+ +                fr->rlist,fr->rcoulomb,fr->bBHAM ? "BHAM":"LJ",fr->rvdw);
+ +    
+ +    fr->eDispCorr = ir->eDispCorr;
+ +    if (ir->eDispCorr != edispcNO)
+ +    {
+ +        set_avcsixtwelve(fp,fr,mtop);
+ +    }
+ +    
+ +    if (fr->bBHAM)
+ +    {
+ +        set_bham_b_max(fp,fr,mtop);
+ +    }
+ +
+ +    fr->bGB = (ir->implicit_solvent == eisGBSA);
+ +      fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
+ +
+ +    /* Copy the GBSA data (radius, volume and surftens for each
+ +     * atomtype) from the topology atomtype section to forcerec.
+ +     */
+ +    snew(fr->atype_radius,fr->ntype);
+ +    snew(fr->atype_vol,fr->ntype);
+ +    snew(fr->atype_surftens,fr->ntype);
+ +    snew(fr->atype_gb_radius,fr->ntype);
+ +    snew(fr->atype_S_hct,fr->ntype);
+ +
+ +    if (mtop->atomtypes.nr > 0)
+ +    {
+ +        for(i=0;i<fr->ntype;i++)
+ +            fr->atype_radius[i] =mtop->atomtypes.radius[i];
+ +        for(i=0;i<fr->ntype;i++)
+ +            fr->atype_vol[i] = mtop->atomtypes.vol[i];
+ +        for(i=0;i<fr->ntype;i++)
+ +            fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
+ +        for(i=0;i<fr->ntype;i++)
+ +            fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
+ +        for(i=0;i<fr->ntype;i++)
+ +            fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
+ +    }  
+ +      
+ +      /* Generate the GB table if needed */
+ +      if(fr->bGB)
+ +      {
+ +#ifdef GMX_DOUBLE
+ +              fr->gbtabscale=2000;
+ +#else
+ +              fr->gbtabscale=500;
+ +#endif
+ +              
+ +              fr->gbtabr=100;
+ +              fr->gbtab=make_gb_table(fp,oenv,fr,tabpfn,fr->gbtabscale);
+ +
+ +        init_gb(&fr->born,cr,fr,ir,mtop,ir->rgbradii,ir->gb_algorithm);
+ +
+ +        /* Copy local gb data (for dd, this is done in dd_partition_system) */
+ +        if (!DOMAINDECOMP(cr))
+ +        {
+ +            make_local_gb(cr,fr->born,ir->gb_algorithm);
+ +        }
+ +    }
+ +
+ +    /* Set the charge scaling */
+ +    if (fr->epsilon_r != 0)
+ +        fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
+ +    else
+ +        /* eps = 0 is infinite dieletric: no coulomb interactions */
+ +        fr->epsfac = 0;
+ +    
+ +    /* Reaction field constants */
+ +    if (EEL_RF(fr->eeltype))
+ +        calc_rffac(fp,fr->eeltype,fr->epsilon_r,fr->epsilon_rf,
+ +                   fr->rcoulomb,fr->temp,fr->zsquare,box,
+ +                   &fr->kappa,&fr->k_rf,&fr->c_rf);
+ +    
+ +    set_chargesum(fp,fr,mtop);
+ +    
+ +    /* if we are using LR electrostatics, and they are tabulated,
+ +     * the tables will contain modified coulomb interactions.
+ +     * Since we want to use the non-shifted ones for 1-4
+ +     * coulombic interactions, we must have an extra set of tables.
+ +     */
+ +    
+ +    /* Construct tables.
+ +     * A little unnecessary to make both vdw and coul tables sometimes,
+ +     * but what the heck... */
+ +    
+ +    bTab = fr->bcoultab || fr->bvdwtab;
+ +
+ +    bSep14tab = ((!bTab || fr->eeltype!=eelCUT || fr->vdwtype!=evdwCUT ||
+ +                  fr->bBHAM) &&
+ +                 (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
+ +                  gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0 ||
+ +                  gmx_mtop_ftype_count(mtop,F_LJC_PAIRS_NB) > 0));
+ +
+ +    negp_pp = ir->opts.ngener - ir->nwall;
+ +    negptable = 0;
+ +    if (!bTab) {
+ +        bNormalnblists = TRUE;
+ +        fr->nnblists = 1;
+ +    } else {
+ +        bNormalnblists = (ir->eDispCorr != edispcNO);
+ +        for(egi=0; egi<negp_pp; egi++) {
+ +            for(egj=egi;  egj<negp_pp; egj++) {
+ +                egp_flags = ir->opts.egp_flags[GID(egi,egj,ir->opts.ngener)];
+ +                if (!(egp_flags & EGP_EXCL)) {
+ +                    if (egp_flags & EGP_TABLE) {
+ +                        negptable++;
+ +                    } else {
+ +                        bNormalnblists = TRUE;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        if (bNormalnblists) {
+ +            fr->nnblists = negptable + 1;
+ +        } else {
+ +            fr->nnblists = negptable;
+ +        }
+ +        if (fr->nnblists > 1)
+ +            snew(fr->gid2nblists,ir->opts.ngener*ir->opts.ngener);
+ +    }
+ +    snew(fr->nblists,fr->nnblists);
+ +    
+ +    /* This code automatically gives table length tabext without cut-off's,
+ +     * in that case grompp should already have checked that we do not need
+ +     * normal tables and we only generate tables for 1-4 interactions.
+ +     */
+ +    rtab = ir->rlistlong + ir->tabext;
+ +
+ +    if (bTab) {
+ +        /* make tables for ordinary interactions */
+ +        if (bNormalnblists) {
+ +            make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,NULL,NULL,&fr->nblists[0]);
+ +            if (!bSep14tab)
+ +                fr->tab14 = fr->nblists[0].tab;
+ +            m = 1;
+ +        } else {
+ +            m = 0;
+ +        }
+ +        if (negptable > 0) {
+ +            /* Read the special tables for certain energy group pairs */
+ +            nm_ind = mtop->groups.grps[egcENER].nm_ind;
+ +            for(egi=0; egi<negp_pp; egi++) {
+ +                for(egj=egi;  egj<negp_pp; egj++) {
+ +                    egp_flags = ir->opts.egp_flags[GID(egi,egj,ir->opts.ngener)];
+ +                    if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL)) {
+ +                        nbl = &(fr->nblists[m]);
+ +                        if (fr->nnblists > 1) {
+ +                            fr->gid2nblists[GID(egi,egj,ir->opts.ngener)] = m;
+ +                        }
+ +                        /* Read the table file with the two energy groups names appended */
+ +                        make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,
+ +                                        *mtop->groups.grpname[nm_ind[egi]],
+ +                                        *mtop->groups.grpname[nm_ind[egj]],
+ +                                        &fr->nblists[m]);
+ +                        m++;
+ +                    } else if (fr->nnblists > 1) {
+ +                        fr->gid2nblists[GID(egi,egj,ir->opts.ngener)] = 0;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    if (bSep14tab)
+ +    {
+ +        /* generate extra tables with plain Coulomb for 1-4 interactions only */
+ +        fr->tab14 = make_tables(fp,oenv,fr,MASTER(cr),tabpfn,rtab,
+ +                                GMX_MAKETABLES_14ONLY);
+ +    }
+ +
+ +    /* Read AdResS Thermo Force table if needed */
+ +    if(fr->adress_icor == eAdressICThermoForce)
+ +    {
+ +        /* old todo replace */ 
+ +        
+ +        if (ir->adress->n_tf_grps > 0){
+ +            make_adress_tf_tables(fp,oenv,fr,ir,tabfn, mtop, box);
+ +
+ +        }else{
+ +            /* load the default table */
+ +            snew(fr->atf_tabs, 1);
+ +            fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp,oenv,fr,tabafn, box);
+ +        }
+ +    }
+ +    
+ +    /* Wall stuff */
+ +    fr->nwall = ir->nwall;
+ +    if (ir->nwall && ir->wall_type==ewtTABLE)
+ +    {
+ +        make_wall_tables(fp,oenv,ir,tabfn,&mtop->groups,fr);
+ +    }
+ +    
+ +    if (fcd && tabbfn) {
+ +        fcd->bondtab  = make_bonded_tables(fp,
+ +                                           F_TABBONDS,F_TABBONDSNC,
+ +                                           mtop,tabbfn,"b");
+ +        fcd->angletab = make_bonded_tables(fp,
+ +                                           F_TABANGLES,-1,
+ +                                           mtop,tabbfn,"a");
+ +        fcd->dihtab   = make_bonded_tables(fp,
+ +                                           F_TABDIHS,-1,
+ +                                           mtop,tabbfn,"d");
+ +    } else {
+ +        if (debug)
+ +            fprintf(debug,"No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
+ +    }
+ +    
-     fr->cginfo_mb = init_cginfo_mb(fp,mtop,fr,bNoSolvOpt);
- 
+ +    /* QM/MM initialization if requested
+ +     */
+ +    if (ir->bQMMM)
+ +    {
+ +        fprintf(stderr,"QM/MM calculation requested.\n");
+ +    }
+ +    
+ +    fr->bQMMM      = ir->bQMMM;   
+ +    fr->qr         = mk_QMMMrec();
+ +    
+ +    /* Set all the static charge group info */
-     if (cr->duty & DUTY_PP){
++    fr->cginfo_mb = init_cginfo_mb(fp,mtop,fr,bNoSolvOpt,
++                                   &fr->bExcl_IntraCGAll_InterCGNone);
+ +    if (DOMAINDECOMP(cr)) {
+ +        fr->cginfo = NULL;
+ +    } else {
+ +        fr->cginfo = cginfo_expand(mtop->nmolblock,fr->cginfo_mb);
+ +    }
+ +    
+ +    if (!DOMAINDECOMP(cr))
+ +    {
+ +        /* When using particle decomposition, the effect of the second argument,
+ +         * which sets fr->hcg, is corrected later in do_md and init_em.
+ +         */
+ +        forcerec_set_ranges(fr,ncg_mtop(mtop),ncg_mtop(mtop),
+ +                            mtop->natoms,mtop->natoms,mtop->natoms);
+ +    }
+ +    
+ +    fr->print_force = print_force;
+ +
+ +
+ +    /* coarse load balancing vars */
+ +    fr->t_fnbf=0.;
+ +    fr->t_wait=0.;
+ +    fr->timesteps=0;
+ +    
+ +    /* Initialize neighbor search */
+ +    init_ns(fp,cr,&fr->ns,fr,mtop,box);
+ +    
++    if (cr->duty & DUTY_PP)
++    {
+ +        gmx_setup_kernels(fp,fr,bGenericKernelOnly);
+ +        if (ir->bAdress)
++        {
+ +            gmx_setup_adress_kernels(fp,bGenericKernelOnly);
++        }
++    }
++
++    /* Initialize the thread working data for bonded interactions */
++    init_forcerec_f_threads(fr,mtop->groups.grps[egcENER].nr);
++    
++    snew(fr->excl_load,fr->nthreads+1);
++
++    if (fr->cutoff_scheme == ecutsVERLET)
++    {
++        if (ir->rcoulomb != ir->rvdw)
++        {
++            gmx_fatal(FARGS,"With Verlet lists rcoulomb and rvdw should be identical");
++        }
++
++        init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
++
++        /* initialize interaction constants
++         * TODO should be moved out during modularization.
++         */
++        init_interaction_const(fp, &fr->ic, fr);
++    }
++
++    if (ir->eDispCorr != edispcNO)
++    {
++        calc_enervirdiff(fp,ir->eDispCorr,fr);
+ +    }
+ +}
+ +
+ +#define pr_real(fp,r) fprintf(fp,"%s: %e\n",#r,r)
+ +#define pr_int(fp,i)  fprintf((fp),"%s: %d\n",#i,i)
+ +#define pr_bool(fp,b) fprintf((fp),"%s: %s\n",#b,bool_names[b])
+ +
+ +void pr_forcerec(FILE *fp,t_forcerec *fr,t_commrec *cr)
+ +{
+ +  int i;
+ +
+ +  pr_real(fp,fr->rlist);
+ +  pr_real(fp,fr->rcoulomb);
+ +  pr_real(fp,fr->fudgeQQ);
+ +  pr_bool(fp,fr->bGrid);
+ +  pr_bool(fp,fr->bTwinRange);
+ +  /*pr_int(fp,fr->cg0);
+ +    pr_int(fp,fr->hcg);*/
+ +  for(i=0; i<fr->nnblists; i++)
+ +    pr_int(fp,fr->nblists[i].tab.n);
+ +  pr_real(fp,fr->rcoulomb_switch);
+ +  pr_real(fp,fr->rcoulomb);
+ +  
+ +  fflush(fp);
+ +}
++
++void forcerec_set_excl_load(t_forcerec *fr,
++                            const gmx_localtop_t *top,const t_commrec *cr)
++{
++    const int *ind,*a;
++    int t,i,j,ntot,n,ntarget;
++
++    if (cr != NULL && PARTDECOMP(cr))
++    {
++        /* No OpenMP with particle decomposition */
++        pd_at_range(cr,
++                    &fr->excl_load[0],
++                    &fr->excl_load[1]);
++
++        return;
++    }
++
++    ind = top->excls.index;
++    a   = top->excls.a;
++
++    ntot = 0;
++    for(i=0; i<top->excls.nr; i++)
++    {
++        for(j=ind[i]; j<ind[i+1]; j++)
++        {
++            if (a[j] > i)
++            {
++                ntot++;
++            }
++        }
++    }
++
++    fr->excl_load[0] = 0;
++    n = 0;
++    i = 0;
++    for(t=1; t<=fr->nthreads; t++)
++    {
++        ntarget = (ntot*t)/fr->nthreads;
++        while(i < top->excls.nr && n < ntarget)
++        {
++            for(j=ind[i]; j<ind[i+1]; j++)
++            {
++                if (a[j] > i)
++                {
++                    n++;
++                }
++            }
++            i++;
++        }
++        fr->excl_load[t] = i;
++    }
++}
++
diff --cc src/gromacs/mdlib/gmx_wallcycle.c
Simple merge
diff --cc src/gromacs/mdlib/groupcoord.h

index b612a56f04ceab82814fe8f37c6ec3c40edc4308,0000000000000000000000000000000000000000..ec5d5e7ad35a035bc6dd03246287c151bdeb5622

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/groupcoord.h
--- /dev/null
+++ b/src/gromacs/mdlib/groupcoord.h
@@@ -1,196 -1,0 +1,197 @@@
+ +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2008, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ + 
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +
+ +/*! \file groupcoord.h
+ + *
+ + *  @brief Assemble atom positions for comparison with a reference set.
+ + *
+ + *  This file contains functions to assemble the positions of a subset of the 
+ + *  atoms and to do operations on it like determining the center of mass, or
+ + *  doing translations and rotations. These functions are useful when
+ + *  a subset of the positions needs to be compared to some set of reference
+ + *  positions, as e.g. done for essential dynamics.
+ + *  
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <stdio.h>
+ +#include "typedefs.h"
++#include "types/commrec.h"
+ +
+ +
+ +/*! \brief Select local atoms of a group.
+ +*
+ +* Selects the indices of local atoms of a group and stores them in anrs_loc[0..nr_loc]. 
+ +* If you need the positions of the group's atoms on all nodes, provide a coll_ind[0..nr] 
+ +* array and pass it on to communicate_group_positions. Thus the collective array 
+ +* will always have the same atom order (ascending indices). 
+ +*
+ +*  \param[in]     ga2la      Global to local atom index conversion data.
+ +*  \param[in]     nr         The total number of atoms that the group contains.
+ +*  \param[in]     anrs       The global atom number of the group's atoms.
+ +*  \param[out]    nr_loc     The number of group atoms present on the local node.
+ +*  \param[out]    anrs_loc   The local atom numbers of the group.
+ +*  \param[in,out] nalloc_loc Local allocation size of anrs_loc array.
+ +*  \param[out]    coll_ind   If not NULL this array must be of size nr. It stores
+ +*                            for each local atom where it belongs in the global
+ +*                            (collective) array such that it can be gmx_summed
+ +*                            in the communicate_group_positions routine.
+ +*/
+ +extern void dd_make_local_group_indices(gmx_ga2la_t ga2la,
+ +                                        const int nr, int anrs[], int *nr_loc,
+ +                                        int *anrs_loc[], int *nalloc_loc,
+ +                                        int coll_ind[]);
+ +
+ +
+ +/*! \brief Assemble local positions into a collective array present on all nodes.
+ + * 
+ + * Communicate the positions of the group's atoms such that every node has all of 
+ + * them. Unless running on huge number of cores, this is not a big performance impact
+ + * as long as the collective subset [0..nr] is kept small. The atom indices are 
+ + * retrieved from anrs_loc[0..nr_loc]. If you call the routine for the serial case,
+ + * provide an array coll_ind[i] = i for i in 1..nr.
+ + * 
+ + * \param[in]     cr           Pointer to MPI communication data.
+ + * \param[out]    xcoll        Collective array of positions, idential on all nodes
+ + *                             after this routine has been called.
+ + * \param[in,out] shifts       Collective array of shifts for xcoll, needed to make
+ + *                             the group whole. This array remembers the shifts
+ + *                             since the start of the simulation (where the group
+ + *                             is whole) and must therefore not be changed outside
+ + *                             of this routine!
+ + * \param[out]    extra_shifts Extra shifts since last time step, only needed as
+ + *                             buffer variable [0..nr].
+ + * \param[in]     bNS          Neighborsearching/domain redecomposition has been
+ + *                             performed at the begin of this time step such that
+ + *                             the shifts have changed and need to be updated.
+ + * \param[in]     x_loc        Pointer to the local atom positions this node has.
+ + * \param[in]     nr           Total number of atoms in the group.
+ + * \param[in]     nr_loc       Number of group atoms on the local node.
+ + * \param[in]     anrs_loc     Array of the local atom indices.
+ + * \param[in]     coll_ind     This array of size nr stores for each local atom where
+ + *                             it belongs in the collective array so that the local
+ + *                             contributions can be gmx_summed. It is provided by
+ + *                             dd_make_local_group_indices.
+ + * \param[in,out] xcoll_old    Positions from the last time step, used to make the
+ + *                             group whole.
+ + * \param[in]     box          Simulation box matrix, needed to shift xcoll such that
+ + *                             the group becomes whole.
+ + */
+ +extern void communicate_group_positions(t_commrec *cr, rvec *xcoll, ivec *shifts,
+ +                                        ivec *extra_shifts, const gmx_bool bNS,
+ +                                        rvec *x_loc, const int nr, const int nr_loc,
+ +                                        int *anrs_loc, int *coll_ind, rvec *xcoll_old,
+ +                                        matrix box);
+ +
+ +
+ +/*! \brief Calculates the center of the positions x locally.
+ + * 
+ + * Calculates the center of mass (if masses are given in the weight array) or
+ + * the geometrical center (if NULL is passed as weight).
+ + * 
+ + * \param[in]   x            Positions.
+ + * \param[in]   weight       Can be NULL or an array of weights. If masses are
+ + *                           given as weights, the COM is calculated.
+ + * \param[in]   nr           Number of positions and weights if present.
+ + * \param[out]  center       The (weighted) center of the positions.
+ + * 
+ + */
+ +extern void get_center(rvec x[], real weight[], const int nr, rvec center);
+ +
+ +
+ +/*! \brief Calculates the sum of the positions x locally.
+ + * 
+ + * Calculates the (weighted) sum of position vectors and returns the sum of 
+ + * weights, which is needed when local contributions shall be summed to a 
+ + * global weighted center.
+ + * 
+ + * \param[in]   x            Array of positions.
+ + * \param[in]   weight       Can be NULL or an array of weights.
+ + * \param[in]   nr           Number of positions and weights if present.
+ + * \param[out]  dsumvec      The (weighted) sum of the positions.
+ + * \return Sum of weights.
+ + * 
+ + */
+ +extern double get_sum_of_positions(rvec x[], real weight[], const int nr, dvec dsumvec);
+ +
+ +
+ +/*! \brief Calculates the global center of all local arrays x. 
+ + * 
+ + * Get the center from local positions [0..nr_loc], this involves communication.
+ + * Not that the positions must already have the correct PBC representation. Use
+ + * this routine if no collective coordinates are assembled from which the center 
+ + * could be calculated without communication.
+ + * 
+ + * \param[in]   cr           Pointer to MPI communication data.
+ + * \param[in]   x_loc        Array of local positions [0..nr_loc].
+ + * \param[in]   weight_loc   Array of local weights, these are the masses if the
+ + *                           center of mass is to be calculated.
+ + * \param[in]   nr_loc       The number of positions on the local node.
+ + * \param[in]   nr_group     The number of positions in the whole group. Since
+ + *                           this is known anyway, we do not need to communicate
+ + *                           and sum nr_loc if we pass it over.
+ + * \param[out]  center       The (weighted) center of all x_loc from all the
+ + *                           nodes.
+ + */
+ +extern void get_center_comm(t_commrec *cr, rvec x_loc[], real weight_loc[],
+ +                            int nr_loc, int nr_group, rvec center);
+ +
+ +
+ +/*! \brief Translate positions.
+ + * 
+ + * Add a translation vector to the positions x.
+ + * 
+ + * \param[in,out] x          Array of positions.
+ + * \param[in]     nr         Number of entries in the position array.
+ + * \param[in]     transvec   Translation vector to be added to all positions.
+ + * 
+ + */
+ +extern void translate_x(rvec x[], const int nr, const rvec transvec);
+ +
+ +
+ +/*! \brief Rotate positions.
+ + * 
+ + * Rotate the positions with the rotation matrix.
+ + * 
+ + * \param[in,out] x          Array of positions.
+ + * \param[in]     nr         Number of entries in the position array.
+ + * \param[in]     rmat       Rotation matrix to operate on all positions.
+ + * 
+ + */
+ +extern void rotate_x(rvec x[], const int nr, matrix rmat);
+ +
diff --cc src/gromacs/mdlib/iteratedconstraints.c
Simple merge
diff --cc src/gromacs/mdlib/md_support.c

index 4a0aff42b83f75bedcb401da680c3f04e1dc15ea,0000000000000000000000000000000000000000..b3b7b6ab32b514eebdbff2e0767dfe041e22c566

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/md_support.c
--- /dev/null
+++ b/src/gromacs/mdlib/md_support.c
@@@ -1,818 -1,0 +1,774 @@@
-     char buf[STRLEN];
- 
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include "typedefs.h"
+ +#include "string2.h"
+ +#include "smalloc.h"
+ +#include "mdrun.h"
+ +#include "domdec.h"
+ +#include "mtop_util.h"
+ +#include "gmx_wallcycle.h"
+ +#include "vcm.h"
+ +#include "nrnb.h"
+ +#include "macros.h"
++#include "md_logging.h"
++#include "md_support.h"
+ +
+ +/* Is the signal in one simulation independent of other simulations? */
+ +gmx_bool gs_simlocal[eglsNR] = { TRUE, FALSE, FALSE, TRUE };
+ +
+ +/* check which of the multisim simulations has the shortest number of
+ +   steps and return that number of nsteps */
+ +gmx_large_int_t get_multisim_nsteps(const t_commrec *cr,
+ +                                    gmx_large_int_t nsteps)
+ +{
+ +    gmx_large_int_t steps_out;
+ +
+ +    if MASTER(cr)
+ +    {
+ +        gmx_large_int_t *buf;
+ +        int s;
+ +
+ +        snew(buf,cr->ms->nsim);
+ +
+ +        buf[cr->ms->sim] = nsteps;
+ +        gmx_sumli_sim(cr->ms->nsim, buf, cr->ms);
+ +
+ +        steps_out=-1;
+ +        for(s=0; s<cr->ms->nsim; s++)
+ +        {
+ +            /* find the smallest positive number */
+ +            if (buf[s]>= 0 && ((steps_out < 0) || (buf[s]<steps_out)) )
+ +            {
+ +                steps_out=buf[s];
+ +            }
+ +        }
+ +        sfree(buf);
+ +
+ +        /* if we're the limiting simulation, don't do anything */
+ +        if (steps_out>=0 && steps_out<nsteps) 
+ +        {
+ +            char strbuf[255];
+ +            snprintf(strbuf, 255, "Will stop simulation %%d after %s steps (another simulation will end then).\n", gmx_large_int_pfmt);
+ +            fprintf(stderr, strbuf, cr->ms->sim, steps_out);
+ +        }
+ +    }
+ +    /* broadcast to non-masters */
+ +    gmx_bcast(sizeof(gmx_large_int_t), &steps_out, cr);
+ +    return steps_out;
+ +}
+ +
+ +int multisim_min(const gmx_multisim_t *ms,int nmin,int n)
+ +{
+ +    int  *buf;
+ +    gmx_bool bPos,bEqual;
+ +    int  s,d;
+ +
+ +    snew(buf,ms->nsim);
+ +    buf[ms->sim] = n;
+ +    gmx_sumi_sim(ms->nsim,buf,ms);
+ +    bPos   = TRUE;
+ +    bEqual = TRUE;
+ +    for(s=0; s<ms->nsim; s++)
+ +    {
+ +        bPos   = bPos   && (buf[s] > 0);
+ +        bEqual = bEqual && (buf[s] == buf[0]);
+ +    }
+ +    if (bPos)
+ +    {
+ +        if (bEqual)
+ +        {
+ +            nmin = min(nmin,buf[0]);
+ +        }
+ +        else
+ +        {
+ +            /* Find the least common multiple */
+ +            for(d=2; d<nmin; d++)
+ +            {
+ +                s = 0;
+ +                while (s < ms->nsim && d % buf[s] == 0)
+ +                {
+ +                    s++;
+ +                }
+ +                if (s == ms->nsim)
+ +                {
+ +                    /* We found the LCM and it is less than nmin */
+ +                    nmin = d;
+ +                    break;
+ +                }
+ +            }
+ +        }
+ +    }
+ +    sfree(buf);
+ +
+ +    return nmin;
+ +}
+ +
+ +int multisim_nstsimsync(const t_commrec *cr,
+ +                        const t_inputrec *ir,int repl_ex_nst)
+ +{
+ +    int nmin;
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        nmin = INT_MAX;
+ +        nmin = multisim_min(cr->ms,nmin,ir->nstlist);
+ +        nmin = multisim_min(cr->ms,nmin,ir->nstcalcenergy);
+ +        nmin = multisim_min(cr->ms,nmin,repl_ex_nst);
+ +        if (nmin == INT_MAX)
+ +        {
+ +            gmx_fatal(FARGS,"Can not find an appropriate interval for inter-simulation communication, since nstlist, nstcalcenergy and -replex are all <= 0");
+ +        }
+ +        /* Avoid inter-simulation communication at every (second) step */
+ +        if (nmin <= 2)
+ +        {
+ +            nmin = 10;
+ +        }
+ +    }
+ +
+ +    gmx_bcast(sizeof(int),&nmin,cr);
+ +
+ +    return nmin;
+ +}
+ +
+ +void init_global_signals(globsig_t *gs,const t_commrec *cr,
+ +                         const t_inputrec *ir,int repl_ex_nst)
+ +{
+ +    int i;
+ +
+ +    if (MULTISIM(cr))
+ +    {
+ +        gs->nstms = multisim_nstsimsync(cr,ir,repl_ex_nst);
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Syncing simulations for checkpointing and termination every %d steps\n",gs->nstms);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        gs->nstms = 1;
+ +    }
+ +
+ +    for(i=0; i<eglsNR; i++)
+ +    {
+ +        gs->sig[i] = 0;
+ +        gs->set[i] = 0;
+ +    }
+ +}
+ +
+ +void copy_coupling_state(t_state *statea,t_state *stateb, 
+ +                         gmx_ekindata_t *ekinda,gmx_ekindata_t *ekindb, t_grpopts* opts) 
+ +{
+ +    
+ +    /* MRS note -- might be able to get rid of some of the arguments.  Look over it when it's all debugged */
+ +    
+ +    int i,j,nc;
+ +
+ +    /* Make sure we have enough space for x and v */
+ +    if (statea->nalloc > stateb->nalloc)
+ +    {
+ +        stateb->nalloc = statea->nalloc;
+ +        srenew(stateb->x,stateb->nalloc);
+ +        srenew(stateb->v,stateb->nalloc);
+ +    }
+ +
+ +    stateb->natoms     = statea->natoms;
+ +    stateb->ngtc       = statea->ngtc;
+ +    stateb->nnhpres    = statea->nnhpres;
+ +    stateb->veta       = statea->veta;
+ +    if (ekinda) 
+ +    {
+ +        copy_mat(ekinda->ekin,ekindb->ekin);
+ +        for (i=0; i<stateb->ngtc; i++) 
+ +        {
+ +            ekindb->tcstat[i].T = ekinda->tcstat[i].T;
+ +            ekindb->tcstat[i].Th = ekinda->tcstat[i].Th;
+ +            copy_mat(ekinda->tcstat[i].ekinh,ekindb->tcstat[i].ekinh);
+ +            copy_mat(ekinda->tcstat[i].ekinf,ekindb->tcstat[i].ekinf);
+ +            ekindb->tcstat[i].ekinscalef_nhc =  ekinda->tcstat[i].ekinscalef_nhc;
+ +            ekindb->tcstat[i].ekinscaleh_nhc =  ekinda->tcstat[i].ekinscaleh_nhc;
+ +            ekindb->tcstat[i].vscale_nhc =  ekinda->tcstat[i].vscale_nhc;
+ +        }
+ +    }
+ +    copy_rvecn(statea->x,stateb->x,0,stateb->natoms);
+ +    copy_rvecn(statea->v,stateb->v,0,stateb->natoms);
+ +    copy_mat(statea->box,stateb->box);
+ +    copy_mat(statea->box_rel,stateb->box_rel);
+ +    copy_mat(statea->boxv,stateb->boxv);
+ +
+ +    for (i = 0; i<stateb->ngtc; i++) 
+ +    { 
+ +        nc = i*opts->nhchainlength;
+ +        for (j=0; j<opts->nhchainlength; j++) 
+ +        {
+ +            stateb->nosehoover_xi[nc+j]  = statea->nosehoover_xi[nc+j];
+ +            stateb->nosehoover_vxi[nc+j] = statea->nosehoover_vxi[nc+j];
+ +        }
+ +    }
+ +    if (stateb->nhpres_xi != NULL)
+ +    {
+ +        for (i = 0; i<stateb->nnhpres; i++) 
+ +        {
+ +            nc = i*opts->nhchainlength;
+ +            for (j=0; j<opts->nhchainlength; j++) 
+ +            {
+ +                stateb->nhpres_xi[nc+j]  = statea->nhpres_xi[nc+j];
+ +                stateb->nhpres_vxi[nc+j] = statea->nhpres_vxi[nc+j];
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +real compute_conserved_from_auxiliary(t_inputrec *ir, t_state *state, t_extmass *MassQ)
+ +{
+ +    real quantity = 0;
+ +    switch (ir->etc) 
+ +    {
+ +    case etcNO:
+ +        break;
+ +    case etcBERENDSEN:
+ +        break;
+ +    case etcNOSEHOOVER:
+ +        quantity = NPT_energy(ir,state,MassQ);                
+ +        break;
+ +    case etcVRESCALE:
+ +        quantity = vrescale_energy(&(ir->opts),state->therm_integral);
+ +        break;
+ +    default:
+ +        break;
+ +    }
+ +    return quantity;
+ +}
+ +
+ +void compute_globals(FILE *fplog, gmx_global_stat_t gstat, t_commrec *cr, t_inputrec *ir, 
+ +                     t_forcerec *fr, gmx_ekindata_t *ekind, 
+ +                     t_state *state, t_state *state_global, t_mdatoms *mdatoms, 
+ +                     t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle,
+ +                     gmx_enerdata_t *enerd,tensor force_vir, tensor shake_vir, tensor total_vir, 
+ +                     tensor pres, rvec mu_tot, gmx_constr_t constr, 
+ +                     globsig_t *gs,gmx_bool bInterSimGS,
+ +                     matrix box, gmx_mtop_t *top_global, real *pcurr, 
+ +                     int natoms, gmx_bool *bSumEkinhOld, int flags)
+ +{
+ +    int  i,gsi;
+ +    real gs_buf[eglsNR];
+ +    tensor corr_vir,corr_pres,shakeall_vir;
+ +    gmx_bool bEner,bPres,bTemp, bVV;
+ +    gmx_bool bRerunMD, bStopCM, bGStat, bIterate, 
+ +        bFirstIterate,bReadEkin,bEkinAveVel,bScaleEkin, bConstrain;
+ +    real ekin,temp,prescorr,enercorr,dvdlcorr;
+ +    
+ +    /* translate CGLO flags to gmx_booleans */
+ +    bRerunMD = flags & CGLO_RERUNMD;
+ +    bStopCM = flags & CGLO_STOPCM;
+ +    bGStat = flags & CGLO_GSTAT;
+ +
+ +    bReadEkin = (flags & CGLO_READEKIN);
+ +    bScaleEkin = (flags & CGLO_SCALEEKIN);
+ +    bEner = flags & CGLO_ENERGY;
+ +    bTemp = flags & CGLO_TEMPERATURE;
+ +    bPres  = (flags & CGLO_PRESSURE);
+ +    bConstrain = (flags & CGLO_CONSTRAINT);
+ +    bIterate = (flags & CGLO_ITERATE);
+ +    bFirstIterate = (flags & CGLO_FIRSTITERATE);
+ +
+ +    /* we calculate a full state kinetic energy either with full-step velocity verlet
+ +       or half step where we need the pressure */
+ +    
+ +    bEkinAveVel = (ir->eI==eiVV || (ir->eI==eiVVAK && bPres) || bReadEkin);
+ +    
+ +    /* in initalization, it sums the shake virial in vv, and to 
+ +       sums ekinh_old in leapfrog (or if we are calculating ekinh_old) for other reasons */
+ +
+ +    /* ########## Kinetic energy  ############## */
+ +    
+ +    if (bTemp) 
+ +    {
+ +        /* Non-equilibrium MD: this is parallellized, but only does communication
+ +         * when there really is NEMD.
+ +         */
+ +        
+ +        if (PAR(cr) && (ekind->bNEMD)) 
+ +        {
+ +            accumulate_u(cr,&(ir->opts),ekind);
+ +        }
+ +        debug_gmx();
+ +        if (bReadEkin)
+ +        {
+ +            restore_ekinstate_from_state(cr,ekind,&state_global->ekinstate);
+ +        }
+ +        else 
+ +        {
+ +
+ +            calc_ke_part(state,&(ir->opts),mdatoms,ekind,nrnb,bEkinAveVel,bIterate);
+ +        }
+ +        
+ +        debug_gmx();
+ +    }
+ +
+ +    /* Calculate center of mass velocity if necessary, also parallellized */
+ +    if (bStopCM)
+ +    {
+ +        calc_vcm_grp(fplog,mdatoms->start,mdatoms->homenr,mdatoms,
+ +                     state->x,state->v,vcm);
+ +    }
+ +
+ +    if (bTemp || bStopCM || bPres || bEner || bConstrain)
+ +    {
+ +        if (!bGStat)
+ +        {
+ +            /* We will not sum ekinh_old,                                                            
+ +             * so signal that we still have to do it.                                                
+ +             */
+ +            *bSumEkinhOld = TRUE;
+ +
+ +        }
+ +        else
+ +        {
+ +            if (gs != NULL)
+ +            {
+ +                for(i=0; i<eglsNR; i++)
+ +                {
+ +                    gs_buf[i] = gs->sig[i];
+ +                }
+ +            }
+ +            if (PAR(cr)) 
+ +            {
+ +                wallcycle_start(wcycle,ewcMoveE);
+ +                global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
+ +                            ir,ekind,constr,bStopCM ? vcm : NULL,
+ +                            gs != NULL ? eglsNR : 0,gs_buf,
+ +                            top_global,state,
+ +                            *bSumEkinhOld,flags);
+ +                wallcycle_stop(wcycle,ewcMoveE);
+ +            }
+ +            if (gs != NULL)
+ +            {
+ +                if (MULTISIM(cr) && bInterSimGS)
+ +                {
+ +                    if (MASTER(cr))
+ +                    {
+ +                        /* Communicate the signals between the simulations */
+ +                        gmx_sum_sim(eglsNR,gs_buf,cr->ms);
+ +                    }
+ +                    /* Communicate the signals form the master to the others */
+ +                    gmx_bcast(eglsNR*sizeof(gs_buf[0]),gs_buf,cr);
+ +                }
+ +                for(i=0; i<eglsNR; i++)
+ +                {
+ +                    if (bInterSimGS || gs_simlocal[i])
+ +                    {
+ +                        /* Set the communicated signal only when it is non-zero,
+ +                         * since signals might not be processed at each MD step.
+ +                         */
+ +                        gsi = (gs_buf[i] >= 0 ?
+ +                               (int)(gs_buf[i] + 0.5) :
+ +                               (int)(gs_buf[i] - 0.5));
+ +                        if (gsi != 0)
+ +                        {
+ +                            gs->set[i] = gsi;
+ +                        }
+ +                        /* Turn off the local signal */
+ +                        gs->sig[i] = 0;
+ +                    }
+ +                }
+ +            }
+ +            *bSumEkinhOld = FALSE;
+ +        }
+ +    }
+ +    
+ +    if (!ekind->bNEMD && debug && bTemp && (vcm->nr > 0))
+ +    {
+ +        correct_ekin(debug,
+ +                     mdatoms->start,mdatoms->start+mdatoms->homenr,
+ +                     state->v,vcm->group_p[0],
+ +                     mdatoms->massT,mdatoms->tmass,ekind->ekin);
+ +    }
+ +    
+ +    /* Do center of mass motion removal */
+ +    if (bStopCM)
+ +    {
+ +        check_cm_grp(fplog,vcm,ir,1);
+ +        do_stopcm_grp(fplog,mdatoms->start,mdatoms->homenr,mdatoms->cVCM,
+ +                      state->x,state->v,vcm);
+ +        inc_nrnb(nrnb,eNR_STOPCM,mdatoms->homenr);
+ +    }
+ +
+ +    if (bEner)
+ +    {
+ +        /* Calculate the amplitude of the cosine velocity profile */
+ +        ekind->cosacc.vcos = ekind->cosacc.mvcos/mdatoms->tmass;
+ +    }
+ +
+ +    if (bTemp) 
+ +    {
+ +        /* Sum the kinetic energies of the groups & calc temp */
+ +        /* compute full step kinetic energies if vv, or if vv-avek and we are computing the pressure with IR_NPT_TROTTER */
+ +        /* three maincase:  VV with AveVel (md-vv), vv with AveEkin (md-vv-avek), leap with AveEkin (md).  
+ +           Leap with AveVel is not supported; it's not clear that it will actually work.  
+ +           bEkinAveVel: If TRUE, we simply multiply ekin by ekinscale to get a full step kinetic energy. 
+ +           If FALSE, we average ekinh_old and ekinh*ekinscale_nhc to get an averaged half step kinetic energy.
+ +           bSaveEkinOld: If TRUE (in the case of iteration = bIterate is TRUE), we don't reset the ekinscale_nhc.  
+ +           If FALSE, we go ahead and erase over it.
+ +        */ 
+ +        enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,&(enerd->term[F_DKDL]),
+ +                                       bEkinAveVel,bIterate,bScaleEkin);
+ + 
+ +        enerd->term[F_EKIN] = trace(ekind->ekin);
+ +    }
+ +    
+ +    /* ##########  Long range energy information ###### */
+ +    
+ +    if (bEner || bPres || bConstrain) 
+ +    {
+ +        calc_dispcorr(fplog,ir,fr,0,top_global->natoms,box,state->lambda[efptVDW],
+ +                      corr_pres,corr_vir,&prescorr,&enercorr,&dvdlcorr);
+ +    }
+ +    
+ +    if (bEner && bFirstIterate) 
+ +    {
+ +        enerd->term[F_DISPCORR] = enercorr;
+ +        enerd->term[F_EPOT] += enercorr;
+ +        enerd->term[F_DVDL_VDW] += dvdlcorr;
+ +    }
+ +    
+ +    /* ########## Now pressure ############## */
+ +    if (bPres || bConstrain) 
+ +    {
+ +        
+ +        m_add(force_vir,shake_vir,total_vir);
+ +        
+ +        /* Calculate pressure and apply LR correction if PPPM is used.
+ +         * Use the box from last timestep since we already called update().
+ +         */
+ +        
+ +        enerd->term[F_PRES] = calc_pres(fr->ePBC,ir->nwall,box,ekind->ekin,total_vir,pres);
+ +        
+ +        /* Calculate long range corrections to pressure and energy */
+ +        /* this adds to enerd->term[F_PRES] and enerd->term[F_ETOT], 
+ +           and computes enerd->term[F_DISPCORR].  Also modifies the 
+ +           total_vir and pres tesors */
+ +        
+ +        m_add(total_vir,corr_vir,total_vir);
+ +        m_add(pres,corr_pres,pres);
+ +        enerd->term[F_PDISPCORR] = prescorr;
+ +        enerd->term[F_PRES] += prescorr;
+ +        *pcurr = enerd->term[F_PRES];
+ +        /* calculate temperature using virial */
+ +        enerd->term[F_VTEMP] = calc_temp(trace(total_vir),ir->opts.nrdf[0]);
+ +
+ +    }    
+ +}
+ +
+ +void check_nst_param(FILE *fplog,t_commrec *cr,
+ +                     const char *desc_nst,int nst,
+ +                     const char *desc_p,int *p)
+ +{
-         sprintf(buf,"NOTE: %s changes %s to %d\n",desc_nst,desc_p,*p);
-         md_print_warning(cr,fplog,buf);
+ +    if (*p > 0 && *p % nst != 0)
+ +    {
+ +        /* Round up to the next multiple of nst */
+ +        *p = ((*p)/nst + 1)*nst;
- void reset_all_counters(FILE *fplog,t_commrec *cr,
-                         gmx_large_int_t step,
-                         gmx_large_int_t *step_rel,t_inputrec *ir,
-                         gmx_wallcycle_t wcycle,t_nrnb *nrnb,
-                         gmx_runtime_t *runtime)
- {
-     char buf[STRLEN],sbuf[STEPSTRSIZE];
- 
-     /* Reset all the counters related to performance over the run */
-     sprintf(buf,"Step %s: resetting all time and cycle counters\n",
-             gmx_step_str(step,sbuf));
-     md_print_warning(cr,fplog,buf);
- 
-     wallcycle_stop(wcycle,ewcRUN);
-     wallcycle_reset_all(wcycle);
-     if (DOMAINDECOMP(cr))
-     {
-         reset_dd_statistics_counters(cr->dd);
-     }
-     init_nrnb(nrnb);
-     ir->init_step += *step_rel;
-     ir->nsteps    -= *step_rel;
-     *step_rel = 0;
-     wallcycle_start(wcycle,ewcRUN);
-     runtime_start(runtime);
-     print_date_and_time(fplog,cr->nodeid,"Restarted time",runtime);
- }
- 
- void min_zero(int *n,int i)
++        md_print_warn(cr,fplog,
++                      "NOTE: %s changes %s to %d\n",desc_nst,desc_p,*p);
+ +    }
+ +}
+ +
+ +void set_current_lambdas(gmx_large_int_t step, t_lambda *fepvals, gmx_bool bRerunMD,
+ +                         t_trxframe *rerun_fr,t_state *state_global, t_state *state, double lam0[])
+ +/* find the current lambdas.  If rerunning, we either read in a state, or a lambda value,
+ +   requiring different logic. */
+ +{
+ +    real frac;
+ +    int i,fep_state=0;
+ +    if (bRerunMD)
+ +    {
+ +        if (rerun_fr->bLambda)
+ +        {
+ +            if (fepvals->delta_lambda!=0)
+ +            {
+ +                state_global->lambda[efptFEP] = rerun_fr->lambda;
+ +                for (i=0;i<efptNR;i++)
+ +                {
+ +                    if (i!= efptFEP)
+ +                    {
+ +                        state->lambda[i] = state_global->lambda[i];
+ +                    }
+ +                }
+ +            }
+ +            else
+ +            {
+ +                /* find out between which two value of lambda we should be */
+ +                frac = (step*fepvals->delta_lambda);
+ +                fep_state = floor(frac*fepvals->n_lambda);
+ +                /* interpolate between this state and the next */
+ +                /* this assumes that the initial lambda corresponds to lambda==0, which is verified in grompp */
+ +                frac = (frac*fepvals->n_lambda)-fep_state;
+ +                for (i=0;i<efptNR;i++)
+ +                {
+ +                    state_global->lambda[i] = lam0[i] + (fepvals->all_lambda[i][fep_state]) +
+ +                        frac*(fepvals->all_lambda[i][fep_state+1]-fepvals->all_lambda[i][fep_state]);
+ +                }
+ +            }
+ +        }
+ +        else if (rerun_fr->bFepState)
+ +        {
+ +            state_global->fep_state = rerun_fr->fep_state;
+ +            for (i=0;i<efptNR;i++)
+ +            {
+ +                state_global->lambda[i] = fepvals->all_lambda[i][fep_state];
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        if (fepvals->delta_lambda!=0)
+ +        {
+ +            /* find out between which two value of lambda we should be */
+ +            frac = (step*fepvals->delta_lambda);
+ +            if (fepvals->n_lambda > 0)
+ +            {
+ +                fep_state = floor(frac*fepvals->n_lambda);
+ +                /* interpolate between this state and the next */
+ +                /* this assumes that the initial lambda corresponds to lambda==0, which is verified in grompp */
+ +                frac = (frac*fepvals->n_lambda)-fep_state;
+ +                for (i=0;i<efptNR;i++)
+ +                {
+ +                    state_global->lambda[i] = lam0[i] + (fepvals->all_lambda[i][fep_state]) +
+ +                        frac*(fepvals->all_lambda[i][fep_state+1]-fepvals->all_lambda[i][fep_state]);
+ +                }
+ +            }
+ +            else
+ +            {
+ +                for (i=0;i<efptNR;i++)
+ +                {
+ +                    state_global->lambda[i] = lam0[i] + frac;
+ +                }
+ +            }
+ +        }
+ +    }
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        state->lambda[i] = state_global->lambda[i];
+ +    }
+ +}
+ +
- int lcd4(int i1,int i2,int i3,int i4)
++static void min_zero(int *n,int i)
+ +{
+ +    if (i > 0 && (*n == 0 || i < *n))
+ +    {
+ +        *n = i;
+ +    }
+ +}
+ +
-     char buf[STRLEN];
- 
++static int lcd4(int i1,int i2,int i3,int i4)
+ +{
+ +    int nst;
+ +
+ +    nst = 0;
+ +    min_zero(&nst,i1);
+ +    min_zero(&nst,i2);
+ +    min_zero(&nst,i3);
+ +    min_zero(&nst,i4);
+ +    if (nst == 0)
+ +    {
+ +        gmx_incons("All 4 inputs for determininig nstglobalcomm are <= 0");
+ +    }
+ +    
+ +    while (nst > 1 && ((i1 > 0 && i1 % nst != 0)  ||
+ +                       (i2 > 0 && i2 % nst != 0)  ||
+ +                       (i3 > 0 && i3 % nst != 0)  ||
+ +                       (i4 > 0 && i4 % nst != 0)))
+ +    {
+ +        nst--;
+ +    }
+ +
+ +    return nst;
+ +}
+ +
+ +int check_nstglobalcomm(FILE *fplog,t_commrec *cr,
+ +                        int nstglobalcomm,t_inputrec *ir)
+ +{
-             sprintf(buf,"WARNING: nstglobalcomm is larger than nstlist, but not a multiple, setting it to %d\n",nstglobalcomm);
-             md_print_warning(cr,fplog,buf);
+ +    if (!EI_DYNAMICS(ir->eI))
+ +    {
+ +        nstglobalcomm = 1;
+ +    }
+ +
+ +    if (nstglobalcomm == -1)
+ +    {
+ +        if (!(ir->nstcalcenergy > 0 ||
+ +              ir->nstlist > 0 ||
+ +              ir->etc != etcNO ||
+ +              ir->epc != epcNO))
+ +        {
+ +            nstglobalcomm = 10;
+ +            if (ir->nstenergy > 0 && ir->nstenergy < nstglobalcomm)
+ +            {
+ +                nstglobalcomm = ir->nstenergy;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* Ensure that we do timely global communication for
+ +             * (possibly) each of the four following options.
+ +             */
+ +            nstglobalcomm = lcd4(ir->nstcalcenergy,
+ +                                 ir->nstlist,
+ +                                 ir->etc != etcNO ? ir->nsttcouple : 0,
+ +                                 ir->epc != epcNO ? ir->nstpcouple : 0);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        if (ir->nstlist > 0 &&
+ +            nstglobalcomm > ir->nstlist && nstglobalcomm % ir->nstlist != 0)
+ +        {
+ +            nstglobalcomm = (nstglobalcomm / ir->nstlist)*ir->nstlist;
-         sprintf(buf,"WARNING: Changing nstcomm from %d to %d\n",
-                 ir->nstcomm,nstglobalcomm);
-         md_print_warning(cr,fplog,buf);
++            md_print_warn(cr,fplog,"WARNING: nstglobalcomm is larger than nstlist, but not a multiple, setting it to %d\n",nstglobalcomm);
+ +        }
+ +        if (ir->nstcalcenergy > 0)
+ +        {
+ +            check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
+ +                            "nstcalcenergy",&ir->nstcalcenergy);
+ +        }
+ +        if (ir->etc != etcNO && ir->nsttcouple > 0)
+ +        {
+ +            check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
+ +                            "nsttcouple",&ir->nsttcouple);
+ +        }
+ +        if (ir->epc != epcNO && ir->nstpcouple > 0)
+ +        {
+ +            check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
+ +                            "nstpcouple",&ir->nstpcouple);
+ +        }
+ +
+ +        check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
+ +                        "nstenergy",&ir->nstenergy);
+ +
+ +        check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
+ +                        "nstlog",&ir->nstlog);
+ +    }
+ +
+ +    if (ir->comm_mode != ecmNO && ir->nstcomm < nstglobalcomm)
+ +    {
-         md_print_warning(cr,fplog,"Old tpr file with twin-range settings: modifying energy calculation and/or T/P-coupling frequencies");
++        md_print_warn(cr,fplog,"WARNING: Changing nstcomm from %d to %d\n",
++                      ir->nstcomm,nstglobalcomm);
+ +        ir->nstcomm = nstglobalcomm;
+ +    }
+ +
+ +    return nstglobalcomm;
+ +}
+ +
+ +void check_ir_old_tpx_versions(t_commrec *cr,FILE *fplog,
+ +                               t_inputrec *ir,gmx_mtop_t *mtop)
+ +{
+ +    /* Check required for old tpx files */
+ +    if (IR_TWINRANGE(*ir) && ir->nstlist > 1 &&
+ +        ir->nstcalcenergy % ir->nstlist != 0)
+ +    {
-             md_print_warning(cr,fplog,"With twin-range cut-off's and SHAKE the virial and pressure are incorrect");
++        md_print_warn(cr,fplog,"Old tpr file with twin-range settings: modifying energy calculation and/or T/P-coupling frequencies\n");
+ +
+ +        if (gmx_mtop_ftype_count(mtop,F_CONSTR) +
+ +            gmx_mtop_ftype_count(mtop,F_CONSTRNC) > 0 &&
+ +            ir->eConstrAlg == econtSHAKE)
+ +        {
- 
- void md_print_warning(const t_commrec *cr,FILE *fplog,const char *buf)
- {
-     if (MASTER(cr))
-     {
-         fprintf(stderr,"\n%s\n",buf);
-     }
-     if (fplog)
-     {
-         fprintf(fplog,"\n%s\n",buf);
-     }
- }
++            md_print_warn(cr,fplog,"With twin-range cut-off's and SHAKE the virial and pressure are incorrect\n");
+ +            if (ir->epc != epcNO)
+ +            {
+ +                gmx_fatal(FARGS,"Can not do pressure coupling with twin-range cut-off's and SHAKE");
+ +            }
+ +        }
+ +        check_nst_param(fplog,cr,"nstlist",ir->nstlist,
+ +                        "nstcalcenergy",&ir->nstcalcenergy);
+ +        if (ir->epc != epcNO)
+ +        {
+ +            check_nst_param(fplog,cr,"nstlist",ir->nstlist,
+ +                            "nstpcouple",&ir->nstpcouple);
+ +        }
+ +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
+ +                        "nstenergy",&ir->nstenergy);
+ +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
+ +                        "nstlog",&ir->nstlog);
+ +        if (ir->efep != efepNO)
+ +        {
+ +            check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
+ +                            "nstdhdl",&ir->fepvals->nstdhdl);
+ +        }
+ +    }
+ +}
+ +
+ +void rerun_parallel_comm(t_commrec *cr,t_trxframe *fr,
+ +                         gmx_bool *bNotLastFrame)
+ +{
+ +    gmx_bool bAlloc;
+ +    rvec *xp,*vp;
+ +
+ +    bAlloc = (fr->natoms == 0);
+ +
+ +    if (MASTER(cr) && !*bNotLastFrame)
+ +    {
+ +        fr->natoms = -1;
+ +    }
+ +    xp = fr->x;
+ +    vp = fr->v;
+ +    gmx_bcast(sizeof(*fr),fr,cr);
+ +    fr->x = xp;
+ +    fr->v = vp;
+ +
+ +    *bNotLastFrame = (fr->natoms >= 0);
+ +
+ +    if (*bNotLastFrame && PARTDECOMP(cr))
+ +    {
+ +        /* x and v are the only variable size quantities stored in trr
+ +         * that are required for rerun (f is not needed).
+ +         */
+ +        if (bAlloc)
+ +        {
+ +            snew(fr->x,fr->natoms);
+ +            snew(fr->v,fr->natoms);
+ +        }
+ +        if (fr->bX)
+ +        {
+ +            gmx_bcast(fr->natoms*sizeof(fr->x[0]),fr->x[0],cr);
+ +        }
+ +        if (fr->bV)
+ +        {
+ +            gmx_bcast(fr->natoms*sizeof(fr->v[0]),fr->v[0],cr);
+ +        }
+ +    }
+ +}
diff --cc src/gromacs/mdlib/mdatom.c
Simple merge
diff --cc src/gromacs/mdlib/mdebin.c

index d48ff48c241de96278c2dbfd82c3059e547d9258,0000000000000000000000000000000000000000..a1e17220b22d167ae29da9161417b958130ed84b

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/mdebin.c
--- /dev/null
+++ b/src/gromacs/mdlib/mdebin.c
@@@ -1,1446 -1,0 +1,1452 @@@
-             md->bEner[i] = (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC);
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + *
+ + *                This source code is part of
+ + *
+ + *                 G   R   O   M   A   C   S
+ + *
+ + *          GROningen MAchine for Chemical Simulations
+ + *
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + *
+ + * For more info, check our website at http://www.gromacs.org
+ + *
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include <float.h>
+ +#include "typedefs.h"
+ +#include "string2.h"
+ +#include "mdebin.h"
+ +#include "smalloc.h"
+ +#include "physics.h"
+ +#include "enxio.h"
+ +#include "vec.h"
+ +#include "disre.h"
+ +#include "main.h"
+ +#include "network.h"
+ +#include "names.h"
+ +#include "orires.h"
+ +#include "constr.h"
+ +#include "mtop_util.h"
+ +#include "xvgr.h"
+ +#include "gmxfio.h"
+ +#include "macros.h"
+ +#include "mdrun.h"
+ +#include "mdebin_bar.h"
+ +
+ +
+ +static const char *conrmsd_nm[] = { "Constr. rmsd", "Constr.2 rmsd" };
+ +
+ +static const char *boxs_nm[] = { "Box-X", "Box-Y", "Box-Z" };
+ +
+ +static const char *tricl_boxs_nm[] = {
+ +    "Box-XX", "Box-YY", "Box-ZZ",
+ +    "Box-YX", "Box-ZX", "Box-ZY"
+ +};
+ +
+ +static const char *vol_nm[] = { "Volume" };
+ +
+ +static const char *dens_nm[] = {"Density" };
+ +
+ +static const char *pv_nm[] = {"pV" };
+ +
+ +static const char *enthalpy_nm[] = {"Enthalpy" };
+ +
+ +static const char *boxvel_nm[] = {
+ +    "Box-Vel-XX", "Box-Vel-YY", "Box-Vel-ZZ",
+ +    "Box-Vel-YX", "Box-Vel-ZX", "Box-Vel-ZY"
+ +};
+ +
+ +#define NBOXS asize(boxs_nm)
+ +#define NTRICLBOXS asize(tricl_boxs_nm)
+ +
+ +t_mdebin *init_mdebin(ener_file_t fp_ene,
+ +                      const gmx_mtop_t *mtop,
+ +                      const t_inputrec *ir,
+ +                      FILE *fp_dhdl)
+ +{
+ +    const char *ener_nm[F_NRE];
+ +    static const char *vir_nm[] = {
+ +        "Vir-XX", "Vir-XY", "Vir-XZ",
+ +        "Vir-YX", "Vir-YY", "Vir-YZ",
+ +        "Vir-ZX", "Vir-ZY", "Vir-ZZ"
+ +    };
+ +    static const char *sv_nm[] = {
+ +        "ShakeVir-XX", "ShakeVir-XY", "ShakeVir-XZ",
+ +        "ShakeVir-YX", "ShakeVir-YY", "ShakeVir-YZ",
+ +        "ShakeVir-ZX", "ShakeVir-ZY", "ShakeVir-ZZ"
+ +    };
+ +    static const char *fv_nm[] = {
+ +        "ForceVir-XX", "ForceVir-XY", "ForceVir-XZ",
+ +        "ForceVir-YX", "ForceVir-YY", "ForceVir-YZ",
+ +        "ForceVir-ZX", "ForceVir-ZY", "ForceVir-ZZ"
+ +    };
+ +    static const char *pres_nm[] = {
+ +        "Pres-XX","Pres-XY","Pres-XZ",
+ +        "Pres-YX","Pres-YY","Pres-YZ",
+ +        "Pres-ZX","Pres-ZY","Pres-ZZ"
+ +    };
+ +    static const char *surft_nm[] = {
+ +        "#Surf*SurfTen"
+ +    };
+ +    static const char *mu_nm[] = {
+ +        "Mu-X", "Mu-Y", "Mu-Z"
+ +    };
+ +    static const char *vcos_nm[] = {
+ +        "2CosZ*Vel-X"
+ +    };
+ +    static const char *visc_nm[] = {
+ +        "1/Viscosity"
+ +    };
+ +    static const char *baro_nm[] = {
+ +        "Barostat"
+ +    };
+ +
+ +    char     **grpnms;
+ +    const gmx_groups_t *groups;
+ +    char     **gnm;
+ +    char     buf[256];
+ +    const char     *bufi;
+ +    t_mdebin *md;
+ +    int      i,j,ni,nj,n,nh,k,kk,ncon,nset;
+ +    gmx_bool     bBHAM,bNoseHoover,b14;
+ +
+ +    snew(md,1);
+ +
+ +    md->bVir=TRUE;
+ +    md->bPress=TRUE;
+ +    md->bSurft=TRUE;
+ +    md->bMu=TRUE;
+ +
+ +    if (EI_DYNAMICS(ir->eI))
+ +    {
+ +        md->delta_t = ir->delta_t;
+ +    }
+ +    else
+ +    {
+ +        md->delta_t = 0;
+ +    }
+ +
+ +    groups = &mtop->groups;
+ +
+ +    bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
+ +    b14   = (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
+ +             gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0);
+ +
+ +    ncon = gmx_mtop_ftype_count(mtop,F_CONSTR);
+ +    nset = gmx_mtop_ftype_count(mtop,F_SETTLE);
+ +    md->bConstr    = (ncon > 0 || nset > 0);
+ +    md->bConstrVir = FALSE;
+ +    if (md->bConstr) {
+ +        if (ncon > 0 && ir->eConstrAlg == econtLINCS) {
+ +            if (ir->eI == eiSD2)
+ +                md->nCrmsd = 2;
+ +            else
+ +                md->nCrmsd = 1;
+ +        }
+ +        md->bConstrVir = (getenv("GMX_CONSTRAINTVIR") != NULL);
+ +    } else {
+ +        md->nCrmsd = 0;
+ +    }
+ +
+ +    /* Energy monitoring */
+ +    for(i=0;i<egNR;i++)
+ +    {
+ +        md->bEInd[i]=FALSE;
+ +    }
+ +
+ +#ifndef GMX_OPENMM
+ +    for(i=0; i<F_NRE; i++)
+ +    {
+ +        md->bEner[i] = FALSE;
+ +        if (i == F_LJ)
+ +            md->bEner[i] = !bBHAM;
+ +        else if (i == F_BHAM)
+ +            md->bEner[i] = bBHAM;
+ +        else if (i == F_EQM)
+ +            md->bEner[i] = ir->bQMMM;
+ +        else if (i == F_COUL_LR)
+ +            md->bEner[i] = (ir->rcoulomb > ir->rlist);
+ +        else if (i == F_LJ_LR)
+ +            md->bEner[i] = (!bBHAM && ir->rvdw > ir->rlist);
+ +        else if (i == F_BHAM_LR)
+ +            md->bEner[i] = (bBHAM && ir->rvdw > ir->rlist);
+ +        else if (i == F_RF_EXCL)
-             /* FIXME: The constness should not be cast away */
-             /*ener_nm[f_nre]=(char *)interaction_function[i].longname;*/
++            md->bEner[i] = (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC && ir->cutoff_scheme == ecutsGROUP);
+ +        else if (i == F_COUL_RECIP)
+ +            md->bEner[i] = EEL_FULL(ir->coulombtype);
+ +        else if (i == F_LJ14)
+ +            md->bEner[i] = b14;
+ +        else if (i == F_COUL14)
+ +            md->bEner[i] = b14;
+ +        else if (i == F_LJC14_Q || i == F_LJC_PAIRS_NB)
+ +            md->bEner[i] = FALSE;
+ +        else if ((i == F_DVDL_COUL && ir->fepvals->separate_dvdl[efptCOUL]) ||
+ +                 (i == F_DVDL_VDW  && ir->fepvals->separate_dvdl[efptVDW]) ||
+ +                 (i == F_DVDL_BONDED && ir->fepvals->separate_dvdl[efptBONDED]) ||
+ +                 (i == F_DVDL_RESTRAINT && ir->fepvals->separate_dvdl[efptRESTRAINT]) ||
+ +                 (i == F_DKDL && ir->fepvals->separate_dvdl[efptMASS]) ||
+ +                 (i == F_DVDL && ir->fepvals->separate_dvdl[efptFEP]))
+ +            md->bEner[i] = (ir->efep != efepNO);
+ +        else if ((interaction_function[i].flags & IF_VSITE) ||
+ +                 (i == F_CONSTR) || (i == F_CONSTRNC) || (i == F_SETTLE))
+ +            md->bEner[i] = FALSE;
+ +        else if ((i == F_COUL_SR) || (i == F_EPOT) || (i == F_PRES)  || (i==F_EQM))
+ +            md->bEner[i] = TRUE;
+ +        else if ((i == F_GBPOL) && ir->implicit_solvent==eisGBSA)
+ +            md->bEner[i] = TRUE;
+ +        else if ((i == F_NPSOLVATION) && ir->implicit_solvent==eisGBSA && (ir->sa_algorithm != esaNO))
+ +            md->bEner[i] = TRUE;
+ +        else if ((i == F_GB12) || (i == F_GB13) || (i == F_GB14))
+ +            md->bEner[i] = FALSE;
+ +        else if ((i == F_ETOT) || (i == F_EKIN) || (i == F_TEMP))
+ +            md->bEner[i] = EI_DYNAMICS(ir->eI);
+ +        else if (i==F_VTEMP)
+ +            md->bEner[i] =  (EI_DYNAMICS(ir->eI) && getenv("GMX_VIRIAL_TEMPERATURE"));
+ +        else if (i == F_DISPCORR || i == F_PDISPCORR)
+ +            md->bEner[i] = (ir->eDispCorr != edispcNO);
+ +        else if (i == F_DISRESVIOL)
+ +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_DISRES) > 0);
+ +        else if (i == F_ORIRESDEV)
+ +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0);
+ +        else if (i == F_CONNBONDS)
+ +            md->bEner[i] = FALSE;
+ +        else if (i == F_COM_PULL)
+ +            md->bEner[i] = (ir->ePull == epullUMBRELLA || ir->ePull == epullCONST_F || ir->bRot);
+ +        else if (i == F_ECONSERVED)
+ +            md->bEner[i] = ((ir->etc == etcNOSEHOOVER || ir->etc == etcVRESCALE) &&
+ +                            (ir->epc == epcNO || ir->epc==epcMTTK));
+ +        else
+ +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,i) > 0);
+ +    }
+ +#else
+ +    /* OpenMM always produces only the following 4 energy terms */
+ +    md->bEner[F_EPOT] = TRUE;
+ +    md->bEner[F_EKIN] = TRUE;
+ +    md->bEner[F_ETOT] = TRUE;
+ +    md->bEner[F_TEMP] = TRUE;
+ +#endif
+ +
+ +    /* for adress simulations, most energy terms are not meaningfull, and thus disabled*/
+ +    if (ir->bAdress && !debug) {
+ +        for (i = 0; i < F_NRE; i++) {
+ +            md->bEner[i] = FALSE;
+ +            if(i == F_EKIN){ md->bEner[i] = TRUE;}
+ +            if(i == F_TEMP){ md->bEner[i] = TRUE;}
+ +        }
+ +        md->bVir=FALSE;
+ +        md->bPress=FALSE;
+ +        md->bSurft=FALSE;
+ +        md->bMu=FALSE;
+ +    }
+ +
+ +    md->f_nre=0;
+ +    for(i=0; i<F_NRE; i++)
+ +    {
+ +        if (md->bEner[i])
+ +        {
-     if(md->bMu)
+ +            ener_nm[md->f_nre]=interaction_function[i].longname;
+ +            md->f_nre++;
+ +        }
+ +    }
+ +
+ +    md->epc = ir->epc;
+ +    md->bDiagPres = !TRICLINIC(ir->ref_p);
+ +    md->ref_p = (ir->ref_p[XX][XX]+ir->ref_p[YY][YY]+ir->ref_p[ZZ][ZZ])/DIM;
+ +    md->bTricl = TRICLINIC(ir->compress) || TRICLINIC(ir->deform);
+ +    md->bDynBox = DYNAMIC_BOX(*ir);
+ +    md->etc = ir->etc;
+ +    md->bNHC_trotter = IR_NVT_TROTTER(ir);
+ +    md->bPrintNHChains = ir-> bPrintNHChains;
+ +    md->bMTTK = (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir));
++    md->bMu = NEED_MUTOT(*ir);
+ +
+ +    md->ebin  = mk_ebin();
+ +    /* Pass NULL for unit to let get_ebin_space determine the units
+ +     * for interaction_function[i].longname
+ +     */
+ +    md->ie    = get_ebin_space(md->ebin,md->f_nre,ener_nm,NULL);
+ +    if (md->nCrmsd)
+ +    {
+ +        /* This should be called directly after the call for md->ie,
+ +         * such that md->iconrmsd follows directly in the list.
+ +         */
+ +        md->iconrmsd = get_ebin_space(md->ebin,md->nCrmsd,conrmsd_nm,"");
+ +    }
+ +    if (md->bDynBox)
+ +    {
+ +        md->ib    = get_ebin_space(md->ebin,
+ +                                   md->bTricl ? NTRICLBOXS : NBOXS,
+ +                                   md->bTricl ? tricl_boxs_nm : boxs_nm,
+ +                                   unit_length);
+ +        md->ivol  = get_ebin_space(md->ebin, 1, vol_nm,  unit_volume);
+ +        md->idens = get_ebin_space(md->ebin, 1, dens_nm, unit_density_SI);
+ +        if (md->bDiagPres)
+ +        {
+ +            md->ipv   = get_ebin_space(md->ebin, 1, pv_nm,   unit_energy);
+ +            md->ienthalpy = get_ebin_space(md->ebin, 1, enthalpy_nm,   unit_energy);
+ +        }
+ +    }
+ +    if (md->bConstrVir)
+ +    {
+ +        md->isvir = get_ebin_space(md->ebin,asize(sv_nm),sv_nm,unit_energy);
+ +        md->ifvir = get_ebin_space(md->ebin,asize(fv_nm),fv_nm,unit_energy);
+ +    }
+ +    if (md->bVir)
+ +        md->ivir   = get_ebin_space(md->ebin,asize(vir_nm),vir_nm,unit_energy);
+ +    if (md->bPress)
+ +        md->ipres  = get_ebin_space(md->ebin,asize(pres_nm),pres_nm,unit_pres_bar);
+ +    if (md->bSurft)
+ +        md->isurft = get_ebin_space(md->ebin,asize(surft_nm),surft_nm,
+ +                                unit_surft_bar);
+ +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
+ +    {
+ +        md->ipc = get_ebin_space(md->ebin,md->bTricl ? 6 : 3,
+ +                                 boxvel_nm,unit_vel);
+ +    }
+ +    if (md->bMu)
++    {
+ +        md->imu    = get_ebin_space(md->ebin,asize(mu_nm),mu_nm,unit_dipole_D);
++    }
+ +    if (ir->cos_accel != 0)
+ +    {
+ +        md->ivcos = get_ebin_space(md->ebin,asize(vcos_nm),vcos_nm,unit_vel);
+ +        md->ivisc = get_ebin_space(md->ebin,asize(visc_nm),visc_nm,
+ +                                   unit_invvisc_SI);
+ +    }
+ +
+ +    /* Energy monitoring */
+ +    for(i=0;i<egNR;i++)
+ +    {
+ +        md->bEInd[i] = FALSE;
+ +    }
+ +    md->bEInd[egCOULSR] = TRUE;
+ +    md->bEInd[egLJSR  ] = TRUE;
+ +
+ +    if (ir->rcoulomb > ir->rlist)
+ +    {
+ +        md->bEInd[egCOULLR] = TRUE;
+ +    }
+ +    if (!bBHAM)
+ +    {
+ +        if (ir->rvdw > ir->rlist)
+ +        {
+ +            md->bEInd[egLJLR]   = TRUE;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        md->bEInd[egLJSR]   = FALSE;
+ +        md->bEInd[egBHAMSR] = TRUE;
+ +        if (ir->rvdw > ir->rlist)
+ +        {
+ +            md->bEInd[egBHAMLR]   = TRUE;
+ +        }
+ +    }
+ +    if (b14)
+ +    {
+ +        md->bEInd[egLJ14] = TRUE;
+ +        md->bEInd[egCOUL14] = TRUE;
+ +    }
+ +    md->nEc=0;
+ +    for(i=0; (i<egNR); i++)
+ +    {
+ +        if (md->bEInd[i])
+ +        {
+ +            md->nEc++;
+ +        }
+ +    }
+ +
+ +    n=groups->grps[egcENER].nr;
+ +    /* for adress simulations, most energy terms are not meaningfull, and thus disabled*/
+ +    if (!ir->bAdress){
+ +        /*standard simulation*/
+ +        md->nEg=n;
+ +        md->nE=(n*(n+1))/2;
+ +    }
+ +    else if (!debug) {
+ +        /*AdResS simulation*/
+ +       md->nU=0;
+ +       md->nEg=0;
+ +       md->nE=0;
+ +       md->nEc=0;
+ +       md->isvir=FALSE;
+ +    }
+ +    snew(md->igrp,md->nE);
+ +    if (md->nE > 1)
+ +    {
+ +        n=0;
+ +        snew(gnm,md->nEc);
+ +        for(k=0; (k<md->nEc); k++)
+ +        {
+ +            snew(gnm[k],STRLEN);
+ +        }
+ +        for(i=0; (i<groups->grps[egcENER].nr); i++)
+ +        {
+ +            ni=groups->grps[egcENER].nm_ind[i];
+ +            for(j=i; (j<groups->grps[egcENER].nr); j++)
+ +            {
+ +                nj=groups->grps[egcENER].nm_ind[j];
+ +                for(k=kk=0; (k<egNR); k++)
+ +                {
+ +                    if (md->bEInd[k])
+ +                    {
+ +                        sprintf(gnm[kk],"%s:%s-%s",egrp_nm[k],
+ +                                *(groups->grpname[ni]),*(groups->grpname[nj]));
+ +                        kk++;
+ +                    }
+ +                }
+ +                md->igrp[n]=get_ebin_space(md->ebin,md->nEc,
+ +                                           (const char **)gnm,unit_energy);
+ +                n++;
+ +            }
+ +        }
+ +        for(k=0; (k<md->nEc); k++)
+ +        {
+ +            sfree(gnm[k]);
+ +        }
+ +        sfree(gnm);
+ +
+ +        if (n != md->nE)
+ +        {
+ +            gmx_incons("Number of energy terms wrong");
+ +        }
+ +    }
+ +
+ +    md->nTC=groups->grps[egcTC].nr;
+ +    md->nNHC = ir->opts.nhchainlength; /* shorthand for number of NH chains */
+ +    if (md->bMTTK)
+ +    {
+ +        md->nTCP = 1;  /* assume only one possible coupling system for barostat
+ +                          for now */
+ +    }
+ +    else
+ +    {
+ +        md->nTCP = 0;
+ +    }
+ +    if (md->etc == etcNOSEHOOVER)
+ +    {
+ +        if (md->bNHC_trotter)
+ +        {
+ +            md->mde_n = 2*md->nNHC*md->nTC;
+ +        }
+ +        else
+ +        {
+ +            md->mde_n = 2*md->nTC;
+ +        }
+ +        if (md->epc == epcMTTK)
+ +        {
+ +            md->mdeb_n = 2*md->nNHC*md->nTCP;
+ +        }
+ +    } else {
+ +        md->mde_n = md->nTC;
+ +        md->mdeb_n = 0;
+ +    }
+ +
+ +    snew(md->tmp_r,md->mde_n);
+ +    snew(md->tmp_v,md->mde_n);
+ +    snew(md->grpnms,md->mde_n);
+ +    grpnms = md->grpnms;
+ +
+ +    for(i=0; (i<md->nTC); i++)
+ +    {
+ +        ni=groups->grps[egcTC].nm_ind[i];
+ +        sprintf(buf,"T-%s",*(groups->grpname[ni]));
+ +        grpnms[i]=strdup(buf);
+ +    }
+ +    md->itemp=get_ebin_space(md->ebin,md->nTC,(const char **)grpnms,
+ +                             unit_temp_K);
+ +
+ +    if (md->etc == etcNOSEHOOVER)
+ +    {
+ +        if (md->bPrintNHChains)
+ +        {
+ +            if (md->bNHC_trotter)
+ +            {
+ +                for(i=0; (i<md->nTC); i++)
+ +                {
+ +                    ni=groups->grps[egcTC].nm_ind[i];
+ +                    bufi = *(groups->grpname[ni]);
+ +                    for(j=0; (j<md->nNHC); j++)
+ +                    {
+ +                        sprintf(buf,"Xi-%d-%s",j,bufi);
+ +                        grpnms[2*(i*md->nNHC+j)]=strdup(buf);
+ +                        sprintf(buf,"vXi-%d-%s",j,bufi);
+ +                        grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
+ +                    }
+ +                }
+ +                md->itc=get_ebin_space(md->ebin,md->mde_n,
+ +                                       (const char **)grpnms,unit_invtime);
+ +                if (md->bMTTK)
+ +                {
+ +                    for(i=0; (i<md->nTCP); i++)
+ +                    {
+ +                        bufi = baro_nm[0];  /* All barostat DOF's together for now. */
+ +                        for(j=0; (j<md->nNHC); j++)
+ +                        {
+ +                            sprintf(buf,"Xi-%d-%s",j,bufi);
+ +                            grpnms[2*(i*md->nNHC+j)]=strdup(buf);
+ +                            sprintf(buf,"vXi-%d-%s",j,bufi);
+ +                            grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
+ +                        }
+ +                    }
+ +                    md->itcb=get_ebin_space(md->ebin,md->mdeb_n,
+ +                                            (const char **)grpnms,unit_invtime);
+ +                }
+ +            }
+ +            else
+ +            {
+ +                for(i=0; (i<md->nTC); i++)
+ +                {
+ +                    ni=groups->grps[egcTC].nm_ind[i];
+ +                    bufi = *(groups->grpname[ni]);
+ +                    sprintf(buf,"Xi-%s",bufi);
+ +                    grpnms[2*i]=strdup(buf);
+ +                    sprintf(buf,"vXi-%s",bufi);
+ +                    grpnms[2*i+1]=strdup(buf);
+ +                }
+ +                md->itc=get_ebin_space(md->ebin,md->mde_n,
+ +                                       (const char **)grpnms,unit_invtime);
+ +            }
+ +        }
+ +    }
+ +    else if (md->etc == etcBERENDSEN || md->etc == etcYES ||
+ +             md->etc == etcVRESCALE)
+ +    {
+ +        for(i=0; (i<md->nTC); i++)
+ +        {
+ +            ni=groups->grps[egcTC].nm_ind[i];
+ +            sprintf(buf,"Lamb-%s",*(groups->grpname[ni]));
+ +            grpnms[i]=strdup(buf);
+ +        }
+ +        md->itc=get_ebin_space(md->ebin,md->mde_n,(const char **)grpnms,"");
+ +    }
+ +
+ +    sfree(grpnms);
+ +
+ +
+ +    md->nU=groups->grps[egcACC].nr;
+ +    if (md->nU > 1)
+ +    {
+ +        snew(grpnms,3*md->nU);
+ +        for(i=0; (i<md->nU); i++)
+ +        {
+ +            ni=groups->grps[egcACC].nm_ind[i];
+ +            sprintf(buf,"Ux-%s",*(groups->grpname[ni]));
+ +            grpnms[3*i+XX]=strdup(buf);
+ +            sprintf(buf,"Uy-%s",*(groups->grpname[ni]));
+ +            grpnms[3*i+YY]=strdup(buf);
+ +            sprintf(buf,"Uz-%s",*(groups->grpname[ni]));
+ +            grpnms[3*i+ZZ]=strdup(buf);
+ +        }
+ +        md->iu=get_ebin_space(md->ebin,3*md->nU,(const char **)grpnms,unit_vel);
+ +        sfree(grpnms);
+ +    }
+ +
+ +    if ( fp_ene )
+ +    {
+ +        do_enxnms(fp_ene,&md->ebin->nener,&md->ebin->enm);
+ +    }
+ +
+ +    md->print_grpnms=NULL;
+ +
+ +    /* check whether we're going to write dh histograms */
+ +    md->dhc=NULL;
+ +    if (ir->fepvals->separate_dhdl_file == esepdhdlfileNO )
+ +    {
+ +        /* Currently dh histograms are only written with dynamics */
+ +        if (EI_DYNAMICS(ir->eI))
+ +        {
+ +            snew(md->dhc, 1);
+ +
+ +            mde_delta_h_coll_init(md->dhc, ir);
+ +        }
+ +        md->fp_dhdl = NULL;
+ +    }
+ +    else
+ +    {
+ +        md->fp_dhdl = fp_dhdl;
+ +    }
+ +    if (ir->bSimTemp) {
+ +        int i;
+ +        snew(md->temperatures,ir->fepvals->n_lambda);
+ +        for (i=0;i<ir->fepvals->n_lambda;i++)
+ +        {
+ +            md->temperatures[i] = ir->simtempvals->temperatures[i];
+ +        }
+ +    }
+ +    return md;
+ +}
+ +
+ +extern FILE *open_dhdl(const char *filename,const t_inputrec *ir,
+ +                       const output_env_t oenv)
+ +{
+ +    FILE *fp;
+ +    const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda",
+ +        *lambdastate="\\lambda state",*remain="remaining";
+ +    char title[STRLEN],label_x[STRLEN],label_y[STRLEN];
+ +    int  i,np,nps,nsets,nsets_de,nsetsbegin;
+ +    t_lambda *fep;
+ +    char **setname;
+ +    char buf[STRLEN];
+ +    int bufplace=0;
+ +
+ +    int nsets_dhdl = 0;
+ +    int s = 0;
+ +    int nsetsextend;
+ +
+ +    /* for simplicity */
+ +    fep = ir->fepvals;
+ +
+ +    if (fep->n_lambda == 0)
+ +    {
+ +        sprintf(title,"%s",dhdl);
+ +        sprintf(label_x,"Time (ps)");
+ +        sprintf(label_y,"%s (%s %s)",
+ +                dhdl,unit_energy,"[\\lambda]\\S-1\\N");
+ +    }
+ +    else
+ +    {
+ +        sprintf(title,"%s and %s",dhdl,deltag);
+ +        sprintf(label_x,"Time (ps)");
+ +        sprintf(label_y,"%s and %s (%s %s)",
+ +                dhdl,deltag,unit_energy,"[\\8l\\4]\\S-1\\N");
+ +    }
+ +    fp = gmx_fio_fopen(filename,"w+");
+ +    xvgr_header(fp,title,label_x,label_y,exvggtXNY,oenv);
+ +
+ +    if (!(ir->bSimTemp))
+ +    {
+ +        bufplace = sprintf(buf,"T = %g (K) ",
+ +                ir->opts.ref_t[0]);
+ +    }
+ +    if (ir->efep != efepSLOWGROWTH)
+ +    {
+ +        if (fep->n_lambda == 0)
+ +        {
+ +            sprintf(&(buf[bufplace]),"%s = %g",
+ +                    lambda,fep->init_lambda);
+ +        }
+ +        else
+ +        {
+ +            sprintf(&(buf[bufplace]),"%s = %d",
+ +                    lambdastate,fep->init_fep_state);
+ +        }
+ +    }
+ +    xvgr_subtitle(fp,buf,oenv);
+ +
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        if (fep->separate_dvdl[i]) {nsets_dhdl++;}
+ +    }
+ +
+ +    /* count the number of delta_g states */
+ +    nsets_de = fep->n_lambda;
+ +
+ +    nsets = nsets_dhdl + nsets_de; /* dhdl + fep differences */
+ +
+ +    if (fep->n_lambda>0 && ir->bExpanded)
+ +    {
+ +        nsets += 1;   /*add fep state for expanded ensemble */
+ +    }
+ +
+ +    if (fep->bPrintEnergy)
+ +    {
+ +        nsets += 1;  /* add energy to the dhdl as well */
+ +    }
+ +
+ +    nsetsextend = nsets;
+ +    if ((ir->epc!=epcNO) && (fep->n_lambda>0))
+ +    {
+ +        nsetsextend += 1; /* for PV term, other terms possible if required for the reduced potential (only needed with foreign lambda) */
+ +    }
+ +    snew(setname,nsetsextend);
+ +
+ +    if (ir->bExpanded)
+ +    {
+ +        /* state for the fep_vals, if we have alchemical sampling */
+ +        sprintf(buf,"%s","Thermodynamic state");
+ +        setname[s] = strdup(buf);
+ +        s+=1;
+ +    }
+ +
+ +    if (fep->bPrintEnergy)
+ +    {
+ +        sprintf(buf,"%s (%s)","Energy",unit_energy);
+ +        setname[s] = strdup(buf);
+ +        s+=1;
+ +    }
+ +
+ +    for (i=0;i<efptNR;i++)
+ +    {
+ +        if (fep->separate_dvdl[i]) {
+ +            sprintf(buf,"%s (%s)",dhdl,efpt_names[i]);
+ +            setname[s] = strdup(buf);
+ +            s+=1;
+ +        }
+ +    }
+ +
+ +    if (fep->n_lambda > 0)
+ +    {
+ +        /* g_bar has to determine the lambda values used in this simulation
+ +         * from this xvg legend.
+ +         */
+ +
+ +        if (ir->bExpanded) {
+ +            nsetsbegin = 1;  /* for including the expanded ensemble */
+ +        } else {
+ +            nsetsbegin = 0;
+ +        }
+ +
+ +        if (fep->bPrintEnergy)
+ +        {
+ +            nsetsbegin += 1;
+ +        }
+ +        nsetsbegin += nsets_dhdl;
+ +
+ +        for(s=nsetsbegin; s<nsets; s++)
+ +        {
+ +            nps = sprintf(buf,"%s %s (",deltag,lambda);
+ +            for (i=0;i<efptNR;i++)
+ +            {
+ +                if (fep->separate_dvdl[i])
+ +                {
+ +                    np = sprintf(&buf[nps],"%g,",fep->all_lambda[i][s-(nsetsbegin)]);
+ +                    nps += np;
+ +                }
+ +            }
+ +            if (ir->bSimTemp)
+ +            {
+ +                /* print the temperature for this state if doing simulated annealing */
+ +                sprintf(&buf[nps],"T = %g (%s))",ir->simtempvals->temperatures[s-(nsetsbegin)],unit_temp_K);
+ +            }
+ +            else
+ +            {
+ +                sprintf(&buf[nps-1],")");  /* -1 to overwrite the last comma */
+ +            }
+ +            setname[s] = strdup(buf);
+ +        }
+ +        if (ir->epc!=epcNO) {
+ +            np = sprintf(buf,"pV (%s)",unit_energy);
+ +            setname[nsetsextend-1] = strdup(buf);  /* the first entry after nsets */
+ +        }
+ +
+ +        xvgr_legend(fp,nsetsextend,(const char **)setname,oenv);
+ +
+ +        for(s=0; s<nsetsextend; s++)
+ +        {
+ +            sfree(setname[s]);
+ +        }
+ +        sfree(setname);
+ +    }
+ +
+ +    return fp;
+ +}
+ +
+ +static void copy_energy(t_mdebin *md, real e[],real ecpy[])
+ +{
+ +    int i,j;
+ +
+ +    for(i=j=0; (i<F_NRE); i++)
+ +        if (md->bEner[i])
+ +            ecpy[j++] = e[i];
+ +    if (j != md->f_nre)
+ +        gmx_incons("Number of energy terms wrong");
+ +}
+ +
+ +void upd_mdebin(t_mdebin *md,
+ +                gmx_bool bDoDHDL,
+ +                gmx_bool bSum,
+ +                double time,
+ +                real tmass,
+ +                gmx_enerdata_t *enerd,
+ +                t_state *state,
+ +                t_lambda *fep,
+ +                t_expanded *expand,
+ +                matrix  box,
+ +                tensor svir,
+ +                tensor fvir,
+ +                tensor vir,
+ +                tensor pres,
+ +                gmx_ekindata_t *ekind,
+ +                rvec mu_tot,
+ +                gmx_constr_t constr)
+ +{
+ +    int    i,j,k,kk,m,n,gid;
+ +    real   crmsd[2],tmp6[6];
+ +    real   bs[NTRICLBOXS],vol,dens,pv,enthalpy;
+ +    real   eee[egNR];
+ +    real   ecopy[F_NRE];
+ +    double store_dhdl[efptNR];
+ +    double *dE=NULL;
+ +    real   store_energy=0;
+ +    real   tmp;
+ +
+ +    /* Do NOT use the box in the state variable, but the separate box provided
+ +     * as an argument. This is because we sometimes need to write the box from
+ +     * the last timestep to match the trajectory frames.
+ +     */
+ +    copy_energy(md, enerd->term,ecopy);
+ +    add_ebin(md->ebin,md->ie,md->f_nre,ecopy,bSum);
+ +    if (md->nCrmsd)
+ +    {
+ +        crmsd[0] = constr_rmsd(constr,FALSE);
+ +        if (md->nCrmsd > 1)
+ +        {
+ +            crmsd[1] = constr_rmsd(constr,TRUE);
+ +        }
+ +        add_ebin(md->ebin,md->iconrmsd,md->nCrmsd,crmsd,FALSE);
+ +    }
+ +    if (md->bDynBox)
+ +    {
+ +        int nboxs;
+ +        if(md->bTricl)
+ +        {
+ +            bs[0] = box[XX][XX];
+ +            bs[1] = box[YY][YY];
+ +            bs[2] = box[ZZ][ZZ];
+ +            bs[3] = box[YY][XX];
+ +            bs[4] = box[ZZ][XX];
+ +            bs[5] = box[ZZ][YY];
+ +            nboxs=NTRICLBOXS;
+ +        }
+ +        else
+ +        {
+ +            bs[0] = box[XX][XX];
+ +            bs[1] = box[YY][YY];
+ +            bs[2] = box[ZZ][ZZ];
+ +            nboxs=NBOXS;
+ +        }
+ +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
+ +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
+ +        add_ebin(md->ebin,md->ib   ,nboxs,bs   ,bSum);
+ +        add_ebin(md->ebin,md->ivol ,1    ,&vol ,bSum);
+ +        add_ebin(md->ebin,md->idens,1    ,&dens,bSum);
+ +
+ +        if (md->bDiagPres)
+ +        {
+ +            /* This is pV (in kJ/mol).  The pressure is the reference pressure,
+ +               not the instantaneous pressure */
+ +            pv = vol*md->ref_p/PRESFAC;
+ +
+ +            add_ebin(md->ebin,md->ipv  ,1    ,&pv  ,bSum);
+ +            enthalpy = pv + enerd->term[F_ETOT];
+ +            add_ebin(md->ebin,md->ienthalpy  ,1    ,&enthalpy  ,bSum);
+ +        }
+ +    }
+ +    if (md->bConstrVir)
+ +    {
+ +        add_ebin(md->ebin,md->isvir,9,svir[0],bSum);
+ +        add_ebin(md->ebin,md->ifvir,9,fvir[0],bSum);
+ +    }
+ +    if (md->bVir)
+ +        add_ebin(md->ebin,md->ivir,9,vir[0],bSum);
+ +    if (md->bPress)
+ +        add_ebin(md->ebin,md->ipres,9,pres[0],bSum);
+ +    if (md->bSurft){
+ +        tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ];
+ +        add_ebin(md->ebin,md->isurft,1,&tmp,bSum);
+ +    }
+ +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
+ +    {
+ +        tmp6[0] = state->boxv[XX][XX];
+ +        tmp6[1] = state->boxv[YY][YY];
+ +        tmp6[2] = state->boxv[ZZ][ZZ];
+ +        tmp6[3] = state->boxv[YY][XX];
+ +        tmp6[4] = state->boxv[ZZ][XX];
+ +        tmp6[5] = state->boxv[ZZ][YY];
+ +        add_ebin(md->ebin,md->ipc,md->bTricl ? 6 : 3,tmp6,bSum);
+ +    }
-             fprintf(log,"   Total Dipole (%s)\n",unit_dipole_D);
-             pr_ebin(log,md->ebin,md->imu,3,3,mode,FALSE);
-             fprintf(log,"\n");
++    if (md->bMu)
++    {
+ +        add_ebin(md->ebin,md->imu,3,mu_tot,bSum);
++    }
+ +    if (ekind && ekind->cosacc.cos_accel != 0)
+ +    {
+ +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
+ +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
+ +        add_ebin(md->ebin,md->ivcos,1,&(ekind->cosacc.vcos),bSum);
+ +        /* 1/viscosity, unit 1/(kg m^-1 s^-1) */
+ +        tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO)
+ +                 *dens*vol*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
+ +        add_ebin(md->ebin,md->ivisc,1,&tmp,bSum);
+ +    }
+ +    if (md->nE > 1)
+ +    {
+ +        n=0;
+ +        for(i=0; (i<md->nEg); i++)
+ +        {
+ +            for(j=i; (j<md->nEg); j++)
+ +            {
+ +                gid=GID(i,j,md->nEg);
+ +                for(k=kk=0; (k<egNR); k++)
+ +                {
+ +                    if (md->bEInd[k])
+ +                    {
+ +                        eee[kk++] = enerd->grpp.ener[k][gid];
+ +                    }
+ +                }
+ +                add_ebin(md->ebin,md->igrp[n],md->nEc,eee,bSum);
+ +                n++;
+ +            }
+ +        }
+ +    }
+ +
+ +    if (ekind)
+ +    {
+ +        for(i=0; (i<md->nTC); i++)
+ +        {
+ +            md->tmp_r[i] = ekind->tcstat[i].T;
+ +        }
+ +        add_ebin(md->ebin,md->itemp,md->nTC,md->tmp_r,bSum);
+ +
+ +        if (md->etc == etcNOSEHOOVER)
+ +        {
+ +            /* whether to print Nose-Hoover chains: */
+ +            if (md->bPrintNHChains)
+ +            {
+ +                if (md->bNHC_trotter)
+ +                {
+ +                    for(i=0; (i<md->nTC); i++)
+ +                    {
+ +                        for (j=0;j<md->nNHC;j++)
+ +                        {
+ +                            k = i*md->nNHC+j;
+ +                            md->tmp_r[2*k] = state->nosehoover_xi[k];
+ +                            md->tmp_r[2*k+1] = state->nosehoover_vxi[k];
+ +                        }
+ +                    }
+ +                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);
+ +
+ +                    if (md->bMTTK) {
+ +                        for(i=0; (i<md->nTCP); i++)
+ +                        {
+ +                            for (j=0;j<md->nNHC;j++)
+ +                            {
+ +                                k = i*md->nNHC+j;
+ +                                md->tmp_r[2*k] = state->nhpres_xi[k];
+ +                                md->tmp_r[2*k+1] = state->nhpres_vxi[k];
+ +                            }
+ +                        }
+ +                        add_ebin(md->ebin,md->itcb,md->mdeb_n,md->tmp_r,bSum);
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    for(i=0; (i<md->nTC); i++)
+ +                    {
+ +                        md->tmp_r[2*i] = state->nosehoover_xi[i];
+ +                        md->tmp_r[2*i+1] = state->nosehoover_vxi[i];
+ +                    }
+ +                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);
+ +                }
+ +            }
+ +        }
+ +        else if (md->etc == etcBERENDSEN || md->etc == etcYES ||
+ +                 md->etc == etcVRESCALE)
+ +        {
+ +            for(i=0; (i<md->nTC); i++)
+ +            {
+ +                md->tmp_r[i] = ekind->tcstat[i].lambda;
+ +            }
+ +            add_ebin(md->ebin,md->itc,md->nTC,md->tmp_r,bSum);
+ +        }
+ +    }
+ +
+ +    if (ekind && md->nU > 1)
+ +    {
+ +        for(i=0; (i<md->nU); i++)
+ +        {
+ +            copy_rvec(ekind->grpstat[i].u,md->tmp_v[i]);
+ +        }
+ +        add_ebin(md->ebin,md->iu,3*md->nU,md->tmp_v[0],bSum);
+ +    }
+ +
+ +    ebin_increase_count(md->ebin,bSum);
+ +
+ +    /* BAR + thermodynamic integration values */
+ +    if ((md->fp_dhdl || md->dhc) && bDoDHDL && (enerd->n_lambda > 0))
+ +    {
+ +        snew(dE,enerd->n_lambda-1);
+ +        for(i=0; i<enerd->n_lambda-1; i++) {
+ +            dE[i] = enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0];  /* zero for simulated tempering */
+ +            if (md->temperatures!=NULL)
+ +            {
+ +                /* MRS: is this right, given the way we have defined the exchange probabilities? */
+ +                /* is this even useful to have at all? */
+ +                dE[i] += (md->temperatures[i]/md->temperatures[state->fep_state]-1.0)*enerd->term[F_EKIN];
+ +            }
+ +        }
+ +    }
+ +
+ +    if (md->fp_dhdl && bDoDHDL)
+ +    {
+ +        fprintf(md->fp_dhdl,"%.4f",time);
+ +        /* the current free energy state */
+ +
+ +        /* print the current state if we are doing expanded ensemble */
+ +        if (expand->elmcmove > elmcmoveNO) {
+ +            fprintf(md->fp_dhdl," %4d",state->fep_state);
+ +        }
+ +        /* total energy (for if the temperature changes */
+ +        if (fep->bPrintEnergy)
+ +        {
+ +            store_energy = enerd->term[F_ETOT];
+ +            fprintf(md->fp_dhdl," %#.8g",store_energy);
+ +        }
+ +
+ +        for (i=0;i<efptNR;i++)
+ +        {
+ +            if (fep->separate_dvdl[i])
+ +            {
+ +                fprintf(md->fp_dhdl," %#.8g",enerd->term[F_DVDL+i]); /* assumes F_DVDL is first */
+ +            }
+ +        }
+ +        for(i=1; i<enerd->n_lambda; i++)
+ +        {
+ +            fprintf(md->fp_dhdl," %#.8g",dE[i-1]);
+ +
+ +        }
+ +        if ((md->epc!=epcNO)  && (enerd->n_lambda > 0))
+ +        {
+ +            fprintf(md->fp_dhdl," %#.8g",pv);   /* PV term only needed when there are alternate state lambda */
+ +        }
+ +        fprintf(md->fp_dhdl,"\n");
+ +        /* and the binary free energy output */
+ +    }
+ +    if (md->dhc && bDoDHDL)
+ +    {
+ +        int idhdl = 0;
+ +        for (i=0;i<efptNR;i++)
+ +        {
+ +            if (fep->separate_dvdl[i])
+ +            {
+ +                store_dhdl[idhdl] = enerd->term[F_DVDL+i]; /* assumes F_DVDL is first */
+ +                idhdl+=1;
+ +            }
+ +        }
+ +        /* store_dh is dE */
+ +        mde_delta_h_coll_add_dh(md->dhc,
+ +                                (double)state->fep_state,
+ +                                store_energy,
+ +                                pv,
+ +                                (expand->elamstats>elamstatsNO),
+ +                                (fep->bPrintEnergy),
+ +                                (md->epc!=epcNO),
+ +                                idhdl,
+ +                                fep->n_lambda,
+ +                                store_dhdl,
+ +                                dE,
+ +                                time);
+ +    }
+ +    if ((md->fp_dhdl || md->dhc) && bDoDHDL && (enerd->n_lambda >0))
+ +    {
+ +        sfree(dE);
+ +    }
+ +}
+ +
+ +
+ +void upd_mdebin_step(t_mdebin *md)
+ +{
+ +    ebin_increase_count(md->ebin,FALSE);
+ +}
+ +
+ +static void npr(FILE *log,int n,char c)
+ +{
+ +    for(; (n>0); n--) fprintf(log,"%c",c);
+ +}
+ +
+ +static void pprint(FILE *log,const char *s,t_mdebin *md)
+ +{
+ +    char CHAR='#';
+ +    int  slen;
+ +    char buf1[22],buf2[22];
+ +
+ +    slen = strlen(s);
+ +    fprintf(log,"\t<======  ");
+ +    npr(log,slen,CHAR);
+ +    fprintf(log,"  ==>\n");
+ +    fprintf(log,"\t<====  %s  ====>\n",s);
+ +    fprintf(log,"\t<==  ");
+ +    npr(log,slen,CHAR);
+ +    fprintf(log,"  ======>\n\n");
+ +
+ +    fprintf(log,"\tStatistics over %s steps using %s frames\n",
+ +            gmx_step_str(md->ebin->nsteps_sim,buf1),
+ +            gmx_step_str(md->ebin->nsum_sim,buf2));
+ +    fprintf(log,"\n");
+ +}
+ +
+ +void print_ebin_header(FILE *log,gmx_large_int_t steps,double time,real lambda)
+ +{
+ +    char buf[22];
+ +
+ +    fprintf(log,"   %12s   %12s   %12s\n"
+ +            "   %12s   %12.5f   %12.5f\n\n",
+ +            "Step","Time","Lambda",gmx_step_str(steps,buf),time,lambda);
+ +}
+ +
+ +void print_ebin(ener_file_t fp_ene,gmx_bool bEne,gmx_bool bDR,gmx_bool bOR,
+ +                FILE *log,
+ +                gmx_large_int_t step,double time,
+ +                int mode,gmx_bool bCompact,
+ +                t_mdebin *md,t_fcdata *fcd,
+ +                gmx_groups_t *groups,t_grpopts *opts)
+ +{
+ +    /*static char **grpnms=NULL;*/
+ +    char        buf[246];
+ +    int         i,j,n,ni,nj,ndr,nor,b;
+ +    int         ndisre=0;
+ +    real        *disre_rm3tav, *disre_rt;
+ +
+ +    /* these are for the old-style blocks (1 subblock, only reals), because
+ +       there can be only one per ID for these */
+ +    int         nr[enxNR];
+ +    int         id[enxNR];
+ +    real        *block[enxNR];
+ +
+ +    /* temporary arrays for the lambda values to write out */
+ +    double      enxlambda_data[2];
+ +
+ +    t_enxframe  fr;
+ +
+ +    switch (mode)
+ +    {
+ +        case eprNORMAL:
+ +            init_enxframe(&fr);
+ +            fr.t            = time;
+ +            fr.step         = step;
+ +            fr.nsteps       = md->ebin->nsteps;
+ +            fr.dt           = md->delta_t;
+ +            fr.nsum         = md->ebin->nsum;
+ +            fr.nre          = (bEne) ? md->ebin->nener : 0;
+ +            fr.ener         = md->ebin->e;
+ +            ndisre          = bDR ? fcd->disres.npair : 0;
+ +            disre_rm3tav    = fcd->disres.rm3tav;
+ +            disre_rt        = fcd->disres.rt;
+ +            /* Optional additional old-style (real-only) blocks. */
+ +            for(i=0; i<enxNR; i++)
+ +            {
+ +                nr[i] = 0;
+ +            }
+ +            if (fcd->orires.nr > 0 && bOR)
+ +            {
+ +                diagonalize_orires_tensors(&(fcd->orires));
+ +                nr[enxOR]     = fcd->orires.nr;
+ +                block[enxOR]  = fcd->orires.otav;
+ +                id[enxOR]     = enxOR;
+ +                nr[enxORI]    = (fcd->orires.oinsl != fcd->orires.otav) ?
+ +                          fcd->orires.nr : 0;
+ +                block[enxORI] = fcd->orires.oinsl;
+ +                id[enxORI]    = enxORI;
+ +                nr[enxORT]    = fcd->orires.nex*12;
+ +                block[enxORT] = fcd->orires.eig;
+ +                id[enxORT]    = enxORT;
+ +            }
+ +
+ +            /* whether we are going to wrte anything out: */
+ +            if (fr.nre || ndisre || nr[enxOR] || nr[enxORI])
+ +            {
+ +
+ +                /* the old-style blocks go first */
+ +                fr.nblock = 0;
+ +                for(i=0; i<enxNR; i++)
+ +                {
+ +                    if (nr[i] > 0)
+ +                    {
+ +                        fr.nblock = i + 1;
+ +                    }
+ +                }
+ +                add_blocks_enxframe(&fr, fr.nblock);
+ +                for(b=0;b<fr.nblock;b++)
+ +                {
+ +                    add_subblocks_enxblock(&(fr.block[b]), 1);
+ +                    fr.block[b].id=id[b];
+ +                    fr.block[b].sub[0].nr = nr[b];
+ +#ifndef GMX_DOUBLE
+ +                    fr.block[b].sub[0].type = xdr_datatype_float;
+ +                    fr.block[b].sub[0].fval = block[b];
+ +#else
+ +                    fr.block[b].sub[0].type = xdr_datatype_double;
+ +                    fr.block[b].sub[0].dval = block[b];
+ +#endif
+ +                }
+ +
+ +                /* check for disre block & fill it. */
+ +                if (ndisre>0)
+ +                {
+ +                    int db = fr.nblock;
+ +                    fr.nblock+=1;
+ +                    add_blocks_enxframe(&fr, fr.nblock);
+ +
+ +                    add_subblocks_enxblock(&(fr.block[db]), 2);
+ +                    fr.block[db].id=enxDISRE;
+ +                    fr.block[db].sub[0].nr=ndisre;
+ +                    fr.block[db].sub[1].nr=ndisre;
+ +#ifndef GMX_DOUBLE
+ +                    fr.block[db].sub[0].type=xdr_datatype_float;
+ +                    fr.block[db].sub[1].type=xdr_datatype_float;
+ +                    fr.block[db].sub[0].fval=disre_rt;
+ +                    fr.block[db].sub[1].fval=disre_rm3tav;
+ +#else
+ +                    fr.block[db].sub[0].type=xdr_datatype_double;
+ +                    fr.block[db].sub[1].type=xdr_datatype_double;
+ +                    fr.block[db].sub[0].dval=disre_rt;
+ +                    fr.block[db].sub[1].dval=disre_rm3tav;
+ +#endif
+ +                }
+ +                /* here we can put new-style blocks */
+ +
+ +                /* Free energy perturbation blocks */
+ +                if (md->dhc)
+ +                {
+ +                    mde_delta_h_coll_handle_block(md->dhc, &fr, fr.nblock);
+ +                }
+ +
+ +                /* we can now free & reset the data in the blocks */
+ +                if (md->dhc)
+ +                {
+ +                    mde_delta_h_coll_reset(md->dhc);
+ +                }
+ +
+ +                /* do the actual I/O */
+ +                do_enx(fp_ene,&fr);
+ +                gmx_fio_check_file_position(enx_file_pointer(fp_ene));
+ +                if (fr.nre)
+ +                {
+ +                    /* We have stored the sums, so reset the sum history */
+ +                    reset_ebin_sums(md->ebin);
+ +                }
+ +            }
+ +            free_enxframe(&fr);
+ +            break;
+ +        case eprAVER:
+ +            if (log)
+ +            {
+ +                pprint(log,"A V E R A G E S",md);
+ +            }
+ +            break;
+ +        case eprRMS:
+ +            if (log)
+ +            {
+ +                pprint(log,"R M S - F L U C T U A T I O N S",md);
+ +            }
+ +            break;
+ +        default:
+ +            gmx_fatal(FARGS,"Invalid print mode (%d)",mode);
+ +    }
+ +
+ +    if (log)
+ +    {
+ +        for(i=0;i<opts->ngtc;i++)
+ +        {
+ +            if(opts->annealing[i]!=eannNO)
+ +            {
+ +                fprintf(log,"Current ref_t for group %s: %8.1f\n",
+ +                        *(groups->grpname[groups->grps[egcTC].nm_ind[i]]),
+ +                        opts->ref_t[i]);
+ +            }
+ +        }
+ +        if (mode==eprNORMAL && fcd->orires.nr>0)
+ +        {
+ +            print_orires_log(log,&(fcd->orires));
+ +        }
+ +        fprintf(log,"   Energies (%s)\n",unit_energy);
+ +        pr_ebin(log,md->ebin,md->ie,md->f_nre+md->nCrmsd,5,mode,TRUE);
+ +        fprintf(log,"\n");
+ +
+ +        if (!bCompact)
+ +        {
+ +            if (md->bDynBox)
+ +            {
+ +                pr_ebin(log,md->ebin,md->ib, md->bTricl ? NTRICLBOXS : NBOXS,5,
+ +                        mode,TRUE);
+ +                fprintf(log,"\n");
+ +            }
+ +            if (md->bConstrVir)
+ +            {
+ +                fprintf(log,"   Constraint Virial (%s)\n",unit_energy);
+ +                pr_ebin(log,md->ebin,md->isvir,9,3,mode,FALSE);
+ +                fprintf(log,"\n");
+ +                fprintf(log,"   Force Virial (%s)\n",unit_energy);
+ +                pr_ebin(log,md->ebin,md->ifvir,9,3,mode,FALSE);
+ +                fprintf(log,"\n");
+ +            }
+ +            if (md->bVir)
+ +            {
+ +                fprintf(log,"   Total Virial (%s)\n",unit_energy);
+ +                pr_ebin(log,md->ebin,md->ivir,9,3,mode,FALSE);
+ +                fprintf(log,"\n");
+ +            }
+ +            if (md->bPress)
+ +            {
+ +                fprintf(log,"   Pressure (%s)\n",unit_pres_bar);
+ +                pr_ebin(log,md->ebin,md->ipres,9,3,mode,FALSE);
+ +                fprintf(log,"\n");
+ +            }
++            if (md->bMu)
++            {
++                fprintf(log,"   Total Dipole (%s)\n",unit_dipole_D);
++                pr_ebin(log,md->ebin,md->imu,3,3,mode,FALSE);
++                fprintf(log,"\n");
++            }
+ +
+ +            if (md->nE > 1)
+ +            {
+ +                if (md->print_grpnms==NULL)
+ +                {
+ +                    snew(md->print_grpnms,md->nE);
+ +                    n=0;
+ +                    for(i=0; (i<md->nEg); i++)
+ +                    {
+ +                        ni=groups->grps[egcENER].nm_ind[i];
+ +                        for(j=i; (j<md->nEg); j++)
+ +                        {
+ +                            nj=groups->grps[egcENER].nm_ind[j];
+ +                            sprintf(buf,"%s-%s",*(groups->grpname[ni]),
+ +                                    *(groups->grpname[nj]));
+ +                            md->print_grpnms[n++]=strdup(buf);
+ +                        }
+ +                    }
+ +                }
+ +                sprintf(buf,"Epot (%s)",unit_energy);
+ +                fprintf(log,"%15s   ",buf);
+ +                for(i=0; (i<egNR); i++)
+ +                {
+ +                    if (md->bEInd[i])
+ +                    {
+ +                        fprintf(log,"%12s   ",egrp_nm[i]);
+ +                    }
+ +                }
+ +                fprintf(log,"\n");
+ +                for(i=0; (i<md->nE); i++)
+ +                {
+ +                    fprintf(log,"%15s",md->print_grpnms[i]);
+ +                    pr_ebin(log,md->ebin,md->igrp[i],md->nEc,md->nEc,mode,
+ +                            FALSE);
+ +                }
+ +                fprintf(log,"\n");
+ +            }
+ +            if (md->nTC > 1)
+ +            {
+ +                pr_ebin(log,md->ebin,md->itemp,md->nTC,4,mode,TRUE);
+ +                fprintf(log,"\n");
+ +            }
+ +            if (md->nU > 1)
+ +            {
+ +                fprintf(log,"%15s   %12s   %12s   %12s\n",
+ +                        "Group","Ux","Uy","Uz");
+ +                for(i=0; (i<md->nU); i++)
+ +                {
+ +                    ni=groups->grps[egcACC].nm_ind[i];
+ +                    fprintf(log,"%15s",*groups->grpname[ni]);
+ +                    pr_ebin(log,md->ebin,md->iu+3*i,3,3,mode,FALSE);
+ +                }
+ +                fprintf(log,"\n");
+ +            }
+ +        }
+ +    }
+ +
+ +}
+ +
+ +void update_energyhistory(energyhistory_t * enerhist,t_mdebin * mdebin)
+ +{
+ +    int i;
+ +
+ +    enerhist->nsteps     = mdebin->ebin->nsteps;
+ +    enerhist->nsum       = mdebin->ebin->nsum;
+ +    enerhist->nsteps_sim = mdebin->ebin->nsteps_sim;
+ +    enerhist->nsum_sim   = mdebin->ebin->nsum_sim;
+ +    enerhist->nener      = mdebin->ebin->nener;
+ +
+ +    if (mdebin->ebin->nsum > 0)
+ +    {
+ +        /* Check if we need to allocate first */
+ +        if(enerhist->ener_ave == NULL)
+ +        {
+ +            snew(enerhist->ener_ave,enerhist->nener);
+ +            snew(enerhist->ener_sum,enerhist->nener);
+ +        }
+ +
+ +        for(i=0;i<enerhist->nener;i++)
+ +        {
+ +            enerhist->ener_ave[i] = mdebin->ebin->e[i].eav;
+ +            enerhist->ener_sum[i] = mdebin->ebin->e[i].esum;
+ +        }
+ +    }
+ +
+ +    if (mdebin->ebin->nsum_sim > 0)
+ +    {
+ +        /* Check if we need to allocate first */
+ +        if(enerhist->ener_sum_sim == NULL)
+ +        {
+ +            snew(enerhist->ener_sum_sim,enerhist->nener);
+ +        }
+ +
+ +        for(i=0;i<enerhist->nener;i++)
+ +        {
+ +            enerhist->ener_sum_sim[i] = mdebin->ebin->e_sim[i].esum;
+ +        }
+ +    }
+ +    if (mdebin->dhc)
+ +    {
+ +        mde_delta_h_coll_update_energyhistory(mdebin->dhc, enerhist);
+ +    }
+ +}
+ +
+ +void restore_energyhistory_from_state(t_mdebin * mdebin,
+ +                                      energyhistory_t * enerhist)
+ +{
+ +    int i;
+ +
+ +    if ((enerhist->nsum > 0 || enerhist->nsum_sim > 0) &&
+ +        mdebin->ebin->nener != enerhist->nener)
+ +    {
+ +        gmx_fatal(FARGS,"Mismatch between number of energies in run input (%d) and checkpoint file (%d).",
+ +                  mdebin->ebin->nener,enerhist->nener);
+ +    }
+ +
+ +    mdebin->ebin->nsteps     = enerhist->nsteps;
+ +    mdebin->ebin->nsum       = enerhist->nsum;
+ +    mdebin->ebin->nsteps_sim = enerhist->nsteps_sim;
+ +    mdebin->ebin->nsum_sim   = enerhist->nsum_sim;
+ +
+ +    for(i=0; i<mdebin->ebin->nener; i++)
+ +    {
+ +        mdebin->ebin->e[i].eav  =
+ +                  (enerhist->nsum > 0 ? enerhist->ener_ave[i] : 0);
+ +        mdebin->ebin->e[i].esum =
+ +                  (enerhist->nsum > 0 ? enerhist->ener_sum[i] : 0);
+ +        mdebin->ebin->e_sim[i].esum =
+ +                  (enerhist->nsum_sim > 0 ? enerhist->ener_sum_sim[i] : 0);
+ +    }
+ +    if (mdebin->dhc)
+ +    {
+ +        mde_delta_h_coll_restore_energyhistory(mdebin->dhc, enerhist);
+ +    }
+ +}
diff --cc src/gromacs/mdlib/minimize.c

index 159ad9280ec01925c66c60d22b9a8b08bf4333e6,0000000000000000000000000000000000000000..7acf363ed32587f98df2ce38b27a0a96bf6ea25a

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/minimize.c
--- /dev/null
+++ b/src/gromacs/mdlib/minimize.c
@@@ -1,2497 -1,0 +1,2544 @@@
-         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + *
+ + *                This source code is part of
+ + *
+ + *                 G   R   O   M   A   C   S
+ + *
+ + *          GROningen MAchine for Chemical Simulations
+ + *
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + *
+ + * For more info, check our website at http://www.gromacs.org
+ + *
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include <time.h>
+ +#include <math.h>
+ +#include "sysstuff.h"
+ +#include "string2.h"
+ +#include "network.h"
+ +#include "confio.h"
+ +#include "copyrite.h"
+ +#include "smalloc.h"
+ +#include "nrnb.h"
+ +#include "main.h"
+ +#include "force.h"
+ +#include "macros.h"
+ +#include "random.h"
+ +#include "names.h"
+ +#include "gmx_fatal.h"
+ +#include "txtdump.h"
+ +#include "typedefs.h"
+ +#include "update.h"
+ +#include "constr.h"
+ +#include "vec.h"
+ +#include "statutil.h"
+ +#include "tgroup.h"
+ +#include "mdebin.h"
+ +#include "vsite.h"
+ +#include "force.h"
+ +#include "mdrun.h"
++#include "md_support.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "trnio.h"
+ +#include "mdatoms.h"
+ +#include "ns.h"
+ +#include "gmx_wallcycle.h"
+ +#include "mtop_util.h"
+ +#include "gmxfio.h"
+ +#include "pme.h"
++#include "bondf.h"
++#include "gmx_omp_nthreads.h"
++
+ +
+ +#include "gromacs/linearalgebra/mtxio.h"
+ +#include "gromacs/linearalgebra/sparsematrix.h"
+ +
+ +typedef struct {
+ +  t_state s;
+ +  rvec    *f;
+ +  real    epot;
+ +  real    fnorm;
+ +  real    fmax;
+ +  int     a_fmax;
+ +} em_state_t;
+ +
+ +static em_state_t *init_em_state()
+ +{
+ +  em_state_t *ems;
+ +
+ +  snew(ems,1);
+ +
+ +  /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
+ +  snew(ems->s.lambda,efptNR);
+ +
+ +  return ems;
+ +}
+ +
+ +static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
+ +                           gmx_wallcycle_t wcycle,
+ +                           const char *name)
+ +{
+ +    char buf[STRLEN];
+ +
+ +    runtime_start(runtime);
+ +
+ +    sprintf(buf,"Started %s",name);
+ +    print_date_and_time(fplog,cr->nodeid,buf,NULL);
+ +
+ +    wallcycle_start(wcycle,ewcRUN);
+ +}
+ +static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
+ +                        gmx_wallcycle_t wcycle)
+ +{
+ +    wallcycle_stop(wcycle,ewcRUN);
+ +
+ +    runtime_end(runtime);
+ +}
+ +
+ +static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
+ +{
+ +    fprintf(out,"\n");
+ +    fprintf(out,"%s:\n",minimizer);
+ +    fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
+ +    fprintf(out,"   Number of steps    = %12d\n",nsteps);
+ +}
+ +
+ +static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
+ +{
+ +    char buffer[2048];
+ +    if (bLastStep)
+ +    {
+ +        sprintf(buffer,
+ +                "\nEnergy minimization reached the maximum number"
+ +                "of steps before the forces reached the requested"
+ +                "precision Fmax < %g.\n",ftol);
+ +    }
+ +    else
+ +    {
+ +        sprintf(buffer,
+ +                "\nEnergy minimization has stopped, but the forces have"
+ +                "not converged to the requested precision Fmax < %g (which"
+ +                "may not be possible for your system). It stopped"
+ +                "because the algorithm tried to make a new step whose size"
+ +                "was too small, or there was no change in the energy since"
+ +                "last step. Either way, we regard the minimization as"
+ +                "converged to within the available machine precision,"
+ +                "given your starting configuration and EM parameters.\n%s%s",
+ +                ftol,
+ +                sizeof(real)<sizeof(double) ?
+ +                "\nDouble precision normally gives you higher accuracy, but"
+ +                "this is often not needed for preparing to run molecular"
+ +                "dynamics.\n" :
+ +                "",
+ +                bConstrain ?
+ +                "You might need to increase your constraint accuracy, or turn\n"
+ +                "off constraints altogether (set constraints = none in mdp file)\n" :
+ +                "");
+ +    }
+ +    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
+ +}
+ +
+ +
+ +
+ +static void print_converged(FILE *fp,const char *alg,real ftol,
+ +                          gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
+ +                          real epot,real fmax, int nfmax, real fnorm)
+ +{
+ +  char buf[STEPSTRSIZE];
+ +
+ +  if (bDone)
+ +    fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
+ +          alg,ftol,gmx_step_str(count,buf));
+ +  else if(count<nsteps)
+ +    fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
+ +               "but did not reach the requested Fmax < %g.\n",
+ +          alg,gmx_step_str(count,buf),ftol);
+ +  else
+ +    fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
+ +          alg,ftol,gmx_step_str(count,buf));
+ +
+ +#ifdef GMX_DOUBLE
+ +  fprintf(fp,"Potential Energy  = %21.14e\n",epot);
+ +  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
+ +  fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
+ +#else
+ +  fprintf(fp,"Potential Energy  = %14.7e\n",epot);
+ +  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
+ +  fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
+ +#endif
+ +}
+ +
+ +static void get_f_norm_max(t_commrec *cr,
+ +                         t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
+ +                         real *fnorm,real *fmax,int *a_fmax)
+ +{
+ +  double fnorm2,*sum;
+ +  real fmax2,fmax2_0,fam;
+ +  int  la_max,a_max,start,end,i,m,gf;
+ +
+ +  /* This routine finds the largest force and returns it.
+ +   * On parallel machines the global max is taken.
+ +   */
+ +  fnorm2 = 0;
+ +  fmax2 = 0;
+ +  la_max = -1;
+ +  gf = 0;
+ +  start = mdatoms->start;
+ +  end   = mdatoms->homenr + start;
+ +  if (mdatoms->cFREEZE) {
+ +    for(i=start; i<end; i++) {
+ +      gf = mdatoms->cFREEZE[i];
+ +      fam = 0;
+ +      for(m=0; m<DIM; m++)
+ +      if (!opts->nFreeze[gf][m])
+ +        fam += sqr(f[i][m]);
+ +      fnorm2 += fam;
+ +      if (fam > fmax2) {
+ +      fmax2  = fam;
+ +      la_max = i;
+ +      }
+ +    }
+ +  } else {
+ +    for(i=start; i<end; i++) {
+ +      fam = norm2(f[i]);
+ +      fnorm2 += fam;
+ +      if (fam > fmax2) {
+ +      fmax2  = fam;
+ +      la_max = i;
+ +      }
+ +    }
+ +  }
+ +
+ +  if (la_max >= 0 && DOMAINDECOMP(cr)) {
+ +    a_max = cr->dd->gatindex[la_max];
+ +  } else {
+ +    a_max = la_max;
+ +  }
+ +  if (PAR(cr)) {
+ +    snew(sum,2*cr->nnodes+1);
+ +    sum[2*cr->nodeid]   = fmax2;
+ +    sum[2*cr->nodeid+1] = a_max;
+ +    sum[2*cr->nnodes]   = fnorm2;
+ +    gmx_sumd(2*cr->nnodes+1,sum,cr);
+ +    fnorm2 = sum[2*cr->nnodes];
+ +    /* Determine the global maximum */
+ +    for(i=0; i<cr->nnodes; i++) {
+ +      if (sum[2*i] > fmax2) {
+ +      fmax2 = sum[2*i];
+ +      a_max = (int)(sum[2*i+1] + 0.5);
+ +      }
+ +    }
+ +    sfree(sum);
+ +  }
+ +
+ +  if (fnorm)
+ +    *fnorm = sqrt(fnorm2);
+ +  if (fmax)
+ +    *fmax  = sqrt(fmax2);
+ +  if (a_fmax)
+ +    *a_fmax = a_max;
+ +}
+ +
+ +static void get_state_f_norm_max(t_commrec *cr,
+ +                         t_grpopts *opts,t_mdatoms *mdatoms,
+ +                         em_state_t *ems)
+ +{
+ +  get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
+ +}
+ +
+ +void init_em(FILE *fplog,const char *title,
+ +             t_commrec *cr,t_inputrec *ir,
+ +             t_state *state_global,gmx_mtop_t *top_global,
+ +             em_state_t *ems,gmx_localtop_t **top,
+ +             rvec **f,rvec **f_global,
+ +             t_nrnb *nrnb,rvec mu_tot,
+ +             t_forcerec *fr,gmx_enerdata_t **enerd,
+ +             t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
+ +             gmx_vsite_t *vsite,gmx_constr_t constr,
+ +             int nfile,const t_filenm fnm[],
+ +             gmx_mdoutf_t **outf,t_mdebin **mdebin)
+ +{
+ +    int  start,homenr,i;
+ +    real dvdlambda;
+ +
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"Initiating %s\n",title);
+ +    }
+ +
+ +    state_global->ngtc = 0;
+ +
+ +    /* Initialize lambda variables */
+ +    initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
+ +
+ +    init_nrnb(nrnb);
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        *top = dd_init_local_top(top_global);
+ +
+ +        dd_init_local_state(cr->dd,state_global,&ems->s);
+ +
+ +        *f = NULL;
+ +
+ +        /* Distribute the charge groups over the nodes from the master node */
+ +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
+ +                            state_global,top_global,ir,
+ +                            &ems->s,&ems->f,mdatoms,*top,
+ +                            fr,vsite,NULL,constr,
+ +                            nrnb,NULL,FALSE);
+ +        dd_store_state(cr->dd,&ems->s);
+ +
+ +        if (ir->nstfout)
+ +        {
+ +            snew(*f_global,top_global->natoms);
+ +        }
+ +        else
+ +        {
+ +            *f_global = NULL;
+ +        }
+ +        *graph = NULL;
+ +    }
+ +    else
+ +    {
+ +        snew(*f,top_global->natoms);
+ +
+ +        /* Just copy the state */
+ +        ems->s = *state_global;
+ +        snew(ems->s.x,ems->s.nalloc);
+ +        snew(ems->f,ems->s.nalloc);
+ +        for(i=0; i<state_global->natoms; i++)
+ +        {
+ +            copy_rvec(state_global->x[i],ems->s.x[i]);
+ +        }
+ +        copy_mat(state_global->box,ems->s.box);
+ +
+ +        if (PAR(cr) && ir->eI != eiNM)
+ +        {
+ +            /* Initialize the particle decomposition and split the topology */
+ +            *top = split_system(fplog,top_global,ir,cr);
+ +
+ +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
+ +        }
+ +        else
+ +        {
+ +            *top = gmx_mtop_generate_local_top(top_global,ir);
+ +        }
+ +        *f_global = *f;
+ +
-                       ems->s.x,ems->s.x,NULL,ems->s.box,
++        forcerec_set_excl_load(fr,*top,cr);
++
++        init_bonded_thread_force_reduction(fr,&(*top)->idef);      
++        
++        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
+ +        {
+ +            *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
+ +        }
+ +        else
+ +        {
+ +            *graph = NULL;
+ +        }
+ +
+ +        if (PARTDECOMP(cr))
+ +        {
+ +            pd_at_range(cr,&start,&homenr);
+ +            homenr -= start;
+ +        }
+ +        else
+ +        {
+ +            start  = 0;
+ +            homenr = top_global->natoms;
+ +        }
+ +        atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
+ +        update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
+ +
+ +        if (vsite)
+ +        {
+ +            set_vsite_top(vsite,*top,mdatoms,cr);
+ +        }
+ +    }
+ +
+ +    if (constr)
+ +    {
+ +        if (ir->eConstrAlg == econtSHAKE &&
+ +            gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
+ +        {
+ +            gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
+ +                      econstr_names[econtSHAKE],econstr_names[econtLINCS]);
+ +        }
+ +
+ +        if (!DOMAINDECOMP(cr))
+ +        {
+ +            set_constraints(constr,*top,ir,mdatoms,cr);
+ +        }
+ +
+ +        if (!ir->bContinuation)
+ +        {
+ +            /* Constrain the starting coordinates */
+ +            dvdlambda=0;
+ +            constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
+ +                      ir,NULL,cr,-1,0,mdatoms,
-     gmx_pme_finish(cr);
++                      ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
+ +                      ems->s.lambda[efptFEP],&dvdlambda,
+ +                      NULL,NULL,nrnb,econqCoord,FALSE,0,0);
+ +        }
+ +    }
+ +
+ +    if (PAR(cr))
+ +    {
+ +        *gstat = global_stat_init(ir);
+ +    }
+ +
+ +    *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
+ +
+ +    snew(*enerd,1);
+ +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
+ +                  *enerd);
+ +
+ +    if (mdebin != NULL)
+ +    {
+ +        /* Init bin for energy stuff */
+ +        *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
+ +    }
+ +
+ +    clear_rvec(mu_tot);
+ +    calc_shifts(ems->s.box,fr->shift_vec);
+ +}
+ +
+ +static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
+ +                      gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
+ +{
+ +  if (!(cr->duty & DUTY_PME)) {
+ +    /* Tell the PME only node to finish */
-                      em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
-                      gmx_constr_t constr,gmx_localtop_t *top,
-                      t_nrnb *nrnb,gmx_wallcycle_t wcycle,
-                      gmx_large_int_t count)
++    gmx_pme_send_finish(cr);
+ +  }
+ +
+ +  done_mdoutf(outf);
+ +
+ +  em_time_end(fplog,cr,runtime,wcycle);
+ +}
+ +
+ +static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
+ +{
+ +  em_state_t tmp;
+ +
+ +  tmp   = *ems1;
+ +  *ems1 = *ems2;
+ +  *ems2 = tmp;
+ +}
+ +
+ +static void copy_em_coords(em_state_t *ems,t_state *state)
+ +{
+ +    int i;
+ +
+ +    for(i=0; (i<state->natoms); i++)
+ +    {
+ +        copy_rvec(ems->s.x[i],state->x[i]);
+ +    }
+ +}
+ +
+ +static void write_em_traj(FILE *fplog,t_commrec *cr,
+ +                          gmx_mdoutf_t *outf,
+ +                          gmx_bool bX,gmx_bool bF,const char *confout,
+ +                          gmx_mtop_t *top_global,
+ +                          t_inputrec *ir,gmx_large_int_t step,
+ +                          em_state_t *state,
+ +                          t_state *state_global,rvec *f_global)
+ +{
+ +    int mdof_flags;
+ +
+ +    if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
+ +    {
+ +        copy_em_coords(state,state_global);
+ +        f_global = state->f;
+ +    }
+ +
+ +    mdof_flags = 0;
+ +    if (bX) { mdof_flags |= MDOF_X; }
+ +    if (bF) { mdof_flags |= MDOF_F; }
+ +    write_traj(fplog,cr,outf,mdof_flags,
+ +               top_global,step,(double)step,
+ +               &state->s,state_global,state->f,f_global,NULL,NULL);
+ +
+ +    if (confout != NULL && MASTER(cr))
+ +    {
+ +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
+ +        {
+ +            /* Make molecules whole only for confout writing */
+ +            do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
+ +                        state_global->x);
+ +        }
+ +
+ +        write_sto_conf_mtop(confout,
+ +                            *top_global->name,top_global,
+ +                            state_global->x,NULL,ir->ePBC,state_global->box);
+ +    }
+ +}
+ +
+ +static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
-   t_state *s1,*s2;
-   int  start,end,gf,i,m;
-   rvec *x1,*x2;
-   real dvdlambda;
++                       gmx_bool bMolPBC,
++                       em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
++                       gmx_constr_t constr,gmx_localtop_t *top,
++                       t_nrnb *nrnb,gmx_wallcycle_t wcycle,
++                       gmx_large_int_t count)
+ +
+ +{
-   s1 = &ems1->s;
-   s2 = &ems2->s;
++    t_state *s1,*s2;
++    int  i;
++    int  start,end;
++    rvec *x1,*x2;
++    real dvdlambda;
+ +
-   if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
-     gmx_incons("state mismatch in do_em_step");
++    s1 = &ems1->s;
++    s2 = &ems2->s;
+ +
-   s2->flags = s1->flags;
++    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
++    {
++        gmx_incons("state mismatch in do_em_step");
++    }
+ +
-   if (s2->nalloc != s1->nalloc) {
-     s2->nalloc = s1->nalloc;
-     srenew(s2->x,s1->nalloc);
-     srenew(ems2->f,  s1->nalloc);
-     if (s2->flags & (1<<estCGP))
-       srenew(s2->cg_p,  s1->nalloc);
-   }
++    s2->flags = s1->flags;
+ +
-   s2->natoms = s1->natoms;
-   /* Copy free energy state -> is this necessary? */
-   for (i=0;i<efptNR;i++)
-   {
-       s2->lambda[i] = s1->lambda[i];
-   }
-   copy_mat(s1->box,s2->box);
++    if (s2->nalloc != s1->nalloc)
++    {
++        s2->nalloc = s1->nalloc;
++        srenew(s2->x,s1->nalloc);
++        srenew(ems2->f,  s1->nalloc);
++        if (s2->flags & (1<<estCGP))
++        {
++            srenew(s2->cg_p,  s1->nalloc);
++        }
++    }
++  
++    s2->natoms = s1->natoms;
++    copy_mat(s1->box,s2->box);
++    /* Copy free energy state */
++    for (i=0;i<efptNR;i++)
++    {
++        s2->lambda[i] = s1->lambda[i];
++    }
++    copy_mat(s1->box,s2->box);
+ +
-   start = md->start;
-   end   = md->start + md->homenr;
++    start = md->start;
++    end   = md->start + md->homenr;
+ +
-   x1 = s1->x;
-   x2 = s2->x;
-   gf = 0;
-   for(i=start; i<end; i++) {
-     if (md->cFREEZE)
-       gf = md->cFREEZE[i];
-     for(m=0; m<DIM; m++) {
-       if (ir->opts.nFreeze[gf][m])
-       x2[i][m] = x1[i][m];
-       else
-       x2[i][m] = x1[i][m] + a*f[i][m];
-     }
-   }
++    x1 = s1->x;
++    x2 = s2->x;
+ +
-   if (s2->flags & (1<<estCGP)) {
-     /* Copy the CG p vector */
-     x1 = s1->cg_p;
-     x2 = s2->cg_p;
-     for(i=start; i<end; i++)
-       copy_rvec(x1[i],x2[i]);
-   }
++#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
++    {
++        int gf,i,m;
+ +
-   if (DOMAINDECOMP(cr)) {
-     s2->ddp_count = s1->ddp_count;
-     if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
-       s2->cg_gl_nalloc = s1->cg_gl_nalloc;
-       srenew(s2->cg_gl,s2->cg_gl_nalloc);
++        gf = 0;
++#pragma omp for schedule(static) nowait
++        for(i=start; i<end; i++)
++        {
++            if (md->cFREEZE)
++            {
++                gf = md->cFREEZE[i];
++            }
++            for(m=0; m<DIM; m++)
++            {
++                if (ir->opts.nFreeze[gf][m])
++                {
++                    x2[i][m] = x1[i][m];
++                }
++                else
++                {
++                    x2[i][m] = x1[i][m] + a*f[i][m];
++                }
++            }
++        }
+ +
-     s2->ncg_gl = s1->ncg_gl;
-     for(i=0; i<s2->ncg_gl; i++)
-       s2->cg_gl[i] = s1->cg_gl[i];
-     s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
-   }
- 
-   if (constr) {
-     wallcycle_start(wcycle,ewcCONSTR);
-     dvdlambda = 0;
-     constrain(NULL,TRUE,TRUE,constr,&top->idef,
-               ir,NULL,cr,count,0,md,
-               s1->x,s2->x,NULL,s2->box,s2->lambda[efptBONDED],
-               &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
-     wallcycle_stop(wcycle,ewcCONSTR);
-   }
++        if (s2->flags & (1<<estCGP))
++        {
++            /* Copy the CG p vector */
++            x1 = s1->cg_p;
++            x2 = s2->cg_p;
++#pragma omp for schedule(static) nowait
++            for(i=start; i<end; i++)
++            {
++                copy_rvec(x1[i],x2[i]);
++            }
++        }
++        
++        if (DOMAINDECOMP(cr))
++        {
++            s2->ddp_count = s1->ddp_count;
++            if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
++            {
++#pragma omp barrier
++                s2->cg_gl_nalloc = s1->cg_gl_nalloc;
++                srenew(s2->cg_gl,s2->cg_gl_nalloc);
++#pragma omp barrier
++            }
++            s2->ncg_gl = s1->ncg_gl;
++#pragma omp for schedule(static) nowait
++            for(i=0; i<s2->ncg_gl; i++)
++            {
++                s2->cg_gl[i] = s1->cg_gl[i];
++            }
++            s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
++        }
++    }
++    
++    if (constr)
++    {
++        wallcycle_start(wcycle,ewcCONSTR);
++        dvdlambda = 0;
++        constrain(NULL,TRUE,TRUE,constr,&top->idef,   
++                  ir,NULL,cr,count,0,md,
++                  s1->x,s2->x,NULL,bMolPBC,s2->box,
++                  s2->lambda[efptBONDED],&dvdlambda,
++                  NULL,NULL,nrnb,econqCoord,FALSE,0,0);
++        wallcycle_stop(wcycle,ewcCONSTR);
+ +    }
-              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
+ +}
+ +
+ +static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
+ +                                   gmx_mtop_t *top_global,t_inputrec *ir,
+ +                                   em_state_t *ems,gmx_localtop_t *top,
+ +                                   t_mdatoms *mdatoms,t_forcerec *fr,
+ +                                   gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                                   t_nrnb *nrnb,gmx_wallcycle_t wcycle)
+ +{
+ +    /* Repartition the domain decomposition */
+ +    wallcycle_start(wcycle,ewcDOMDEC);
+ +    dd_partition_system(fplog,step,cr,FALSE,1,
+ +                        NULL,top_global,ir,
+ +                        &ems->s,&ems->f,
+ +                        mdatoms,top,fr,vsite,NULL,constr,
+ +                        nrnb,wcycle,FALSE);
+ +    dd_store_state(cr->dd,&ems->s);
+ +    wallcycle_stop(wcycle,ewcDOMDEC);
+ +}
+ +
+ +static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
+ +                            t_state *state_global,gmx_mtop_t *top_global,
+ +                            em_state_t *ems,gmx_localtop_t *top,
+ +                            t_inputrec *inputrec,
+ +                            t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +                            gmx_global_stat_t gstat,
+ +                            gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                            t_fcdata *fcd,
+ +                            t_graph *graph,t_mdatoms *mdatoms,
+ +                            t_forcerec *fr,rvec mu_tot,
+ +                            gmx_enerdata_t *enerd,tensor vir,tensor pres,
+ +                            gmx_large_int_t count,gmx_bool bFirst)
+ +{
+ +  real t;
+ +  gmx_bool bNS;
+ +  int  nabnsb;
+ +  tensor force_vir,shake_vir,ekin;
+ +  real dvdlambda,prescorr,enercorr,dvdlcorr;
+ +  real terminate=0;
+ +
+ +  /* Set the time to the initial time, the time does not change during EM */
+ +  t = inputrec->init_t;
+ +
+ +  if (bFirst ||
+ +      (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
+ +    /* This the first state or an old state used before the last ns */
+ +    bNS = TRUE;
+ +  } else {
+ +    bNS = FALSE;
+ +    if (inputrec->nstlist > 0) {
+ +      bNS = TRUE;
+ +    } else if (inputrec->nstlist == -1) {
+ +      nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
+ +      if (PAR(cr))
+ +      gmx_sumi(1,&nabnsb,cr);
+ +      bNS = (nabnsb > 0);
+ +    }
+ +  }
+ +
+ +  if (vsite)
+ +    construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
+ +                   top->idef.iparams,top->idef.il,
+ +                   fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
+ +
+ +  if (DOMAINDECOMP(cr)) {
+ +    if (bNS) {
+ +      /* Repartition the domain decomposition */
+ +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
+ +                           ems,top,mdatoms,fr,vsite,constr,
+ +                           nrnb,wcycle);
+ +    }
+ +  }
+ +
+ +    /* Calc force & energy on new trial position  */
+ +    /* do_force always puts the charge groups in the box and shifts again
+ +     * We do not unshift, so molecules are always whole in congrad.c
+ +     */
+ +    do_force(fplog,cr,inputrec,
+ +             count,nrnb,wcycle,top,top_global,&top_global->groups,
+ +             ems->s.box,ems->s.x,&ems->s.hist,
+ +             ems->f,force_vir,mdatoms,enerd,fcd,
+ +             ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
-               ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda[efptBONDED],&dvdlambda,
++             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
++             GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
+ +             (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
+ +
+ +    /* Clear the unused shake virial and pressure */
+ +    clear_mat(shake_vir);
+ +    clear_mat(pres);
+ +
+ +    /* Communicate stuff when parallel */
+ +    if (PAR(cr) && inputrec->eI != eiNM)
+ +    {
+ +        wallcycle_start(wcycle,ewcMoveE);
+ +
+ +        global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
+ +                    inputrec,NULL,NULL,NULL,1,&terminate,
+ +                    top_global,&ems->s,FALSE,
+ +                    CGLO_ENERGY |
+ +                    CGLO_PRESSURE |
+ +                    CGLO_CONSTRAINT |
+ +                    CGLO_FIRSTITERATE);
+ +
+ +        wallcycle_stop(wcycle,ewcMoveE);
+ +    }
+ +
+ +    /* Calculate long range corrections to pressure and energy */
+ +    calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
+ +                  pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
+ +    enerd->term[F_DISPCORR] = enercorr;
+ +    enerd->term[F_EPOT] += enercorr;
+ +    enerd->term[F_PRES] += prescorr;
+ +    enerd->term[F_DVDL] += dvdlcorr;
+ +
+ +  ems->epot = enerd->term[F_EPOT];
+ +
+ +  if (constr) {
+ +    /* Project out the constraint components of the force */
+ +    wallcycle_start(wcycle,ewcCONSTR);
+ +    dvdlambda = 0;
+ +    constrain(NULL,FALSE,FALSE,constr,&top->idef,
+ +              inputrec,NULL,cr,count,0,mdatoms,
-     do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
-              constr,top,nrnb,wcycle,-1);
++              ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
++              ems->s.lambda[efptBONDED],&dvdlambda,
+ +              NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
+ +    if (fr->bSepDVDL && fplog)
+ +      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
+ +    enerd->term[F_DVDL_BONDED] += dvdlambda;
+ +    m_add(force_vir,shake_vir,vir);
+ +    wallcycle_stop(wcycle,ewcCONSTR);
+ +  } else {
+ +    copy_mat(force_vir,vir);
+ +  }
+ +
+ +  clear_mat(ekin);
+ +  enerd->term[F_PRES] =
+ +    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
+ +
+ +  sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
+ +
+ +    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
+ +    {
+ +        get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
+ +    }
+ +}
+ +
+ +static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
+ +                            gmx_mtop_t *mtop,
+ +                            em_state_t *s_min,em_state_t *s_b)
+ +{
+ +  rvec *fm,*fb,*fmg;
+ +  t_block *cgs_gl;
+ +  int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
+ +  double partsum;
+ +  unsigned char *grpnrFREEZE;
+ +
+ +  if (debug)
+ +    fprintf(debug,"Doing reorder_partsum\n");
+ +
+ +  fm = s_min->f;
+ +  fb = s_b->f;
+ +
+ +  cgs_gl = dd_charge_groups_global(cr->dd);
+ +  index = cgs_gl->index;
+ +
+ +  /* Collect fm in a global vector fmg.
+ +   * This conflicts with the spirit of domain decomposition,
+ +   * but to fully optimize this a much more complicated algorithm is required.
+ +   */
+ +  snew(fmg,mtop->natoms);
+ +
+ +  ncg   = s_min->s.ncg_gl;
+ +  cg_gl = s_min->s.cg_gl;
+ +  i = 0;
+ +  for(c=0; c<ncg; c++) {
+ +    cg = cg_gl[c];
+ +    a0 = index[cg];
+ +    a1 = index[cg+1];
+ +    for(a=a0; a<a1; a++) {
+ +      copy_rvec(fm[i],fmg[a]);
+ +      i++;
+ +    }
+ +  }
+ +  gmx_sum(mtop->natoms*3,fmg[0],cr);
+ +
+ +  /* Now we will determine the part of the sum for the cgs in state s_b */
+ +  ncg   = s_b->s.ncg_gl;
+ +  cg_gl = s_b->s.cg_gl;
+ +  partsum = 0;
+ +  i = 0;
+ +  gf = 0;
+ +  grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
+ +  for(c=0; c<ncg; c++) {
+ +    cg = cg_gl[c];
+ +    a0 = index[cg];
+ +    a1 = index[cg+1];
+ +    for(a=a0; a<a1; a++) {
+ +      if (mdatoms->cFREEZE && grpnrFREEZE) {
+ +      gf = grpnrFREEZE[i];
+ +      }
+ +      for(m=0; m<DIM; m++) {
+ +      if (!opts->nFreeze[gf][m]) {
+ +        partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
+ +      }
+ +      }
+ +      i++;
+ +    }
+ +  }
+ +
+ +  sfree(fmg);
+ +
+ +  return partsum;
+ +}
+ +
+ +static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
+ +                  gmx_mtop_t *mtop,
+ +                  em_state_t *s_min,em_state_t *s_b)
+ +{
+ +  rvec *fm,*fb;
+ +  double sum;
+ +  int  gf,i,m;
+ +
+ +  /* This is just the classical Polak-Ribiere calculation of beta;
+ +   * it looks a bit complicated since we take freeze groups into account,
+ +   * and might have to sum it in parallel runs.
+ +   */
+ +
+ +  if (!DOMAINDECOMP(cr) ||
+ +      (s_min->s.ddp_count == cr->dd->ddp_count &&
+ +       s_b->s.ddp_count   == cr->dd->ddp_count)) {
+ +    fm = s_min->f;
+ +    fb = s_b->f;
+ +    sum = 0;
+ +    gf = 0;
+ +    /* This part of code can be incorrect with DD,
+ +     * since the atom ordering in s_b and s_min might differ.
+ +     */
+ +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
+ +      if (mdatoms->cFREEZE)
+ +      gf = mdatoms->cFREEZE[i];
+ +      for(m=0; m<DIM; m++)
+ +      if (!opts->nFreeze[gf][m]) {
+ +        sum += (fb[i][m] - fm[i][m])*fb[i][m];
+ +      }
+ +    }
+ +  } else {
+ +    /* We need to reorder cgs while summing */
+ +    sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
+ +  }
+ +  if (PAR(cr))
+ +    gmx_sumd(1,&sum,cr);
+ +
+ +  return sum/sqr(s_min->fnorm);
+ +}
+ +
+ +double do_cg(FILE *fplog,t_commrec *cr,
+ +             int nfile,const t_filenm fnm[],
+ +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+ +             int nstglobalcomm,
+ +             gmx_vsite_t *vsite,gmx_constr_t constr,
+ +             int stepout,
+ +             t_inputrec *inputrec,
+ +             gmx_mtop_t *top_global,t_fcdata *fcd,
+ +             t_state *state_global,
+ +             t_mdatoms *mdatoms,
+ +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +             gmx_edsam_t ed,
+ +             t_forcerec *fr,
+ +             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+ +             gmx_membed_t membed,
+ +             real cpt_period,real max_hours,
+ +             const char *deviceOptions,
+ +             unsigned long Flags,
+ +             gmx_runtime_t *runtime)
+ +{
+ +  const char *CG="Polak-Ribiere Conjugate Gradients";
+ +
+ +  em_state_t *s_min,*s_a,*s_b,*s_c;
+ +  gmx_localtop_t *top;
+ +  gmx_enerdata_t *enerd;
+ +  rvec   *f;
+ +  gmx_global_stat_t gstat;
+ +  t_graph    *graph;
+ +  rvec   *f_global,*p,*sf,*sfm;
+ +  double gpa,gpb,gpc,tmp,sum[2],minstep;
+ +  real   fnormn;
+ +  real   stepsize;
+ +  real   a,b,c,beta=0.0;
+ +  real   epot_repl=0;
+ +  real   pnorm;
+ +  t_mdebin   *mdebin;
+ +  gmx_bool   converged,foundlower;
+ +  rvec   mu_tot;
+ +  gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
+ +  tensor vir,pres;
+ +  int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
+ +  gmx_mdoutf_t *outf;
+ +  int    i,m,gf,step,nminstep;
+ +  real   terminate=0;
+ +
+ +  step=0;
+ +
+ +  s_min = init_em_state();
+ +  s_a   = init_em_state();
+ +  s_b   = init_em_state();
+ +  s_c   = init_em_state();
+ +
+ +  /* Init em and store the local state in s_min */
+ +  init_em(fplog,CG,cr,inputrec,
+ +          state_global,top_global,s_min,&top,&f,&f_global,
+ +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
+ +          nfile,fnm,&outf,&mdebin);
+ +
+ +  /* Print to log file */
+ +  print_em_start(fplog,cr,runtime,wcycle,CG);
+ +
+ +  /* Max number of steps */
+ +  number_steps=inputrec->nsteps;
+ +
+ +  if (MASTER(cr))
+ +    sp_header(stderr,CG,inputrec->em_tol,number_steps);
+ +  if (fplog)
+ +    sp_header(fplog,CG,inputrec->em_tol,number_steps);
+ +
+ +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
+ +  /* do_force always puts the charge groups in the box and shifts again
+ +   * We do not unshift, so molecules are always whole in congrad.c
+ +   */
+ +  evaluate_energy(fplog,bVerbose,cr,
+ +                state_global,top_global,s_min,top,
+ +                inputrec,nrnb,wcycle,gstat,
+ +                vsite,constr,fcd,graph,mdatoms,fr,
+ +                mu_tot,enerd,vir,pres,-1,TRUE);
+ +  where();
+ +
+ +  if (MASTER(cr)) {
+ +    /* Copy stuff to the energy bin for easy printing etc. */
+ +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
+ +               mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
+ +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
+ +
+ +    print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
+ +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
+ +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
+ +  }
+ +  where();
+ +
+ +  /* Estimate/guess the initial stepsize */
+ +  stepsize = inputrec->em_stepsize/s_min->fnorm;
+ +
+ +  if (MASTER(cr)) {
+ +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
+ +          s_min->fmax,s_min->a_fmax+1);
+ +    fprintf(stderr,"   F-Norm            = %12.5e\n",
+ +          s_min->fnorm/sqrt(state_global->natoms));
+ +    fprintf(stderr,"\n");
+ +    /* and copy to the log file too... */
+ +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
+ +          s_min->fmax,s_min->a_fmax+1);
+ +    fprintf(fplog,"   F-Norm            = %12.5e\n",
+ +          s_min->fnorm/sqrt(state_global->natoms));
+ +    fprintf(fplog,"\n");
+ +  }
+ +  /* Start the loop over CG steps.
+ +   * Each successful step is counted, and we continue until
+ +   * we either converge or reach the max number of steps.
+ +   */
+ +  converged = FALSE;
+ +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
+ +
+ +    /* start taking steps in a new direction
+ +     * First time we enter the routine, beta=0, and the direction is
+ +     * simply the negative gradient.
+ +     */
+ +
+ +    /* Calculate the new direction in p, and the gradient in this direction, gpa */
+ +    p  = s_min->s.cg_p;
+ +    sf = s_min->f;
+ +    gpa = 0;
+ +    gf = 0;
+ +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
+ +      if (mdatoms->cFREEZE)
+ +      gf = mdatoms->cFREEZE[i];
+ +      for(m=0; m<DIM; m++) {
+ +      if (!inputrec->opts.nFreeze[gf][m]) {
+ +        p[i][m] = sf[i][m] + beta*p[i][m];
+ +        gpa -= p[i][m]*sf[i][m];
+ +        /* f is negative gradient, thus the sign */
+ +      } else {
+ +          p[i][m] = 0;
+ +      }
+ +      }
+ +    }
+ +
+ +    /* Sum the gradient along the line across CPUs */
+ +    if (PAR(cr))
+ +      gmx_sumd(1,&gpa,cr);
+ +
+ +    /* Calculate the norm of the search vector */
+ +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
+ +
+ +    /* Just in case stepsize reaches zero due to numerical precision... */
+ +    if(stepsize<=0)
+ +      stepsize = inputrec->em_stepsize/pnorm;
+ +
+ +    /*
+ +     * Double check the value of the derivative in the search direction.
+ +     * If it is positive it must be due to the old information in the
+ +     * CG formula, so just remove that and start over with beta=0.
+ +     * This corresponds to a steepest descent step.
+ +     */
+ +    if(gpa>0) {
+ +      beta = 0;
+ +      step--; /* Don't count this step since we are restarting */
+ +      continue; /* Go back to the beginning of the big for-loop */
+ +    }
+ +
+ +    /* Calculate minimum allowed stepsize, before the average (norm)
+ +     * relative change in coordinate is smaller than precision
+ +     */
+ +    minstep=0;
+ +    for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
+ +      for(m=0; m<DIM; m++) {
+ +      tmp = fabs(s_min->s.x[i][m]);
+ +      if(tmp < 1.0)
+ +        tmp = 1.0;
+ +      tmp = p[i][m]/tmp;
+ +      minstep += tmp*tmp;
+ +      }
+ +    }
+ +    /* Add up from all CPUs */
+ +    if(PAR(cr))
+ +      gmx_sumd(1,&minstep,cr);
+ +
+ +    minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
+ +
+ +    if(stepsize<minstep) {
+ +      converged=TRUE;
+ +      break;
+ +    }
+ +
+ +    /* Write coordinates if necessary */
+ +    do_x = do_per_step(step,inputrec->nstxout);
+ +    do_f = do_per_step(step,inputrec->nstfout);
+ +
+ +    write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
+ +                  top_global,inputrec,step,
+ +                  s_min,state_global,f_global);
+ +
+ +    /* Take a step downhill.
+ +     * In theory, we should minimize the function along this direction.
+ +     * That is quite possible, but it turns out to take 5-10 function evaluations
+ +     * for each line. However, we dont really need to find the exact minimum -
+ +     * it is much better to start a new CG step in a modified direction as soon
+ +     * as we are close to it. This will save a lot of energy evaluations.
+ +     *
+ +     * In practice, we just try to take a single step.
+ +     * If it worked (i.e. lowered the energy), we increase the stepsize but
+ +     * the continue straight to the next CG step without trying to find any minimum.
+ +     * If it didn't work (higher energy), there must be a minimum somewhere between
+ +     * the old position and the new one.
+ +     *
+ +     * Due to the finite numerical accuracy, it turns out that it is a good idea
+ +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
+ +     * This leads to lower final energies in the tests I've done. / Erik
+ +     */
+ +    s_a->epot = s_min->epot;
+ +    a = 0.0;
+ +    c = a + stepsize; /* reference position along line is zero */
+ +
+ +    if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
+ +      em_dd_partition_system(fplog,step,cr,top_global,inputrec,
+ +                           s_min,top,mdatoms,fr,vsite,constr,
+ +                           nrnb,wcycle);
+ +    }
+ +
+ +    /* Take a trial step (new coords in s_c) */
-       do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
-                  constr,top,nrnb,wcycle,-1);
++    do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
++               constr,top,nrnb,wcycle,-1);
+ +
+ +    neval++;
+ +    /* Calculate energy for the trial step */
+ +    evaluate_energy(fplog,bVerbose,cr,
+ +                  state_global,top_global,s_c,top,
+ +                  inputrec,nrnb,wcycle,gstat,
+ +                  vsite,constr,fcd,graph,mdatoms,fr,
+ +                  mu_tot,enerd,vir,pres,-1,FALSE);
+ +
+ +    /* Calc derivative along line */
+ +    p  = s_c->s.cg_p;
+ +    sf = s_c->f;
+ +    gpc=0;
+ +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
+ +      for(m=0; m<DIM; m++)
+ +        gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
+ +    }
+ +    /* Sum the gradient along the line across CPUs */
+ +    if (PAR(cr))
+ +      gmx_sumd(1,&gpc,cr);
+ +
+ +    /* This is the max amount of increase in energy we tolerate */
+ +    tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
+ +
+ +    /* Accept the step if the energy is lower, or if it is not significantly higher
+ +     * and the line derivative is still negative.
+ +     */
+ +    if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
+ +      foundlower = TRUE;
+ +      /* Great, we found a better energy. Increase step for next iteration
+ +       * if we are still going down, decrease it otherwise
+ +       */
+ +      if(gpc<0)
+ +      stepsize *= 1.618034;  /* The golden section */
+ +      else
+ +      stepsize *= 0.618034;  /* 1/golden section */
+ +    } else {
+ +      /* New energy is the same or higher. We will have to do some work
+ +       * to find a smaller value in the interval. Take smaller step next time!
+ +       */
+ +      foundlower = FALSE;
+ +      stepsize *= 0.618034;
+ +    }
+ +
+ +
+ +
+ +
+ +    /* OK, if we didn't find a lower value we will have to locate one now - there must
+ +     * be one in the interval [a=0,c].
+ +     * The same thing is valid here, though: Don't spend dozens of iterations to find
+ +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
+ +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
+ +     *
+ +     * I also have a safeguard for potentially really patological functions so we never
+ +     * take more than 20 steps before we give up ...
+ +     *
+ +     * If we already found a lower value we just skip this step and continue to the update.
+ +     */
+ +    if (!foundlower) {
+ +      nminstep=0;
+ +
+ +      do {
+ +      /* Select a new trial point.
+ +       * If the derivatives at points a & c have different sign we interpolate to zero,
+ +       * otherwise just do a bisection.
+ +       */
+ +      if(gpa<0 && gpc>0)
+ +        b = a + gpa*(a-c)/(gpc-gpa);
+ +      else
+ +        b = 0.5*(a+c);
+ +
+ +      /* safeguard if interpolation close to machine accuracy causes errors:
+ +       * never go outside the interval
+ +       */
+ +      if(b<=a || b>=c)
+ +        b = 0.5*(a+c);
+ +
+ +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
+ +        /* Reload the old state */
+ +        em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
+ +                               s_min,top,mdatoms,fr,vsite,constr,
+ +                               nrnb,wcycle);
+ +      }
+ +
+ +      /* Take a trial step to this new point - new coords in s_b */
-       do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
-                constr,top,nrnb,wcycle,count);
++      do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
++               constr,top,nrnb,wcycle,-1);
+ +
+ +      neval++;
+ +      /* Calculate energy for the trial step */
+ +      evaluate_energy(fplog,bVerbose,cr,
+ +                      state_global,top_global,s_b,top,
+ +                      inputrec,nrnb,wcycle,gstat,
+ +                      vsite,constr,fcd,graph,mdatoms,fr,
+ +                      mu_tot,enerd,vir,pres,-1,FALSE);
+ +
+ +      /* p does not change within a step, but since the domain decomposition
+ +       * might change, we have to use cg_p of s_b here.
+ +       */
+ +      p  = s_b->s.cg_p;
+ +      sf = s_b->f;
+ +      gpb=0;
+ +      for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
+ +        for(m=0; m<DIM; m++)
+ +            gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
+ +      }
+ +      /* Sum the gradient along the line across CPUs */
+ +      if (PAR(cr))
+ +        gmx_sumd(1,&gpb,cr);
+ +
+ +      if (debug)
+ +        fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
+ +                s_a->epot,s_b->epot,s_c->epot,gpb);
+ +
+ +      epot_repl = s_b->epot;
+ +
+ +      /* Keep one of the intervals based on the value of the derivative at the new point */
+ +      if (gpb > 0) {
+ +        /* Replace c endpoint with b */
+ +        swap_em_state(s_b,s_c);
+ +        c = b;
+ +        gpc = gpb;
+ +      } else {
+ +        /* Replace a endpoint with b */
+ +        swap_em_state(s_b,s_a);
+ +        a = b;
+ +        gpa = gpb;
+ +      }
+ +
+ +      /*
+ +       * Stop search as soon as we find a value smaller than the endpoints.
+ +       * Never run more than 20 steps, no matter what.
+ +       */
+ +      nminstep++;
+ +      } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
+ +             (nminstep < 20));
+ +
+ +      if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
+ +        nminstep >= 20) {
+ +      /* OK. We couldn't find a significantly lower energy.
+ +       * If beta==0 this was steepest descent, and then we give up.
+ +       * If not, set beta=0 and restart with steepest descent before quitting.
+ +         */
+ +      if (beta == 0.0) {
+ +        /* Converged */
+ +        converged = TRUE;
+ +        break;
+ +      } else {
+ +        /* Reset memory before giving up */
+ +        beta = 0.0;
+ +        continue;
+ +      }
+ +      }
+ +
+ +      /* Select min energy state of A & C, put the best in B.
+ +       */
+ +      if (s_c->epot < s_a->epot) {
+ +      if (debug)
+ +        fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
+ +                s_c->epot,s_a->epot);
+ +      swap_em_state(s_b,s_c);
+ +      gpb = gpc;
+ +      b = c;
+ +      } else {
+ +      if (debug)
+ +        fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
+ +                s_a->epot,s_c->epot);
+ +      swap_em_state(s_b,s_a);
+ +      gpb = gpa;
+ +      b = a;
+ +      }
+ +
+ +    } else {
+ +      if (debug)
+ +      fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
+ +              s_c->epot);
+ +      swap_em_state(s_b,s_c);
+ +      gpb = gpc;
+ +      b = c;
+ +    }
+ +
+ +    /* new search direction */
+ +    /* beta = 0 means forget all memory and restart with steepest descents. */
+ +    if (nstcg && ((step % nstcg)==0))
+ +      beta = 0.0;
+ +    else {
+ +      /* s_min->fnorm cannot be zero, because then we would have converged
+ +       * and broken out.
+ +       */
+ +
+ +      /* Polak-Ribiere update.
+ +       * Change to fnorm2/fnorm2_old for Fletcher-Reeves
+ +       */
+ +      beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
+ +    }
+ +    /* Limit beta to prevent oscillations */
+ +    if (fabs(beta) > 5.0)
+ +      beta = 0.0;
+ +
+ +
+ +    /* update positions */
+ +    swap_em_state(s_min,s_b);
+ +    gpa = gpb;
+ +
+ +    /* Print it if necessary */
+ +    if (MASTER(cr)) {
+ +      if(bVerbose)
+ +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
+ +              step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
+ +              s_min->fmax,s_min->a_fmax+1);
+ +      /* Store the new (lower) energies */
+ +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
+ +                 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
+ +                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
+ +
+ +      do_log = do_per_step(step,inputrec->nstlog);
+ +      do_ene = do_per_step(step,inputrec->nstenergy);
+ +      if(do_log)
+ +          print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
+ +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
+ +               do_log ? fplog : NULL,step,step,eprNORMAL,
+ +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
+ +    }
+ +
+ +    /* Stop when the maximum force lies below tolerance.
+ +     * If we have reached machine precision, converged is already set to true.
+ +     */
+ +    converged = converged || (s_min->fmax < inputrec->em_tol);
+ +
+ +  } /* End of the loop */
+ +
+ +  if (converged)
+ +    step--; /* we never took that last step in this case */
+ +
+ +    if (s_min->fmax > inputrec->em_tol)
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
+ +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
+ +        }
+ +        converged = FALSE;
+ +    }
+ +
+ +  if (MASTER(cr)) {
+ +    /* If we printed energy and/or logfile last step (which was the last step)
+ +     * we don't have to do it again, but otherwise print the final values.
+ +     */
+ +    if(!do_log) {
+ +      /* Write final value to log since we didn't do anything the last step */
+ +      print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
+ +    }
+ +    if (!do_ene || !do_log) {
+ +      /* Write final energy file entries */
+ +      print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
+ +               !do_log ? fplog : NULL,step,step,eprNORMAL,
+ +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
+ +    }
+ +  }
+ +
+ +  /* Print some stuff... */
+ +  if (MASTER(cr))
+ +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
+ +
+ +  /* IMPORTANT!
+ +   * For accurate normal mode calculation it is imperative that we
+ +   * store the last conformation into the full precision binary trajectory.
+ +   *
+ +   * However, we should only do it if we did NOT already write this step
+ +   * above (which we did if do_x or do_f was true).
+ +   */
+ +  do_x = !do_per_step(step,inputrec->nstxout);
+ +  do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
+ +
+ +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
+ +                top_global,inputrec,step,
+ +                s_min,state_global,f_global);
+ +
+ +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
+ +
+ +  if (MASTER(cr)) {
+ +    print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
+ +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
+ +    print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
+ +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
+ +
+ +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
+ +  }
+ +
+ +  finish_em(fplog,cr,outf,runtime,wcycle);
+ +
+ +  /* To print the actual number of steps we needed somewhere */
+ +  runtime->nsteps_done = step;
+ +
+ +  return 0;
+ +} /* That's all folks */
+ +
+ +
+ +double do_lbfgs(FILE *fplog,t_commrec *cr,
+ +                int nfile,const t_filenm fnm[],
+ +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+ +                int nstglobalcomm,
+ +                gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                int stepout,
+ +                t_inputrec *inputrec,
+ +                gmx_mtop_t *top_global,t_fcdata *fcd,
+ +                t_state *state,
+ +                t_mdatoms *mdatoms,
+ +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +                gmx_edsam_t ed,
+ +                t_forcerec *fr,
+ +                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+ +                gmx_membed_t membed,
+ +                real cpt_period,real max_hours,
+ +                const char *deviceOptions,
+ +                unsigned long Flags,
+ +                gmx_runtime_t *runtime)
+ +{
+ +  static const char *LBFGS="Low-Memory BFGS Minimizer";
+ +  em_state_t ems;
+ +  gmx_localtop_t *top;
+ +  gmx_enerdata_t *enerd;
+ +  rvec   *f;
+ +  gmx_global_stat_t gstat;
+ +  t_graph    *graph;
+ +  rvec   *f_global;
+ +  int    ncorr,nmaxcorr,point,cp,neval,nminstep;
+ +  double stepsize,gpa,gpb,gpc,tmp,minstep;
+ +  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
+ +  real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
+ +  real   a,b,c,maxdelta,delta;
+ +  real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
+ +  real   dgdx,dgdg,sq,yr,beta;
+ +  t_mdebin   *mdebin;
+ +  gmx_bool   converged,first;
+ +  rvec   mu_tot;
+ +  real   fnorm,fmax;
+ +  gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
+ +  tensor vir,pres;
+ +  int    start,end,number_steps;
+ +  gmx_mdoutf_t *outf;
+ +  int    i,k,m,n,nfmax,gf,step;
+ +  int    mdof_flags;
+ +  /* not used */
+ +  real   terminate;
+ +
+ +  if (PAR(cr))
+ +    gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
+ +
+ +  n = 3*state->natoms;
+ +  nmaxcorr = inputrec->nbfgscorr;
+ +
+ +  /* Allocate memory */
+ +  /* Use pointers to real so we dont have to loop over both atoms and
+ +   * dimensions all the time...
+ +   * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
+ +   * that point to the same memory.
+ +   */
+ +  snew(xa,n);
+ +  snew(xb,n);
+ +  snew(xc,n);
+ +  snew(fa,n);
+ +  snew(fb,n);
+ +  snew(fc,n);
+ +  snew(frozen,n);
+ +
+ +  snew(p,n);
+ +  snew(lastx,n);
+ +  snew(lastf,n);
+ +  snew(rho,nmaxcorr);
+ +  snew(alpha,nmaxcorr);
+ +
+ +  snew(dx,nmaxcorr);
+ +  for(i=0;i<nmaxcorr;i++)
+ +    snew(dx[i],n);
+ +
+ +  snew(dg,nmaxcorr);
+ +  for(i=0;i<nmaxcorr;i++)
+ +    snew(dg[i],n);
+ +
+ +  step = 0;
+ +  neval = 0;
+ +
+ +  /* Init em */
+ +  init_em(fplog,LBFGS,cr,inputrec,
+ +          state,top_global,&ems,&top,&f,&f_global,
+ +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
+ +          nfile,fnm,&outf,&mdebin);
+ +  /* Do_lbfgs is not completely updated like do_steep and do_cg,
+ +   * so we free some memory again.
+ +   */
+ +  sfree(ems.s.x);
+ +  sfree(ems.f);
+ +
+ +  xx = (real *)state->x;
+ +  ff = (real *)f;
+ +
+ +  start = mdatoms->start;
+ +  end   = mdatoms->homenr + start;
+ +
+ +  /* Print to log file */
+ +  print_em_start(fplog,cr,runtime,wcycle,LBFGS);
+ +
+ +  do_log = do_ene = do_x = do_f = TRUE;
+ +
+ +  /* Max number of steps */
+ +  number_steps=inputrec->nsteps;
+ +
+ +  /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
+ +  gf = 0;
+ +  for(i=start; i<end; i++) {
+ +    if (mdatoms->cFREEZE)
+ +      gf = mdatoms->cFREEZE[i];
+ +     for(m=0; m<DIM; m++)
+ +       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
+ +  }
+ +  if (MASTER(cr))
+ +    sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
+ +  if (fplog)
+ +    sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
+ +
+ +  if (vsite)
+ +    construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
+ +                   top->idef.iparams,top->idef.il,
+ +                   fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +
+ +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
+ +  /* do_force always puts the charge groups in the box and shifts again
+ +   * We do not unshift, so molecules are always whole
+ +   */
+ +  neval++;
+ +  ems.s.x = state->x;
+ +  ems.f = f;
+ +  evaluate_energy(fplog,bVerbose,cr,
+ +                state,top_global,&ems,top,
+ +                inputrec,nrnb,wcycle,gstat,
+ +                vsite,constr,fcd,graph,mdatoms,fr,
+ +                mu_tot,enerd,vir,pres,-1,TRUE);
+ +  where();
+ +
+ +  if (MASTER(cr)) {
+ +    /* Copy stuff to the energy bin for easy printing etc. */
+ +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
+ +               mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
+ +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
+ +
+ +    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
+ +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
+ +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
+ +  }
+ +  where();
+ +
+ +  /* This is the starting energy */
+ +  Epot = enerd->term[F_EPOT];
+ +
+ +  fnorm = ems.fnorm;
+ +  fmax  = ems.fmax;
+ +  nfmax = ems.a_fmax;
+ +
+ +  /* Set the initial step.
+ +   * since it will be multiplied by the non-normalized search direction
+ +   * vector (force vector the first time), we scale it by the
+ +   * norm of the force.
+ +   */
+ +
+ +  if (MASTER(cr)) {
+ +    fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
+ +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
+ +    fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
+ +    fprintf(stderr,"\n");
+ +    /* and copy to the log file too... */
+ +    fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
+ +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
+ +    fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
+ +    fprintf(fplog,"\n");
+ +  }
+ +
+ +  point=0;
+ +  for(i=0;i<n;i++)
+ +    if(!frozen[i])
+ +      dx[point][i] = ff[i];  /* Initial search direction */
+ +    else
+ +      dx[point][i] = 0;
+ +
+ +  stepsize = 1.0/fnorm;
+ +  converged = FALSE;
+ +
+ +  /* Start the loop over BFGS steps.
+ +   * Each successful step is counted, and we continue until
+ +   * we either converge or reach the max number of steps.
+ +   */
+ +
+ +  ncorr=0;
+ +
+ +  /* Set the gradient from the force */
+ +  converged = FALSE;
+ +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
+ +
+ +    /* Write coordinates if necessary */
+ +    do_x = do_per_step(step,inputrec->nstxout);
+ +    do_f = do_per_step(step,inputrec->nstfout);
+ +
+ +    mdof_flags = 0;
+ +    if (do_x)
+ +    {
+ +        mdof_flags |= MDOF_X;
+ +    }
+ +
+ +    if (do_f)
+ +    {
+ +        mdof_flags |= MDOF_F;
+ +    }
+ +
+ +    write_traj(fplog,cr,outf,mdof_flags,
+ +               top_global,step,(real)step,state,state,f,f,NULL,NULL);
+ +
+ +    /* Do the linesearching in the direction dx[point][0..(n-1)] */
+ +
+ +    /* pointer to current direction - point=0 first time here */
+ +    s=dx[point];
+ +
+ +    /* calculate line gradient */
+ +    for(gpa=0,i=0;i<n;i++)
+ +      gpa-=s[i]*ff[i];
+ +
+ +    /* Calculate minimum allowed stepsize, before the average (norm)
+ +     * relative change in coordinate is smaller than precision
+ +     */
+ +    for(minstep=0,i=0;i<n;i++) {
+ +      tmp=fabs(xx[i]);
+ +      if(tmp<1.0)
+ +      tmp=1.0;
+ +      tmp = s[i]/tmp;
+ +      minstep += tmp*tmp;
+ +    }
+ +    minstep = GMX_REAL_EPS/sqrt(minstep/n);
+ +
+ +    if(stepsize<minstep) {
+ +      converged=TRUE;
+ +      break;
+ +    }
+ +
+ +    /* Store old forces and coordinates */
+ +    for(i=0;i<n;i++) {
+ +      lastx[i]=xx[i];
+ +      lastf[i]=ff[i];
+ +    }
+ +    Epot0=Epot;
+ +
+ +    first=TRUE;
+ +
+ +    for(i=0;i<n;i++)
+ +      xa[i]=xx[i];
+ +
+ +    /* Take a step downhill.
+ +     * In theory, we should minimize the function along this direction.
+ +     * That is quite possible, but it turns out to take 5-10 function evaluations
+ +     * for each line. However, we dont really need to find the exact minimum -
+ +     * it is much better to start a new BFGS step in a modified direction as soon
+ +     * as we are close to it. This will save a lot of energy evaluations.
+ +     *
+ +     * In practice, we just try to take a single step.
+ +     * If it worked (i.e. lowered the energy), we increase the stepsize but
+ +     * the continue straight to the next BFGS step without trying to find any minimum.
+ +     * If it didn't work (higher energy), there must be a minimum somewhere between
+ +     * the old position and the new one.
+ +     *
+ +     * Due to the finite numerical accuracy, it turns out that it is a good idea
+ +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
+ +     * This leads to lower final energies in the tests I've done. / Erik
+ +     */
+ +    foundlower=FALSE;
+ +    EpotA = Epot0;
+ +    a = 0.0;
+ +    c = a + stepsize; /* reference position along line is zero */
+ +
+ +    /* Check stepsize first. We do not allow displacements
+ +     * larger than emstep.
+ +     */
+ +    do {
+ +      c = a + stepsize;
+ +      maxdelta=0;
+ +      for(i=0;i<n;i++) {
+ +      delta=c*s[i];
+ +      if(delta>maxdelta)
+ +        maxdelta=delta;
+ +      }
+ +      if(maxdelta>inputrec->em_stepsize)
+ +      stepsize*=0.1;
+ +    } while(maxdelta>inputrec->em_stepsize);
+ +
+ +    /* Take a trial step */
+ +    for (i=0; i<n; i++)
+ +      xc[i] = lastx[i] + c*s[i];
+ +
+ +    neval++;
+ +    /* Calculate energy for the trial step */
+ +    ems.s.x = (rvec *)xc;
+ +    ems.f   = (rvec *)fc;
+ +    evaluate_energy(fplog,bVerbose,cr,
+ +                  state,top_global,&ems,top,
+ +                  inputrec,nrnb,wcycle,gstat,
+ +                  vsite,constr,fcd,graph,mdatoms,fr,
+ +                  mu_tot,enerd,vir,pres,step,FALSE);
+ +    EpotC = ems.epot;
+ +
+ +    /* Calc derivative along line */
+ +    for(gpc=0,i=0; i<n; i++) {
+ +      gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
+ +    }
+ +    /* Sum the gradient along the line across CPUs */
+ +    if (PAR(cr))
+ +      gmx_sumd(1,&gpc,cr);
+ +
+ +     /* This is the max amount of increase in energy we tolerate */
+ +   tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
+ +
+ +    /* Accept the step if the energy is lower, or if it is not significantly higher
+ +     * and the line derivative is still negative.
+ +     */
+ +    if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
+ +      foundlower = TRUE;
+ +      /* Great, we found a better energy. Increase step for next iteration
+ +       * if we are still going down, decrease it otherwise
+ +       */
+ +      if(gpc<0)
+ +      stepsize *= 1.618034;  /* The golden section */
+ +      else
+ +      stepsize *= 0.618034;  /* 1/golden section */
+ +    } else {
+ +      /* New energy is the same or higher. We will have to do some work
+ +       * to find a smaller value in the interval. Take smaller step next time!
+ +       */
+ +      foundlower = FALSE;
+ +      stepsize *= 0.618034;
+ +    }
+ +
+ +    /* OK, if we didn't find a lower value we will have to locate one now - there must
+ +     * be one in the interval [a=0,c].
+ +     * The same thing is valid here, though: Don't spend dozens of iterations to find
+ +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
+ +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
+ +     *
+ +     * I also have a safeguard for potentially really patological functions so we never
+ +     * take more than 20 steps before we give up ...
+ +     *
+ +     * If we already found a lower value we just skip this step and continue to the update.
+ +     */
+ +
+ +    if(!foundlower) {
+ +
+ +      nminstep=0;
+ +      do {
+ +      /* Select a new trial point.
+ +       * If the derivatives at points a & c have different sign we interpolate to zero,
+ +       * otherwise just do a bisection.
+ +       */
+ +
+ +      if(gpa<0 && gpc>0)
+ +        b = a + gpa*(a-c)/(gpc-gpa);
+ +      else
+ +        b = 0.5*(a+c);
+ +
+ +      /* safeguard if interpolation close to machine accuracy causes errors:
+ +       * never go outside the interval
+ +       */
+ +      if(b<=a || b>=c)
+ +        b = 0.5*(a+c);
+ +
+ +      /* Take a trial step */
+ +      for (i=0; i<n; i++)
+ +        xb[i] = lastx[i] + b*s[i];
+ +
+ +      neval++;
+ +      /* Calculate energy for the trial step */
+ +      ems.s.x = (rvec *)xb;
+ +      ems.f   = (rvec *)fb;
+ +      evaluate_energy(fplog,bVerbose,cr,
+ +                      state,top_global,&ems,top,
+ +                      inputrec,nrnb,wcycle,gstat,
+ +                      vsite,constr,fcd,graph,mdatoms,fr,
+ +                      mu_tot,enerd,vir,pres,step,FALSE);
+ +      EpotB = ems.epot;
+ +
+ +      fnorm = ems.fnorm;
+ +
+ +      for(gpb=0,i=0; i<n; i++)
+ +        gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
+ +
+ +      /* Sum the gradient along the line across CPUs */
+ +      if (PAR(cr))
+ +        gmx_sumd(1,&gpb,cr);
+ +
+ +      /* Keep one of the intervals based on the value of the derivative at the new point */
+ +      if(gpb>0) {
+ +        /* Replace c endpoint with b */
+ +        EpotC = EpotB;
+ +        c = b;
+ +        gpc = gpb;
+ +        /* swap coord pointers b/c */
+ +        xtmp = xb;
+ +        ftmp = fb;
+ +        xb = xc;
+ +        fb = fc;
+ +        xc = xtmp;
+ +        fc = ftmp;
+ +      } else {
+ +        /* Replace a endpoint with b */
+ +        EpotA = EpotB;
+ +        a = b;
+ +        gpa = gpb;
+ +        /* swap coord pointers a/b */
+ +        xtmp = xb;
+ +        ftmp = fb;
+ +        xb = xa;
+ +        fb = fa;
+ +        xa = xtmp;
+ +        fa = ftmp;
+ +      }
+ +
+ +      /*
+ +       * Stop search as soon as we find a value smaller than the endpoints,
+ +       * or if the tolerance is below machine precision.
+ +       * Never run more than 20 steps, no matter what.
+ +       */
+ +      nminstep++;
+ +      } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
+ +
+ +      if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
+ +      /* OK. We couldn't find a significantly lower energy.
+ +       * If ncorr==0 this was steepest descent, and then we give up.
+ +       * If not, reset memory to restart as steepest descent before quitting.
+ +         */
+ +      if(ncorr==0) {
+ +      /* Converged */
+ +        converged=TRUE;
+ +        break;
+ +      } else {
+ +        /* Reset memory */
+ +        ncorr=0;
+ +        /* Search in gradient direction */
+ +        for(i=0;i<n;i++)
+ +          dx[point][i]=ff[i];
+ +        /* Reset stepsize */
+ +        stepsize = 1.0/fnorm;
+ +        continue;
+ +      }
+ +      }
+ +
+ +      /* Select min energy state of A & C, put the best in xx/ff/Epot
+ +       */
+ +      if(EpotC<EpotA) {
+ +      Epot = EpotC;
+ +      /* Use state C */
+ +      for(i=0;i<n;i++) {
+ +        xx[i]=xc[i];
+ +        ff[i]=fc[i];
+ +      }
+ +      stepsize=c;
+ +      } else {
+ +      Epot = EpotA;
+ +      /* Use state A */
+ +      for(i=0;i<n;i++) {
+ +        xx[i]=xa[i];
+ +        ff[i]=fa[i];
+ +      }
+ +      stepsize=a;
+ +      }
+ +
+ +    } else {
+ +      /* found lower */
+ +      Epot = EpotC;
+ +      /* Use state C */
+ +      for(i=0;i<n;i++) {
+ +      xx[i]=xc[i];
+ +      ff[i]=fc[i];
+ +      }
+ +      stepsize=c;
+ +    }
+ +
+ +    /* Update the memory information, and calculate a new
+ +     * approximation of the inverse hessian
+ +     */
+ +
+ +    /* Have new data in Epot, xx, ff */
+ +    if(ncorr<nmaxcorr)
+ +      ncorr++;
+ +
+ +    for(i=0;i<n;i++) {
+ +      dg[point][i]=lastf[i]-ff[i];
+ +      dx[point][i]*=stepsize;
+ +    }
+ +
+ +    dgdg=0;
+ +    dgdx=0;
+ +    for(i=0;i<n;i++) {
+ +      dgdg+=dg[point][i]*dg[point][i];
+ +      dgdx+=dg[point][i]*dx[point][i];
+ +    }
+ +
+ +    diag=dgdx/dgdg;
+ +
+ +    rho[point]=1.0/dgdx;
+ +    point++;
+ +
+ +    if(point>=nmaxcorr)
+ +      point=0;
+ +
+ +    /* Update */
+ +    for(i=0;i<n;i++)
+ +      p[i]=ff[i];
+ +
+ +    cp=point;
+ +
+ +    /* Recursive update. First go back over the memory points */
+ +    for(k=0;k<ncorr;k++) {
+ +      cp--;
+ +      if(cp<0)
+ +      cp=ncorr-1;
+ +
+ +      sq=0;
+ +      for(i=0;i<n;i++)
+ +      sq+=dx[cp][i]*p[i];
+ +
+ +      alpha[cp]=rho[cp]*sq;
+ +
+ +      for(i=0;i<n;i++)
+ +      p[i] -= alpha[cp]*dg[cp][i];
+ +    }
+ +
+ +    for(i=0;i<n;i++)
+ +      p[i] *= diag;
+ +
+ +    /* And then go forward again */
+ +    for(k=0;k<ncorr;k++) {
+ +      yr = 0;
+ +      for(i=0;i<n;i++)
+ +      yr += p[i]*dg[cp][i];
+ +
+ +      beta = rho[cp]*yr;
+ +      beta = alpha[cp]-beta;
+ +
+ +      for(i=0;i<n;i++)
+ +      p[i] += beta*dx[cp][i];
+ +
+ +      cp++;
+ +      if(cp>=ncorr)
+ +      cp=0;
+ +    }
+ +
+ +    for(i=0;i<n;i++)
+ +      if(!frozen[i])
+ +      dx[point][i] = p[i];
+ +      else
+ +      dx[point][i] = 0;
+ +
+ +    stepsize=1.0;
+ +
+ +    /* Test whether the convergence criterion is met */
+ +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
+ +
+ +    /* Print it if necessary */
+ +    if (MASTER(cr)) {
+ +      if(bVerbose)
+ +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
+ +              step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
+ +      /* Store the new (lower) energies */
+ +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
+ +                 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
+ +                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
+ +      do_log = do_per_step(step,inputrec->nstlog);
+ +      do_ene = do_per_step(step,inputrec->nstenergy);
+ +      if(do_log)
+ +          print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
+ +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
+ +               do_log ? fplog : NULL,step,step,eprNORMAL,
+ +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
+ +    }
+ +
+ +    /* Stop when the maximum force lies below tolerance.
+ +     * If we have reached machine precision, converged is already set to true.
+ +     */
+ +
+ +    converged = converged || (fmax < inputrec->em_tol);
+ +
+ +  } /* End of the loop */
+ +
+ +  if(converged)
+ +    step--; /* we never took that last step in this case */
+ +
+ +    if(fmax>inputrec->em_tol)
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
+ +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
+ +        }
+ +        converged = FALSE;
+ +    }
+ +
+ +  /* If we printed energy and/or logfile last step (which was the last step)
+ +   * we don't have to do it again, but otherwise print the final values.
+ +   */
+ +  if(!do_log) /* Write final value to log since we didn't do anythin last step */
+ +    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
+ +  if(!do_ene || !do_log) /* Write final energy file entries */
+ +    print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
+ +             !do_log ? fplog : NULL,step,step,eprNORMAL,
+ +             TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
+ +
+ +  /* Print some stuff... */
+ +  if (MASTER(cr))
+ +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
+ +
+ +  /* IMPORTANT!
+ +   * For accurate normal mode calculation it is imperative that we
+ +   * store the last conformation into the full precision binary trajectory.
+ +   *
+ +   * However, we should only do it if we did NOT already write this step
+ +   * above (which we did if do_x or do_f was true).
+ +   */
+ +  do_x = !do_per_step(step,inputrec->nstxout);
+ +  do_f = !do_per_step(step,inputrec->nstfout);
+ +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
+ +                top_global,inputrec,step,
+ +                &ems,state,f);
+ +
+ +  if (MASTER(cr)) {
+ +    print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
+ +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
+ +    print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
+ +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
+ +
+ +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
+ +  }
+ +
+ +  finish_em(fplog,cr,outf,runtime,wcycle);
+ +
+ +  /* To print the actual number of steps we needed somewhere */
+ +  runtime->nsteps_done = step;
+ +
+ +  return 0;
+ +} /* That's all folks */
+ +
+ +
+ +double do_steep(FILE *fplog,t_commrec *cr,
+ +                int nfile, const t_filenm fnm[],
+ +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+ +                int nstglobalcomm,
+ +                gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                int stepout,
+ +                t_inputrec *inputrec,
+ +                gmx_mtop_t *top_global,t_fcdata *fcd,
+ +                t_state *state_global,
+ +                t_mdatoms *mdatoms,
+ +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +                gmx_edsam_t ed,
+ +                t_forcerec *fr,
+ +                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+ +                gmx_membed_t membed,
+ +                real cpt_period,real max_hours,
+ +                const char *deviceOptions,
+ +                unsigned long Flags,
+ +                gmx_runtime_t *runtime)
+ +{
+ +  const char *SD="Steepest Descents";
+ +  em_state_t *s_min,*s_try;
+ +  rvec       *f_global;
+ +  gmx_localtop_t *top;
+ +  gmx_enerdata_t *enerd;
+ +  rvec   *f;
+ +  gmx_global_stat_t gstat;
+ +  t_graph    *graph;
+ +  real   stepsize,constepsize;
+ +  real   ustep,dvdlambda,fnormn;
+ +  gmx_mdoutf_t *outf;
+ +  t_mdebin   *mdebin;
+ +  gmx_bool   bDone,bAbort,do_x,do_f;
+ +  tensor vir,pres;
+ +  rvec   mu_tot;
+ +  int    nsteps;
+ +  int    count=0;
+ +  int    steps_accepted=0;
+ +  /* not used */
+ +  real   terminate=0;
+ +
+ +  s_min = init_em_state();
+ +  s_try = init_em_state();
+ +
+ +  /* Init em and store the local state in s_try */
+ +  init_em(fplog,SD,cr,inputrec,
+ +          state_global,top_global,s_try,&top,&f,&f_global,
+ +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
+ +          nfile,fnm,&outf,&mdebin);
+ +
+ +  /* Print to log file  */
+ +  print_em_start(fplog,cr,runtime,wcycle,SD);
+ +
+ +  /* Set variables for stepsize (in nm). This is the largest
+ +   * step that we are going to make in any direction.
+ +   */
+ +  ustep = inputrec->em_stepsize;
+ +  stepsize = 0;
+ +
+ +  /* Max number of steps  */
+ +  nsteps = inputrec->nsteps;
+ +
+ +  if (MASTER(cr))
+ +    /* Print to the screen  */
+ +    sp_header(stderr,SD,inputrec->em_tol,nsteps);
+ +  if (fplog)
+ +    sp_header(fplog,SD,inputrec->em_tol,nsteps);
+ +
+ +  /**** HERE STARTS THE LOOP ****
+ +   * count is the counter for the number of steps
+ +   * bDone will be TRUE when the minimization has converged
+ +   * bAbort will be TRUE when nsteps steps have been performed or when
+ +   * the stepsize becomes smaller than is reasonable for machine precision
+ +   */
+ +  count  = 0;
+ +  bDone  = FALSE;
+ +  bAbort = FALSE;
+ +  while( !bDone && !bAbort ) {
+ +    bAbort = (nsteps >= 0) && (count == nsteps);
+ +
+ +    /* set new coordinates, except for first step */
+ +    if (count > 0) {
++        do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
++                   s_min,stepsize,s_min->f,s_try,
++                   constr,top,nrnb,wcycle,count);
+ +    }
+ +
+ +    evaluate_energy(fplog,bVerbose,cr,
+ +                  state_global,top_global,s_try,top,
+ +                  inputrec,nrnb,wcycle,gstat,
+ +                  vsite,constr,fcd,graph,mdatoms,fr,
+ +                  mu_tot,enerd,vir,pres,count,count==0);
+ +
+ +    if (MASTER(cr))
+ +      print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
+ +
+ +    if (count == 0)
+ +      s_min->epot = s_try->epot + 1;
+ +
+ +    /* Print it if necessary  */
+ +    if (MASTER(cr)) {
+ +      if (bVerbose) {
+ +      fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
+ +              count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
+ +              (s_try->epot < s_min->epot) ? '\n' : '\r');
+ +      }
+ +
+ +      if (s_try->epot < s_min->epot) {
+ +      /* Store the new (lower) energies  */
+ +      upd_mdebin(mdebin,FALSE,FALSE,(double)count,
+ +                 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
+ +                   s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
+ +      print_ebin(outf->fp_ene,TRUE,
+ +                 do_per_step(steps_accepted,inputrec->nstdisreout),
+ +                 do_per_step(steps_accepted,inputrec->nstorireout),
+ +                 fplog,count,count,eprNORMAL,TRUE,
+ +                 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
+ +      fflush(fplog);
+ +      }
+ +    }
+ +
+ +    /* Now if the new energy is smaller than the previous...
+ +     * or if this is the first step!
+ +     * or if we did random steps!
+ +     */
+ +
+ +    if ( (count==0) || (s_try->epot < s_min->epot) ) {
+ +      steps_accepted++;
+ +
+ +      /* Test whether the convergence criterion is met...  */
+ +      bDone = (s_try->fmax < inputrec->em_tol);
+ +
+ +      /* Copy the arrays for force, positions and energy  */
+ +      /* The 'Min' array always holds the coords and forces of the minimal
+ +       sampled energy  */
+ +      swap_em_state(s_min,s_try);
+ +      if (count > 0)
+ +      ustep *= 1.2;
+ +
+ +      /* Write to trn, if necessary */
+ +      do_x = do_per_step(steps_accepted,inputrec->nstxout);
+ +      do_f = do_per_step(steps_accepted,inputrec->nstfout);
+ +      write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
+ +                    top_global,inputrec,count,
+ +                    s_min,state_global,f_global);
+ +    }
+ +    else {
+ +      /* If energy is not smaller make the step smaller...  */
+ +      ustep *= 0.5;
+ +
+ +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
+ +      /* Reload the old state */
+ +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
+ +                             s_min,top,mdatoms,fr,vsite,constr,
+ +                             nrnb,wcycle);
+ +      }
+ +    }
+ +
+ +    /* Determine new step  */
+ +    stepsize = ustep/s_min->fmax;
+ +
+ +    /* Check if stepsize is too small, with 1 nm as a characteristic length */
+ +#ifdef GMX_DOUBLE
+ +        if (count == nsteps || ustep < 1e-12)
+ +#else
+ +        if (count == nsteps || ustep < 1e-6)
+ +#endif
+ +        {
+ +            if (MASTER(cr))
+ +            {
+ +                warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
+ +                warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
+ +            }
+ +            bAbort=TRUE;
+ +        }
+ +
+ +    count++;
+ +  } /* End of the loop  */
+ +
+ +    /* Print some shit...  */
+ +  if (MASTER(cr))
+ +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
+ +  write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
+ +              top_global,inputrec,count,
+ +              s_min,state_global,f_global);
+ +
+ +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
+ +
+ +  if (MASTER(cr)) {
+ +    print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
+ +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
+ +    print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
+ +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
+ +  }
+ +
+ +  finish_em(fplog,cr,outf,runtime,wcycle);
+ +
+ +  /* To print the actual number of steps we needed somewhere */
+ +  inputrec->nsteps=count;
+ +
+ +  runtime->nsteps_done = count;
+ +
+ +  return 0;
+ +} /* That's all folks */
+ +
+ +
+ +double do_nm(FILE *fplog,t_commrec *cr,
+ +             int nfile,const t_filenm fnm[],
+ +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+ +             int nstglobalcomm,
+ +             gmx_vsite_t *vsite,gmx_constr_t constr,
+ +             int stepout,
+ +             t_inputrec *inputrec,
+ +             gmx_mtop_t *top_global,t_fcdata *fcd,
+ +             t_state *state_global,
+ +             t_mdatoms *mdatoms,
+ +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +             gmx_edsam_t ed,
+ +             t_forcerec *fr,
+ +             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+ +             gmx_membed_t membed,
+ +             real cpt_period,real max_hours,
+ +             const char *deviceOptions,
+ +             unsigned long Flags,
+ +             gmx_runtime_t *runtime)
+ +{
+ +    const char *NM = "Normal Mode Analysis";
+ +    gmx_mdoutf_t *outf;
+ +    int        natoms,atom,d;
+ +    int        nnodes,node;
+ +    rvec       *f_global;
+ +    gmx_localtop_t *top;
+ +    gmx_enerdata_t *enerd;
+ +    rvec       *f;
+ +    gmx_global_stat_t gstat;
+ +    t_graph    *graph;
+ +    real       t,t0,lambda,lam0;
+ +    gmx_bool       bNS;
+ +    tensor     vir,pres;
+ +    rvec       mu_tot;
+ +    rvec       *fneg,*dfdx;
+ +    gmx_bool       bSparse; /* use sparse matrix storage format */
+ +    size_t     sz;
+ +    gmx_sparsematrix_t * sparse_matrix = NULL;
+ +    real *     full_matrix             = NULL;
+ +    em_state_t *   state_work;
+ +
+ +    /* added with respect to mdrun */
+ +    int        i,j,k,row,col;
+ +    real       der_range=10.0*sqrt(GMX_REAL_EPS);
+ +    real       x_min;
+ +    real       fnorm,fmax;
+ +
+ +    if (constr != NULL)
+ +    {
+ +        gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
+ +    }
+ +
+ +    state_work = init_em_state();
+ +
+ +    /* Init em and store the local state in state_minimum */
+ +    init_em(fplog,NM,cr,inputrec,
+ +            state_global,top_global,state_work,&top,
+ +            &f,&f_global,
+ +            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
+ +            nfile,fnm,&outf,NULL);
+ +
+ +    natoms = top_global->natoms;
+ +    snew(fneg,natoms);
+ +    snew(dfdx,natoms);
+ +
+ +#ifndef GMX_DOUBLE
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr,
+ +                "NOTE: This version of Gromacs has been compiled in single precision,\n"
+ +                "      which MIGHT not be accurate enough for normal mode analysis.\n"
+ +                "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
+ +                "      are fairly modest even if you recompile in double precision.\n\n");
+ +    }
+ +#endif
+ +
+ +    /* Check if we can/should use sparse storage format.
+ +     *
+ +     * Sparse format is only useful when the Hessian itself is sparse, which it
+ +      * will be when we use a cutoff.
+ +      * For small systems (n<1000) it is easier to always use full matrix format, though.
+ +      */
+ +    if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
+ +    {
+ +        fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
+ +        bSparse = FALSE;
+ +    }
+ +    else if(top_global->natoms < 1000)
+ +    {
+ +        fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
+ +        bSparse = FALSE;
+ +    }
+ +    else
+ +    {
+ +        fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
+ +        bSparse = TRUE;
+ +    }
+ +
+ +    sz = DIM*top_global->natoms;
+ +
+ +    fprintf(stderr,"Allocating Hessian memory...\n\n");
+ +
+ +    if(bSparse)
+ +    {
+ +        sparse_matrix=gmx_sparsematrix_init(sz);
+ +        sparse_matrix->compressed_symmetric = TRUE;
+ +    }
+ +    else
+ +    {
+ +        snew(full_matrix,sz*sz);
+ +    }
+ +
+ +    /* Initial values */
+ +    t0           = inputrec->init_t;
+ +    lam0         = inputrec->fepvals->init_lambda;
+ +    t            = t0;
+ +    lambda       = lam0;
+ +
+ +    init_nrnb(nrnb);
+ +
+ +    where();
+ +
+ +    /* Write start time and temperature */
+ +    print_em_start(fplog,cr,runtime,wcycle,NM);
+ +
+ +    /* fudge nr of steps to nr of atoms */
+ +    inputrec->nsteps = natoms*2;
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
+ +                *(top_global->name),(int)inputrec->nsteps);
+ +    }
+ +
+ +    nnodes = cr->nnodes;
+ +
+ +    /* Make evaluate_energy do a single node force calculation */
+ +    cr->nnodes = 1;
+ +    evaluate_energy(fplog,bVerbose,cr,
+ +                    state_global,top_global,state_work,top,
+ +                    inputrec,nrnb,wcycle,gstat,
+ +                    vsite,constr,fcd,graph,mdatoms,fr,
+ +                    mu_tot,enerd,vir,pres,-1,TRUE);
+ +    cr->nnodes = nnodes;
+ +
+ +    /* if forces are not small, warn user */
+ +    get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
+ +        if (state_work->fmax > 1.0e-3)
+ +        {
+ +            fprintf(stderr,"Maximum force probably not small enough to");
+ +            fprintf(stderr," ensure that you are in an \nenergy well. ");
+ +            fprintf(stderr,"Be aware that negative eigenvalues may occur");
+ +            fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
+ +        }
+ +    }
+ +
+ +    /***********************************************************
+ +     *
+ +     *      Loop over all pairs in matrix
+ +     *
+ +     *      do_force called twice. Once with positive and
+ +     *      once with negative displacement
+ +     *
+ +     ************************************************************/
+ +
+ +    /* Steps are divided one by one over the nodes */
+ +    for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
+ +    {
+ +
+ +        for (d=0; d<DIM; d++)
+ +        {
+ +            x_min = state_work->s.x[atom][d];
+ +
+ +            state_work->s.x[atom][d] = x_min - der_range;
+ +
+ +            /* Make evaluate_energy do a single node force calculation */
+ +            cr->nnodes = 1;
+ +            evaluate_energy(fplog,bVerbose,cr,
+ +                            state_global,top_global,state_work,top,
+ +                            inputrec,nrnb,wcycle,gstat,
+ +                            vsite,constr,fcd,graph,mdatoms,fr,
+ +                            mu_tot,enerd,vir,pres,atom*2,FALSE);
+ +
+ +            for(i=0; i<natoms; i++)
+ +            {
+ +                copy_rvec(state_work->f[i], fneg[i]);
+ +            }
+ +
+ +            state_work->s.x[atom][d] = x_min + der_range;
+ +
+ +            evaluate_energy(fplog,bVerbose,cr,
+ +                            state_global,top_global,state_work,top,
+ +                            inputrec,nrnb,wcycle,gstat,
+ +                            vsite,constr,fcd,graph,mdatoms,fr,
+ +                            mu_tot,enerd,vir,pres,atom*2+1,FALSE);
+ +            cr->nnodes = nnodes;
+ +
+ +            /* x is restored to original */
+ +            state_work->s.x[atom][d] = x_min;
+ +
+ +            for(j=0; j<natoms; j++)
+ +            {
+ +                for (k=0; (k<DIM); k++)
+ +                {
+ +                    dfdx[j][k] =
+ +                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
+ +                }
+ +            }
+ +
+ +            if (!MASTER(cr))
+ +            {
+ +#ifdef GMX_MPI
+ +#ifdef GMX_DOUBLE
+ +#define mpi_type MPI_DOUBLE
+ +#else
+ +#define mpi_type MPI_FLOAT
+ +#endif
+ +                MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
+ +                         cr->mpi_comm_mygroup);
+ +#endif
+ +            }
+ +            else
+ +            {
+ +                for(node=0; (node<nnodes && atom+node<natoms); node++)
+ +                {
+ +                    if (node > 0)
+ +                    {
+ +#ifdef GMX_MPI
+ +                        MPI_Status stat;
+ +                        MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
+ +                                 cr->mpi_comm_mygroup,&stat);
+ +#undef mpi_type
+ +#endif
+ +                    }
+ +
+ +                    row = (atom + node)*DIM + d;
+ +
+ +                    for(j=0; j<natoms; j++)
+ +                    {
+ +                        for(k=0; k<DIM; k++)
+ +                        {
+ +                            col = j*DIM + k;
+ +
+ +                            if (bSparse)
+ +                            {
+ +                                if (col >= row && dfdx[j][k] != 0.0)
+ +                                {
+ +                                    gmx_sparsematrix_increment_value(sparse_matrix,
+ +                                                                     row,col,dfdx[j][k]);
+ +                                }
+ +                            }
+ +                            else
+ +                            {
+ +                                full_matrix[row*sz+col] = dfdx[j][k];
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +
+ +            if (bVerbose && fplog)
+ +            {
+ +                fflush(fplog);
+ +            }
+ +        }
+ +        /* write progress */
+ +        if (MASTER(cr) && bVerbose)
+ +        {
+ +            fprintf(stderr,"\rFinished step %d out of %d",
+ +                    min(atom+nnodes,natoms),natoms);
+ +            fflush(stderr);
+ +        }
+ +    }
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr,"\n\nWriting Hessian...\n");
+ +        gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
+ +    }
+ +
+ +    finish_em(fplog,cr,outf,runtime,wcycle);
+ +
+ +    runtime->nsteps_done = natoms*2;
+ +
+ +    return 0;
+ +}
+ +
diff --cc src/gromacs/mdlib/nbnxn_consts.h

index 0000000000000000000000000000000000000000,c217ff1e30cf48f07a4f7238904b13aa28e7c253..c217ff1e30cf48f07a4f7238904b13aa28e7c253

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_consts.h
+++ b/src/gromacs/mdlib/nbnxn_consts.h
diff --cc src/gromacs/mdlib/nbnxn_cuda/CMakeLists.txt

index 0000000000000000000000000000000000000000,b42694bf9fd873772bf517c3e7fadd8b8e8be56e..b42694bf9fd873772bf517c3e7fadd8b8e8be56e

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_cuda/CMakeLists.txt
+++ b/src/gromacs/mdlib/nbnxn_cuda/CMakeLists.txt
diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu

index 0000000000000000000000000000000000000000,86f81aa3e73f13e15954e008534e231b6c945688..86f81aa3e73f13e15954e008534e231b6c945688

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_cuda/nbnxn_cuda.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.h

index 0000000000000000000000000000000000000000,6eb2d970e3c5cf7d9f6eda513f0a06227c96e430..6eb2d970e3c5cf7d9f6eda513f0a06227c96e430

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_cuda/nbnxn_cuda.h
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.h
diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index 0000000000000000000000000000000000000000,35f990db78357d015f8689220de584684ce2b61f..35f990db78357d015f8689220de584684ce2b61f

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh

index 0000000000000000000000000000000000000000,19bc1374c9999a0ec37a7e6c96b3eabf04431c93..19bc1374c9999a0ec37a7e6c96b3eabf04431c93

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh
diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh

index 0000000000000000000000000000000000000000,39eb9988c1221367dc4937b0e981c1b2ef2b9d87..39eb9988c1221367dc4937b0e981c1b2ef2b9d87

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh
diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh

index 0000000000000000000000000000000000000000,5233ddffc37abb21d83feb90db9adc2ffa48a705..5233ddffc37abb21d83feb90db9adc2ffa48a705

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh
diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh

index 0000000000000000000000000000000000000000,a18f905bbf8a03c31bdfeebc198d6a4442a61b89..a18f905bbf8a03c31bdfeebc198d6a4442a61b89

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh
diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..fa598b3bd2a4dceaaa3aeb654ccae9d0998786d3

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h
@@@ -1,0 -1,0 +1,196 @@@
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
++ *
++ *                This source code is part of
++ *
++ *                 G   R   O   M   A   C   S
++ *
++ *          GROningen MAchine for Chemical Simulations
++ *
++ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
++ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
++ * Copyright (c) 2001-2012, The GROMACS development team,
++ * check out http://www.gromacs.org for more information.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ *
++ * If you want to redistribute modifications, please consider that
++ * scientific software is very special. Version control is crucial -
++ * bugs must be traceable. We will be happy to consider code for
++ * inclusion in the official distribution, but derived work must not
++ * be called official GROMACS. Details are found in the README & COPYING
++ * files - if they are missing, get the official version at www.gromacs.org.
++ *
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the papers on the package - you can find them in the top README file.
++ *
++ * For more info, check our website at http://www.gromacs.org
++ *
++ * And Hey:
++ * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
++ */
++
++#ifndef NBNXN_CUDA_TYPES_H
++#define NBNXN_CUDA_TYPES_H
++
++#include "types/nbnxn_pairlist.h"
++#include "types/nbnxn_cuda_types_ext.h"
++#include "../../gmxlib/cuda_tools/cudautils.cuh"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++/** Types of electrostatics available in the CUDA nonbonded force kernels. */
++enum { eelCuEWALD, eelCuEWALD_TWIN, eelCuRF, eelCuCUT, eelCuNR };
++
++enum { eNbnxnCuKDefault, eNbnxnCuKLegacy, eNbnxnCuKOld, eNbnxnCuKNR };
++
++#define NBNXN_KVER_OLD(k)      (k == eNbnxnCuKOld)
++#define NBNXN_KVER_LEGACY(k)   (k == eNbnxnCuKLegacy)
++#define NBNXN_KVER_DEFAULT(k)  (k == eNbnxnCuKDefault)
++
++/* Non-bonded kernel versions. */
++
++/*  All structs prefixed with "cu_" hold data used in GPU calculations and
++ *  are passed to the kernels, except cu_timers_t. */
++typedef struct cu_plist     cu_plist_t;
++typedef struct cu_atomdata  cu_atomdata_t;
++typedef struct cu_nbparam   cu_nbparam_t;
++typedef struct cu_timers    cu_timers_t;
++typedef struct nb_staging   nb_staging_t;
++
++
++/** Staging area for temporary data. The energies get downloaded here first,
++ *   before getting added to the CPU-side aggregate values.
++ */
++struct nb_staging
++{
++    float   *e_lj;      /**< LJ energy            */
++    float   *e_el;      /**< electrostatic energy */
++    float3  *fshift;    /**< shift forces         */
++};
++
++/** Nonbonded atom data -- both inputs and outputs. */
++struct cu_atomdata
++{
++    int     natoms;             /**< number of atoms                              */
++    int     natoms_local;       /**< number of local atoms                        */
++    int     nalloc;             /**< allocation size for the atom data (xq, f)    */
++
++    float4  *xq;                /**< atom coordinates + charges, size natoms      */
++    float3  *f;                 /**< force output array, size natoms              */
++    /* TODO: try float2 for the energies */
++    float   *e_lj,              /**< LJ energy output, size 1                     */
++            *e_el;              /**< Electrostatics energy input, size 1          */
++
++    float3  *fshift;            /**< shift forces                                 */
++
++    int     ntypes;             /**< number of atom types                         */
++    int     *atom_types;        /**< atom type indices, size natoms               */
++
++    float3  *shift_vec;         /**< shifts                                       */
++    bool    bShiftVecUploaded;  /**< true if the shift vector has been uploaded   */
++};
++
++/** Parameters required for the CUDA nonbonded calculations. */
++struct cu_nbparam
++{
++    int     eeltype;        /**< type of electrostatics                             */
++
++    float   epsfac;         /**< charge multiplication factor                       */
++    float   c_rf,           /**< Reaction-field/plain cutoff electrostatics const.  */
++            two_k_rf;       /**< Reaction-field electrostatics constant             */
++    float   ewald_beta;     /**< Ewald/PME parameter                                */
++    float   sh_ewald;       /**< Ewald/PME  correction term                         */
++    float   rvdw_sq;        /**< VdW cut-off                                        */
++    float   rcoulomb_sq;    /**< Coulomb cut-off                                    */
++    float   rlist_sq;       /**< pair-list cut-off                                  */
++    float   sh_invrc6;      /**< LJ potential correction term                       */
++
++    float   *nbfp;          /**< nonbonded parameter table with C6/C12 pairs        */
++
++    /* Ewald Coulomb force table data */
++    int     coulomb_tab_size;   /**< table size (s.t. it fits in texture cache)     */
++    float   coulomb_tab_scale;  /**< table scale/spacing                            */
++    float   *coulomb_tab;       /**< pointer to the table in the device memory      */
++};
++
++/** Pair list data */
++struct cu_plist
++{
++    int             na_c;       /**< number of atoms per cluster                  */
++
++    int             nsci;       /**< size of sci, # of i clusters in the list     */
++    int             sci_nalloc; /**< allocation size of sci                       */
++    nbnxn_sci_t     *sci;       /**< list of i-cluster ("super-clusters")         */
++
++    int             ncj4;       /**< total # of 4*j clusters                      */
++    int             cj4_nalloc; /**< allocation size of cj4                       */
++    nbnxn_cj4_t     *cj4;       /**< 4*j cluster list, contains j cluster number
++                                     and index into the i cluster list            */
++    nbnxn_excl_t    *excl;      /**< atom interaction bits                        */
++    int             nexcl;      /**< count for excl                               */
++    int             excl_nalloc;/**< allocation size of excl                      */
++
++    bool            bDoPrune;   /**< true if pair-list pruning needs to be
++                                     done during the  current step                */
++};
++
++/** CUDA events used for timing GPU kernels and H2D/D2H transfers.
++ * The two-sized arrays hold the local and non-local values and should always
++ * be indexed with eintLocal/eintNonlocal.
++ */
++struct cu_timers
++{
++    cudaEvent_t start_atdat;     /**< start event for atom data transfer (every PS step)             */
++    cudaEvent_t stop_atdat;      /**< stop event for atom data transfer (every PS step)              */
++    cudaEvent_t start_nb_h2d[2]; /**< start events for x/q H2D transfers (l/nl, every step)          */
++    cudaEvent_t stop_nb_h2d[2];  /**< stop events for x/q H2D transfers (l/nl, every step)           */
++    cudaEvent_t start_nb_d2h[2]; /**< start events for f D2H transfer (l/nl, every step)             */
++    cudaEvent_t stop_nb_d2h[2];  /**< stop events for f D2H transfer (l/nl, every step)              */
++    cudaEvent_t start_pl_h2d[2]; /**< start events for pair-list H2D transfers (l/nl, every PS step) */
++    cudaEvent_t stop_pl_h2d[2];  /**< start events for pair-list H2D transfers (l/nl, every PS step) */
++    cudaEvent_t start_nb_k[2];   /**< start event for non-bonded kernels (l/nl, every step)          */
++    cudaEvent_t stop_nb_k[2];    /**< stop event non-bonded kernels (l/nl, every step)               */
++};
++
++/** Main data structure for CUDA nonbonded force calculations. */
++struct nbnxn_cuda
++{
++    cuda_dev_info_t *dev_info;      /**< CUDA device information                              */
++    int             kernel_ver;     /**< The version of the kernel to be executed on the
++                                         device in use, possible values: eNbnxnCuK*           */
++    bool            bUseTwoStreams; /**< true if doing both local/non-local NB work on GPU    */
++    bool            bUseStreamSync; /**< true if the standard cudaStreamSynchronize is used
++                                         and not memory polling-based waiting                 */
++    cu_atomdata_t   *atdat;         /**< atom data                                            */
++    cu_nbparam_t    *nbparam;       /**< parameters required for the non-bonded calc.         */
++    cu_plist_t      *plist[2];      /**< pair-list data structures (local and non-local)      */
++    nb_staging_t    nbst;           /**< staging area where fshift/energies get downloaded    */
++
++    cudaStream_t    stream[2];      /**< local and non-local GPU streams                      */
++
++    /** events used for synchronization */
++    cudaEvent_t    nonlocal_done;   /**< event triggered when the non-local non-bonded kernel
++                                      is done (and the local transfer can proceed)            */
++    cudaEvent_t    misc_ops_done;   /**< event triggered when the operations that precede the
++                                         main force calculations are done (e.g. buffer 0-ing) */
++
++    /* NOTE: With current CUDA versions (<=5.0) timing doesn't work with multiple
++     * concurrent streams, so we won't time if both l/nl work is done on GPUs.
++     * Timer init/uninit is still done even with timing off so only the condition
++     * setting bDoTime needs to be change if this CUDA "feature" gets fixed. */
++    bool            bDoTime;        /**< True if event-based timing is enabled.               */
++    cu_timers_t     *timers;        /**< CUDA event-based timers.                             */
++    wallclock_gpu_t *timings;       /**< Timing data.                                         */
++};
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif        /* NBNXN_CUDA_TYPES_H */
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.c

index 0000000000000000000000000000000000000000,8bdcfb4aa6d04e9edd88fdadd25ce4c20b1ca625..8bdcfb4aa6d04e9edd88fdadd25ce4c20b1ca625

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_common.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.c
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h
Simple merge
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c

index 0000000000000000000000000000000000000000,ef2c42f9a0484e69eab4a3067e0a79ef193da3b8..ef2c42f9a0484e69eab4a3067e0a79ef193da3b8

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h

index 0000000000000000000000000000000000000000,0ac60cd8b3066c03af0994b7e81869e5fd0ee25a..0ac60cd8b3066c03af0994b7e81869e5fd0ee25a

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c

index 0000000000000000000000000000000000000000,c1f9e2e40a8080b4e324188b939c47a60ae77a5d..c1f9e2e40a8080b4e324188b939c47a60ae77a5d

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h

index 0000000000000000000000000000000000000000,ab6a7ddd66c5d99100e3e7a8cc1d0872b4633d69..ab6a7ddd66c5d99100e3e7a8cc1d0872b4633d69

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h

index 0000000000000000000000000000000000000000,534c07861e164c3d019d38c297a15d33ece978a8..534c07861e164c3d019d38c297a15d33ece978a8

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h

index 0000000000000000000000000000000000000000,f248d3c121680ff695c40af6390343a26905589e..f248d3c121680ff695c40af6390343a26905589e

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.c

index 0000000000000000000000000000000000000000,8d7497a5159cad615ca4e41199a1b6254cea56a5..8d7497a5159cad615ca4e41199a1b6254cea56a5

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.c
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.h

index 0000000000000000000000000000000000000000,5732f9e1ca0a7ebcabf1ed982886413b16af62f9..5732f9e1ca0a7ebcabf1ed982886413b16af62f9

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.h
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.c

index 0000000000000000000000000000000000000000,2396702b6e5550b92e75db355725d16edf87fc13..2396702b6e5550b92e75db355725d16edf87fc13

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.c
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.h

index 0000000000000000000000000000000000000000,c56754284e609505b57b1859a0b4f034c32c43ee..c56754284e609505b57b1859a0b4f034c32c43ee

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.h
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_includes.h

index 0000000000000000000000000000000000000000,936c8aa80258af4c27d1e5ca3942eae259a8b0db..936c8aa80258af4c27d1e5ca3942eae259a8b0db

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_includes.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_includes.h
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_inner.h

index 0000000000000000000000000000000000000000,b9fdd34efc6a4e3989c555494fe26a73293cb658..b9fdd34efc6a4e3989c555494fe26a73293cb658

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_inner.h
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h

index 0000000000000000000000000000000000000000,960d783e5ec4454c710faa408281b899be168104..960d783e5ec4454c710faa408281b899be168104

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h
diff --cc src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_utils.h

index 0000000000000000000000000000000000000000,4ef461092254bdb46abbae7229bb75f4f11f7ba5..4ef461092254bdb46abbae7229bb75f4f11f7ba5

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_utils.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_utils.h
diff --cc src/gromacs/mdlib/nbnxn_search.c

index 0000000000000000000000000000000000000000,477210fb6de8e693f4d038aa29ce96f24308f0b2..477210fb6de8e693f4d038aa29ce96f24308f0b2

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_search.c
+++ b/src/gromacs/mdlib/nbnxn_search.c
diff --cc src/gromacs/mdlib/nbnxn_search_x86_simd.h

index 0000000000000000000000000000000000000000,eb962590e17e9b165d9e54124c3a6a41a22765d0..eb962590e17e9b165d9e54124c3a6a41a22765d0

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nbnxn_search_x86_simd.h
+++ b/src/gromacs/mdlib/nbnxn_search_x86_simd.h
diff --cc src/gromacs/mdlib/nlistheuristics.c
Simple merge
diff --cc src/gromacs/mdlib/ns.c

index 15b7eb57ea4165130fa2a7488a2c7aa493788949,0000000000000000000000000000000000000000..811a92f26ed737af1780c712458d7196c174a990

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/ns.c
--- /dev/null
+++ b/src/gromacs/mdlib/ns.c
@@@ -1,2802 -1,0 +1,2802 @@@
-             get_nsgrid_boundaries(grid,NULL,box,NULL,NULL,NULL,
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#ifdef GMX_THREAD_SHM_FDECOMP
+ +#include <pthread.h> 
+ +#endif
+ +
+ +#include <math.h>
+ +#include <string.h>
+ +#include "sysstuff.h"
+ +#include "smalloc.h"
+ +#include "macros.h"
+ +#include "maths.h"
+ +#include "vec.h"
+ +#include "network.h"
+ +#include "nsgrid.h"
+ +#include "force.h"
+ +#include "nonbonded.h"
+ +#include "ns.h"
+ +#include "pbc.h"
+ +#include "names.h"
+ +#include "gmx_fatal.h"
+ +#include "nrnb.h"
+ +#include "txtdump.h"
+ +#include "mtop_util.h"
+ +
+ +#include "domdec.h"
+ +#include "adress.h"
+ +
+ +
+ +/* 
+ + *    E X C L U S I O N   H A N D L I N G
+ + */
+ +
+ +#ifdef DEBUG
+ +static void SETEXCL_(t_excl e[],atom_id i,atom_id j)
+ +{   e[j] = e[j] | (1<<i); }
+ +static void RMEXCL_(t_excl e[],atom_id i,atom_id j) 
+ +{ e[j]=e[j] & ~(1<<i); }
+ +static gmx_bool ISEXCL_(t_excl e[],atom_id i,atom_id j) 
+ +{ return (gmx_bool)(e[j] & (1<<i)); }
+ +static gmx_bool NOTEXCL_(t_excl e[],atom_id i,atom_id j)
+ +{  return !(ISEXCL(e,i,j)); }
+ +#else
+ +#define SETEXCL(e,i,j) (e)[((atom_id) (j))] |= (1<<((atom_id) (i)))
+ +#define RMEXCL(e,i,j)  (e)[((atom_id) (j))] &= (~(1<<((atom_id) (i))))
+ +#define ISEXCL(e,i,j)  (gmx_bool) ((e)[((atom_id) (j))] & (1<<((atom_id) (i))))
+ +#define NOTEXCL(e,i,j) !(ISEXCL(e,i,j))
+ +#endif
+ +
+ +/************************************************
+ + *
+ + *  U T I L I T I E S    F O R    N S
+ + *
+ + ************************************************/
+ +
+ +static void reallocate_nblist(t_nblist *nl)
+ +{
+ +    if (gmx_debug_at)
+ +    {
+ +        fprintf(debug,"reallocating neigborlist il_code=%d, maxnri=%d\n",
+ +                nl->il_code,nl->maxnri); 
+ +    }
+ +    srenew(nl->iinr,   nl->maxnri);
+ +    if (nl->enlist == enlistCG_CG)
+ +    {
+ +        srenew(nl->iinr_end,nl->maxnri);
+ +    }
+ +    srenew(nl->gid,    nl->maxnri);
+ +    srenew(nl->shift,  nl->maxnri);
+ +    srenew(nl->jindex, nl->maxnri+1);
+ +}
+ +
+ +/* ivdw/icoul are used to determine the type of interaction, so we
+ + * can set an innerloop index here. The obvious choice for this would have
+ + * been the vdwtype/coultype values in the forcerecord, but unfortunately 
+ + * those types are braindead - for instance both Buckingham and normal 
+ + * Lennard-Jones use the same value (evdwCUT), and a separate gmx_boolean variable
+ + * to determine which interaction is used. There is further no special value
+ + * for 'no interaction'. For backward compatibility with old TPR files we won't
+ + * change this in the 3.x series, so when calling this routine you should use:
+ + *
+ + * icoul=0 no coulomb interaction
+ + * icoul=1 cutoff standard coulomb
+ + * icoul=2 reaction-field coulomb
+ + * icoul=3 tabulated coulomb
+ + *
+ + * ivdw=0 no vdw interaction
+ + * ivdw=1 standard L-J interaction
+ + * ivdw=2 Buckingham
+ + * ivdw=3 tabulated vdw.
+ + *
+ + * Kind of ugly, but it works.
+ + */
+ +static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
+ +                        int maxsr,int maxlr,
+ +                        int ivdw, int icoul, 
+ +                        gmx_bool bfree, int enlist)
+ +{
+ +    t_nblist *nl;
+ +    int      homenr;
+ +    int      i,nn;
+ +    
+ +    int inloop[20] =
+ +    { 
+ +        eNR_NBKERNEL_NONE,
+ +        eNR_NBKERNEL010,
+ +        eNR_NBKERNEL020,
+ +        eNR_NBKERNEL030,
+ +        eNR_NBKERNEL100,
+ +        eNR_NBKERNEL110,
+ +        eNR_NBKERNEL120,
+ +        eNR_NBKERNEL130,
+ +        eNR_NBKERNEL200,
+ +        eNR_NBKERNEL210,
+ +        eNR_NBKERNEL220,
+ +        eNR_NBKERNEL230,
+ +        eNR_NBKERNEL300,
+ +        eNR_NBKERNEL310,
+ +        eNR_NBKERNEL320,
+ +        eNR_NBKERNEL330,
+ +        eNR_NBKERNEL400,
+ +        eNR_NBKERNEL410,
+ +        eNR_NBKERNEL_NONE,
+ +        eNR_NBKERNEL430
+ +    };
+ +  
+ +    for(i=0; (i<2); i++)
+ +    {
+ +        nl     = (i == 0) ? nl_sr : nl_lr;
+ +        homenr = (i == 0) ? maxsr : maxlr;
+ +
+ +        if (nl == NULL)
+ +        {
+ +            continue;
+ +        }
+ +        
+ +        /* Set coul/vdw in neighborlist, and for the normal loops we determine
+ +         * an index of which one to call.
+ +         */
+ +        nl->ivdw  = ivdw;
+ +        nl->icoul = icoul;
+ +        nl->free_energy = bfree;
+ +    
+ +        if (bfree)
+ +        {
+ +            nl->enlist  = enlistATOM_ATOM;
+ +            nl->il_code = eNR_NBKERNEL_FREE_ENERGY;
+ +        }
+ +        else
+ +        {
+ +            nl->enlist = enlist;
+ +
+ +            nn = inloop[4*icoul + ivdw];
+ +            
+ +            /* solvent loops follow directly after the corresponding
+ +            * ordinary loops, in the order:
+ +            *
+ +            * SPC, SPC-SPC, TIP4p, TIP4p-TIP4p
+ +            *   
+ +            */
+ +            switch (enlist) {
+ +            case enlistATOM_ATOM:
+ +            case enlistCG_CG:
+ +                break;
+ +            case enlistSPC_ATOM:     nn += 1; break;
+ +            case enlistSPC_SPC:      nn += 2; break;
+ +            case enlistTIP4P_ATOM:   nn += 3; break;
+ +            case enlistTIP4P_TIP4P:  nn += 4; break;
+ +            }
+ +            
+ +            nl->il_code = nn;
+ +        }
+ +
+ +        if (debug)
+ +            fprintf(debug,"Initiating neighbourlist type %d for %s interactions,\nwith %d SR, %d LR atoms.\n",
+ +                    nl->il_code,ENLISTTYPE(enlist),maxsr,maxlr);
+ +        
+ +        /* maxnri is influenced by the number of shifts (maximum is 8)
+ +         * and the number of energy groups.
+ +         * If it is not enough, nl memory will be reallocated during the run.
+ +         * 4 seems to be a reasonable factor, which only causes reallocation
+ +         * during runs with tiny and many energygroups.
+ +         */
+ +        nl->maxnri      = homenr*4;
+ +        nl->maxnrj      = 0;
+ +        nl->maxlen      = 0;
+ +        nl->nri         = -1;
+ +        nl->nrj         = 0;
+ +        nl->iinr        = NULL;
+ +        nl->gid         = NULL;
+ +        nl->shift       = NULL;
+ +        nl->jindex      = NULL;
+ +        reallocate_nblist(nl);
+ +        nl->jindex[0] = 0;
+ +#ifdef GMX_THREAD_SHM_FDECOMP
+ +        nl->counter = 0;
+ +        snew(nl->mtx,1);
+ +        pthread_mutex_init(nl->mtx,NULL);
+ +#endif
+ +    }
+ +}
+ +
+ +void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
+ +{
+ +   /* Make maxlr tunable! (does not seem to be a big difference though) 
+ +    * This parameter determines the number of i particles in a long range 
+ +    * neighbourlist. Too few means many function calls, too many means
+ +    * cache trashing.
+ +    */
+ +   int maxsr,maxsr_wat,maxlr,maxlr_wat;
+ +   int icoul,icoulf,ivdw;
+ +   int solvent;
+ +   int enlist_def,enlist_w,enlist_ww;
+ +   int i;
+ +   t_nblists *nbl;
+ +
+ +   /* maxsr     = homenr-fr->nWatMol*3; */
+ +   maxsr     = homenr;
+ +
+ +   if (maxsr < 0)
+ +   {
+ +     gmx_fatal(FARGS,"%s, %d: Negative number of short range atoms.\n"
+ +               "Call your Gromacs dealer for assistance.",__FILE__,__LINE__);
+ +   }
+ +   /* This is just for initial allocation, so we do not reallocate
+ +    * all the nlist arrays many times in a row.
+ +    * The numbers seem very accurate, but they are uncritical.
+ +    */
+ +   maxsr_wat = min(fr->nWatMol,(homenr+2)/3); 
+ +   if (fr->bTwinRange) 
+ +   {
+ +       maxlr     = 50;
+ +       maxlr_wat = min(maxsr_wat,maxlr);
+ +   }
+ +   else
+ +   {
+ +     maxlr = maxlr_wat = 0;
+ +   }  
+ +
+ +   /* Determine the values for icoul/ivdw. */
+ +   /* Start with GB */
+ +   if(fr->bGB)
+ +   {
+ +       icoul=enbcoulGB;
+ +   }
+ +   else if (fr->bcoultab)
+ +   {
+ +       icoul = enbcoulTAB;
+ +   }
+ +   else if (EEL_RF(fr->eeltype))
+ +   {
+ +       icoul = enbcoulRF;
+ +   }
+ +   else 
+ +   {
+ +       icoul = enbcoulOOR;
+ +   }
+ +   
+ +   if (fr->bvdwtab)
+ +   {
+ +       ivdw = enbvdwTAB;
+ +   }
+ +   else if (fr->bBHAM)
+ +   {
+ +       ivdw = enbvdwBHAM;
+ +   }
+ +   else 
+ +   {
+ +       ivdw = enbvdwLJ;
+ +   }
+ +
+ +   fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0);
+ +   if (!fr->ns.bCGlist)
+ +   {
+ +       enlist_def = enlistATOM_ATOM;
+ +   }
+ +   else
+ +   {
+ +       enlist_def = enlistCG_CG;
+ +       if (log != NULL)
+ +       {
+ +           fprintf(log,"\nUsing charge-group - charge-group neighbor lists and kernels\n\n");
+ +       }
+ +   }
+ +   
+ +   if (fr->solvent_opt == esolTIP4P) {
+ +       enlist_w  = enlistTIP4P_ATOM;
+ +       enlist_ww = enlistTIP4P_TIP4P;
+ +   } else {
+ +       enlist_w  = enlistSPC_ATOM;
+ +       enlist_ww = enlistSPC_SPC;
+ +   }
+ +
+ +   for(i=0; i<fr->nnblists; i++) 
+ +   {
+ +       nbl = &(fr->nblists[i]);
+ +       init_nblist(&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
+ +                   maxsr,maxlr,ivdw,icoul,FALSE,enlist_def);
+ +       init_nblist(&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
+ +                   maxsr,maxlr,ivdw,0,FALSE,enlist_def);
+ +       init_nblist(&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
+ +                   maxsr,maxlr,0,icoul,FALSE,enlist_def);
+ +       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
+ +                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_w);
+ +       init_nblist(&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
+ +                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_w);
+ +       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
+ +                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_ww);
+ +       init_nblist(&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
+ +                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_ww);
+ +       
+ +       if (fr->efep != efepNO) 
+ +       {
+ +           if ((fr->bEwald) && (fr->sc_alphacoul > 0)) /* need to handle long range differently if using softcore */
+ +           {
+ +               icoulf = enbcoulFEWALD;
+ +           }
+ +           else
+ +           {
+ +               icoulf = icoul;
+ +           }
+ +
+ +           init_nblist(&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
+ +                       maxsr,maxlr,ivdw,icoulf,TRUE,enlistATOM_ATOM);
+ +           init_nblist(&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
+ +                       maxsr,maxlr,ivdw,0,TRUE,enlistATOM_ATOM);
+ +           init_nblist(&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
+ +                       maxsr,maxlr,0,icoulf,TRUE,enlistATOM_ATOM);
+ +       }  
+ +   }
+ +   /* QMMM MM list */
+ +   if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
+ +   {
+ +       init_nblist(&fr->QMMMlist,NULL,
+ +                   maxsr,maxlr,0,icoul,FALSE,enlistATOM_ATOM);
+ +   }
+ +
+ +   fr->ns.nblist_initialized=TRUE;
+ +}
+ +
+ +static void reset_nblist(t_nblist *nl)
+ +{
+ +     nl->nri       = -1;
+ +     nl->nrj       = 0;
+ +     nl->maxlen    = 0;
+ +     if (nl->jindex)
+ +     {
+ +         nl->jindex[0] = 0;
+ +     }
+ +}
+ +
+ +static void reset_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL)
+ +{
+ +    int n,i;
+ +  
+ +    if (bLR) 
+ +    {
+ +        reset_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
+ +    }
+ +    else 
+ +    {
+ +        for(n=0; n<fr->nnblists; n++)
+ +        {
+ +            for(i=0; i<eNL_NR; i++)
+ +            {
+ +                reset_nblist(&(fr->nblists[n].nlist_sr[i]));
+ +            }
+ +        }
+ +        if (fr->bQMMM)
+ +        { 
+ +            /* only reset the short-range nblist */
+ +            reset_nblist(&(fr->QMMMlist));
+ +        }
+ +    }
+ +}
+ +
+ +
+ +
+ +
+ +static inline void new_i_nblist(t_nblist *nlist,
+ +                                gmx_bool bLR,atom_id i_atom,int shift,int gid)
+ +{
+ +    int    i,k,nri,nshift;
+ +    
+ +    nri = nlist->nri;
+ +    
+ +    /* Check whether we have to increase the i counter */
+ +    if ((nri == -1) ||
+ +        (nlist->iinr[nri]  != i_atom) || 
+ +        (nlist->shift[nri] != shift) || 
+ +        (nlist->gid[nri]   != gid))
+ +    {
+ +        /* This is something else. Now see if any entries have 
+ +         * been added in the list of the previous atom.
+ +         */
+ +        if ((nri == -1) ||
+ +            ((nlist->jindex[nri+1] > nlist->jindex[nri]) && 
+ +             (nlist->gid[nri] != -1)))
+ +        {
+ +            /* If so increase the counter */
+ +            nlist->nri++;
+ +            nri++;
+ +            if (nlist->nri >= nlist->maxnri)
+ +            {
+ +                nlist->maxnri += over_alloc_large(nlist->nri);
+ +                reallocate_nblist(nlist);
+ +            }
+ +        }
+ +        /* Set the number of neighbours and the atom number */
+ +        nlist->jindex[nri+1] = nlist->jindex[nri];
+ +        nlist->iinr[nri]     = i_atom;
+ +        nlist->gid[nri]      = gid;
+ +        nlist->shift[nri]    = shift;
+ +    }
+ +}
+ +
+ +static inline void close_i_nblist(t_nblist *nlist) 
+ +{
+ +    int nri = nlist->nri;
+ +    int len;
+ +    
+ +    if (nri >= 0)
+ +    {
+ +        nlist->jindex[nri+1] = nlist->nrj;
+ +        
+ +        len=nlist->nrj -  nlist->jindex[nri];
+ +        
+ +        /* nlist length for water i molecules is treated statically 
+ +         * in the innerloops 
+ +         */
+ +        if (len > nlist->maxlen)
+ +        {
+ +            nlist->maxlen = len;
+ +        }
+ +    }
+ +}
+ +
+ +static inline void close_nblist(t_nblist *nlist)
+ +{
+ +    /* Only close this nblist when it has been initialized.
+ +     * Avoid the creation of i-lists with no j-particles.
+ +     */
+ +    if (nlist->nrj == 0)
+ +    {
+ +        /* Some assembly kernels do not support empty lists,
+ +         * make sure here that we don't generate any empty lists.
+ +         * With the current ns code this branch is taken in two cases:
+ +         * No i-particles at all: nri=-1 here
+ +         * There are i-particles, but no j-particles; nri=0 here
+ +         */
+ +        nlist->nri = 0;
+ +    }
+ +    else
+ +    {
+ +        /* Close list number nri by incrementing the count */
+ +        nlist->nri++;
+ +    }
+ +}
+ +
+ +static inline void close_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL, 
+ +                                       gmx_bool bMakeQMMMnblist)
+ +{
+ +    int n,i;
+ +    
+ +    if (bMakeQMMMnblist) {
+ +        if (!bLR)
+ +        {
+ +            close_nblist(&(fr->QMMMlist));
+ +        }
+ +    }
+ +    else 
+ +    {
+ +        if (bLR)
+ +        {
+ +            close_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
+ +        }
+ +        else
+ +        { 
+ +            for(n=0; n<fr->nnblists; n++)
+ +            {
+ +                for(i=0; (i<eNL_NR); i++)
+ +                {
+ +                    close_nblist(&(fr->nblists[n].nlist_sr[i]));
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static inline void add_j_to_nblist(t_nblist *nlist,atom_id j_atom,gmx_bool bLR)
+ +{
+ +    int nrj=nlist->nrj;
+ +    
+ +    if (nlist->nrj >= nlist->maxnrj)
+ +    {
+ +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
+ +        if (gmx_debug_at)
+ +            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
+ +                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
+ +        
+ +        srenew(nlist->jjnr,nlist->maxnrj);
+ +    }
+ +
+ +    nlist->jjnr[nrj] = j_atom;
+ +    nlist->nrj ++;
+ +}
+ +
+ +static inline void add_j_to_nblist_cg(t_nblist *nlist,
+ +                                      atom_id j_start,int j_end,
+ +                                      t_excl *bexcl,gmx_bool i_is_j,
+ +                                      gmx_bool bLR)
+ +{
+ +    int nrj=nlist->nrj;
+ +    int j;
+ +
+ +    if (nlist->nrj >= nlist->maxnrj)
+ +    {
+ +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
+ +        if (gmx_debug_at)
+ +            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
+ +                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
+ +        
+ +        srenew(nlist->jjnr    ,nlist->maxnrj);
+ +        srenew(nlist->jjnr_end,nlist->maxnrj);
+ +        srenew(nlist->excl    ,nlist->maxnrj*MAX_CGCGSIZE);
+ +    }
+ +
+ +    nlist->jjnr[nrj]     = j_start;
+ +    nlist->jjnr_end[nrj] = j_end;
+ +
+ +    if (j_end - j_start > MAX_CGCGSIZE)
+ +    {
+ +        gmx_fatal(FARGS,"The charge-group - charge-group neighborlist do not support charge groups larger than %d, found a charge group of size %d",MAX_CGCGSIZE,j_end-j_start);
+ +    }
+ +
+ +    /* Set the exclusions */
+ +    for(j=j_start; j<j_end; j++)
+ +    {
+ +        nlist->excl[nrj*MAX_CGCGSIZE + j - j_start] = bexcl[j];
+ +    }
+ +    if (i_is_j)
+ +    {
+ +        /* Avoid double counting of intra-cg interactions */
+ +        for(j=1; j<j_end-j_start; j++)
+ +        {
+ +            nlist->excl[nrj*MAX_CGCGSIZE + j] |= (1<<j) - 1;
+ +        }
+ +    }
+ +
+ +    nlist->nrj ++;
+ +}
+ +
+ +typedef void
+ +put_in_list_t(gmx_bool              bHaveVdW[],
+ +              int               ngid,
+ +              t_mdatoms *       md,
+ +              int               icg,
+ +              int               jgid,
+ +              int               nj,
+ +              atom_id           jjcg[],
+ +              atom_id           index[],
+ +              t_excl            bExcl[],
+ +              int               shift,
+ +              t_forcerec *      fr,
+ +              gmx_bool              bLR,
+ +              gmx_bool              bDoVdW,
+ +              gmx_bool              bDoCoul);
+ +
+ +static void 
+ +put_in_list_at(gmx_bool              bHaveVdW[],
+ +               int               ngid,
+ +               t_mdatoms *       md,
+ +               int               icg,
+ +               int               jgid,
+ +               int               nj,
+ +               atom_id           jjcg[],
+ +               atom_id           index[],
+ +               t_excl            bExcl[],
+ +               int               shift,
+ +               t_forcerec *      fr,
+ +               gmx_bool              bLR,
+ +               gmx_bool              bDoVdW,
+ +               gmx_bool              bDoCoul)
+ +{
+ +    /* The a[] index has been removed,
+ +     * to put it back in i_atom should be a[i0] and jj should be a[jj].
+ +     */
+ +    t_nblist *   vdwc;
+ +    t_nblist *   vdw;
+ +    t_nblist *   coul;
+ +    t_nblist *   vdwc_free  = NULL;
+ +    t_nblist *   vdw_free   = NULL;
+ +    t_nblist *   coul_free  = NULL;
+ +    t_nblist *   vdwc_ww    = NULL;
+ +    t_nblist *   coul_ww    = NULL;
+ +    
+ +    int           i,j,jcg,igid,gid,nbl_ind,ind_ij;
+ +    atom_id   jj,jj0,jj1,i_atom;
+ +    int       i0,nicg,len;
+ +    
+ +    int       *cginfo;
+ +    int       *type,*typeB;
+ +    real      *charge,*chargeB;
+ +    real      qi,qiB,qq,rlj;
+ +    gmx_bool      bFreeEnergy,bFree,bFreeJ,bNotEx,*bPert;
+ +    gmx_bool      bDoVdW_i,bDoCoul_i,bDoCoul_i_sol;
+ +    int       iwater,jwater;
+ +    t_nblist  *nlist;
+ +    
+ +    /* Copy some pointers */
+ +    cginfo  = fr->cginfo;
+ +    charge  = md->chargeA;
+ +    chargeB = md->chargeB;
+ +    type    = md->typeA;
+ +    typeB   = md->typeB;
+ +    bPert   = md->bPerturbed;
+ +    
+ +    /* Get atom range */
+ +    i0     = index[icg];
+ +    nicg   = index[icg+1]-i0;
+ +    
+ +    /* Get the i charge group info */
+ +    igid   = GET_CGINFO_GID(cginfo[icg]);
+ +    iwater = GET_CGINFO_SOLOPT(cginfo[icg]);
+ +    
+ +    bFreeEnergy = FALSE;
+ +    if (md->nPerturbed) 
+ +    {
+ +        /* Check if any of the particles involved are perturbed. 
+ +         * If not we can do the cheaper normal put_in_list
+ +         * and use more solvent optimization.
+ +         */
+ +        for(i=0; i<nicg; i++)
+ +        {
+ +            bFreeEnergy |= bPert[i0+i];
+ +        }
+ +        /* Loop over the j charge groups */
+ +        for(j=0; (j<nj && !bFreeEnergy); j++) 
+ +        {
+ +            jcg = jjcg[j];
+ +            jj0 = index[jcg];
+ +            jj1 = index[jcg+1];
+ +            /* Finally loop over the atoms in the j-charge group */   
+ +            for(jj=jj0; jj<jj1; jj++)
+ +            {
+ +                bFreeEnergy |= bPert[jj];
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* Unpack pointers to neighbourlist structs */
+ +    if (fr->nnblists == 1)
+ +    {
+ +        nbl_ind = 0;
+ +    }
+ +    else
+ +    {
+ +        nbl_ind = fr->gid2nblists[GID(igid,jgid,ngid)];
+ +    }
+ +    if (bLR)
+ +    {
+ +        nlist = fr->nblists[nbl_ind].nlist_lr;
+ +    }
+ +    else
+ +    {
+ +        nlist = fr->nblists[nbl_ind].nlist_sr;
+ +    }
+ +    
+ +    if (iwater != esolNO)
+ +    {
+ +        vdwc = &nlist[eNL_VDWQQ_WATER];
+ +        vdw  = &nlist[eNL_VDW];
+ +        coul = &nlist[eNL_QQ_WATER];
+ +#ifndef DISABLE_WATERWATER_NLIST
+ +        vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER];
+ +        coul_ww = &nlist[eNL_QQ_WATERWATER];
+ +#endif
+ +    } 
+ +    else 
+ +    {
+ +        vdwc = &nlist[eNL_VDWQQ];
+ +        vdw  = &nlist[eNL_VDW];
+ +        coul = &nlist[eNL_QQ];
+ +    }
+ +    
+ +    if (!bFreeEnergy) 
+ +    {
+ +        if (iwater != esolNO) 
+ +        {
+ +            /* Loop over the atoms in the i charge group */    
+ +            i_atom  = i0;
+ +            gid     = GID(igid,jgid,ngid);
+ +            /* Create new i_atom for each energy group */
+ +            if (bDoCoul && bDoVdW)
+ +            {
+ +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
+ +#ifndef DISABLE_WATERWATER_NLIST
+ +                new_i_nblist(vdwc_ww,bLR,i_atom,shift,gid);
+ +#endif
+ +            }
+ +            if (bDoVdW)
+ +            {
+ +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
+ +            }
+ +            if (bDoCoul) 
+ +            {
+ +                new_i_nblist(coul,bLR,i_atom,shift,gid);
+ +#ifndef DISABLE_WATERWATER_NLIST
+ +                new_i_nblist(coul_ww,bLR,i_atom,shift,gid);
+ +#endif
+ +            }      
+ +        /* Loop over the j charge groups */
+ +            for(j=0; (j<nj); j++) 
+ +            {
+ +                jcg=jjcg[j];
+ +                
+ +                if (jcg == icg)
+ +                {
+ +                    continue;
+ +                }
+ +                
+ +                jj0 = index[jcg];
+ +                jwater = GET_CGINFO_SOLOPT(cginfo[jcg]);
+ +                
+ +                if (iwater == esolSPC && jwater == esolSPC)
+ +                {
+ +                    /* Interaction between two SPC molecules */
+ +                    if (!bDoCoul)
+ +                    {
+ +                        /* VdW only - only first atoms in each water interact */
+ +                        add_j_to_nblist(vdw,jj0,bLR);
+ +                    }
+ +                    else 
+ +                    {
+ +#ifdef DISABLE_WATERWATER_NLIST       
+ +                        /* Add entries for the three atoms - only do VdW if we need to */
+ +                        if (!bDoVdW)
+ +                        {
+ +                            add_j_to_nblist(coul,jj0,bLR);
+ +                        }
+ +                        else
+ +                        {
+ +                            add_j_to_nblist(vdwc,jj0,bLR);
+ +                        }
+ +                        add_j_to_nblist(coul,jj0+1,bLR);
+ +                        add_j_to_nblist(coul,jj0+2,bLR);          
+ +#else
+ +                        /* One entry for the entire water-water interaction */
+ +                        if (!bDoVdW)
+ +                        {
+ +                            add_j_to_nblist(coul_ww,jj0,bLR);
+ +                        }
+ +                        else
+ +                        {
+ +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
+ +                        }
+ +#endif
+ +                    }  
+ +                } 
+ +                else if (iwater == esolTIP4P && jwater == esolTIP4P) 
+ +                {
+ +                    /* Interaction between two TIP4p molecules */
+ +                    if (!bDoCoul)
+ +                    {
+ +                        /* VdW only - only first atoms in each water interact */
+ +                        add_j_to_nblist(vdw,jj0,bLR);
+ +                    }
+ +                    else 
+ +                    {
+ +#ifdef DISABLE_WATERWATER_NLIST       
+ +                        /* Add entries for the four atoms - only do VdW if we need to */
+ +                        if (bDoVdW)
+ +                        {
+ +                            add_j_to_nblist(vdw,jj0,bLR);
+ +                        }
+ +                        add_j_to_nblist(coul,jj0+1,bLR);
+ +                        add_j_to_nblist(coul,jj0+2,bLR);          
+ +                        add_j_to_nblist(coul,jj0+3,bLR);          
+ +#else
+ +                        /* One entry for the entire water-water interaction */
+ +                        if (!bDoVdW)
+ +                        {
+ +                            add_j_to_nblist(coul_ww,jj0,bLR);
+ +                        }
+ +                        else
+ +                        {
+ +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
+ +                        }
+ +#endif
+ +                    }                                         
+ +                }
+ +                else 
+ +                {
+ +                    /* j charge group is not water, but i is.
+ +                     * Add entries to the water-other_atom lists; the geometry of the water
+ +                     * molecule doesn't matter - that is taken care of in the nonbonded kernel,
+ +                     * so we don't care if it is SPC or TIP4P...
+ +                     */
+ +                    
+ +                    jj1 = index[jcg+1];
+ +                    
+ +                    if (!bDoVdW) 
+ +                    {
+ +                        for(jj=jj0; (jj<jj1); jj++) 
+ +                        {
+ +                            if (charge[jj] != 0)
+ +                            {
+ +                                add_j_to_nblist(coul,jj,bLR);
+ +                            }
+ +                        }
+ +                    }
+ +                    else if (!bDoCoul)
+ +                    {
+ +                        for(jj=jj0; (jj<jj1); jj++)
+ +                        {
+ +                            if (bHaveVdW[type[jj]])
+ +                            {
+ +                                add_j_to_nblist(vdw,jj,bLR);
+ +                            }
+ +                        }
+ +                    }
+ +                    else 
+ +                    {
+ +                        /* _charge_ _groups_ interact with both coulomb and LJ */
+ +                        /* Check which atoms we should add to the lists!       */
+ +                        for(jj=jj0; (jj<jj1); jj++) 
+ +                        {
+ +                            if (bHaveVdW[type[jj]]) 
+ +                            {
+ +                                if (charge[jj] != 0)
+ +                                {
+ +                                    add_j_to_nblist(vdwc,jj,bLR);
+ +                                }
+ +                                else
+ +                                {
+ +                                    add_j_to_nblist(vdw,jj,bLR);
+ +                                }
+ +                            }
+ +                            else if (charge[jj] != 0)
+ +                            {
+ +                                add_j_to_nblist(coul,jj,bLR);
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            close_i_nblist(vdw); 
+ +            close_i_nblist(coul); 
+ +            close_i_nblist(vdwc);  
+ +#ifndef DISABLE_WATERWATER_NLIST
+ +            close_i_nblist(coul_ww);
+ +            close_i_nblist(vdwc_ww); 
+ +#endif
+ +        } 
+ +        else
+ +        { 
+ +            /* no solvent as i charge group */
+ +            /* Loop over the atoms in the i charge group */    
+ +            for(i=0; i<nicg; i++) 
+ +            {
+ +                i_atom  = i0+i;
+ +                gid     = GID(igid,jgid,ngid);
+ +                qi      = charge[i_atom];
+ +                
+ +                /* Create new i_atom for each energy group */
+ +                if (bDoVdW && bDoCoul)
+ +                {
+ +                    new_i_nblist(vdwc,bLR,i_atom,shift,gid);
+ +                }
+ +                if (bDoVdW)
+ +                {
+ +                    new_i_nblist(vdw,bLR,i_atom,shift,gid);
+ +                }
+ +                if (bDoCoul)
+ +                {
+ +                    new_i_nblist(coul,bLR,i_atom,shift,gid);
+ +                }
+ +                bDoVdW_i  = (bDoVdW  && bHaveVdW[type[i_atom]]);
+ +                bDoCoul_i = (bDoCoul && qi!=0);
+ +                
+ +                if (bDoVdW_i || bDoCoul_i) 
+ +                {
+ +                    /* Loop over the j charge groups */
+ +                    for(j=0; (j<nj); j++) 
+ +                    {
+ +                        jcg=jjcg[j];
+ +                        
+ +                        /* Check for large charge groups */
+ +                        if (jcg == icg)
+ +                        {
+ +                            jj0 = i0 + i + 1;
+ +                        }
+ +                        else
+ +                        {
+ +                            jj0 = index[jcg];
+ +                        }
+ +                        
+ +                        jj1=index[jcg+1];
+ +                        /* Finally loop over the atoms in the j-charge group */       
+ +                        for(jj=jj0; jj<jj1; jj++) 
+ +                        {
+ +                            bNotEx = NOTEXCL(bExcl,i,jj);
+ +                            
+ +                            if (bNotEx) 
+ +                            {
+ +                                if (!bDoVdW_i) 
+ +                                { 
+ +                                    if (charge[jj] != 0)
+ +                                    {
+ +                                        add_j_to_nblist(coul,jj,bLR);
+ +                                    }
+ +                                }
+ +                                else if (!bDoCoul_i) 
+ +                                {
+ +                                    if (bHaveVdW[type[jj]])
+ +                                    {
+ +                                        add_j_to_nblist(vdw,jj,bLR);
+ +                                    }
+ +                                }
+ +                                else 
+ +                                {
+ +                                    if (bHaveVdW[type[jj]]) 
+ +                                    {
+ +                                        if (charge[jj] != 0)
+ +                                        {
+ +                                            add_j_to_nblist(vdwc,jj,bLR);
+ +                                        }
+ +                                        else
+ +                                        {
+ +                                            add_j_to_nblist(vdw,jj,bLR);
+ +                                        }
+ +                                    } 
+ +                                    else if (charge[jj] != 0)
+ +                                    {
+ +                                        add_j_to_nblist(coul,jj,bLR);
+ +                                    }
+ +                                }
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +                close_i_nblist(vdw);
+ +                close_i_nblist(coul);
+ +                close_i_nblist(vdwc);
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* we are doing free energy */
+ +        vdwc_free = &nlist[eNL_VDWQQ_FREE];
+ +        vdw_free  = &nlist[eNL_VDW_FREE];
+ +        coul_free = &nlist[eNL_QQ_FREE];
+ +        /* Loop over the atoms in the i charge group */    
+ +        for(i=0; i<nicg; i++) 
+ +        {
+ +            i_atom  = i0+i;
+ +            gid     = GID(igid,jgid,ngid);
+ +            qi      = charge[i_atom];
+ +            qiB     = chargeB[i_atom];
+ +            
+ +            /* Create new i_atom for each energy group */
+ +            if (bDoVdW && bDoCoul) 
+ +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
+ +            if (bDoVdW)   
+ +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
+ +            if (bDoCoul) 
+ +                new_i_nblist(coul,bLR,i_atom,shift,gid);
+ +            
+ +            new_i_nblist(vdw_free,bLR,i_atom,shift,gid);
+ +            new_i_nblist(coul_free,bLR,i_atom,shift,gid);
+ +            new_i_nblist(vdwc_free,bLR,i_atom,shift,gid);
+ +            
+ +            bDoVdW_i  = (bDoVdW  &&
+ +                         (bHaveVdW[type[i_atom]] || bHaveVdW[typeB[i_atom]]));
+ +            bDoCoul_i = (bDoCoul && (qi!=0 || qiB!=0));
+ +            /* For TIP4P the first atom does not have a charge,
+ +             * but the last three do. So we should still put an atom
+ +             * without LJ but with charge in the water-atom neighborlist
+ +             * for a TIP4p i charge group.
+ +             * For SPC type water the first atom has LJ and charge,
+ +             * so there is no such problem.
+ +             */
+ +            if (iwater == esolNO)
+ +            {
+ +                bDoCoul_i_sol = bDoCoul_i;
+ +            }
+ +            else
+ +            {
+ +                bDoCoul_i_sol = bDoCoul;
+ +            }
+ +            
+ +            if (bDoVdW_i || bDoCoul_i_sol) 
+ +            {
+ +                /* Loop over the j charge groups */
+ +                for(j=0; (j<nj); j++)
+ +                {
+ +                    jcg=jjcg[j];
+ +                    
+ +                    /* Check for large charge groups */
+ +                    if (jcg == icg)
+ +                    {
+ +                        jj0 = i0 + i + 1;
+ +                    }
+ +                    else
+ +                    {
+ +                        jj0 = index[jcg];
+ +                    }
+ +                    
+ +                    jj1=index[jcg+1];
+ +                    /* Finally loop over the atoms in the j-charge group */   
+ +                    bFree = bPert[i_atom];
+ +                    for(jj=jj0; (jj<jj1); jj++) 
+ +                    {
+ +                        bFreeJ = bFree || bPert[jj];
+ +                        /* Complicated if, because the water H's should also
+ +                         * see perturbed j-particles
+ +                         */
+ +                        if (iwater==esolNO || i==0 || bFreeJ) 
+ +                        {
+ +                            bNotEx = NOTEXCL(bExcl,i,jj);
+ +                            
+ +                            if (bNotEx) 
+ +                            {
+ +                                if (bFreeJ)
+ +                                {
+ +                                    if (!bDoVdW_i) 
+ +                                    {
+ +                                        if (charge[jj]!=0 || chargeB[jj]!=0)
+ +                                        {
+ +                                            add_j_to_nblist(coul_free,jj,bLR);
+ +                                        }
+ +                                    }
+ +                                    else if (!bDoCoul_i) 
+ +                                    {
+ +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]])
+ +                                        {
+ +                                            add_j_to_nblist(vdw_free,jj,bLR);
+ +                                        }
+ +                                    }
+ +                                    else 
+ +                                    {
+ +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]]) 
+ +                                        {
+ +                                            if (charge[jj]!=0 || chargeB[jj]!=0)
+ +                                            {
+ +                                                add_j_to_nblist(vdwc_free,jj,bLR);
+ +                                            }
+ +                                            else
+ +                                            {
+ +                                                add_j_to_nblist(vdw_free,jj,bLR);
+ +                                            }
+ +                                        }
+ +                                        else if (charge[jj]!=0 || chargeB[jj]!=0)
+ +                                            add_j_to_nblist(coul_free,jj,bLR);
+ +                                    }
+ +                                }
+ +                                else if (!bDoVdW_i) 
+ +                                { 
+ +                                    /* This is done whether or not bWater is set */
+ +                                    if (charge[jj] != 0)
+ +                                    {
+ +                                        add_j_to_nblist(coul,jj,bLR);
+ +                                    }
+ +                                }
+ +                                else if (!bDoCoul_i_sol) 
+ +                                { 
+ +                                    if (bHaveVdW[type[jj]])
+ +                                    {
+ +                                        add_j_to_nblist(vdw,jj,bLR);
+ +                                    }
+ +                                }
+ +                                else 
+ +                                {
+ +                                    if (bHaveVdW[type[jj]]) 
+ +                                    {
+ +                                        if (charge[jj] != 0)
+ +                                        {
+ +                                            add_j_to_nblist(vdwc,jj,bLR);
+ +                                        }
+ +                                        else
+ +                                        {
+ +                                            add_j_to_nblist(vdw,jj,bLR);
+ +                                        }
+ +                                    } 
+ +                                    else if (charge[jj] != 0)
+ +                                    {
+ +                                        add_j_to_nblist(coul,jj,bLR);
+ +                                    }
+ +                                }
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            close_i_nblist(vdw);
+ +            close_i_nblist(coul);
+ +            close_i_nblist(vdwc);
+ +            close_i_nblist(vdw_free);
+ +            close_i_nblist(coul_free);
+ +            close_i_nblist(vdwc_free);
+ +        }
+ +    }
+ +}
+ +
+ +static void 
+ +put_in_list_qmmm(gmx_bool              bHaveVdW[],
+ +                 int               ngid,
+ +                 t_mdatoms *       md,
+ +                 int               icg,
+ +                 int               jgid,
+ +                 int               nj,
+ +                 atom_id           jjcg[],
+ +                 atom_id           index[],
+ +                 t_excl            bExcl[],
+ +                 int               shift,
+ +                 t_forcerec *      fr,
+ +                 gmx_bool              bLR,
+ +                 gmx_bool              bDoVdW,
+ +                 gmx_bool              bDoCoul)
+ +{
+ +    t_nblist *   coul;
+ +    int         i,j,jcg,igid,gid;
+ +    atom_id   jj,jj0,jj1,i_atom;
+ +    int       i0,nicg;
+ +    gmx_bool      bNotEx;
+ +    
+ +    /* Get atom range */
+ +    i0     = index[icg];
+ +    nicg   = index[icg+1]-i0;
+ +    
+ +    /* Get the i charge group info */
+ +    igid   = GET_CGINFO_GID(fr->cginfo[icg]);
+ +    
+ +    coul = &fr->QMMMlist;
+ +    
+ +    /* Loop over atoms in the ith charge group */
+ +    for (i=0;i<nicg;i++)
+ +    {
+ +        i_atom = i0+i;
+ +        gid    = GID(igid,jgid,ngid);
+ +        /* Create new i_atom for each energy group */
+ +        new_i_nblist(coul,bLR,i_atom,shift,gid);
+ +        
+ +        /* Loop over the j charge groups */
+ +        for (j=0;j<nj;j++)
+ +        {
+ +            jcg=jjcg[j];
+ +            
+ +            /* Charge groups cannot have QM and MM atoms simultaneously */
+ +            if (jcg!=icg)
+ +            {
+ +                jj0 = index[jcg];
+ +                jj1 = index[jcg+1];
+ +                /* Finally loop over the atoms in the j-charge group */
+ +                for(jj=jj0; jj<jj1; jj++)
+ +                {
+ +                    bNotEx = NOTEXCL(bExcl,i,jj);
+ +                    if(bNotEx)
+ +                        add_j_to_nblist(coul,jj,bLR);
+ +                }
+ +            }
+ +        }
+ +        close_i_nblist(coul);
+ +    }
+ +}
+ +
+ +static void 
+ +put_in_list_cg(gmx_bool              bHaveVdW[],
+ +               int               ngid,
+ +               t_mdatoms *       md,
+ +               int               icg,
+ +               int               jgid,
+ +               int               nj,
+ +               atom_id           jjcg[],
+ +               atom_id           index[],
+ +               t_excl            bExcl[],
+ +               int               shift,
+ +               t_forcerec *      fr,
+ +               gmx_bool              bLR,
+ +               gmx_bool              bDoVdW,
+ +               gmx_bool              bDoCoul)
+ +{
+ +    int          cginfo;
+ +    int          igid,gid,nbl_ind;
+ +    t_nblist *   vdwc;
+ +    int          j,jcg;
+ +
+ +    cginfo = fr->cginfo[icg];
+ +
+ +    igid = GET_CGINFO_GID(cginfo);
+ +    gid  = GID(igid,jgid,ngid);
+ +
+ +    /* Unpack pointers to neighbourlist structs */
+ +    if (fr->nnblists == 1)
+ +    {
+ +        nbl_ind = 0;
+ +    }
+ +    else
+ +    {
+ +        nbl_ind = fr->gid2nblists[gid];
+ +    }
+ +    if (bLR)
+ +    {
+ +        vdwc = &fr->nblists[nbl_ind].nlist_lr[eNL_VDWQQ];
+ +    }
+ +    else
+ +    {
+ +        vdwc = &fr->nblists[nbl_ind].nlist_sr[eNL_VDWQQ];
+ +    }
+ +
+ +    /* Make a new neighbor list for charge group icg.
+ +     * Currently simply one neighbor list is made with LJ and Coulomb.
+ +     * If required, zero interactions could be removed here
+ +     * or in the force loop.
+ +     */
+ +    new_i_nblist(vdwc,bLR,index[icg],shift,gid);
+ +    vdwc->iinr_end[vdwc->nri] = index[icg+1];
+ +
+ +    for(j=0; (j<nj); j++) 
+ +    {
+ +        jcg = jjcg[j];
+ +        /* Skip the icg-icg pairs if all self interactions are excluded */
+ +        if (!(jcg == icg && GET_CGINFO_EXCL_INTRA(cginfo)))
+ +        {
+ +            /* Here we add the j charge group jcg to the list,
+ +             * exclusions are also added to the list.
+ +             */
+ +            add_j_to_nblist_cg(vdwc,index[jcg],index[jcg+1],bExcl,icg==jcg,bLR);
+ +        }
+ +    }
+ +
+ +    close_i_nblist(vdwc);  
+ +}
+ +
+ +static void setexcl(atom_id start,atom_id end,t_blocka *excl,gmx_bool b,
+ +                    t_excl bexcl[])
+ +{
+ +    atom_id i,k;
+ +    
+ +    if (b)
+ +    {
+ +        for(i=start; i<end; i++)
+ +        {
+ +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
+ +            {
+ +                SETEXCL(bexcl,i-start,excl->a[k]);
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        for(i=start; i<end; i++)
+ +        {
+ +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
+ +            {
+ +                RMEXCL(bexcl,i-start,excl->a[k]);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +int calc_naaj(int icg,int cgtot)
+ +{
+ +    int naaj;
+ +    
+ +    if ((cgtot % 2) == 1)
+ +    {
+ +        /* Odd number of charge groups, easy */
+ +        naaj = 1 + (cgtot/2);
+ +    }
+ +    else if ((cgtot % 4) == 0)
+ +    {
+ +    /* Multiple of four is hard */
+ +        if (icg < cgtot/2)
+ +        {
+ +            if ((icg % 2) == 0)
+ +            {
+ +                naaj=1+(cgtot/2);
+ +            }
+ +            else
+ +            {
+ +                naaj=cgtot/2;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            if ((icg % 2) == 1)
+ +            {
+ +                naaj=1+(cgtot/2);
+ +            }
+ +            else
+ +            {
+ +                naaj=cgtot/2;
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* cgtot/2 = odd */
+ +        if ((icg % 2) == 0)
+ +        {
+ +            naaj=1+(cgtot/2);
+ +        }
+ +        else
+ +        {
+ +            naaj=cgtot/2;
+ +        }
+ +    }
+ +#ifdef DEBUG
+ +    fprintf(log,"naaj=%d\n",naaj);
+ +#endif
+ +
+ +    return naaj;
+ +}
+ +
+ +/************************************************
+ + *
+ + *  S I M P L E      C O R E     S T U F F
+ + *
+ + ************************************************/
+ +
+ +static real calc_image_tric(rvec xi,rvec xj,matrix box,
+ +                            rvec b_inv,int *shift)
+ +{
+ +    /* This code assumes that the cut-off is smaller than
+ +     * a half times the smallest diagonal element of the box.
+ +     */
+ +    const real h25=2.5;
+ +    real dx,dy,dz;
+ +    real r2;
+ +    int  tx,ty,tz;
+ +    
+ +    /* Compute diff vector */
+ +    dz = xj[ZZ] - xi[ZZ];
+ +    dy = xj[YY] - xi[YY];
+ +    dx = xj[XX] - xi[XX];
+ +    
+ +  /* Perform NINT operation, using trunc operation, therefore
+ +   * we first add 2.5 then subtract 2 again
+ +   */
+ +    tz = dz*b_inv[ZZ] + h25;
+ +    tz -= 2;
+ +    dz -= tz*box[ZZ][ZZ];
+ +    dy -= tz*box[ZZ][YY];
+ +    dx -= tz*box[ZZ][XX];
+ +
+ +    ty = dy*b_inv[YY] + h25;
+ +    ty -= 2;
+ +    dy -= ty*box[YY][YY];
+ +    dx -= ty*box[YY][XX];
+ +    
+ +    tx = dx*b_inv[XX]+h25;
+ +    tx -= 2;
+ +    dx -= tx*box[XX][XX];
+ +  
+ +    /* Distance squared */
+ +    r2 = (dx*dx) + (dy*dy) + (dz*dz);
+ +
+ +    *shift = XYZ2IS(tx,ty,tz);
+ +
+ +    return r2;
+ +}
+ +
+ +static real calc_image_rect(rvec xi,rvec xj,rvec box_size,
+ +                            rvec b_inv,int *shift)
+ +{
+ +    const real h15=1.5;
+ +    real ddx,ddy,ddz;
+ +    real dx,dy,dz;
+ +    real r2;
+ +    int  tx,ty,tz;
+ +    
+ +    /* Compute diff vector */
+ +    dx = xj[XX] - xi[XX];
+ +    dy = xj[YY] - xi[YY];
+ +    dz = xj[ZZ] - xi[ZZ];
+ +  
+ +    /* Perform NINT operation, using trunc operation, therefore
+ +     * we first add 1.5 then subtract 1 again
+ +     */
+ +    tx = dx*b_inv[XX] + h15;
+ +    ty = dy*b_inv[YY] + h15;
+ +    tz = dz*b_inv[ZZ] + h15;
+ +    tx--;
+ +    ty--;
+ +    tz--;
+ +    
+ +    /* Correct diff vector for translation */
+ +    ddx = tx*box_size[XX] - dx;
+ +    ddy = ty*box_size[YY] - dy;
+ +    ddz = tz*box_size[ZZ] - dz;
+ +    
+ +    /* Distance squared */
+ +    r2 = (ddx*ddx) + (ddy*ddy) + (ddz*ddz);
+ +    
+ +    *shift = XYZ2IS(tx,ty,tz);
+ +    
+ +    return r2;
+ +}
+ +
+ +static void add_simple(t_ns_buf *nsbuf,int nrj,atom_id cg_j,
+ +                       gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
+ +                       int icg,int jgid,t_block *cgs,t_excl bexcl[],
+ +                       int shift,t_forcerec *fr,put_in_list_t *put_in_list)
+ +{
+ +    if (nsbuf->nj + nrj > MAX_CG)
+ +    {
+ +        put_in_list(bHaveVdW,ngid,md,icg,jgid,nsbuf->ncg,nsbuf->jcg,
+ +                    cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE);
+ +        /* Reset buffer contents */
+ +        nsbuf->ncg = nsbuf->nj = 0;
+ +    }
+ +    nsbuf->jcg[nsbuf->ncg++] = cg_j;
+ +    nsbuf->nj += nrj;
+ +}
+ +
+ +static void ns_inner_tric(rvec x[],int icg,int *i_egp_flags,
+ +                          int njcg,atom_id jcg[],
+ +                          matrix box,rvec b_inv,real rcut2,
+ +                          t_block *cgs,t_ns_buf **ns_buf,
+ +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
+ +                          t_excl bexcl[],t_forcerec *fr,
+ +                          put_in_list_t *put_in_list)
+ +{
+ +    int      shift;
+ +    int      j,nrj,jgid;
+ +    int      *cginfo=fr->cginfo;
+ +    atom_id  cg_j,*cgindex;
+ +    t_ns_buf *nsbuf;
+ +    
+ +    cgindex = cgs->index;
+ +    shift   = CENTRAL;
+ +    for(j=0; (j<njcg); j++)
+ +    {
+ +        cg_j   = jcg[j];
+ +        nrj    = cgindex[cg_j+1]-cgindex[cg_j];
+ +        if (calc_image_tric(x[icg],x[cg_j],box,b_inv,&shift) < rcut2)
+ +        {
+ +            jgid  = GET_CGINFO_GID(cginfo[cg_j]);
+ +            if (!(i_egp_flags[jgid] & EGP_EXCL))
+ +            {
+ +                add_simple(&ns_buf[jgid][shift],nrj,cg_j,
+ +                           bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
+ +                           put_in_list);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void ns_inner_rect(rvec x[],int icg,int *i_egp_flags,
+ +                          int njcg,atom_id jcg[],
+ +                          gmx_bool bBox,rvec box_size,rvec b_inv,real rcut2,
+ +                          t_block *cgs,t_ns_buf **ns_buf,
+ +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
+ +                          t_excl bexcl[],t_forcerec *fr,
+ +                          put_in_list_t *put_in_list)
+ +{
+ +    int      shift;
+ +    int      j,nrj,jgid;
+ +    int      *cginfo=fr->cginfo;
+ +    atom_id  cg_j,*cgindex;
+ +    t_ns_buf *nsbuf;
+ +
+ +    cgindex = cgs->index;
+ +    if (bBox)
+ +    {
+ +        shift = CENTRAL;
+ +        for(j=0; (j<njcg); j++)
+ +        {
+ +            cg_j   = jcg[j];
+ +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
+ +            if (calc_image_rect(x[icg],x[cg_j],box_size,b_inv,&shift) < rcut2)
+ +            {
+ +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
+ +                if (!(i_egp_flags[jgid] & EGP_EXCL))
+ +                {
+ +                    add_simple(&ns_buf[jgid][shift],nrj,cg_j,
+ +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
+ +                               put_in_list);
+ +                }
+ +            }
+ +        }
+ +    } 
+ +    else
+ +    {
+ +        for(j=0; (j<njcg); j++)
+ +        {
+ +            cg_j   = jcg[j];
+ +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
+ +            if ((rcut2 == 0) || (distance2(x[icg],x[cg_j]) < rcut2)) {
+ +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
+ +                if (!(i_egp_flags[jgid] & EGP_EXCL))
+ +                {
+ +                    add_simple(&ns_buf[jgid][CENTRAL],nrj,cg_j,
+ +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,CENTRAL,fr,
+ +                               put_in_list);
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +/* ns_simple_core needs to be adapted for QMMM still 2005 */
+ +
+ +static int ns_simple_core(t_forcerec *fr,
+ +                          gmx_localtop_t *top,
+ +                          t_mdatoms *md,
+ +                          matrix box,rvec box_size,
+ +                          t_excl bexcl[],atom_id *aaj,
+ +                          int ngid,t_ns_buf **ns_buf,
+ +                          put_in_list_t *put_in_list,gmx_bool bHaveVdW[])
+ +{
+ +    int      naaj,k;
+ +    real     rlist2;
+ +    int      nsearch,icg,jcg,igid,i0,nri,nn;
+ +    int      *cginfo;
+ +    t_ns_buf *nsbuf;
+ +    /* atom_id  *i_atoms; */
+ +    t_block  *cgs=&(top->cgs);
+ +    t_blocka *excl=&(top->excls);
+ +    rvec     b_inv;
+ +    int      m;
+ +    gmx_bool     bBox,bTriclinic;
+ +    int      *i_egp_flags;
+ +    
+ +    rlist2 = sqr(fr->rlist);
+ +    
+ +    bBox = (fr->ePBC != epbcNONE);
+ +    if (bBox)
+ +    {
+ +        for(m=0; (m<DIM); m++)
+ +        {
+ +            b_inv[m] = divide_err(1.0,box_size[m]);
+ +        }
+ +        bTriclinic = TRICLINIC(box);
+ +    }
+ +    else
+ +    {
+ +        bTriclinic = FALSE;
+ +    }
+ +    
+ +    cginfo = fr->cginfo;
+ +    
+ +    nsearch=0;
+ +    for (icg=fr->cg0; (icg<fr->hcg); icg++)
+ +    {
+ +        /*
+ +          i0        = cgs->index[icg];
+ +          nri       = cgs->index[icg+1]-i0;
+ +          i_atoms   = &(cgs->a[i0]);
+ +          i_eg_excl = fr->eg_excl + ngid*md->cENER[*i_atoms];
+ +          setexcl(nri,i_atoms,excl,TRUE,bexcl);
+ +        */
+ +        igid = GET_CGINFO_GID(cginfo[icg]);
+ +        i_egp_flags = fr->egp_flags + ngid*igid;
+ +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,TRUE,bexcl);
+ +        
+ +        naaj=calc_naaj(icg,cgs->nr);
+ +        if (bTriclinic)
+ +        {
+ +            ns_inner_tric(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
+ +                          box,b_inv,rlist2,cgs,ns_buf,
+ +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
+ +        }
+ +        else
+ +        {
+ +            ns_inner_rect(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
+ +                          bBox,box_size,b_inv,rlist2,cgs,ns_buf,
+ +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
+ +        }
+ +        nsearch += naaj;
+ +        
+ +        for(nn=0; (nn<ngid); nn++)
+ +        {
+ +            for(k=0; (k<SHIFTS); k++)
+ +            {
+ +                nsbuf = &(ns_buf[nn][k]);
+ +                if (nsbuf->ncg > 0)
+ +                {
+ +                    put_in_list(bHaveVdW,ngid,md,icg,nn,nsbuf->ncg,nsbuf->jcg,
+ +                                cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE);
+ +                    nsbuf->ncg=nsbuf->nj=0;
+ +                }
+ +            }
+ +        }
+ +        /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */
+ +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,FALSE,bexcl);
+ +    }
+ +    close_neighbor_list(fr,FALSE,-1,-1,FALSE);
+ +    
+ +    return nsearch;
+ +}
+ +
+ +/************************************************
+ + *
+ + *    N S 5     G R I D     S T U F F
+ + *
+ + ************************************************/
+ +
+ +static inline void get_dx(int Nx,real gridx,real rc2,int xgi,real x,
+ +                          int *dx0,int *dx1,real *dcx2)
+ +{
+ +    real dcx,tmp;
+ +    int  xgi0,xgi1,i;
+ +    
+ +    if (xgi < 0)
+ +    {
+ +        *dx0 = 0;
+ +        xgi0 = -1;
+ +        *dx1 = -1;
+ +        xgi1 = 0;
+ +    }
+ +    else if (xgi >= Nx)
+ +    {
+ +        *dx0 = Nx;
+ +        xgi0 = Nx-1;
+ +        *dx1 = Nx-1;
+ +        xgi1 = Nx;
+ +    }
+ +    else
+ +    {
+ +        dcx2[xgi] = 0;
+ +        *dx0 = xgi;
+ +        xgi0 = xgi-1;
+ +        *dx1 = xgi;
+ +        xgi1 = xgi+1;
+ +    }
+ +    
+ +    for(i=xgi0; i>=0; i--)
+ +    {
+ +        dcx = (i+1)*gridx-x;
+ +        tmp = dcx*dcx;
+ +        if (tmp >= rc2)
+ +            break;
+ +        *dx0 = i;
+ +        dcx2[i] = tmp;
+ +    }
+ +    for(i=xgi1; i<Nx; i++)
+ +    {
+ +        dcx = i*gridx-x;
+ +        tmp = dcx*dcx;
+ +        if (tmp >= rc2)
+ +        {
+ +            break;
+ +        }
+ +        *dx1 = i;
+ +        dcx2[i] = tmp;
+ +    }
+ +}
+ +
+ +static inline void get_dx_dd(int Nx,real gridx,real rc2,int xgi,real x,
+ +                             int ncpddc,int shift_min,int shift_max,
+ +                             int *g0,int *g1,real *dcx2)
+ +{
+ +    real dcx,tmp;
+ +    int  g_min,g_max,shift_home;
+ +    
+ +    if (xgi < 0)
+ +    {
+ +        g_min = 0;
+ +        g_max = Nx - 1;
+ +        *g0   = 0;
+ +        *g1   = -1;
+ +    }
+ +    else if (xgi >= Nx)
+ +    {
+ +        g_min = 0;
+ +        g_max = Nx - 1;
+ +        *g0   = Nx;
+ +        *g1   = Nx - 1;
+ +    }
+ +    else
+ +    {
+ +        if (ncpddc == 0)
+ +        {
+ +            g_min = 0;
+ +            g_max = Nx - 1;
+ +        }
+ +        else
+ +        {
+ +            if (xgi < ncpddc)
+ +            {
+ +                shift_home = 0;
+ +            }
+ +            else
+ +            {
+ +                shift_home = -1;
+ +            }
+ +            g_min = (shift_min == shift_home ? 0          : ncpddc);
+ +            g_max = (shift_max == shift_home ? ncpddc - 1 : Nx - 1);
+ +        }
+ +        if (shift_min > 0)
+ +        {
+ +            *g0 = g_min;
+ +            *g1 = g_min - 1;
+ +        }
+ +        else if (shift_max < 0)
+ +        {
+ +            *g0 = g_max + 1;
+ +            *g1 = g_max;
+ +        }
+ +        else
+ +        {
+ +            *g0 = xgi;
+ +            *g1 = xgi;
+ +            dcx2[xgi] = 0;
+ +        }
+ +    }
+ +    
+ +    while (*g0 > g_min)
+ +    {
+ +        /* Check one grid cell down */
+ +        dcx = ((*g0 - 1) + 1)*gridx - x;
+ +        tmp = dcx*dcx;
+ +        if (tmp >= rc2)
+ +        {
+ +            break;
+ +        }
+ +        (*g0)--;
+ +        dcx2[*g0] = tmp;
+ +    }
+ +    
+ +    while (*g1 < g_max)
+ +    {
+ +        /* Check one grid cell up */
+ +        dcx = (*g1 + 1)*gridx - x;
+ +        tmp = dcx*dcx;
+ +        if (tmp >= rc2)
+ +        {
+ +            break;
+ +        }
+ +        (*g1)++;
+ +        dcx2[*g1] = tmp;
+ +    }
+ +}
+ +
+ +
+ +#define sqr(x) ((x)*(x))
+ +#define calc_dx2(XI,YI,ZI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]) + sqr(ZI-y[ZZ]))
+ +#define calc_cyl_dx2(XI,YI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]))
+ +/****************************************************
+ + *
+ + *    F A S T   N E I G H B O R  S E A R C H I N G
+ + *
+ + *    Optimized neighboursearching routine using grid 
+ + *    at least 1x1x1, see GROMACS manual
+ + *
+ + ****************************************************/
+ +
+ +static void do_longrange(t_commrec *cr,gmx_localtop_t *top,t_forcerec *fr,
+ +                         int ngid,t_mdatoms *md,int icg,
+ +                         int jgid,int nlr,
+ +                         atom_id lr[],t_excl bexcl[],int shift,
+ +                         rvec x[],rvec box_size,t_nrnb *nrnb,
+ +                         real *lambda,real *dvdlambda,
+ +                         gmx_grppairener_t *grppener,
+ +                         gmx_bool bDoVdW,gmx_bool bDoCoul,
+ +                         gmx_bool bEvaluateNow,put_in_list_t *put_in_list,
+ +                         gmx_bool bHaveVdW[],
+ +                         gmx_bool bDoForces,rvec *f)
+ +{
+ +    int n,i;
+ +    t_nblist *nl;
+ +    
+ +    for(n=0; n<fr->nnblists; n++)
+ +    {
+ +        for(i=0; (i<eNL_NR); i++)
+ +        {
+ +            nl = &fr->nblists[n].nlist_lr[i];
+ +            if ((nl->nri > nl->maxnri-32) || bEvaluateNow)
+ +            {
+ +                close_neighbor_list(fr,TRUE,n,i,FALSE);
+ +                /* Evaluate the energies and forces */
+ +                do_nonbonded(cr,fr,x,f,md,NULL,
+ +                             grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR],
+ +                             grppener->ener[egCOULLR],
+ +                                                       grppener->ener[egGB],box_size,
+ +                             nrnb,lambda,dvdlambda,n,i,
+ +                             GMX_DONB_LR | GMX_DONB_FORCES);
+ +                
+ +                reset_neighbor_list(fr,TRUE,n,i);
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (!bEvaluateNow)
+ +    {  
+ +        /* Put the long range particles in a list */
+ +        /* do_longrange is never called for QMMM  */
+ +        put_in_list(bHaveVdW,ngid,md,icg,jgid,nlr,lr,top->cgs.index,
+ +                    bexcl,shift,fr,TRUE,bDoVdW,bDoCoul);
+ +    }
+ +}
+ +
+ +static void get_cutoff2(t_forcerec *fr,gmx_bool bDoLongRange,
+ +                        real *rvdw2,real *rcoul2,
+ +                        real *rs2,real *rm2,real *rl2)
+ +{
+ +    *rs2 = sqr(fr->rlist);
+ +    if (bDoLongRange && fr->bTwinRange)
+ +    {
+ +        /* The VdW and elec. LR cut-off's could be different,
+ +         * so we can not simply set them to rlistlong.
+ +         */
+ +        if (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(fr->vdwtype) &&
+ +            fr->rvdw > fr->rlist)
+ +        {
+ +            *rvdw2  = sqr(fr->rlistlong);
+ +        }
+ +        else
+ +        {
+ +            *rvdw2  = sqr(fr->rvdw);
+ +        }
+ +        if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(fr->eeltype) &&
+ +            fr->rcoulomb > fr->rlist)
+ +        {
+ +            *rcoul2 = sqr(fr->rlistlong);
+ +        }
+ +        else
+ +        {
+ +            *rcoul2 = sqr(fr->rcoulomb);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* Workaround for a gcc -O3 or -ffast-math problem */
+ +        *rvdw2  = *rs2;
+ +        *rcoul2 = *rs2;
+ +    }
+ +    *rm2 = min(*rvdw2,*rcoul2);
+ +    *rl2 = max(*rvdw2,*rcoul2);
+ +}
+ +
+ +static void init_nsgrid_lists(t_forcerec *fr,int ngid,gmx_ns_t *ns)
+ +{
+ +    real rvdw2,rcoul2,rs2,rm2,rl2;
+ +    int j;
+ +
+ +    get_cutoff2(fr,TRUE,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
+ +
+ +    /* Short range buffers */
+ +    snew(ns->nl_sr,ngid);
+ +    /* Counters */
+ +    snew(ns->nsr,ngid);
+ +    snew(ns->nlr_ljc,ngid);
+ +    snew(ns->nlr_one,ngid);
+ +    
+ +    if (rm2 > rs2)
+ +    {
+ +            /* Long range VdW and Coul buffers */
+ +        snew(ns->nl_lr_ljc,ngid);
+ +    }
+ +    if (rl2 > rm2)
+ +    {
+ +        /* Long range VdW or Coul only buffers */
+ +        snew(ns->nl_lr_one,ngid);
+ +    }
+ +    for(j=0; (j<ngid); j++) {
+ +        snew(ns->nl_sr[j],MAX_CG);
+ +        if (rm2 > rs2)
+ +        {
+ +            snew(ns->nl_lr_ljc[j],MAX_CG);
+ +        }
+ +        if (rl2 > rm2)
+ +        {
+ +            snew(ns->nl_lr_one[j],MAX_CG);
+ +        }
+ +    }
+ +    if (debug)
+ +    {
+ +        fprintf(debug,
+ +                "ns5_core: rs2 = %g, rm2 = %g, rl2 = %g (nm^2)\n",
+ +                rs2,rm2,rl2);
+ +    }
+ +}
+ +
+ +static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
+ +                       matrix box,rvec box_size,int ngid,
+ +                       gmx_localtop_t *top,
+ +                       t_grid *grid,rvec x[],
+ +                       t_excl bexcl[],gmx_bool *bExcludeAlleg,
+ +                       t_nrnb *nrnb,t_mdatoms *md,
+ +                       real *lambda,real *dvdlambda,
+ +                       gmx_grppairener_t *grppener,
+ +                       put_in_list_t *put_in_list,
+ +                       gmx_bool bHaveVdW[],
+ +                       gmx_bool bDoLongRange,gmx_bool bDoForces,rvec *f,
+ +                       gmx_bool bMakeQMMMnblist)
+ +{
+ +    gmx_ns_t *ns;
+ +    atom_id **nl_lr_ljc,**nl_lr_one,**nl_sr;
+ +    int     *nlr_ljc,*nlr_one,*nsr;
+ +    gmx_domdec_t *dd=NULL;
+ +    t_block *cgs=&(top->cgs);
+ +    int     *cginfo=fr->cginfo;
+ +    /* atom_id *i_atoms,*cgsindex=cgs->index; */
+ +    ivec    sh0,sh1,shp;
+ +    int     cell_x,cell_y,cell_z;
+ +    int     d,tx,ty,tz,dx,dy,dz,cj;
+ +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
+ +    int     zsh_ty,zsh_tx,ysh_tx;
+ +#endif
+ +    int     dx0,dx1,dy0,dy1,dz0,dz1;
+ +    int     Nx,Ny,Nz,shift=-1,j,nrj,nns,nn=-1;
+ +    real    gridx,gridy,gridz,grid_x,grid_y,grid_z;
+ +    real    *dcx2,*dcy2,*dcz2;
+ +    int     zgi,ygi,xgi;
+ +    int     cg0,cg1,icg=-1,cgsnr,i0,igid,nri,naaj,max_jcg;
+ +    int     jcg0,jcg1,jjcg,cgj0,jgid;
+ +    int     *grida,*gridnra,*gridind;
+ +    gmx_bool    rvdw_lt_rcoul,rcoul_lt_rvdw;
+ +    rvec    xi,*cgcm,grid_offset;
+ +    real    r2,rs2,rvdw2,rcoul2,rm2,rl2,XI,YI,ZI,dcx,dcy,dcz,tmp1,tmp2;
+ +    int     *i_egp_flags;
+ +    gmx_bool    bDomDec,bTriclinicX,bTriclinicY;
+ +    ivec    ncpddc;
+ +    
+ +    ns = &fr->ns;
+ +    
+ +    bDomDec = DOMAINDECOMP(cr);
+ +    if (bDomDec)
+ +    {
+ +        dd = cr->dd;
+ +    }
+ +    
+ +    bTriclinicX = ((YY < grid->npbcdim &&
+ +                    (!bDomDec || dd->nc[YY]==1) && box[YY][XX] != 0) ||
+ +                   (ZZ < grid->npbcdim &&
+ +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][XX] != 0));
+ +    bTriclinicY =  (ZZ < grid->npbcdim &&
+ +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][YY] != 0);
+ +    
+ +    cgsnr    = cgs->nr;
+ +
+ +    get_cutoff2(fr,bDoLongRange,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
+ +
+ +    rvdw_lt_rcoul = (rvdw2 >= rcoul2);
+ +    rcoul_lt_rvdw = (rcoul2 >= rvdw2);
+ +    
+ +    if (bMakeQMMMnblist)
+ +    {
+ +        rm2 = rl2;
+ +        rs2 = rl2;
+ +    }
+ +
+ +    nl_sr     = ns->nl_sr;
+ +    nsr       = ns->nsr;
+ +    nl_lr_ljc = ns->nl_lr_ljc;
+ +    nl_lr_one = ns->nl_lr_one;
+ +    nlr_ljc   = ns->nlr_ljc;
+ +    nlr_one   = ns->nlr_one;
+ +    
+ +    /* Unpack arrays */
+ +    cgcm    = fr->cg_cm;
+ +    Nx      = grid->n[XX];
+ +    Ny      = grid->n[YY];
+ +    Nz      = grid->n[ZZ];
+ +    grida   = grid->a;
+ +    gridind = grid->index;
+ +    gridnra = grid->nra;
+ +    nns     = 0;
+ +    
+ +    gridx      = grid->cell_size[XX];
+ +    gridy      = grid->cell_size[YY];
+ +    gridz      = grid->cell_size[ZZ];
+ +    grid_x     = 1/gridx;
+ +    grid_y     = 1/gridy;
+ +    grid_z     = 1/gridz;
+ +    copy_rvec(grid->cell_offset,grid_offset);
+ +    copy_ivec(grid->ncpddc,ncpddc);
+ +    dcx2       = grid->dcx2;
+ +    dcy2       = grid->dcy2;
+ +    dcz2       = grid->dcz2;
+ +    
+ +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
+ +    zsh_ty = floor(-box[ZZ][YY]/box[YY][YY]+0.5);
+ +    zsh_tx = floor(-box[ZZ][XX]/box[XX][XX]+0.5);
+ +    ysh_tx = floor(-box[YY][XX]/box[XX][XX]+0.5);
+ +    if (zsh_tx!=0 && ysh_tx!=0)
+ +    {
+ +        /* This could happen due to rounding, when both ratios are 0.5 */
+ +        ysh_tx = 0;
+ +    }
+ +#endif
+ +    
+ +    debug_gmx();
+ +
+ +    if (fr->n_tpi)
+ +    {
+ +        /* We only want a list for the test particle */
+ +        cg0 = cgsnr - 1;
+ +    }
+ +    else
+ +    {
+ +        cg0 = grid->icg0;
+ +    }
+ +    cg1 = grid->icg1;
+ +
+ +    /* Set the shift range */
+ +    for(d=0; d<DIM; d++)
+ +    {
+ +        sh0[d] = -1;
+ +        sh1[d] = 1;
+ +        /* Check if we need periodicity shifts.
+ +         * Without PBC or with domain decomposition we don't need them.
+ +         */
+ +        if (d >= ePBC2npbcdim(fr->ePBC) || (bDomDec && dd->nc[d] > 1))
+ +        {
+ +            shp[d] = 0;
+ +        }
+ +        else
+ +        {
+ +            if (d == XX &&
+ +                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
+ +            {
+ +                shp[d] = 2;
+ +            }
+ +            else
+ +            {
+ +                shp[d] = 1;
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* Loop over charge groups */
+ +    for(icg=cg0; (icg < cg1); icg++)
+ +    {
+ +        igid = GET_CGINFO_GID(cginfo[icg]);
+ +        /* Skip this charge group if all energy groups are excluded! */
+ +        if (bExcludeAlleg[igid])
+ +        {
+ +            continue;
+ +        }
+ +        
+ +        i0   = cgs->index[icg];
+ +        
+ +        if (bMakeQMMMnblist)
+ +        { 
+ +            /* Skip this charge group if it is not a QM atom while making a
+ +             * QM/MM neighbourlist
+ +             */
+ +            if (md->bQM[i0]==FALSE)
+ +            {
+ +                continue; /* MM particle, go to next particle */ 
+ +            }
+ +            
+ +            /* Compute the number of charge groups that fall within the control
+ +             * of this one (icg)
+ +             */
+ +            naaj    = calc_naaj(icg,cgsnr);
+ +            jcg0    = icg;
+ +            jcg1    = icg + naaj;
+ +            max_jcg = cgsnr;       
+ +        } 
+ +        else
+ +        { 
+ +            /* make a normal neighbourlist */
+ +            
+ +            if (bDomDec)
+ +            {
+ +                /* Get the j charge-group and dd cell shift ranges */
+ +                dd_get_ns_ranges(cr->dd,icg,&jcg0,&jcg1,sh0,sh1);
+ +                max_jcg = 0;
+ +            }
+ +            else
+ +            {
+ +                /* Compute the number of charge groups that fall within the control
+ +                 * of this one (icg)
+ +                 */
+ +                naaj = calc_naaj(icg,cgsnr);
+ +                jcg0 = icg;
+ +                jcg1 = icg + naaj;
+ +                
+ +                if (fr->n_tpi)
+ +                {
+ +                    /* The i-particle is awlways the test particle,
+ +                     * so we want all j-particles
+ +                     */
+ +                    max_jcg = cgsnr - 1;
+ +                }
+ +                else
+ +                {
+ +                    max_jcg  = jcg1 - cgsnr;
+ +                }
+ +            }
+ +        }
+ +        
+ +        i_egp_flags = fr->egp_flags + igid*ngid;
+ +        
+ +        /* Set the exclusions for the atoms in charge group icg using a bitmask */
+ +        setexcl(i0,cgs->index[icg+1],&top->excls,TRUE,bexcl);
+ +        
+ +        ci2xyz(grid,icg,&cell_x,&cell_y,&cell_z);
+ +        
+ +        /* Changed iicg to icg, DvdS 990115 
+ +         * (but see consistency check above, DvdS 990330) 
+ +         */
+ +#ifdef NS5DB
+ +        fprintf(log,"icg=%5d, naaj=%5d, cell %d %d %d\n",
+ +                icg,naaj,cell_x,cell_y,cell_z);
+ +#endif
+ +        /* Loop over shift vectors in three dimensions */
+ +        for (tz=-shp[ZZ]; tz<=shp[ZZ]; tz++)
+ +        {
+ +            ZI = cgcm[icg][ZZ]+tz*box[ZZ][ZZ];
+ +            /* Calculate range of cells in Z direction that have the shift tz */
+ +            zgi = cell_z + tz*Nz;
+ +#define FAST_DD_NS
+ +#ifndef FAST_DD_NS
+ +            get_dx(Nz,gridz,rl2,zgi,ZI,&dz0,&dz1,dcz2);
+ +#else
+ +            get_dx_dd(Nz,gridz,rl2,zgi,ZI-grid_offset[ZZ],
+ +                      ncpddc[ZZ],sh0[ZZ],sh1[ZZ],&dz0,&dz1,dcz2);
+ +#endif
+ +            if (dz0 > dz1)
+ +            {
+ +                continue;
+ +            }
+ +            for (ty=-shp[YY]; ty<=shp[YY]; ty++)
+ +            {
+ +                YI = cgcm[icg][YY]+ty*box[YY][YY]+tz*box[ZZ][YY];
+ +                /* Calculate range of cells in Y direction that have the shift ty */
+ +                if (bTriclinicY)
+ +                {
+ +                    ygi = (int)(Ny + (YI - grid_offset[YY])*grid_y) - Ny;
+ +                }
+ +                else
+ +                {
+ +                    ygi = cell_y + ty*Ny;
+ +                }
+ +#ifndef FAST_DD_NS
+ +                get_dx(Ny,gridy,rl2,ygi,YI,&dy0,&dy1,dcy2);
+ +#else
+ +                get_dx_dd(Ny,gridy,rl2,ygi,YI-grid_offset[YY],
+ +                          ncpddc[YY],sh0[YY],sh1[YY],&dy0,&dy1,dcy2);
+ +#endif
+ +                if (dy0 > dy1)
+ +                {
+ +                    continue;
+ +                }
+ +                for (tx=-shp[XX]; tx<=shp[XX]; tx++)
+ +                {
+ +                    XI = cgcm[icg][XX]+tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
+ +                    /* Calculate range of cells in X direction that have the shift tx */
+ +                    if (bTriclinicX)
+ +                    {
+ +                        xgi = (int)(Nx + (XI - grid_offset[XX])*grid_x) - Nx;
+ +                    }
+ +                    else
+ +                    {
+ +                        xgi = cell_x + tx*Nx;
+ +                    }
+ +#ifndef FAST_DD_NS
+ +                    get_dx(Nx,gridx,rl2,xgi*Nx,XI,&dx0,&dx1,dcx2);
+ +#else
+ +                    get_dx_dd(Nx,gridx,rl2,xgi,XI-grid_offset[XX],
+ +                              ncpddc[XX],sh0[XX],sh1[XX],&dx0,&dx1,dcx2);
+ +#endif
+ +                    if (dx0 > dx1)
+ +                    {
+ +                        continue;
+ +                    }
+ +                    /* Adress: an explicit cg that has a weigthing function of 0 is excluded
+ +                     *  from the neigbour list as it will not interact  */
+ +                    if (fr->adress_type != eAdressOff){
+ +                        if (md->wf[cgs->index[icg]]==0 && egp_explicit(fr, igid)){
+ +                            continue;
+ +                        }
+ +                    }
+ +                    /* Get shift vector */      
+ +                    shift=XYZ2IS(tx,ty,tz);
+ +#ifdef NS5DB
+ +                    range_check(shift,0,SHIFTS);
+ +#endif
+ +                    for(nn=0; (nn<ngid); nn++)
+ +                    {
+ +                        nsr[nn]      = 0;
+ +                        nlr_ljc[nn]  = 0;
+ +                        nlr_one[nn] = 0;
+ +                    }
+ +#ifdef NS5DB
+ +                    fprintf(log,"shift: %2d, dx0,1: %2d,%2d, dy0,1: %2d,%2d, dz0,1: %2d,%2d\n",
+ +                            shift,dx0,dx1,dy0,dy1,dz0,dz1);
+ +                    fprintf(log,"cgcm: %8.3f  %8.3f  %8.3f\n",cgcm[icg][XX],
+ +                            cgcm[icg][YY],cgcm[icg][ZZ]);
+ +                    fprintf(log,"xi:   %8.3f  %8.3f  %8.3f\n",XI,YI,ZI);
+ +#endif
+ +                    for (dx=dx0; (dx<=dx1); dx++)
+ +                    {
+ +                        tmp1 = rl2 - dcx2[dx];
+ +                        for (dy=dy0; (dy<=dy1); dy++)
+ +                        {
+ +                            tmp2 = tmp1 - dcy2[dy];
+ +                            if (tmp2 > 0)
+ +                            {
+ +                                for (dz=dz0; (dz<=dz1); dz++) {
+ +                                    if (tmp2 > dcz2[dz]) {
+ +                                        /* Find grid-cell cj in which possible neighbours are */
+ +                                        cj   = xyz2ci(Ny,Nz,dx,dy,dz);
+ +                                        
+ +                                        /* Check out how many cgs (nrj) there in this cell */
+ +                                        nrj  = gridnra[cj];
+ +                                        
+ +                                        /* Find the offset in the cg list */
+ +                                        cgj0 = gridind[cj];
+ +                                        
+ +                                        /* Check if all j's are out of range so we
+ +                                         * can skip the whole cell.
+ +                                         * Should save some time, especially with DD.
+ +                                         */
+ +                                        if (nrj == 0 ||
+ +                                            (grida[cgj0] >= max_jcg &&
+ +                                             (grida[cgj0] >= jcg1 || grida[cgj0+nrj-1] < jcg0)))
+ +                                        {
+ +                                            continue;
+ +                                        }
+ +                                        
+ +                                        /* Loop over cgs */
+ +                                        for (j=0; (j<nrj); j++)
+ +                                        {
+ +                                            jjcg = grida[cgj0+j];
+ +                                            
+ +                                            /* check whether this guy is in range! */
+ +                                            if ((jjcg >= jcg0 && jjcg < jcg1) ||
+ +                                                (jjcg < max_jcg))
+ +                                            {
+ +                                                r2=calc_dx2(XI,YI,ZI,cgcm[jjcg]);
+ +                                                if (r2 < rl2) {
+ +                                                    /* jgid = gid[cgsatoms[cgsindex[jjcg]]]; */
+ +                                                    jgid = GET_CGINFO_GID(cginfo[jjcg]);
+ +                                                    /* check energy group exclusions */
+ +                                                    if (!(i_egp_flags[jgid] & EGP_EXCL))
+ +                                                    {
+ +                                                        if (r2 < rs2)
+ +                                                        {
+ +                                                            if (nsr[jgid] >= MAX_CG)
+ +                                                            {
+ +                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
+ +                                                                            nsr[jgid],nl_sr[jgid],
+ +                                                                            cgs->index,/* cgsatoms, */ bexcl,
+ +                                                                            shift,fr,FALSE,TRUE,TRUE);
+ +                                                                nsr[jgid]=0;
+ +                                                            }
+ +                                                            nl_sr[jgid][nsr[jgid]++]=jjcg;
+ +                                                        } 
+ +                                                        else if (r2 < rm2)
+ +                                                        {
+ +                                                            if (nlr_ljc[jgid] >= MAX_CG)
+ +                                                            {
+ +                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
+ +                                                                             nlr_ljc[jgid],
+ +                                                                             nl_lr_ljc[jgid],bexcl,shift,x,
+ +                                                                             box_size,nrnb,
+ +                                                                             lambda,dvdlambda,
+ +                                                                             grppener,
+ +                                                                             TRUE,TRUE,FALSE,
+ +                                                                             put_in_list,
+ +                                                                             bHaveVdW,
+ +                                                                             bDoForces,f);
+ +                                                                nlr_ljc[jgid]=0;
+ +                                                            }
+ +                                                            nl_lr_ljc[jgid][nlr_ljc[jgid]++]=jjcg;
+ +                                                        }
+ +                                                        else
+ +                                                        {
+ +                                                            if (nlr_one[jgid] >= MAX_CG) {
+ +                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
+ +                                                                             nlr_one[jgid],
+ +                                                                             nl_lr_one[jgid],bexcl,shift,x,
+ +                                                                             box_size,nrnb,
+ +                                                                             lambda,dvdlambda,
+ +                                                                             grppener,
+ +                                                                             rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
+ +                                                                             put_in_list,
+ +                                                                             bHaveVdW,
+ +                                                                             bDoForces,f);
+ +                                                                nlr_one[jgid]=0;
+ +                                                            }
+ +                                                            nl_lr_one[jgid][nlr_one[jgid]++]=jjcg;
+ +                                                        }
+ +                                                    }
+ +                                                }
+ +                                                nns++;
+ +                                            }
+ +                                        }
+ +                                    }
+ +                                }
+ +                            }
+ +                        }
+ +                    }
+ +                    /* CHECK whether there is anything left in the buffers */
+ +                    for(nn=0; (nn<ngid); nn++)
+ +                    {
+ +                        if (nsr[nn] > 0)
+ +                        {
+ +                            put_in_list(bHaveVdW,ngid,md,icg,nn,nsr[nn],nl_sr[nn],
+ +                                        cgs->index, /* cgsatoms, */ bexcl,
+ +                                        shift,fr,FALSE,TRUE,TRUE);
+ +                        }
+ +                        
+ +                        if (nlr_ljc[nn] > 0)
+ +                        {
+ +                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_ljc[nn],
+ +                                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
+ +                                         lambda,dvdlambda,grppener,TRUE,TRUE,FALSE,
+ +                                         put_in_list,bHaveVdW,bDoForces,f);
+ +                        }
+ +                        
+ +                        if (nlr_one[nn] > 0)
+ +                        {
+ +                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_one[nn],
+ +                                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
+ +                                         lambda,dvdlambda,grppener,
+ +                                         rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
+ +                                         put_in_list,bHaveVdW,bDoForces,f);
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
+ +        setexcl(cgs->index[icg],cgs->index[icg+1],&top->excls,FALSE,bexcl);
+ +    }
+ +    /* Perform any left over force calculations */
+ +    for (nn=0; (nn<ngid); nn++)
+ +    {
+ +        if (rm2 > rs2)
+ +        {
+ +            do_longrange(cr,top,fr,0,md,icg,nn,nlr_ljc[nn],
+ +                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
+ +                         lambda,dvdlambda,grppener,
+ +                         TRUE,TRUE,TRUE,put_in_list,bHaveVdW,bDoForces,f);
+ +        }
+ +        if (rl2 > rm2) {
+ +            do_longrange(cr,top,fr,0,md,icg,nn,nlr_one[nn],
+ +                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
+ +                         lambda,dvdlambda,grppener,
+ +                         rvdw_lt_rcoul,rcoul_lt_rvdw,
+ +                         TRUE,put_in_list,bHaveVdW,bDoForces,f);
+ +        }
+ +    }
+ +    debug_gmx();
+ +    
+ +    /* Close off short range neighbourlists */
+ +    close_neighbor_list(fr,FALSE,-1,-1,bMakeQMMMnblist);
+ +    
+ +    return nns;
+ +}
+ +
+ +void ns_realloc_natoms(gmx_ns_t *ns,int natoms)
+ +{
+ +    int i;
+ +    
+ +    if (natoms > ns->nra_alloc)
+ +    {
+ +        ns->nra_alloc = over_alloc_dd(natoms);
+ +        srenew(ns->bexcl,ns->nra_alloc);
+ +        for(i=0; i<ns->nra_alloc; i++)
+ +        {
+ +            ns->bexcl[i] = 0;
+ +        }
+ +    }
+ +}
+ +
+ +void init_ns(FILE *fplog,const t_commrec *cr,
+ +             gmx_ns_t *ns,t_forcerec *fr,
+ +             const gmx_mtop_t *mtop,
+ +             matrix box)
+ +{
+ +    int  mt,icg,nr_in_cg,maxcg,i,j,jcg,ngid,ncg;
+ +    t_block *cgs;
+ +    char *ptr;
+ +    
+ +    /* Compute largest charge groups size (# atoms) */
+ +    nr_in_cg=1;
+ +    for(mt=0; mt<mtop->nmoltype; mt++) {
+ +        cgs = &mtop->moltype[mt].cgs;
+ +        for (icg=0; (icg < cgs->nr); icg++)
+ +        {
+ +            nr_in_cg=max(nr_in_cg,(int)(cgs->index[icg+1]-cgs->index[icg]));
+ +        }
+ +    }
+ +
+ +    /* Verify whether largest charge group is <= max cg.
+ +     * This is determined by the type of the local exclusion type 
+ +     * Exclusions are stored in bits. (If the type is not large
+ +     * enough, enlarge it, unsigned char -> unsigned short -> unsigned long)
+ +     */
+ +    maxcg = sizeof(t_excl)*8;
+ +    if (nr_in_cg > maxcg)
+ +    {
+ +        gmx_fatal(FARGS,"Max #atoms in a charge group: %d > %d\n",
+ +                  nr_in_cg,maxcg);
+ +    }
+ +    
+ +    ngid = mtop->groups.grps[egcENER].nr;
+ +    snew(ns->bExcludeAlleg,ngid);
+ +    for(i=0; i<ngid; i++) {
+ +        ns->bExcludeAlleg[i] = TRUE;
+ +        for(j=0; j<ngid; j++)
+ +        {
+ +            if (!(fr->egp_flags[i*ngid+j] & EGP_EXCL))
+ +            {
+ +                ns->bExcludeAlleg[i] = FALSE;
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (fr->bGrid) {
+ +        /* Grid search */
+ +        ns->grid = init_grid(fplog,fr);
+ +        init_nsgrid_lists(fr,ngid,ns);
+ +    }
+ +    else
+ +    {
+ +        /* Simple search */
+ +        snew(ns->ns_buf,ngid);
+ +        for(i=0; (i<ngid); i++)
+ +        {
+ +            snew(ns->ns_buf[i],SHIFTS);
+ +        }
+ +        ncg = ncg_mtop(mtop);
+ +        snew(ns->simple_aaj,2*ncg);
+ +        for(jcg=0; (jcg<ncg); jcg++)
+ +        {
+ +            ns->simple_aaj[jcg]     = jcg;
+ +            ns->simple_aaj[jcg+ncg] = jcg;
+ +        }
+ +    }
+ +    
+ +    /* Create array that determines whether or not atoms have VdW */
+ +    snew(ns->bHaveVdW,fr->ntype);
+ +    for(i=0; (i<fr->ntype); i++)
+ +    {
+ +        for(j=0; (j<fr->ntype); j++)
+ +        {
+ +            ns->bHaveVdW[i] = (ns->bHaveVdW[i] || 
+ +                               (fr->bBHAM ? 
+ +                                ((BHAMA(fr->nbfp,fr->ntype,i,j) != 0) ||
+ +                                 (BHAMB(fr->nbfp,fr->ntype,i,j) != 0) ||
+ +                                 (BHAMC(fr->nbfp,fr->ntype,i,j) != 0)) :
+ +                                ((C6(fr->nbfp,fr->ntype,i,j) != 0) ||
+ +                                 (C12(fr->nbfp,fr->ntype,i,j) != 0))));
+ +        }
+ +    }
+ +    if (debug) 
+ +        pr_bvec(debug,0,"bHaveVdW",ns->bHaveVdW,fr->ntype,TRUE);
+ +    
+ +    ns->nra_alloc = 0;
+ +    ns->bexcl = NULL;
+ +    if (!DOMAINDECOMP(cr))
+ +    {
+ +        /* This could be reduced with particle decomposition */
+ +        ns_realloc_natoms(ns,mtop->natoms);
+ +    }
+ +
+ +    ns->nblist_initialized=FALSE;
+ +
+ +    /* nbr list debug dump */
+ +    {
+ +        char *ptr=getenv("GMX_DUMP_NL");
+ +        if (ptr)
+ +        {
+ +            ns->dump_nl=strtol(ptr,NULL,10);
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog, "GMX_DUMP_NL = %d", ns->dump_nl);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            ns->dump_nl=0;
+ +        }
+ +    }
+ +}
+ +
+ +                       
+ +int search_neighbours(FILE *log,t_forcerec *fr,
+ +                      rvec x[],matrix box,
+ +                      gmx_localtop_t *top,
+ +                      gmx_groups_t *groups,
+ +                      t_commrec *cr,
+ +                      t_nrnb *nrnb,t_mdatoms *md,
+ +                      real *lambda,real *dvdlambda,
+ +                      gmx_grppairener_t *grppener,
+ +                      gmx_bool bFillGrid,
+ +                      gmx_bool bDoLongRange,
+ +                      gmx_bool bDoForces,rvec *f)
+ +{
+ +    t_block  *cgs=&(top->cgs);
+ +    rvec     box_size,grid_x0,grid_x1;
+ +    int      i,j,m,ngid;
+ +    real     min_size,grid_dens;
+ +    int      nsearch;
+ +    gmx_bool     bGrid;
+ +    char     *ptr;
+ +    gmx_bool     *i_egp_flags;
+ +    int      cg_start,cg_end,start,end;
+ +    gmx_ns_t *ns;
+ +    t_grid   *grid;
+ +    gmx_domdec_zones_t *dd_zones;
+ +    put_in_list_t *put_in_list;
+ +      
+ +    ns = &fr->ns;
+ +
+ +    /* Set some local variables */
+ +    bGrid = fr->bGrid;
+ +    ngid = groups->grps[egcENER].nr;
+ +    
+ +    for(m=0; (m<DIM); m++)
+ +    {
+ +        box_size[m] = box[m][m];
+ +    }
+ +  
+ +    if (fr->ePBC != epbcNONE)
+ +    {
+ +        if (sqr(fr->rlistlong) >= max_cutoff2(fr->ePBC,box))
+ +        {
+ +            gmx_fatal(FARGS,"One of the box vectors has become shorter than twice the cut-off length or box_yy-|box_zy| or box_zz has become smaller than the cut-off.");
+ +        }
+ +        if (!bGrid)
+ +        {
+ +            min_size = min(box_size[XX],min(box_size[YY],box_size[ZZ]));
+ +            if (2*fr->rlistlong >= min_size)
+ +                gmx_fatal(FARGS,"One of the box diagonal elements has become smaller than twice the cut-off length.");
+ +        }
+ +    }
+ +    
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        ns_realloc_natoms(ns,cgs->index[cgs->nr]);
+ +    }
+ +    debug_gmx();
+ +    
+ +    /* Reset the neighbourlists */
+ +    reset_neighbor_list(fr,FALSE,-1,-1);
+ +    
+ +    if (bGrid && bFillGrid)
+ +    {
+ +              
+ +        grid = ns->grid;
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            dd_zones = domdec_zones(cr->dd);
+ +        }
+ +        else
+ +        {
+ +            dd_zones = NULL;
+ +
++            get_nsgrid_boundaries(grid->nboundeddim,box,NULL,NULL,NULL,NULL,
+ +                                  cgs->nr,fr->cg_cm,grid_x0,grid_x1,&grid_dens);
+ +
+ +            grid_first(log,grid,NULL,NULL,fr->ePBC,box,grid_x0,grid_x1,
+ +                       fr->rlistlong,grid_dens);
+ +        }
+ +        debug_gmx();
+ +        
+ +        /* Don't know why this all is... (DvdS 3/99) */
+ +#ifndef SEGV
+ +        start = 0;
+ +        end   = cgs->nr;
+ +#else
+ +        start = fr->cg0;
+ +        end   = (cgs->nr+1)/2;
+ +#endif
+ +        
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            end = cgs->nr;
+ +            fill_grid(log,dd_zones,grid,end,-1,end,fr->cg_cm);
+ +            grid->icg0 = 0;
+ +            grid->icg1 = dd_zones->izone[dd_zones->nizone-1].cg1;
+ +        }
+ +        else
+ +        {
+ +            fill_grid(log,NULL,grid,cgs->nr,fr->cg0,fr->hcg,fr->cg_cm);
+ +            grid->icg0 = fr->cg0;
+ +            grid->icg1 = fr->hcg;
+ +            debug_gmx();
+ +            
+ +            if (PARTDECOMP(cr))
+ +                mv_grid(cr,grid);
+ +            debug_gmx();
+ +        }
+ +        
+ +        calc_elemnr(log,grid,start,end,cgs->nr);
+ +        calc_ptrs(grid);
+ +        grid_last(log,grid,start,end,cgs->nr);
+ +        
+ +        if (gmx_debug_at)
+ +        {
+ +            check_grid(debug,grid);
+ +            print_grid(debug,grid);
+ +        }
+ +    }
+ +    else if (fr->n_tpi)
+ +    {
+ +        /* Set the grid cell index for the test particle only.
+ +         * The cell to cg index is not corrected, but that does not matter.
+ +         */
+ +        fill_grid(log,NULL,ns->grid,fr->hcg,fr->hcg-1,fr->hcg,fr->cg_cm);
+ +    }
+ +    debug_gmx();
+ +    
+ +    if (!fr->ns.bCGlist)
+ +    {
+ +        put_in_list = put_in_list_at;
+ +    }
+ +    else
+ +    {
+ +        put_in_list = put_in_list_cg;
+ +    }
+ +
+ +    /* Do the core! */
+ +    if (bGrid)
+ +    {
+ +        grid = ns->grid;
+ +        nsearch = nsgrid_core(log,cr,fr,box,box_size,ngid,top,
+ +                              grid,x,ns->bexcl,ns->bExcludeAlleg,
+ +                              nrnb,md,lambda,dvdlambda,grppener,
+ +                              put_in_list,ns->bHaveVdW,
+ +                              bDoLongRange,bDoForces,f,
+ +                              FALSE);
+ +        
+ +        /* neighbour searching withouth QMMM! QM atoms have zero charge in
+ +         * the classical calculation. The charge-charge interaction
+ +         * between QM and MM atoms is handled in the QMMM core calculation
+ +         * (see QMMM.c). The VDW however, we'd like to compute classically
+ +         * and the QM MM atom pairs have just been put in the
+ +         * corresponding neighbourlists. in case of QMMM we still need to
+ +         * fill a special QMMM neighbourlist that contains all neighbours
+ +         * of the QM atoms. If bQMMM is true, this list will now be made: 
+ +         */
+ +        if (fr->bQMMM && fr->qr->QMMMscheme!=eQMMMschemeoniom)
+ +        {
+ +            nsearch += nsgrid_core(log,cr,fr,box,box_size,ngid,top,
+ +                                   grid,x,ns->bexcl,ns->bExcludeAlleg,
+ +                                   nrnb,md,lambda,dvdlambda,grppener,
+ +                                   put_in_list_qmmm,ns->bHaveVdW,
+ +                                   bDoLongRange,bDoForces,f,
+ +                                   TRUE);
+ +        }
+ +    }
+ +    else 
+ +    {
+ +        nsearch = ns_simple_core(fr,top,md,box,box_size,
+ +                                 ns->bexcl,ns->simple_aaj,
+ +                                 ngid,ns->ns_buf,put_in_list,ns->bHaveVdW);
+ +    }
+ +    debug_gmx();
+ +    
+ +#ifdef DEBUG
+ +    pr_nsblock(log);
+ +#endif
+ +    
+ +    inc_nrnb(nrnb,eNR_NS,nsearch);
+ +    /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */
+ +    
+ +    return nsearch;
+ +}
+ +
+ +int natoms_beyond_ns_buffer(t_inputrec *ir,t_forcerec *fr,t_block *cgs,
+ +                            matrix scale_tot,rvec *x)
+ +{
+ +    int  cg0,cg1,cg,a0,a1,a,i,j;
+ +    real rint,hbuf2,scale;
+ +    rvec *cg_cm,cgsc;
+ +    gmx_bool bIsotropic;
+ +    int  nBeyond;
+ +    
+ +    nBeyond = 0;
+ +    
+ +    rint = max(ir->rcoulomb,ir->rvdw);
+ +    if (ir->rlist < rint)
+ +    {
+ +        gmx_fatal(FARGS,"The neighbor search buffer has negative size: %f nm",
+ +                  ir->rlist - rint);
+ +    }
+ +    cg_cm = fr->cg_cm;
+ +    
+ +    cg0 = fr->cg0;
+ +    cg1 = fr->hcg;
+ +    
+ +    if (!EI_DYNAMICS(ir->eI) || !DYNAMIC_BOX(*ir))
+ +    {
+ +        hbuf2 = sqr(0.5*(ir->rlist - rint));
+ +        for(cg=cg0; cg<cg1; cg++)
+ +        {
+ +            a0 = cgs->index[cg];
+ +            a1 = cgs->index[cg+1];
+ +            for(a=a0; a<a1; a++)
+ +            {
+ +                if (distance2(cg_cm[cg],x[a]) > hbuf2)
+ +                {
+ +                    nBeyond++;
+ +                }
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        bIsotropic = TRUE;
+ +        scale = scale_tot[0][0];
+ +        for(i=1; i<DIM; i++)
+ +        {
+ +            /* With anisotropic scaling, the original spherical ns volumes become
+ +             * ellipsoids. To avoid costly transformations we use the minimum
+ +             * eigenvalue of the scaling matrix for determining the buffer size.
+ +             * Since the lower half is 0, the eigenvalues are the diagonal elements.
+ +             */
+ +            scale = min(scale,scale_tot[i][i]);
+ +            if (scale_tot[i][i] != scale_tot[i-1][i-1])
+ +            {
+ +                bIsotropic = FALSE;
+ +            }
+ +            for(j=0; j<i; j++)
+ +            {
+ +                if (scale_tot[i][j] != 0)
+ +                {
+ +                    bIsotropic = FALSE;
+ +                }
+ +            }
+ +        }
+ +        hbuf2 = sqr(0.5*(scale*ir->rlist - rint));
+ +        if (bIsotropic)
+ +        {
+ +            for(cg=cg0; cg<cg1; cg++)
+ +            {
+ +                svmul(scale,cg_cm[cg],cgsc);
+ +                a0 = cgs->index[cg];
+ +                a1 = cgs->index[cg+1];
+ +                for(a=a0; a<a1; a++)
+ +                {
+ +                    if (distance2(cgsc,x[a]) > hbuf2)
+ +                    {                    
+ +                        nBeyond++;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* Anistropic scaling */
+ +            for(cg=cg0; cg<cg1; cg++)
+ +            {
+ +                /* Since scale_tot contains the transpose of the scaling matrix,
+ +                 * we need to multiply with the transpose.
+ +                 */
+ +                tmvmul_ur0(scale_tot,cg_cm[cg],cgsc);
+ +                a0 = cgs->index[cg];
+ +                a1 = cgs->index[cg+1];
+ +                for(a=a0; a<a1; a++)
+ +                {
+ +                    if (distance2(cgsc,x[a]) > hbuf2)
+ +                    {
+ +                        nBeyond++;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    return nBeyond;
+ +}
diff --cc src/gromacs/mdlib/nsgrid.c
Simple merge
diff --cc src/gromacs/mdlib/partdec.c
Simple merge
diff --cc src/gromacs/mdlib/perf_est.c

index d98223e699355bc8a34bc4cf85fc40bc952bce9d,0000000000000000000000000000000000000000..0e26bc3e3098e79ba6f489ec70b319a71da60634

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/perf_est.c
--- /dev/null
+++ b/src/gromacs/mdlib/perf_est.c
@@@ -1,216 -1,0 +1,377 @@@
- /*
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2008, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ + 
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <math.h>
+ +
+ +#include "perf_est.h"
+ +#include "physics.h"
+ +#include "vec.h"
+ +#include "mtop_util.h"
++#include "types/commrec.h"
++#include "nbnxn_search.h"
++#include "nbnxn_consts.h"
++
++
++/* Computational cost of bonded, non-bonded and PME calculations.
++ * This will be machine dependent.
++ * The numbers here are accurate for Intel Core2 and AMD Athlon 64
++ * in single precision. In double precision PME mesh is slightly cheaper,
++ * although not so much that the numbers need to be adjusted.
++ */
++
++/* Cost of a pair interaction in the "group" cut-off scheme" */
++#define C_GR_FQ       1.5
++#define C_GR_QLJ_CUT  1.5
++#define C_GR_QLJ_TAB  2.0
++#define C_GR_LJ_CUT   1.0
++#define C_GR_LJ_TAB   1.75
++/* Cost of 1 water with one Q/LJ atom */
++#define C_GR_QLJW_CUT 2.0
++#define C_GR_QLJW_TAB 2.25
++/* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
++#define C_GR_QW       1.75
++
++/* Cost of a pair interaction in the "Verlet" cut-off scheme" */
++#define C_VT_LJ       0.30
++#define C_VT_QLJ_RF   0.40
++#define C_VT_Q_RF     0.30
++#define C_VT_QLJ_TAB  0.55
++#define C_VT_Q_TAB    0.50
++
++/* Cost of PME, with all components running with SSE instructions */
++/* Cost of particle reordering and redistribution */
++#define C_PME_REDIST  12.0
++/* Cost of q spreading and force interpolation per charge (mainly memory) */
++#define C_PME_SPREAD  0.30
++/* Cost of fft's, will be multiplied with N log(N) */
++#define C_PME_FFT     0.20
++/* Cost of pme_solve, will be multiplied with N */
++#define C_PME_SOLVE   0.50
++
++/* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
++#define C_BOND        5.0
+ +
+ +int n_bonded_dx(gmx_mtop_t *mtop,gmx_bool bExcl)
+ +{
+ +  int mb,nmol,ftype,ndxb,ndx_excl;
+ +  int ndx;
+ +  gmx_moltype_t *molt;
+ +
+ +  /* Count the number of pbc_rvec_sub calls required for bonded interactions.
+ +   * This number is also roughly proportional to the computational cost.
+ +   */
+ +  ndx = 0;
+ +  ndx_excl = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    molt = &mtop->moltype[mtop->molblock[mb].type];
+ +    nmol = mtop->molblock[mb].nmol;
+ +    for(ftype=0; ftype<F_NRE; ftype++) {
+ +      if (interaction_function[ftype].flags & IF_BOND) {
+ +      switch (ftype) {
+ +              case F_POSRES:
+ +              case F_FBPOSRES:  ndxb = 1; break;
+ +              case F_CONNBONDS: ndxb = 0; break;
+ +              default:     ndxb = NRAL(ftype) - 1; break;
+ +      }
+ +      ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype));
+ +      }
+ +    }
+ +    if (bExcl) {
+ +      ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;
+ +    } else {
+ +      ndx_excl = 0;
+ +    }
+ +  }
+ +
+ +  if (debug)
+ +    fprintf(debug,"ndx bonded %d exclusions %d\n",ndx,ndx_excl);
+ +
+ +  ndx += ndx_excl;
+ +
+ +  return ndx;
+ +}
+ +
++static void pp_group_load(gmx_mtop_t *mtop,t_inputrec *ir,matrix box,
++                          int *nq_tot,
++                          double *cost_pp,
++                          gmx_bool *bChargePerturbed)
++{
++    t_atom *atom;
++    int  mb,nmol,atnr,cg,a,a0,ncqlj,ncq,nclj;
++    gmx_bool bBHAM,bLJcut,bWater,bQ,bLJ;
++    int nw,nqlj,nq,nlj;
++    float fq,fqlj,flj,fljtab,fqljw,fqw;
++    t_iparams *iparams;
++    gmx_moltype_t *molt;
++
++    bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
++
++    bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
++
++    /* Computational cost of bonded, non-bonded and PME calculations.
++     * This will be machine dependent.
++     * The numbers here are accurate for Intel Core2 and AMD Athlon 64
++     * in single precision. In double precision PME mesh is slightly cheaper,
++     * although not so much that the numbers need to be adjusted.
++     */
++    fq    = C_GR_FQ;
++    fqlj  = (bLJcut ? C_GR_QLJ_CUT : C_GR_QLJ_TAB);
++    flj   = (bLJcut ? C_GR_LJ_CUT  : C_GR_LJ_TAB);
++    /* Cost of 1 water with one Q/LJ atom */
++    fqljw = (bLJcut ? C_GR_QLJW_CUT : C_GR_QLJW_TAB);
++    /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
++    fqw   = C_GR_QW;
++
++    iparams = mtop->ffparams.iparams;
++    atnr = mtop->ffparams.atnr;
++    nw   = 0;
++    nqlj = 0;
++    nq   = 0;
++    nlj  = 0;
++    *bChargePerturbed = FALSE;
++    for(mb=0; mb<mtop->nmolblock; mb++)
++      {
++        molt = &mtop->moltype[mtop->molblock[mb].type];
++        atom = molt->atoms.atom;
++        nmol = mtop->molblock[mb].nmol;
++        a = 0;
++        for(cg=0; cg<molt->cgs.nr; cg++)
++        {
++            bWater = !bBHAM;
++            ncqlj = 0;
++            ncq   = 0;
++            nclj  = 0;
++            a0    = a;
++            while (a < molt->cgs.index[cg+1])
++            {
++                bQ  = (atom[a].q != 0 || atom[a].qB != 0);
++                bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
++                       iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
++                if (atom[a].q != atom[a].qB)
++                {
++                    *bChargePerturbed = TRUE;
++                }
++                /* This if this atom fits into water optimization */
++                if (!((a == a0   &&  bQ &&  bLJ) ||
++                      (a == a0+1 &&  bQ && !bLJ) ||
++                      (a == a0+2 &&  bQ && !bLJ && atom[a].q == atom[a-1].q) ||
++                      (a == a0+3 && !bQ &&  bLJ)))
++                    bWater = FALSE;
++                if (bQ && bLJ)
++                {
++                    ncqlj++;
++                }
++                else
++                {
++                    if (bQ)
++                    {
++                        ncq++;
++                    }
++                    if (bLJ)
++                    {
++                        nclj++;
++                    }
++                }
++                a++;
++            }
++            if (bWater)
++            {
++                nw   += nmol;
++            }
++            else
++            {
++                nqlj += nmol*ncqlj;
++                nq   += nmol*ncq;
++                nlj  += nmol*nclj;
++            }
++        }
++    }
++
++    *nq_tot = nq + nqlj + nw*3;
++
++    if (debug)
++    {
++      fprintf(debug,"nw %d nqlj %d nq %d nlj %d\n",nw,nqlj,nq,nlj);
++    }
++
++    /* For the PP non-bonded cost it is (unrealistically) assumed
++     * that all atoms are distributed homogeneously in space.
++     * Factor 3 is used because a water molecule has 3 atoms
++     * (and TIP4P effectively has 3 interactions with (water) atoms)).
++     */
++    *cost_pp = 0.5*(fqljw*nw*nqlj +
++                    fqw  *nw*(3*nw + nq) +
++                    fqlj *nqlj*nqlj +
++                    fq   *nq*(3*nw + nqlj + nq) +
++                    flj  *nlj*(nw + nqlj + nlj))
++        *4/3*M_PI*ir->rlist*ir->rlist*ir->rlist/det(box);
++}
++
++static void pp_verlet_load(gmx_mtop_t *mtop,t_inputrec *ir,matrix box,
++                           int *nq_tot,
++                           double *cost_pp,
++                           gmx_bool *bChargePerturbed)
++{
++    t_atom *atom;
++    int  mb,nmol,atnr,cg,a,a0,nqlj,nq,nlj;
++    gmx_bool bQRF;
++    t_iparams *iparams;
++    gmx_moltype_t *molt;
++    float r_eff;
++    double nat;
++
++    bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT);
++
++    iparams = mtop->ffparams.iparams;
++    atnr = mtop->ffparams.atnr;
++    nqlj = 0;
++    nq   = 0;
++    *bChargePerturbed = FALSE;
++    for(mb=0; mb<mtop->nmolblock; mb++)
++      {
++        molt = &mtop->moltype[mtop->molblock[mb].type];
++        atom = molt->atoms.atom;
++        nmol = mtop->molblock[mb].nmol;
++        a = 0;
++        for(a=0; a<molt->atoms.nr; a++)
++        {
++            if (atom[a].q != 0 || atom[a].qB != 0)
++            {
++                if (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
++                    iparams[(atnr+1)*atom[a].type].lj.c12 != 0)
++                {
++                    nqlj += nmol;
++                }
++                else
++                {
++                    nq += nmol;
++                }
++            }
++            if (atom[a].q != atom[a].qB)
++            {
++                *bChargePerturbed = TRUE;
++            }
++        }
++    }
++
++    nlj = mtop->natoms - nqlj - nq;
++
++    *nq_tot = nqlj + nq;
++
++    /* Effective cut-off for cluster pair list of 4x4 atoms */
++    r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE,mtop->natoms/det(box));
++
++    if (debug)
++    {
++        fprintf(debug,"nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",
++                nqlj,nq,nlj,ir->rlist,r_eff);
++    }
++
++    /* For the PP non-bonded cost it is (unrealistically) assumed
++     * that all atoms are distributed homogeneously in space.
++     */
++    /* Convert mtop->natoms to double to avoid int overflow */
++    nat = mtop->natoms;
++    *cost_pp = 0.5*(nqlj*nat*(bQRF ? C_VT_QLJ_RF : C_VT_QLJ_TAB) +
++                    nq*nat*(bQRF ? C_VT_Q_RF : C_VT_Q_TAB) +
++                    nlj*nat*C_VT_LJ)
++        *4/3*M_PI*r_eff*r_eff*r_eff/det(box);
++}
++
+ +float pme_load_estimate(gmx_mtop_t *mtop,t_inputrec *ir,matrix box)
+ +{
+ +  t_atom *atom;
-   int  mb,nmol,atnr,cg,a,a0,ncqlj,ncq,nclj;
++  int  mb,nmol,atnr,cg,a,a0,nq_tot;
+ +  gmx_bool bBHAM,bLJcut,bChargePerturbed,bWater,bQ,bLJ;
-   double nw,nqlj,nq,nlj;
-   double cost_bond,cost_pp,cost_spread,cost_fft,cost_solve,cost_pme;
-   float fq,fqlj,flj,fljtab,fqljw,fqw,fqspread,ffft,fsolve,fbond;
++  double cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve,cost_pme;
+ +  float ratio;
+ +  t_iparams *iparams;
+ +  gmx_moltype_t *molt;
+ +
-   bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
- 
-   bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
- 
+ +  /* Computational cost of bonded, non-bonded and PME calculations.
+ +   * This will be machine dependent.
+ +   * The numbers here are accurate for Intel Core2 and AMD Athlon 64
+ +   * in single precision. In double precision PME mesh is slightly cheaper,
+ +   * although not so much that the numbers need to be adjusted.
+ +   */
-   fq    = 1.5;
-   fqlj  = (bLJcut ? 1.5  : 2.0 );
-   flj   = (bLJcut ? 1.0  : 1.75);
-   /* Cost of 1 water with one Q/LJ atom */
-   fqljw = (bLJcut ? 2.0  : 2.25);
-   /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
-   fqw   = 1.75;
-   /* Cost of q spreading and force interpolation per charge (mainly memory) */
-   fqspread = 0.55;
-   /* Cost of fft's, will be multiplied with N log(N) */
-   ffft     = 0.20;
-   /* Cost of pme_solve, will be multiplied with N */
-   fsolve   = 0.80;
-   /* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
-   fbond = 5.0;
+ +
+ +  iparams = mtop->ffparams.iparams;
+ +  atnr = mtop->ffparams.atnr;
-   nw   = 0;
-   nqlj = 0;
-   nq   = 0;
-   nlj  = 0;
-   bChargePerturbed = FALSE;
-   for(mb=0; mb<mtop->nmolblock; mb++) {
-     molt = &mtop->moltype[mtop->molblock[mb].type];
-     atom = molt->atoms.atom;
-     nmol = mtop->molblock[mb].nmol;
-     a = 0;
-     for(cg=0; cg<molt->cgs.nr; cg++) {
-       bWater = !bBHAM;
-       ncqlj = 0;
-       ncq   = 0;
-       nclj  = 0;
-       a0    = a;
-       while (a < molt->cgs.index[cg+1]) {
-       bQ  = (atom[a].q != 0 || atom[a].qB != 0);
-       bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
-              iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
-       if (atom[a].q != atom[a].qB) {
-         bChargePerturbed = TRUE;
-       }
-       /* This if this atom fits into water optimization */
-       if (!((a == a0   &&  bQ &&  bLJ) ||
-             (a == a0+1 &&  bQ && !bLJ) ||
-             (a == a0+2 &&  bQ && !bLJ && atom[a].q == atom[a-1].q) ||
-             (a == a0+3 && !bQ &&  bLJ)))
-         bWater = FALSE;
-       if (bQ && bLJ) {
-         ncqlj++;
-       } else {
-         if (bQ)
-           ncq++;
-         if (bLJ)
-           nclj++;
-       }
-       a++;
-       }
-       if (bWater) {
-       nw   += nmol;
-       } else {
-       nqlj += nmol*ncqlj;
-       nq   += nmol*ncq;
-       nlj  += nmol*nclj;
-       }
-     }
-   }
-   if (debug)
-     fprintf(debug,"nw %g nqlj %g nq %g nlj %g\n",nw,nqlj,nq,nlj);
+ +
-   cost_bond = fbond*n_bonded_dx(mtop,TRUE);
++  cost_bond = C_BOND*n_bonded_dx(mtop,TRUE);
+ +
-   /* For the PP non-bonded cost it is (unrealistically) assumed
-    * that all atoms are distributed homogeneously in space.
-    */
-   cost_pp = 0.5*(fqljw*nw*nqlj +
-                fqw  *nw*(3*nw + nq) +
-                fqlj *nqlj*nqlj +
-                fq   *nq*(3*nw + nqlj + nq) +
-                flj  *nlj*(nw + nqlj + nlj))
-     *4/3*M_PI*ir->rlist*ir->rlist*ir->rlist/det(box);
++  if (ir->cutoff_scheme == ecutsGROUP)
++  {
++      pp_group_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed);
++  }
++  else
++  {
++      pp_verlet_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed);
++  }
+ +  
-   cost_spread = fqspread*(3*nw + nqlj + nq)*pow(ir->pme_order,3);
-   cost_fft    = ffft*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
-   cost_solve  = fsolve*ir->nkx*ir->nky*ir->nkz;
++  cost_redist = C_PME_REDIST*nq_tot;
++  cost_spread = C_PME_SPREAD*nq_tot*pow(ir->pme_order,3);
++  cost_fft    = C_PME_FFT*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
++  cost_solve  = C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz;
+ +
+ +  if (ir->efep != efepNO && bChargePerturbed) {
-     /* All PME work, except the spline coefficient calculation, doubles */
++    /* All PME work, except redist & spline coefficient calculation, doubles */
+ +    cost_spread *= 2;
+ +    cost_fft    *= 2;
+ +    cost_solve  *= 2;
+ +  }
+ +
-   cost_pme = cost_spread + cost_fft + cost_solve;
++  cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;
+ +
+ +  ratio = cost_pme/(cost_bond + cost_pp + cost_pme);
+ +
+ +  if (debug) {
+ +    fprintf(debug,
-           "cost_bond   %f\n"
-           "cost_pp     %f\n"
-           "cost_spread %f\n"
-           "cost_fft    %f\n"
-           "cost_solve  %f\n",
-           cost_bond,cost_pp,cost_spread,cost_fft,cost_solve);
++            "cost_bond   %f\n"
++            "cost_pp     %f\n"
++            "cost_redist %f\n"
++            "cost_spread %f\n"
++            "cost_fft    %f\n"
++            "cost_solve  %f\n",
++            cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve);
+ +
+ +    fprintf(debug,"Estimate for relative PME load: %.3f\n",ratio);
+ +  }
+ +
+ +  return ratio;
+ +}
diff --cc src/gromacs/mdlib/pme.c

index e87728e9570dff6fbd52dc9aea4fd9834b0b0a80,0000000000000000000000000000000000000000..6c562c8f7aa97c2129be628872c677b1c398f2b6

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/pme.c
--- /dev/null
+++ b/src/gromacs/mdlib/pme.c
@@@ -1,4368 -1,0 +1,4492 @@@
- #include "gmx_x86_sse2.h"
- #include "gmx_math_x86_sse2_single.h"
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + *
+ + *                This source code is part of
+ + *
+ + *                 G   R   O   M   A   C   S
+ + *
+ + *          GROningen MAchine for Chemical Simulations
+ + *
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + *
+ + * For more info, check our website at http://www.gromacs.org
+ + *
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +/* IMPORTANT FOR DEVELOPERS:
+ + *
+ + * Triclinic pme stuff isn't entirely trivial, and we've experienced
+ + * some bugs during development (many of them due to me). To avoid
+ + * this in the future, please check the following things if you make
+ + * changes in this file:
+ + *
+ + * 1. You should obtain identical (at least to the PME precision)
+ + *    energies, forces, and virial for
+ + *    a rectangular box and a triclinic one where the z (or y) axis is
+ + *    tilted a whole box side. For instance you could use these boxes:
+ + *
+ + *    rectangular       triclinic
+ + *     2  0  0           2  0  0
+ + *     0  2  0           0  2  0
+ + *     0  0  6           2  2  6
+ + *
+ + * 2. You should check the energy conservation in a triclinic box.
+ + *
+ + * It might seem an overkill, but better safe than sorry.
+ + * /Erik 001109
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#include <stdio.h>
+ +#include <string.h>
+ +#include <math.h>
+ +#include <assert.h>
+ +#include "typedefs.h"
+ +#include "txtdump.h"
+ +#include "vec.h"
+ +#include "gmxcomplex.h"
+ +#include "smalloc.h"
+ +#include "futil.h"
+ +#include "coulomb.h"
+ +#include "gmx_fatal.h"
+ +#include "pme.h"
+ +#include "network.h"
+ +#include "physics.h"
+ +#include "nrnb.h"
+ +#include "copyrite.h"
+ +#include "gmx_wallcycle.h"
+ +#include "gmx_parallel_3dfft.h"
+ +#include "pdbio.h"
+ +#include "gmx_cyclecounter.h"
+ +#include "macros.h"
+ +
+ +/* Single precision, with SSE2 or higher available */
+ +#if defined(GMX_X86_SSE2) && !defined(GMX_DOUBLE)
+ +
-     ivec ci;     /* The spatial location of this grid       */
-     ivec n;      /* The size of *grid, including order-1    */
-     ivec offset; /* The grid offset from the full node grid */
-     int  order;  /* PME spreading order                     */
-     real *grid;  /* The grid local thread, size n           */
++#include "gmx_x86_simd_single.h"
+ +
+ +#define PME_SSE
+ +/* Some old AMD processors could have problems with unaligned loads+stores */
+ +#ifndef GMX_FAHCORE
+ +#define PME_SSE_UNALIGNED
+ +#endif
+ +#endif
+ +
+ +#define DFT_TOL 1e-7
+ +/* #define PRT_FORCE */
+ +/* conditions for on the fly time-measurement */
+ +/* #define TAKETIME (step > 1 && timesteps < 10) */
+ +#define TAKETIME FALSE
+ +
+ +/* #define PME_TIME_THREADS */
+ +
+ +#ifdef GMX_DOUBLE
+ +#define mpi_type MPI_DOUBLE
+ +#else
+ +#define mpi_type MPI_FLOAT
+ +#endif
+ +
+ +/* GMX_CACHE_SEP should be a multiple of 16 to preserve alignment */
+ +#define GMX_CACHE_SEP 64
+ +
+ +/* We only define a maximum to be able to use local arrays without allocation.
+ + * An order larger than 12 should never be needed, even for test cases.
+ + * If needed it can be changed here.
+ + */
+ +#define PME_ORDER_MAX 12
+ +
+ +/* Internal datastructures */
+ +typedef struct {
+ +    int send_index0;
+ +    int send_nindex;
+ +    int recv_index0;
+ +    int recv_nindex;
++    int recv_size;   /* Receive buffer width, used with OpenMP */
+ +} pme_grid_comm_t;
+ +
+ +typedef struct {
+ +#ifdef GMX_MPI
+ +    MPI_Comm mpi_comm;
+ +#endif
+ +    int  nnodes,nodeid;
+ +    int  *s2g0;
+ +    int  *s2g1;
+ +    int  noverlap_nodes;
+ +    int  *send_id,*recv_id;
++    int  send_size;             /* Send buffer width, used with OpenMP */
+ +    pme_grid_comm_t *comm_data;
+ +    real *sendbuf;
+ +    real *recvbuf;
+ +} pme_overlap_t;
+ +
+ +typedef struct {
+ +    int *n;     /* Cumulative counts of the number of particles per thread */
+ +    int nalloc; /* Allocation size of i */
+ +    int *i;     /* Particle indices ordered on thread index (n) */
+ +} thread_plist_t;
+ +
+ +typedef struct {
++    int  *thread_one;
+ +    int  n;
+ +    int  *ind;
+ +    splinevec theta;
++    real *ptr_theta_z;
+ +    splinevec dtheta;
++    real *ptr_dtheta_z;
+ +} splinedata_t;
+ +
+ +typedef struct {
+ +    int  dimind;            /* The index of the dimension, 0=x, 1=y */
+ +    int  nslab;
+ +    int  nodeid;
+ +#ifdef GMX_MPI
+ +    MPI_Comm mpi_comm;
+ +#endif
+ +
+ +    int  *node_dest;        /* The nodes to send x and q to with DD */
+ +    int  *node_src;         /* The nodes to receive x and q from with DD */
+ +    int  *buf_index;        /* Index for commnode into the buffers */
+ +
+ +    int  maxshift;
+ +
+ +    int  npd;
+ +    int  pd_nalloc;
+ +    int  *pd;
+ +    int  *count;            /* The number of atoms to send to each node */
+ +    int  **count_thread;
+ +    int  *rcount;           /* The number of atoms to receive */
+ +
+ +    int  n;
+ +    int  nalloc;
+ +    rvec *x;
+ +    real *q;
+ +    rvec *f;
+ +    gmx_bool bSpread;       /* These coordinates are used for spreading */
+ +    int  pme_order;
+ +    ivec *idx;
+ +    rvec *fractx;            /* Fractional coordinate relative to the
+ +                              * lower cell boundary
+ +                              */
+ +    int  nthread;
+ +    int  *thread_idx;        /* Which thread should spread which charge */
+ +    thread_plist_t *thread_plist;
+ +    splinedata_t *spline;
+ +} pme_atomcomm_t;
+ +
+ +#define FLBS  3
+ +#define FLBSZ 4
+ +
+ +typedef struct {
-     for(d=0;d<DIM;d++)
-     {
-         srenew(spline->theta[d] ,atc->pme_order*atc->nalloc);
-         srenew(spline->dtheta[d],atc->pme_order*atc->nalloc);
-     }
++    ivec ci;     /* The spatial location of this grid         */
++    ivec n;      /* The used size of *grid, including order-1 */
++    ivec offset; /* The grid offset from the full node grid   */
++    int  order;  /* PME spreading order                       */
++    ivec s;      /* The allocated size of *grid, s >= n       */
++    real *grid;  /* The grid local thread, size n             */
+ +} pmegrid_t;
+ +
+ +typedef struct {
+ +    pmegrid_t grid;     /* The full node grid (non thread-local)            */
+ +    int  nthread;       /* The number of threads operating on this grid     */
+ +    ivec nc;            /* The local spatial decomposition over the threads */
+ +    pmegrid_t *grid_th; /* Array of grids for each thread                   */
++    real *grid_all;     /* Allocated array for the grids in *grid_th        */
+ +    int  **g2t;         /* The grid to thread index                         */
+ +    ivec nthread_comm;  /* The number of threads to communicate with        */
+ +} pmegrids_t;
+ +
+ +
+ +typedef struct {
+ +#ifdef PME_SSE
+ +    /* Masks for SSE aligned spreading and gathering */
+ +    __m128 mask_SSE0[6],mask_SSE1[6];
+ +#else
+ +    int dummy; /* C89 requires that struct has at least one member */
+ +#endif
+ +} pme_spline_work_t;
+ +
+ +typedef struct {
+ +    /* work data for solve_pme */
+ +    int      nalloc;
+ +    real *   mhx;
+ +    real *   mhy;
+ +    real *   mhz;
+ +    real *   m2;
+ +    real *   denom;
+ +    real *   tmp1_alloc;
+ +    real *   tmp1;
+ +    real *   eterm;
+ +    real *   m2inv;
+ +
+ +    real     energy;
+ +    matrix   vir;
+ +} pme_work_t;
+ +
+ +typedef struct gmx_pme {
+ +    int  ndecompdim;         /* The number of decomposition dimensions */
+ +    int  nodeid;             /* Our nodeid in mpi->mpi_comm */
+ +    int  nodeid_major;
+ +    int  nodeid_minor;
+ +    int  nnodes;             /* The number of nodes doing PME */
+ +    int  nnodes_major;
+ +    int  nnodes_minor;
+ +
+ +    MPI_Comm mpi_comm;
+ +    MPI_Comm mpi_comm_d[2];  /* Indexed on dimension, 0=x, 1=y */
+ +#ifdef GMX_MPI
+ +    MPI_Datatype  rvec_mpi;  /* the pme vector's MPI type */
+ +#endif
+ +
+ +    int  nthread;            /* The number of threads doing PME */
+ +
+ +    gmx_bool bPPnode;        /* Node also does particle-particle forces */
+ +    gmx_bool bFEP;           /* Compute Free energy contribution */
+ +    int nkx,nky,nkz;         /* Grid dimensions */
+ +    gmx_bool bP3M;           /* Do P3M: optimize the influence function */
+ +    int pme_order;
+ +    real epsilon_r;
+ +
+ +    pmegrids_t pmegridA;  /* Grids on which we do spreading/interpolation, includes overlap */
+ +    pmegrids_t pmegridB;
+ +    /* The PME charge spreading grid sizes/strides, includes pme_order-1 */
+ +    int     pmegrid_nx,pmegrid_ny,pmegrid_nz;
+ +    /* pmegrid_nz might be larger than strictly necessary to ensure
+ +     * memory alignment, pmegrid_nz_base gives the real base size.
+ +     */
+ +    int     pmegrid_nz_base;
+ +    /* The local PME grid starting indices */
+ +    int     pmegrid_start_ix,pmegrid_start_iy,pmegrid_start_iz;
+ +
+ +    /* Work data for spreading and gathering */
+ +    pme_spline_work_t *spline_work;
+ +
+ +    real *fftgridA;             /* Grids for FFT. With 1D FFT decomposition this can be a pointer */
+ +    real *fftgridB;             /* inside the interpolation grid, but separate for 2D PME decomp. */
+ +    int   fftgrid_nx,fftgrid_ny,fftgrid_nz;
+ +
+ +    t_complex *cfftgridA;             /* Grids for complex FFT data */
+ +    t_complex *cfftgridB;
+ +    int   cfftgrid_nx,cfftgrid_ny,cfftgrid_nz;
+ +
+ +    gmx_parallel_3dfft_t  pfft_setupA;
+ +    gmx_parallel_3dfft_t  pfft_setupB;
+ +
+ +    int  *nnx,*nny,*nnz;
+ +    real *fshx,*fshy,*fshz;
+ +
+ +    pme_atomcomm_t atc[2];  /* Indexed on decomposition index */
+ +    matrix    recipbox;
+ +    splinevec bsp_mod;
+ +
+ +    pme_overlap_t overlap[2]; /* Indexed on dimension, 0=x, 1=y */
+ +
+ +    pme_atomcomm_t atc_energy; /* Only for gmx_pme_calc_energy */
+ +
+ +    rvec *bufv;             /* Communication buffer */
+ +    real *bufr;             /* Communication buffer */
+ +    int  buf_nalloc;        /* The communication buffer size */
+ +
+ +    /* thread local work data for solve_pme */
+ +    pme_work_t *work;
+ +
+ +    /* Work data for PME_redist */
+ +    gmx_bool redist_init;
+ +    int *    scounts;
+ +    int *    rcounts;
+ +    int *    sdispls;
+ +    int *    rdispls;
+ +    int *    sidx;
+ +    int *    idxa;
+ +    real *   redist_buf;
+ +    int      redist_buf_nalloc;
+ +
+ +    /* Work data for sum_qgrid */
+ +    real *   sum_qgrid_tmp;
+ +    real *   sum_qgrid_dd_tmp;
+ +} t_gmx_pme;
+ +
+ +
+ +static void calc_interpolation_idx(gmx_pme_t pme,pme_atomcomm_t *atc,
+ +                                   int start,int end,int thread)
+ +{
+ +    int  i;
+ +    int  *idxptr,tix,tiy,tiz;
+ +    real *xptr,*fptr,tx,ty,tz;
+ +    real rxx,ryx,ryy,rzx,rzy,rzz;
+ +    int  nx,ny,nz;
+ +    int  start_ix,start_iy,start_iz;
+ +    int  *g2tx,*g2ty,*g2tz;
+ +    gmx_bool bThreads;
+ +    int  *thread_idx=NULL;
+ +    thread_plist_t *tpl=NULL;
+ +    int  *tpl_n=NULL;
+ +    int  thread_i;
+ +
+ +    nx  = pme->nkx;
+ +    ny  = pme->nky;
+ +    nz  = pme->nkz;
+ +
+ +    start_ix = pme->pmegrid_start_ix;
+ +    start_iy = pme->pmegrid_start_iy;
+ +    start_iz = pme->pmegrid_start_iz;
+ +
+ +    rxx = pme->recipbox[XX][XX];
+ +    ryx = pme->recipbox[YY][XX];
+ +    ryy = pme->recipbox[YY][YY];
+ +    rzx = pme->recipbox[ZZ][XX];
+ +    rzy = pme->recipbox[ZZ][YY];
+ +    rzz = pme->recipbox[ZZ][ZZ];
+ +
+ +    g2tx = pme->pmegridA.g2t[XX];
+ +    g2ty = pme->pmegridA.g2t[YY];
+ +    g2tz = pme->pmegridA.g2t[ZZ];
+ +
+ +    bThreads = (atc->nthread > 1);
+ +    if (bThreads)
+ +    {
+ +        thread_idx = atc->thread_idx;
+ +
+ +        tpl   = &atc->thread_plist[thread];
+ +        tpl_n = tpl->n;
+ +        for(i=0; i<atc->nthread; i++)
+ +        {
+ +            tpl_n[i] = 0;
+ +        }
+ +    }
+ +
+ +    for(i=start; i<end; i++) {
+ +        xptr   = atc->x[i];
+ +        idxptr = atc->idx[i];
+ +        fptr   = atc->fractx[i];
+ +
+ +        /* Fractional coordinates along box vectors, add 2.0 to make 100% sure we are positive for triclinic boxes */
+ +        tx = nx * ( xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx + 2.0 );
+ +        ty = ny * (                  xptr[YY] * ryy + xptr[ZZ] * rzy + 2.0 );
+ +        tz = nz * (                                   xptr[ZZ] * rzz + 2.0 );
+ +
+ +        tix = (int)(tx);
+ +        tiy = (int)(ty);
+ +        tiz = (int)(tz);
+ +
+ +        /* Because decomposition only occurs in x and y,
+ +         * we never have a fraction correction in z.
+ +         */
+ +        fptr[XX] = tx - tix + pme->fshx[tix];
+ +        fptr[YY] = ty - tiy + pme->fshy[tiy];
+ +        fptr[ZZ] = tz - tiz;
+ +
+ +        idxptr[XX] = pme->nnx[tix];
+ +        idxptr[YY] = pme->nny[tiy];
+ +        idxptr[ZZ] = pme->nnz[tiz];
+ +
+ +#ifdef DEBUG
+ +        range_check(idxptr[XX],0,pme->pmegrid_nx);
+ +        range_check(idxptr[YY],0,pme->pmegrid_ny);
+ +        range_check(idxptr[ZZ],0,pme->pmegrid_nz);
+ +#endif
+ +
+ +        if (bThreads)
+ +        {
+ +            thread_i = g2tx[idxptr[XX]] + g2ty[idxptr[YY]] + g2tz[idxptr[ZZ]];
+ +            thread_idx[i] = thread_i;
+ +            tpl_n[thread_i]++;
+ +        }
+ +    }
+ +
+ +    if (bThreads)
+ +    {
+ +        /* Make a list of particle indices sorted on thread */
+ +
+ +        /* Get the cumulative count */
+ +        for(i=1; i<atc->nthread; i++)
+ +        {
+ +            tpl_n[i] += tpl_n[i-1];
+ +        }
+ +        /* The current implementation distributes particles equally
+ +         * over the threads, so we could actually allocate for that
+ +         * in pme_realloc_atomcomm_things.
+ +         */
+ +        if (tpl_n[atc->nthread-1] > tpl->nalloc)
+ +        {
+ +            tpl->nalloc = over_alloc_large(tpl_n[atc->nthread-1]);
+ +            srenew(tpl->i,tpl->nalloc);
+ +        }
+ +        /* Set tpl_n to the cumulative start */
+ +        for(i=atc->nthread-1; i>=1; i--)
+ +        {
+ +            tpl_n[i] = tpl_n[i-1];
+ +        }
+ +        tpl_n[0] = 0;
+ +
+ +        /* Fill our thread local array with indices sorted on thread */
+ +        for(i=start; i<end; i++)
+ +        {
+ +            tpl->i[tpl_n[atc->thread_idx[i]]++] = i;
+ +        }
+ +        /* Now tpl_n contains the cummulative count again */
+ +    }
+ +}
+ +
+ +static void make_thread_local_ind(pme_atomcomm_t *atc,
+ +                                  int thread,splinedata_t *spline)
+ +{
+ +    int  n,t,i,start,end;
+ +    thread_plist_t *tpl;
+ +
+ +    /* Combine the indices made by each thread into one index */
+ +
+ +    n = 0;
+ +    start = 0;
+ +    for(t=0; t<atc->nthread; t++)
+ +    {
+ +        tpl = &atc->thread_plist[t];
+ +        /* Copy our part (start - end) from the list of thread t */
+ +        if (thread > 0)
+ +        {
+ +            start = tpl->n[thread-1];
+ +        }
+ +        end = tpl->n[thread];
+ +        for(i=start; i<end; i++)
+ +        {
+ +            spline->ind[n++] = tpl->i[i];
+ +        }
+ +    }
+ +
+ +    spline->n = n;
+ +}
+ +
+ +
+ +static void pme_calc_pidx(int start, int end,
+ +                          matrix recipbox, rvec x[],
+ +                          pme_atomcomm_t *atc, int *count)
+ +{
+ +    int  nslab,i;
+ +    int  si;
+ +    real *xptr,s;
+ +    real rxx,ryx,rzx,ryy,rzy;
+ +    int *pd;
+ +
+ +    /* Calculate PME task index (pidx) for each grid index.
+ +     * Here we always assign equally sized slabs to each node
+ +     * for load balancing reasons (the PME grid spacing is not used).
+ +     */
+ +
+ +    nslab = atc->nslab;
+ +    pd    = atc->pd;
+ +
+ +    /* Reset the count */
+ +    for(i=0; i<nslab; i++)
+ +    {
+ +        count[i] = 0;
+ +    }
+ +
+ +    if (atc->dimind == 0)
+ +    {
+ +        rxx = recipbox[XX][XX];
+ +        ryx = recipbox[YY][XX];
+ +        rzx = recipbox[ZZ][XX];
+ +        /* Calculate the node index in x-dimension */
+ +        for(i=start; i<end; i++)
+ +        {
+ +            xptr   = x[i];
+ +            /* Fractional coordinates along box vectors */
+ +            s = nslab*(xptr[XX]*rxx + xptr[YY]*ryx + xptr[ZZ]*rzx);
+ +            si = (int)(s + 2*nslab) % nslab;
+ +            pd[i] = si;
+ +            count[si]++;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        ryy = recipbox[YY][YY];
+ +        rzy = recipbox[ZZ][YY];
+ +        /* Calculate the node index in y-dimension */
+ +        for(i=start; i<end; i++)
+ +        {
+ +            xptr   = x[i];
+ +            /* Fractional coordinates along box vectors */
+ +            s = nslab*(xptr[YY]*ryy + xptr[ZZ]*rzy);
+ +            si = (int)(s + 2*nslab) % nslab;
+ +            pd[i] = si;
+ +            count[si]++;
+ +        }
+ +    }
+ +}
+ +
+ +static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
+ +                                  pme_atomcomm_t *atc)
+ +{
+ +    int nthread,thread,slab;
+ +
+ +    nthread = atc->nthread;
+ +
+ +#pragma omp parallel for num_threads(nthread) schedule(static)
+ +    for(thread=0; thread<nthread; thread++)
+ +    {
+ +        pme_calc_pidx(natoms* thread   /nthread,
+ +                      natoms*(thread+1)/nthread,
+ +                      recipbox,x,atc,atc->count_thread[thread]);
+ +    }
+ +    /* Non-parallel reduction, since nslab is small */
+ +
+ +    for(thread=1; thread<nthread; thread++)
+ +    {
+ +        for(slab=0; slab<atc->nslab; slab++)
+ +        {
+ +            atc->count_thread[0][slab] += atc->count_thread[thread][slab];
+ +        }
+ +    }
+ +}
+ +
++static void realloc_splinevec(splinevec th,real **ptr_z,int nalloc)
++{
++    const int padding=4;
++    int i;
++
++    srenew(th[XX],nalloc);
++    srenew(th[YY],nalloc);
++    /* In z we add padding, this is only required for the aligned SSE code */
++    srenew(*ptr_z,nalloc+2*padding);
++    th[ZZ] = *ptr_z + padding;
++
++    for(i=0; i<padding; i++)
++    {
++        (*ptr_z)[               i] = 0;
++        (*ptr_z)[padding+nalloc+i] = 0;
++    }
++}
++
+ +static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
+ +{
+ +    int i,d;
+ +
+ +    srenew(spline->ind,atc->nalloc);
+ +    /* Initialize the index to identity so it works without threads */
+ +    for(i=0; i<atc->nalloc; i++)
+ +    {
+ +        spline->ind[i] = i;
+ +    }
+ +
-     pnx = pmegrid->n[XX];
-     pny = pmegrid->n[YY];
-     pnz = pmegrid->n[ZZ];
++    realloc_splinevec(spline->theta,&spline->ptr_theta_z,
++                      atc->pme_order*atc->nalloc);
++    realloc_splinevec(spline->dtheta,&spline->ptr_dtheta_z,
++                      atc->pme_order*atc->nalloc);
+ +}
+ +
+ +static void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
+ +{
+ +    int nalloc_old,i,j,nalloc_tpl;
+ +
+ +    /* We have to avoid a NULL pointer for atc->x to avoid
+ +     * possible fatal errors in MPI routines.
+ +     */
+ +    if (atc->n > atc->nalloc || atc->nalloc == 0)
+ +    {
+ +        nalloc_old = atc->nalloc;
+ +        atc->nalloc = over_alloc_dd(max(atc->n,1));
+ +
+ +        if (atc->nslab > 1) {
+ +            srenew(atc->x,atc->nalloc);
+ +            srenew(atc->q,atc->nalloc);
+ +            srenew(atc->f,atc->nalloc);
+ +            for(i=nalloc_old; i<atc->nalloc; i++)
+ +            {
+ +                clear_rvec(atc->f[i]);
+ +            }
+ +        }
+ +        if (atc->bSpread) {
+ +            srenew(atc->fractx,atc->nalloc);
+ +            srenew(atc->idx   ,atc->nalloc);
+ +
+ +            if (atc->nthread > 1)
+ +            {
+ +                srenew(atc->thread_idx,atc->nalloc);
+ +            }
+ +
+ +            for(i=0; i<atc->nthread; i++)
+ +            {
+ +                pme_realloc_splinedata(&atc->spline[i],atc);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void pmeredist_pd(gmx_pme_t pme, gmx_bool forw,
+ +                         int n, gmx_bool bXF, rvec *x_f, real *charge,
+ +                         pme_atomcomm_t *atc)
+ +/* Redistribute particle data for PME calculation */
+ +/* domain decomposition by x coordinate           */
+ +{
+ +    int *idxa;
+ +    int i, ii;
+ +
+ +    if(FALSE == pme->redist_init) {
+ +        snew(pme->scounts,atc->nslab);
+ +        snew(pme->rcounts,atc->nslab);
+ +        snew(pme->sdispls,atc->nslab);
+ +        snew(pme->rdispls,atc->nslab);
+ +        snew(pme->sidx,atc->nslab);
+ +        pme->redist_init = TRUE;
+ +    }
+ +    if (n > pme->redist_buf_nalloc) {
+ +        pme->redist_buf_nalloc = over_alloc_dd(n);
+ +        srenew(pme->redist_buf,pme->redist_buf_nalloc*DIM);
+ +    }
+ +
+ +    pme->idxa = atc->pd;
+ +
+ +#ifdef GMX_MPI
+ +    if (forw && bXF) {
+ +        /* forward, redistribution from pp to pme */
+ +
+ +        /* Calculate send counts and exchange them with other nodes */
+ +        for(i=0; (i<atc->nslab); i++) pme->scounts[i]=0;
+ +        for(i=0; (i<n); i++) pme->scounts[pme->idxa[i]]++;
+ +        MPI_Alltoall( pme->scounts, 1, MPI_INT, pme->rcounts, 1, MPI_INT, atc->mpi_comm);
+ +
+ +        /* Calculate send and receive displacements and index into send
+ +           buffer */
+ +        pme->sdispls[0]=0;
+ +        pme->rdispls[0]=0;
+ +        pme->sidx[0]=0;
+ +        for(i=1; i<atc->nslab; i++) {
+ +            pme->sdispls[i]=pme->sdispls[i-1]+pme->scounts[i-1];
+ +            pme->rdispls[i]=pme->rdispls[i-1]+pme->rcounts[i-1];
+ +            pme->sidx[i]=pme->sdispls[i];
+ +        }
+ +        /* Total # of particles to be received */
+ +        atc->n = pme->rdispls[atc->nslab-1] + pme->rcounts[atc->nslab-1];
+ +
+ +        pme_realloc_atomcomm_things(atc);
+ +
+ +        /* Copy particle coordinates into send buffer and exchange*/
+ +        for(i=0; (i<n); i++) {
+ +            ii=DIM*pme->sidx[pme->idxa[i]];
+ +            pme->sidx[pme->idxa[i]]++;
+ +            pme->redist_buf[ii+XX]=x_f[i][XX];
+ +            pme->redist_buf[ii+YY]=x_f[i][YY];
+ +            pme->redist_buf[ii+ZZ]=x_f[i][ZZ];
+ +        }
+ +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls,
+ +                      pme->rvec_mpi, atc->x, pme->rcounts, pme->rdispls,
+ +                      pme->rvec_mpi, atc->mpi_comm);
+ +    }
+ +    if (forw) {
+ +        /* Copy charge into send buffer and exchange*/
+ +        for(i=0; i<atc->nslab; i++) pme->sidx[i]=pme->sdispls[i];
+ +        for(i=0; (i<n); i++) {
+ +            ii=pme->sidx[pme->idxa[i]];
+ +            pme->sidx[pme->idxa[i]]++;
+ +            pme->redist_buf[ii]=charge[i];
+ +        }
+ +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls, mpi_type,
+ +                      atc->q, pme->rcounts, pme->rdispls, mpi_type,
+ +                      atc->mpi_comm);
+ +    }
+ +    else { /* backward, redistribution from pme to pp */
+ +        MPI_Alltoallv(atc->f, pme->rcounts, pme->rdispls, pme->rvec_mpi,
+ +                      pme->redist_buf, pme->scounts, pme->sdispls,
+ +                      pme->rvec_mpi, atc->mpi_comm);
+ +
+ +        /* Copy data from receive buffer */
+ +        for(i=0; i<atc->nslab; i++)
+ +            pme->sidx[i] = pme->sdispls[i];
+ +        for(i=0; (i<n); i++) {
+ +            ii = DIM*pme->sidx[pme->idxa[i]];
+ +            x_f[i][XX] += pme->redist_buf[ii+XX];
+ +            x_f[i][YY] += pme->redist_buf[ii+YY];
+ +            x_f[i][ZZ] += pme->redist_buf[ii+ZZ];
+ +            pme->sidx[pme->idxa[i]]++;
+ +        }
+ +    }
+ +#endif
+ +}
+ +
+ +static void pme_dd_sendrecv(pme_atomcomm_t *atc,
+ +                            gmx_bool bBackward,int shift,
+ +                            void *buf_s,int nbyte_s,
+ +                            void *buf_r,int nbyte_r)
+ +{
+ +#ifdef GMX_MPI
+ +    int dest,src;
+ +    MPI_Status stat;
+ +
+ +    if (bBackward == FALSE) {
+ +        dest = atc->node_dest[shift];
+ +        src  = atc->node_src[shift];
+ +    } else {
+ +        dest = atc->node_src[shift];
+ +        src  = atc->node_dest[shift];
+ +    }
+ +
+ +    if (nbyte_s > 0 && nbyte_r > 0) {
+ +        MPI_Sendrecv(buf_s,nbyte_s,MPI_BYTE,
+ +                     dest,shift,
+ +                     buf_r,nbyte_r,MPI_BYTE,
+ +                     src,shift,
+ +                     atc->mpi_comm,&stat);
+ +    } else if (nbyte_s > 0) {
+ +        MPI_Send(buf_s,nbyte_s,MPI_BYTE,
+ +                 dest,shift,
+ +                 atc->mpi_comm);
+ +    } else if (nbyte_r > 0) {
+ +        MPI_Recv(buf_r,nbyte_r,MPI_BYTE,
+ +                 src,shift,
+ +                 atc->mpi_comm,&stat);
+ +    }
+ +#endif
+ +}
+ +
+ +static void dd_pmeredist_x_q(gmx_pme_t pme,
+ +                             int n, gmx_bool bX, rvec *x, real *charge,
+ +                             pme_atomcomm_t *atc)
+ +{
+ +    int *commnode,*buf_index;
+ +    int nnodes_comm,i,nsend,local_pos,buf_pos,node,scount,rcount;
+ +
+ +    commnode  = atc->node_dest;
+ +    buf_index = atc->buf_index;
+ +
+ +    nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
+ +
+ +    nsend = 0;
+ +    for(i=0; i<nnodes_comm; i++) {
+ +        buf_index[commnode[i]] = nsend;
+ +        nsend += atc->count[commnode[i]];
+ +    }
+ +    if (bX) {
+ +        if (atc->count[atc->nodeid] + nsend != n)
+ +            gmx_fatal(FARGS,"%d particles communicated to PME node %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
+ +                      "This usually means that your system is not well equilibrated.",
+ +                      n - (atc->count[atc->nodeid] + nsend),
+ +                      pme->nodeid,'x'+atc->dimind);
+ +
+ +        if (nsend > pme->buf_nalloc) {
+ +            pme->buf_nalloc = over_alloc_dd(nsend);
+ +            srenew(pme->bufv,pme->buf_nalloc);
+ +            srenew(pme->bufr,pme->buf_nalloc);
+ +        }
+ +
+ +        atc->n = atc->count[atc->nodeid];
+ +        for(i=0; i<nnodes_comm; i++) {
+ +            scount = atc->count[commnode[i]];
+ +            /* Communicate the count */
+ +            if (debug)
+ +                fprintf(debug,"dimind %d PME node %d send to node %d: %d\n",
+ +                        atc->dimind,atc->nodeid,commnode[i],scount);
+ +            pme_dd_sendrecv(atc,FALSE,i,
+ +                            &scount,sizeof(int),
+ +                            &atc->rcount[i],sizeof(int));
+ +            atc->n += atc->rcount[i];
+ +        }
+ +
+ +        pme_realloc_atomcomm_things(atc);
+ +    }
+ +
+ +    local_pos = 0;
+ +    for(i=0; i<n; i++) {
+ +        node = atc->pd[i];
+ +        if (node == atc->nodeid) {
+ +            /* Copy direct to the receive buffer */
+ +            if (bX) {
+ +                copy_rvec(x[i],atc->x[local_pos]);
+ +            }
+ +            atc->q[local_pos] = charge[i];
+ +            local_pos++;
+ +        } else {
+ +            /* Copy to the send buffer */
+ +            if (bX) {
+ +                copy_rvec(x[i],pme->bufv[buf_index[node]]);
+ +            }
+ +            pme->bufr[buf_index[node]] = charge[i];
+ +            buf_index[node]++;
+ +        }
+ +    }
+ +
+ +    buf_pos = 0;
+ +    for(i=0; i<nnodes_comm; i++) {
+ +        scount = atc->count[commnode[i]];
+ +        rcount = atc->rcount[i];
+ +        if (scount > 0 || rcount > 0) {
+ +            if (bX) {
+ +                /* Communicate the coordinates */
+ +                pme_dd_sendrecv(atc,FALSE,i,
+ +                                pme->bufv[buf_pos],scount*sizeof(rvec),
+ +                                atc->x[local_pos],rcount*sizeof(rvec));
+ +            }
+ +            /* Communicate the charges */
+ +            pme_dd_sendrecv(atc,FALSE,i,
+ +                            pme->bufr+buf_pos,scount*sizeof(real),
+ +                            atc->q+local_pos,rcount*sizeof(real));
+ +            buf_pos   += scount;
+ +            local_pos += atc->rcount[i];
+ +        }
+ +    }
+ +}
+ +
+ +static void dd_pmeredist_f(gmx_pme_t pme, pme_atomcomm_t *atc,
+ +                           int n, rvec *f,
+ +                           gmx_bool bAddF)
+ +{
+ +  int *commnode,*buf_index;
+ +  int nnodes_comm,local_pos,buf_pos,i,scount,rcount,node;
+ +
+ +  commnode  = atc->node_dest;
+ +  buf_index = atc->buf_index;
+ +
+ +  nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
+ +
+ +  local_pos = atc->count[atc->nodeid];
+ +  buf_pos = 0;
+ +  for(i=0; i<nnodes_comm; i++) {
+ +    scount = atc->rcount[i];
+ +    rcount = atc->count[commnode[i]];
+ +    if (scount > 0 || rcount > 0) {
+ +      /* Communicate the forces */
+ +      pme_dd_sendrecv(atc,TRUE,i,
+ +                      atc->f[local_pos],scount*sizeof(rvec),
+ +                      pme->bufv[buf_pos],rcount*sizeof(rvec));
+ +      local_pos += scount;
+ +    }
+ +    buf_index[commnode[i]] = buf_pos;
+ +    buf_pos   += rcount;
+ +  }
+ +
+ +    local_pos = 0;
+ +    if (bAddF)
+ +    {
+ +        for(i=0; i<n; i++)
+ +        {
+ +            node = atc->pd[i];
+ +            if (node == atc->nodeid)
+ +            {
+ +                /* Add from the local force array */
+ +                rvec_inc(f[i],atc->f[local_pos]);
+ +                local_pos++;
+ +            }
+ +            else
+ +            {
+ +                /* Add from the receive buffer */
+ +                rvec_inc(f[i],pme->bufv[buf_index[node]]);
+ +                buf_index[node]++;
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        for(i=0; i<n; i++)
+ +        {
+ +            node = atc->pd[i];
+ +            if (node == atc->nodeid)
+ +            {
+ +                /* Copy from the local force array */
+ +                copy_rvec(atc->f[local_pos],f[i]);
+ +                local_pos++;
+ +            }
+ +            else
+ +            {
+ +                /* Copy from the receive buffer */
+ +                copy_rvec(pme->bufv[buf_index[node]],f[i]);
+ +                buf_index[node]++;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +#ifdef GMX_MPI
+ +static void
+ +gmx_sum_qgrid_dd(gmx_pme_t pme, real *grid, int direction)
+ +{
+ +    pme_overlap_t *overlap;
+ +    int send_index0,send_nindex;
+ +    int recv_index0,recv_nindex;
+ +    MPI_Status stat;
+ +    int i,j,k,ix,iy,iz,icnt;
+ +    int ipulse,send_id,recv_id,datasize;
+ +    real *p;
+ +    real *sendptr,*recvptr;
+ +
+ +    /* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
+ +    overlap = &pme->overlap[1];
+ +
+ +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
+ +    {
+ +        /* Since we have already (un)wrapped the overlap in the z-dimension,
+ +         * we only have to communicate 0 to nkz (not pmegrid_nz).
+ +         */
+ +        if (direction==GMX_SUM_QGRID_FORWARD)
+ +        {
+ +            send_id = overlap->send_id[ipulse];
+ +            recv_id = overlap->recv_id[ipulse];
+ +            send_index0   = overlap->comm_data[ipulse].send_index0;
+ +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
+ +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
+ +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
+ +        }
+ +        else
+ +        {
+ +            send_id = overlap->recv_id[ipulse];
+ +            recv_id = overlap->send_id[ipulse];
+ +            send_index0   = overlap->comm_data[ipulse].recv_index0;
+ +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
+ +            recv_index0   = overlap->comm_data[ipulse].send_index0;
+ +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
+ +        }
+ +
+ +        /* Copy data to contiguous send buffer */
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
+ +                    pme->nodeid,overlap->nodeid,send_id,
+ +                    pme->pmegrid_start_iy,
+ +                    send_index0-pme->pmegrid_start_iy,
+ +                    send_index0-pme->pmegrid_start_iy+send_nindex);
+ +        }
+ +        icnt = 0;
+ +        for(i=0;i<pme->pmegrid_nx;i++)
+ +        {
+ +            ix = i;
+ +            for(j=0;j<send_nindex;j++)
+ +            {
+ +                iy = j + send_index0 - pme->pmegrid_start_iy;
+ +                for(k=0;k<pme->nkz;k++)
+ +                {
+ +                    iz = k;
+ +                    overlap->sendbuf[icnt++] = grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz];
+ +                }
+ +            }
+ +        }
+ +
+ +        datasize      = pme->pmegrid_nx * pme->nkz;
+ +
+ +        MPI_Sendrecv(overlap->sendbuf,send_nindex*datasize,GMX_MPI_REAL,
+ +                     send_id,ipulse,
+ +                     overlap->recvbuf,recv_nindex*datasize,GMX_MPI_REAL,
+ +                     recv_id,ipulse,
+ +                     overlap->mpi_comm,&stat);
+ +
+ +        /* Get data from contiguous recv buffer */
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
+ +                    pme->nodeid,overlap->nodeid,recv_id,
+ +                    pme->pmegrid_start_iy,
+ +                    recv_index0-pme->pmegrid_start_iy,
+ +                    recv_index0-pme->pmegrid_start_iy+recv_nindex);
+ +        }
+ +        icnt = 0;
+ +        for(i=0;i<pme->pmegrid_nx;i++)
+ +        {
+ +            ix = i;
+ +            for(j=0;j<recv_nindex;j++)
+ +            {
+ +                iy = j + recv_index0 - pme->pmegrid_start_iy;
+ +                for(k=0;k<pme->nkz;k++)
+ +                {
+ +                    iz = k;
+ +                    if(direction==GMX_SUM_QGRID_FORWARD)
+ +                    {
+ +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] += overlap->recvbuf[icnt++];
+ +                    }
+ +                    else
+ +                    {
+ +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz]  = overlap->recvbuf[icnt++];
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    /* Major dimension is easier, no copying required,
+ +     * but we might have to sum to separate array.
+ +     * Since we don't copy, we have to communicate up to pmegrid_nz,
+ +     * not nkz as for the minor direction.
+ +     */
+ +    overlap = &pme->overlap[0];
+ +
+ +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
+ +    {
+ +        if(direction==GMX_SUM_QGRID_FORWARD)
+ +        {
+ +            send_id = overlap->send_id[ipulse];
+ +            recv_id = overlap->recv_id[ipulse];
+ +            send_index0   = overlap->comm_data[ipulse].send_index0;
+ +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
+ +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
+ +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
+ +            recvptr   = overlap->recvbuf;
+ +        }
+ +        else
+ +        {
+ +            send_id = overlap->recv_id[ipulse];
+ +            recv_id = overlap->send_id[ipulse];
+ +            send_index0   = overlap->comm_data[ipulse].recv_index0;
+ +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
+ +            recv_index0   = overlap->comm_data[ipulse].send_index0;
+ +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
+ +            recvptr   = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
+ +        }
+ +
+ +        sendptr       = grid + (send_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
+ +        datasize      = pme->pmegrid_ny * pme->pmegrid_nz;
+ +
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
+ +                    pme->nodeid,overlap->nodeid,send_id,
+ +                    pme->pmegrid_start_ix,
+ +                    send_index0-pme->pmegrid_start_ix,
+ +                    send_index0-pme->pmegrid_start_ix+send_nindex);
+ +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
+ +                    pme->nodeid,overlap->nodeid,recv_id,
+ +                    pme->pmegrid_start_ix,
+ +                    recv_index0-pme->pmegrid_start_ix,
+ +                    recv_index0-pme->pmegrid_start_ix+recv_nindex);
+ +        }
+ +
+ +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
+ +                     send_id,ipulse,
+ +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
+ +                     recv_id,ipulse,
+ +                     overlap->mpi_comm,&stat);
+ +
+ +        /* ADD data from contiguous recv buffer */
+ +        if(direction==GMX_SUM_QGRID_FORWARD)
+ +        {
+ +            p = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
+ +            for(i=0;i<recv_nindex*datasize;i++)
+ +            {
+ +                p[i] += overlap->recvbuf[i];
+ +            }
+ +        }
+ +    }
+ +}
+ +#endif
+ +
+ +
+ +static int
+ +copy_pmegrid_to_fftgrid(gmx_pme_t pme, real *pmegrid, real *fftgrid)
+ +{
+ +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
+ +    ivec    local_pme_size;
+ +    int     i,ix,iy,iz;
+ +    int     pmeidx,fftidx;
+ +
+ +    /* Dimensions should be identical for A/B grid, so we just use A here */
+ +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
+ +                                   local_fft_ndata,
+ +                                   local_fft_offset,
+ +                                   local_fft_size);
+ +
+ +    local_pme_size[0] = pme->pmegrid_nx;
+ +    local_pme_size[1] = pme->pmegrid_ny;
+ +    local_pme_size[2] = pme->pmegrid_nz;
+ +
+ +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
+ +     the offset is identical, and the PME grid always has more data (due to overlap)
+ +     */
+ +    {
+ +#ifdef DEBUG_PME
+ +        FILE *fp,*fp2;
+ +        char fn[STRLEN],format[STRLEN];
+ +        real val;
+ +        sprintf(fn,"pmegrid%d.pdb",pme->nodeid);
+ +        fp = ffopen(fn,"w");
+ +        sprintf(fn,"pmegrid%d.txt",pme->nodeid);
+ +        fp2 = ffopen(fn,"w");
+ +     sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
+ +#endif
+ +
+ +    for(ix=0;ix<local_fft_ndata[XX];ix++)
+ +    {
+ +        for(iy=0;iy<local_fft_ndata[YY];iy++)
+ +        {
+ +            for(iz=0;iz<local_fft_ndata[ZZ];iz++)
+ +            {
+ +                pmeidx = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz;
+ +                fftidx = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz;
+ +                fftgrid[fftidx] = pmegrid[pmeidx];
+ +#ifdef DEBUG_PME
+ +                val = 100*pmegrid[pmeidx];
+ +                if (pmegrid[pmeidx] != 0)
+ +                fprintf(fp,format,"ATOM",pmeidx,"CA","GLY",' ',pmeidx,' ',
+ +                        5.0*ix,5.0*iy,5.0*iz,1.0,val);
+ +                if (pmegrid[pmeidx] != 0)
+ +                    fprintf(fp2,"%-12s  %5d  %5d  %5d  %12.5e\n",
+ +                            "qgrid",
+ +                            pme->pmegrid_start_ix + ix,
+ +                            pme->pmegrid_start_iy + iy,
+ +                            pme->pmegrid_start_iz + iz,
+ +                            pmegrid[pmeidx]);
+ +#endif
+ +            }
+ +        }
+ +    }
+ +#ifdef DEBUG_PME
+ +    ffclose(fp);
+ +    ffclose(fp2);
+ +#endif
+ +    }
+ +    return 0;
+ +}
+ +
+ +
+ +static gmx_cycles_t omp_cyc_start()
+ +{
+ +    return gmx_cycles_read();
+ +}
+ +
+ +static gmx_cycles_t omp_cyc_end(gmx_cycles_t c)
+ +{
+ +    return gmx_cycles_read() - c;
+ +}
+ +
+ +
+ +static int
+ +copy_fftgrid_to_pmegrid(gmx_pme_t pme, const real *fftgrid, real *pmegrid,
+ +                        int nthread,int thread)
+ +{
+ +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
+ +    ivec    local_pme_size;
+ +    int     ixy0,ixy1,ixy,ix,iy,iz;
+ +    int     pmeidx,fftidx;
+ +#ifdef PME_TIME_THREADS
+ +    gmx_cycles_t c1;
+ +    static double cs1=0;
+ +    static int cnt=0;
+ +#endif
+ +
+ +#ifdef PME_TIME_THREADS
+ +    c1 = omp_cyc_start();
+ +#endif
+ +    /* Dimensions should be identical for A/B grid, so we just use A here */
+ +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
+ +                                   local_fft_ndata,
+ +                                   local_fft_offset,
+ +                                   local_fft_size);
+ +
+ +    local_pme_size[0] = pme->pmegrid_nx;
+ +    local_pme_size[1] = pme->pmegrid_ny;
+ +    local_pme_size[2] = pme->pmegrid_nz;
+ +
+ +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
+ +     the offset is identical, and the PME grid always has more data (due to overlap)
+ +     */
+ +    ixy0 = ((thread  )*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
+ +    ixy1 = ((thread+1)*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
+ +
+ +    for(ixy=ixy0;ixy<ixy1;ixy++)
+ +    {
+ +        ix = ixy/local_fft_ndata[YY];
+ +        iy = ixy - ix*local_fft_ndata[YY];
+ +
+ +        pmeidx = (ix*local_pme_size[YY] + iy)*local_pme_size[ZZ];
+ +        fftidx = (ix*local_fft_size[YY] + iy)*local_fft_size[ZZ];
+ +        for(iz=0;iz<local_fft_ndata[ZZ];iz++)
+ +        {
+ +            pmegrid[pmeidx+iz] = fftgrid[fftidx+iz];
+ +        }
+ +    }
+ +
+ +#ifdef PME_TIME_THREADS
+ +    c1 = omp_cyc_end(c1);
+ +    cs1 += (double)c1;
+ +    cnt++;
+ +    if (cnt % 20 == 0)
+ +    {
+ +        printf("copy %.2f\n",cs1*1e-9);
+ +    }
+ +#endif
+ +
+ +    return 0;
+ +}
+ +
+ +
+ +static void
+ +wrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
+ +{
+ +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix,iy,iz;
+ +
+ +    nx = pme->nkx;
+ +    ny = pme->nky;
+ +    nz = pme->nkz;
+ +
+ +    pnx = pme->pmegrid_nx;
+ +    pny = pme->pmegrid_ny;
+ +    pnz = pme->pmegrid_nz;
+ +
+ +    overlap = pme->pme_order - 1;
+ +
+ +    /* Add periodic overlap in z */
+ +    for(ix=0; ix<pme->pmegrid_nx; ix++)
+ +    {
+ +        for(iy=0; iy<pme->pmegrid_ny; iy++)
+ +        {
+ +            for(iz=0; iz<overlap; iz++)
+ +            {
+ +                pmegrid[(ix*pny+iy)*pnz+iz] +=
+ +                    pmegrid[(ix*pny+iy)*pnz+nz+iz];
+ +            }
+ +        }
+ +    }
+ +
+ +    if (pme->nnodes_minor == 1)
+ +    {
+ +       for(ix=0; ix<pme->pmegrid_nx; ix++)
+ +       {
+ +           for(iy=0; iy<overlap; iy++)
+ +           {
+ +               for(iz=0; iz<nz; iz++)
+ +               {
+ +                   pmegrid[(ix*pny+iy)*pnz+iz] +=
+ +                       pmegrid[(ix*pny+ny+iy)*pnz+iz];
+ +               }
+ +           }
+ +       }
+ +    }
+ +
+ +    if (pme->nnodes_major == 1)
+ +    {
+ +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
+ +
+ +        for(ix=0; ix<overlap; ix++)
+ +        {
+ +            for(iy=0; iy<ny_x; iy++)
+ +            {
+ +                for(iz=0; iz<nz; iz++)
+ +                {
+ +                    pmegrid[(ix*pny+iy)*pnz+iz] +=
+ +                        pmegrid[((nx+ix)*pny+iy)*pnz+iz];
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void
+ +unwrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
+ +{
+ +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix;
+ +
+ +    nx = pme->nkx;
+ +    ny = pme->nky;
+ +    nz = pme->nkz;
+ +
+ +    pnx = pme->pmegrid_nx;
+ +    pny = pme->pmegrid_ny;
+ +    pnz = pme->pmegrid_nz;
+ +
+ +    overlap = pme->pme_order - 1;
+ +
+ +    if (pme->nnodes_major == 1)
+ +    {
+ +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
+ +
+ +        for(ix=0; ix<overlap; ix++)
+ +        {
+ +            int iy,iz;
+ +
+ +            for(iy=0; iy<ny_x; iy++)
+ +            {
+ +                for(iz=0; iz<nz; iz++)
+ +                {
+ +                    pmegrid[((nx+ix)*pny+iy)*pnz+iz] =
+ +                        pmegrid[(ix*pny+iy)*pnz+iz];
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    if (pme->nnodes_minor == 1)
+ +    {
+ +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
+ +       for(ix=0; ix<pme->pmegrid_nx; ix++)
+ +       {
+ +           int iy,iz;
+ +
+ +           for(iy=0; iy<overlap; iy++)
+ +           {
+ +               for(iz=0; iz<nz; iz++)
+ +               {
+ +                   pmegrid[(ix*pny+ny+iy)*pnz+iz] =
+ +                       pmegrid[(ix*pny+iy)*pnz+iz];
+ +               }
+ +           }
+ +       }
+ +    }
+ +
+ +    /* Copy periodic overlap in z */
+ +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
+ +    for(ix=0; ix<pme->pmegrid_nx; ix++)
+ +    {
+ +        int iy,iz;
+ +
+ +        for(iy=0; iy<pme->pmegrid_ny; iy++)
+ +        {
+ +            for(iz=0; iz<overlap; iz++)
+ +            {
+ +                pmegrid[(ix*pny+iy)*pnz+nz+iz] =
+ +                    pmegrid[(ix*pny+iy)*pnz+iz];
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void clear_grid(int nx,int ny,int nz,real *grid,
+ +                       ivec fs,int *flag,
+ +                       int fx,int fy,int fz,
+ +                       int order)
+ +{
+ +    int nc,ncz;
+ +    int fsx,fsy,fsz,gx,gy,gz,g0x,g0y,x,y,z;
+ +    int flind;
+ +
+ +    nc  = 2 + (order - 2)/FLBS;
+ +    ncz = 2 + (order - 2)/FLBSZ;
+ +
+ +    for(fsx=fx; fsx<fx+nc; fsx++)
+ +    {
+ +        for(fsy=fy; fsy<fy+nc; fsy++)
+ +        {
+ +            for(fsz=fz; fsz<fz+ncz; fsz++)
+ +            {
+ +                flind = (fsx*fs[YY] + fsy)*fs[ZZ] + fsz;
+ +                if (flag[flind] == 0)
+ +                {
+ +                    gx = fsx*FLBS;
+ +                    gy = fsy*FLBS;
+ +                    gz = fsz*FLBSZ;
+ +                    g0x = (gx*ny + gy)*nz + gz;
+ +                    for(x=0; x<FLBS; x++)
+ +                    {
+ +                        g0y = g0x;
+ +                        for(y=0; y<FLBS; y++)
+ +                        {
+ +                            for(z=0; z<FLBSZ; z++)
+ +                            {
+ +                                grid[g0y+z] = 0;
+ +                            }
+ +                            g0y += nz;
+ +                        }
+ +                        g0x += ny*nz;
+ +                    }
+ +
+ +                    flag[flind] = 1;
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +/* This has to be a macro to enable full compiler optimization with xlC (and probably others too) */
+ +#define DO_BSPLINE(order)                            \
+ +for(ithx=0; (ithx<order); ithx++)                    \
+ +{                                                    \
+ +    index_x = (i0+ithx)*pny*pnz;                     \
+ +    valx    = qn*thx[ithx];                          \
+ +                                                     \
+ +    for(ithy=0; (ithy<order); ithy++)                \
+ +    {                                                \
+ +        valxy    = valx*thy[ithy];                   \
+ +        index_xy = index_x+(j0+ithy)*pnz;            \
+ +                                                     \
+ +        for(ithz=0; (ithz<order); ithz++)            \
+ +        {                                            \
+ +            index_xyz        = index_xy+(k0+ithz);   \
+ +            grid[index_xyz] += valxy*thz[ithz];      \
+ +        }                                            \
+ +    }                                                \
+ +}
+ +
+ +
+ +static void spread_q_bsplines_thread(pmegrid_t *pmegrid,
+ +                                     pme_atomcomm_t *atc, splinedata_t *spline,
+ +                                     pme_spline_work_t *work)
+ +{
+ +
+ +    /* spread charges from home atoms to local grid */
+ +    real     *grid;
+ +    pme_overlap_t *ol;
+ +    int      b,i,nn,n,ithx,ithy,ithz,i0,j0,k0;
+ +    int *    idxptr;
+ +    int      order,norder,index_x,index_xy,index_xyz;
+ +    real     valx,valxy,qn;
+ +    real     *thx,*thy,*thz;
+ +    int      localsize, bndsize;
+ +    int      pnx,pny,pnz,ndatatot;
+ +    int      offx,offy,offz;
+ +
- 
++    pnx = pmegrid->s[XX];
++    pny = pmegrid->s[YY];
++    pnz = pmegrid->s[ZZ];
+ +
+ +    offx = pmegrid->offset[XX];
+ +    offy = pmegrid->offset[YY];
+ +    offz = pmegrid->offset[ZZ];
+ +
+ +    ndatatot = pnx*pny*pnz;
+ +    grid = pmegrid->grid;
+ +    for(i=0;i<ndatatot;i++)
+ +    {
+ +        grid[i] = 0;
+ +    }
-     nz = grid->n[ZZ];
++    
+ +    order = pmegrid->order;
+ +
+ +    for(nn=0; nn<spline->n; nn++)
+ +    {
+ +        n  = spline->ind[nn];
+ +        qn = atc->q[n];
+ +
+ +        if (qn != 0)
+ +        {
+ +            idxptr = atc->idx[n];
+ +            norder = nn*order;
+ +
+ +            i0   = idxptr[XX] - offx;
+ +            j0   = idxptr[YY] - offy;
+ +            k0   = idxptr[ZZ] - offz;
+ +
+ +            thx = spline->theta[XX] + norder;
+ +            thy = spline->theta[YY] + norder;
+ +            thz = spline->theta[ZZ] + norder;
+ +
+ +            switch (order) {
+ +            case 4:
+ +#ifdef PME_SSE
+ +#ifdef PME_SSE_UNALIGNED
+ +#define PME_SPREAD_SSE_ORDER4
+ +#else
+ +#define PME_SPREAD_SSE_ALIGNED
+ +#define PME_ORDER 4
+ +#endif
+ +#include "pme_sse_single.h"
+ +#else
+ +                DO_BSPLINE(4);
+ +#endif
+ +                break;
+ +            case 5:
+ +#ifdef PME_SSE
+ +#define PME_SPREAD_SSE_ALIGNED
+ +#define PME_ORDER 5
+ +#include "pme_sse_single.h"
+ +#else
+ +                DO_BSPLINE(5);
+ +#endif
+ +                break;
+ +            default:
+ +                DO_BSPLINE(order);
+ +                break;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void set_grid_alignment(int *pmegrid_nz,int pme_order)
+ +{
+ +#ifdef PME_SSE
+ +    if (pme_order == 5
+ +#ifndef PME_SSE_UNALIGNED
+ +        || pme_order == 4
+ +#endif
+ +        )
+ +    {
+ +        /* Round nz up to a multiple of 4 to ensure alignment */
+ +        *pmegrid_nz = ((*pmegrid_nz + 3) & ~3);
+ +    }
+ +#endif
+ +}
+ +
+ +static void set_gridsize_alignment(int *gridsize,int pme_order)
+ +{
+ +#ifdef PME_SSE
+ +#ifndef PME_SSE_UNALIGNED
+ +    if (pme_order == 4)
+ +    {
+ +        /* Add extra elements to ensured aligned operations do not go
+ +         * beyond the allocated grid size.
+ +         * Note that for pme_order=5, the pme grid z-size alignment
+ +         * ensures that we will not go beyond the grid size.
+ +         */
+ +         *gridsize += 4;
+ +    }
+ +#endif
+ +#endif
+ +}
+ +
+ +static void pmegrid_init(pmegrid_t *grid,
+ +                         int cx, int cy, int cz,
+ +                         int x0, int y0, int z0,
+ +                         int x1, int y1, int z1,
+ +                         gmx_bool set_alignment,
+ +                         int pme_order,
+ +                         real *ptr)
+ +{
+ +    int nz,gridsize;
+ +
+ +    grid->ci[XX] = cx;
+ +    grid->ci[YY] = cy;
+ +    grid->ci[ZZ] = cz;
+ +    grid->offset[XX] = x0;
+ +    grid->offset[YY] = y0;
+ +    grid->offset[ZZ] = z0;
+ +    grid->n[XX]      = x1 - x0 + pme_order - 1;
+ +    grid->n[YY]      = y1 - y0 + pme_order - 1;
+ +    grid->n[ZZ]      = z1 - z0 + pme_order - 1;
++    copy_ivec(grid->n,grid->s);
+ +
-         grid->n[ZZ] = nz;
++    nz = grid->s[ZZ];
+ +    set_grid_alignment(&nz,pme_order);
+ +    if (set_alignment)
+ +    {
-     else if (nz != grid->n[ZZ])
++        grid->s[ZZ] = nz;
+ +    }
-         gridsize = grid->n[XX]*grid->n[YY]*grid->n[ZZ];
++    else if (nz != grid->s[ZZ])
+ +    {
+ +        gmx_incons("pmegrid_init call with an unaligned z size");
+ +    }
+ +
+ +    grid->order = pme_order;
+ +    if (ptr == NULL)
+ +    {
-     int max_comm_lines;
++        gridsize = grid->s[XX]*grid->s[YY]*grid->s[ZZ];
+ +        set_gridsize_alignment(&gridsize,pme_order);
+ +        snew_aligned(grid->grid,gridsize,16);
+ +    }
+ +    else
+ +    {
+ +        grid->grid = ptr;
+ +    }
+ +}
+ +
+ +static int div_round_up(int enumerator,int denominator)
+ +{
+ +    return (enumerator + denominator - 1)/denominator;
+ +}
+ +
+ +static void make_subgrid_division(const ivec n,int ovl,int nthread,
+ +                                  ivec nsub)
+ +{
+ +    int gsize_opt,gsize;
+ +    int nsx,nsy,nsz;
+ +    char *env;
+ +
+ +    gsize_opt = -1;
+ +    for(nsx=1; nsx<=nthread; nsx++)
+ +    {
+ +        if (nthread % nsx == 0)
+ +        {
+ +            for(nsy=1; nsy<=nthread; nsy++)
+ +            {
+ +                if (nsx*nsy <= nthread && nthread % (nsx*nsy) == 0)
+ +                {
+ +                    nsz = nthread/(nsx*nsy);
+ +
+ +                    /* Determine the number of grid points per thread */
+ +                    gsize =
+ +                        (div_round_up(n[XX],nsx) + ovl)*
+ +                        (div_round_up(n[YY],nsy) + ovl)*
+ +                        (div_round_up(n[ZZ],nsz) + ovl);
+ +
+ +                    /* Minimize the number of grids points per thread
+ +                     * and, secondarily, the number of cuts in minor dimensions.
+ +                     */
+ +                    if (gsize_opt == -1 ||
+ +                        gsize < gsize_opt ||
+ +                        (gsize == gsize_opt &&
+ +                         (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
+ +                    {
+ +                        nsub[XX] = nsx;
+ +                        nsub[YY] = nsy;
+ +                        nsub[ZZ] = nsz;
+ +                        gsize_opt = gsize;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    env = getenv("GMX_PME_THREAD_DIVISION");
+ +    if (env != NULL)
+ +    {
+ +        sscanf(env,"%d %d %d",&nsub[XX],&nsub[YY],&nsub[ZZ]);
+ +    }
+ +
+ +    if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
+ +    {
+ +        gmx_fatal(FARGS,"PME grid thread division (%d x %d x %d) does not match the total number of threads (%d)",nsub[XX],nsub[YY],nsub[ZZ],nthread);
+ +    }
+ +}
+ +
+ +static void pmegrids_init(pmegrids_t *grids,
+ +                          int nx,int ny,int nz,int nz_base,
+ +                          int pme_order,
+ +                          int nthread,
+ +                          int overlap_x,
+ +                          int overlap_y)
+ +{
+ +    ivec n,n_base,g0,g1;
+ +    int t,x,y,z,d,i,tfac;
-         real *grid_all;
++    int max_comm_lines=-1;
+ +
+ +    n[XX] = nx - (pme_order - 1);
+ +    n[YY] = ny - (pme_order - 1);
+ +    n[ZZ] = nz - (pme_order - 1);
+ +
+ +    copy_ivec(n,n_base);
+ +    n_base[ZZ] = nz_base;
+ +
+ +    pmegrid_init(&grids->grid,0,0,0,0,0,0,n[XX],n[YY],n[ZZ],FALSE,pme_order,
+ +                 NULL);
+ +
+ +    grids->nthread = nthread;
+ +
+ +    make_subgrid_division(n_base,pme_order-1,grids->nthread,grids->nc);
+ +
+ +    if (grids->nthread > 1)
+ +    {
+ +        ivec nst;
+ +        int gridsize;
-         snew_aligned(grid_all,
+ +
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            nst[d] = div_round_up(n[d],grids->nc[d]) + pme_order - 1;
+ +        }
+ +        set_grid_alignment(&nst[ZZ],pme_order);
+ +
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"pmegrid thread local division: %d x %d x %d\n",
+ +                    grids->nc[XX],grids->nc[YY],grids->nc[ZZ]);
+ +            fprintf(debug,"pmegrid %d %d %d max thread pmegrid %d %d %d\n",
+ +                    nx,ny,nz,
+ +                    nst[XX],nst[YY],nst[ZZ]);
+ +        }
+ +
+ +        snew(grids->grid_th,grids->nthread);
+ +        t = 0;
+ +        gridsize = nst[XX]*nst[YY]*nst[ZZ];
+ +        set_gridsize_alignment(&gridsize,pme_order);
-                                  grid_all+GMX_CACHE_SEP+t*(gridsize+GMX_CACHE_SEP));
++        snew_aligned(grids->grid_all,
+ +                     grids->nthread*gridsize+(grids->nthread+1)*GMX_CACHE_SEP,
+ +                     16);
+ +
+ +        for(x=0; x<grids->nc[XX]; x++)
+ +        {
+ +            for(y=0; y<grids->nc[YY]; y++)
+ +            {
+ +                for(z=0; z<grids->nc[ZZ]; z++)
+ +                {
+ +                    pmegrid_init(&grids->grid_th[t],
+ +                                 x,y,z,
+ +                                 (n[XX]*(x  ))/grids->nc[XX],
+ +                                 (n[YY]*(y  ))/grids->nc[YY],
+ +                                 (n[ZZ]*(z  ))/grids->nc[ZZ],
+ +                                 (n[XX]*(x+1))/grids->nc[XX],
+ +                                 (n[YY]*(y+1))/grids->nc[YY],
+ +                                 (n[ZZ]*(z+1))/grids->nc[ZZ],
+ +                                 TRUE,
+ +                                 pme_order,
-         while ((n[d]*grids->nthread_comm[d])/grids->nc[d] < max_comm_lines)
++                                 grids->grid_all+GMX_CACHE_SEP+t*(gridsize+GMX_CACHE_SEP));
+ +                    t++;
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    snew(grids->g2t,DIM);
+ +    tfac = 1;
+ +    for(d=DIM-1; d>=0; d--)
+ +    {
+ +        snew(grids->g2t[d],n[d]);
+ +        t = 0;
+ +        for(i=0; i<n[d]; i++)
+ +        {
+ +            /* The second check should match the parameters
+ +             * of the pmegrid_init call above.
+ +             */
+ +            while (t + 1 < grids->nc[d] && i >= (n[d]*(t+1))/grids->nc[d])
+ +            {
+ +                t++;
+ +            }
+ +            grids->g2t[d][i] = t*tfac;
+ +        }
+ +
+ +        tfac *= grids->nc[d];
+ +
+ +        switch (d)
+ +        {
+ +        case XX: max_comm_lines = overlap_x;     break;
+ +        case YY: max_comm_lines = overlap_y;     break;
+ +        case ZZ: max_comm_lines = pme_order - 1; break;
+ +        }
+ +        grids->nthread_comm[d] = 0;
- 
++        while ((n[d]*grids->nthread_comm[d])/grids->nc[d] < max_comm_lines &&
++               grids->nthread_comm[d] < grids->nc[d])
+ +        {
+ +            grids->nthread_comm[d]++;
+ +        }
+ +        if (debug != NULL)
+ +        {
+ +            fprintf(debug,"pmegrid thread grid communication range in %c: %d\n",
+ +                    'x'+d,grids->nthread_comm[d]);
+ +        }
+ +        /* It should be possible to make grids->nthread_comm[d]==grids->nc[d]
+ +         * work, but this is not a problematic restriction.
+ +         */
+ +        if (grids->nc[d] > 1 && grids->nthread_comm[d] > grids->nc[d])
+ +        {
+ +            gmx_fatal(FARGS,"Too many threads for PME (%d) compared to the number of grid lines, reduce the number of threads doing PME",grids->nthread);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void pmegrids_destroy(pmegrids_t *grids)
+ +{
+ +    int t;
+ +
+ +    if (grids->grid.grid != NULL)
+ +    {
+ +        sfree(grids->grid.grid);
+ +
+ +        if (grids->nthread > 0)
+ +        {
+ +            for(t=0; t<grids->nthread; t++)
+ +            {
+ +                sfree(grids->grid_th[t].grid);
+ +            }
+ +            sfree(grids->grid_th);
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void realloc_work(pme_work_t *work,int nkx)
+ +{
+ +    if (nkx > work->nalloc)
+ +    {
+ +        work->nalloc = nkx;
+ +        srenew(work->mhx  ,work->nalloc);
+ +        srenew(work->mhy  ,work->nalloc);
+ +        srenew(work->mhz  ,work->nalloc);
+ +        srenew(work->m2   ,work->nalloc);
+ +        /* Allocate an aligned pointer for SSE operations, including 3 extra
+ +         * elements at the end since SSE operates on 4 elements at a time.
+ +         */
+ +        sfree_aligned(work->denom);
+ +        sfree_aligned(work->tmp1);
+ +        sfree_aligned(work->eterm);
+ +        snew_aligned(work->denom,work->nalloc+3,16);
+ +        snew_aligned(work->tmp1 ,work->nalloc+3,16);
+ +        snew_aligned(work->eterm,work->nalloc+3,16);
+ +        srenew(work->m2inv,work->nalloc);
+ +    }
+ +}
+ +
+ +
+ +static void free_work(pme_work_t *work)
+ +{
+ +    sfree(work->mhx);
+ +    sfree(work->mhy);
+ +    sfree(work->mhz);
+ +    sfree(work->m2);
+ +    sfree_aligned(work->denom);
+ +    sfree_aligned(work->tmp1);
+ +    sfree_aligned(work->eterm);
+ +    sfree(work->m2inv);
+ +}
+ +
+ +
+ +#ifdef PME_SSE
+ +    /* Calculate exponentials through SSE in float precision */
+ +inline static void calc_exponentials(int start, int end, real f, real *d_aligned, real *r_aligned, real *e_aligned)
+ +{
+ +    {
+ +        const __m128 two = _mm_set_ps(2.0f,2.0f,2.0f,2.0f);
+ +        __m128 f_sse;
+ +        __m128 lu;
+ +        __m128 tmp_d1,d_inv,tmp_r,tmp_e;
+ +        int kx;
+ +        f_sse = _mm_load1_ps(&f);
+ +        for(kx=0; kx<end; kx+=4)
+ +        {
+ +            tmp_d1   = _mm_load_ps(d_aligned+kx);
+ +            lu       = _mm_rcp_ps(tmp_d1);
+ +            d_inv    = _mm_mul_ps(lu,_mm_sub_ps(two,_mm_mul_ps(lu,tmp_d1)));
+ +            tmp_r    = _mm_load_ps(r_aligned+kx);
+ +            tmp_r    = gmx_mm_exp_ps(tmp_r);
+ +            tmp_e    = _mm_mul_ps(f_sse,d_inv);
+ +            tmp_e    = _mm_mul_ps(tmp_e,tmp_r);
+ +            _mm_store_ps(e_aligned+kx,tmp_e);
+ +        }
+ +    }
+ +}
+ +#else
+ +inline static void calc_exponentials(int start, int end, real f, real *d, real *r, real *e)
+ +{
+ +    int kx;
+ +    for(kx=start; kx<end; kx++)
+ +    {
+ +        d[kx] = 1.0/d[kx];
+ +    }
+ +    for(kx=start; kx<end; kx++)
+ +    {
+ +        r[kx] = exp(r[kx]);
+ +    }
+ +    for(kx=start; kx<end; kx++)
+ +    {
+ +        e[kx] = f*r[kx]*d[kx];
+ +    }
+ +}
+ +#endif
+ +
+ +
+ +static int solve_pme_yzx(gmx_pme_t pme,t_complex *grid,
+ +                         real ewaldcoeff,real vol,
+ +                         gmx_bool bEnerVir,
+ +                         int nthread,int thread)
+ +{
+ +    /* do recip sum over local cells in grid */
+ +    /* y major, z middle, x minor or continuous */
+ +    t_complex *p0;
+ +    int     kx,ky,kz,maxkx,maxky,maxkz;
+ +    int     nx,ny,nz,iyz0,iyz1,iyz,iy,iz,kxstart,kxend;
+ +    real    mx,my,mz;
+ +    real    factor=M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
+ +    real    ets2,struct2,vfactor,ets2vf;
+ +    real    d1,d2,energy=0;
+ +    real    by,bz;
+ +    real    virxx=0,virxy=0,virxz=0,viryy=0,viryz=0,virzz=0;
+ +    real    rxx,ryx,ryy,rzx,rzy,rzz;
+ +    pme_work_t *work;
+ +    real    *mhx,*mhy,*mhz,*m2,*denom,*tmp1,*eterm,*m2inv;
+ +    real    mhxk,mhyk,mhzk,m2k;
+ +    real    corner_fac;
+ +    ivec    complex_order;
+ +    ivec    local_ndata,local_offset,local_size;
+ +    real    elfac;
+ +
+ +    elfac = ONE_4PI_EPS0/pme->epsilon_r;
+ +
+ +    nx = pme->nkx;
+ +    ny = pme->nky;
+ +    nz = pme->nkz;
+ +
+ +    /* Dimensions should be identical for A/B grid, so we just use A here */
+ +    gmx_parallel_3dfft_complex_limits(pme->pfft_setupA,
+ +                                      complex_order,
+ +                                      local_ndata,
+ +                                      local_offset,
+ +                                      local_size);
+ +
+ +    rxx = pme->recipbox[XX][XX];
+ +    ryx = pme->recipbox[YY][XX];
+ +    ryy = pme->recipbox[YY][YY];
+ +    rzx = pme->recipbox[ZZ][XX];
+ +    rzy = pme->recipbox[ZZ][YY];
+ +    rzz = pme->recipbox[ZZ][ZZ];
+ +
+ +    maxkx = (nx+1)/2;
+ +    maxky = (ny+1)/2;
+ +    maxkz = nz/2+1;
+ +
+ +    work = &pme->work[thread];
+ +    mhx   = work->mhx;
+ +    mhy   = work->mhy;
+ +    mhz   = work->mhz;
+ +    m2    = work->m2;
+ +    denom = work->denom;
+ +    tmp1  = work->tmp1;
+ +    eterm = work->eterm;
+ +    m2inv = work->m2inv;
+ +
+ +    iyz0 = local_ndata[YY]*local_ndata[ZZ]* thread   /nthread;
+ +    iyz1 = local_ndata[YY]*local_ndata[ZZ]*(thread+1)/nthread;
+ +
+ +    for(iyz=iyz0; iyz<iyz1; iyz++)
+ +    {
+ +        iy = iyz/local_ndata[ZZ];
+ +        iz = iyz - iy*local_ndata[ZZ];
+ +
+ +        ky = iy + local_offset[YY];
+ +
+ +        if (ky < maxky)
+ +        {
+ +            my = ky;
+ +        }
+ +        else
+ +        {
+ +            my = (ky - ny);
+ +        }
+ +
+ +        by = M_PI*vol*pme->bsp_mod[YY][ky];
+ +
+ +        kz = iz + local_offset[ZZ];
+ +
+ +        mz = kz;
+ +
+ +        bz = pme->bsp_mod[ZZ][kz];
+ +
+ +        /* 0.5 correction for corner points */
+ +        corner_fac = 1;
+ +        if (kz == 0 || kz == (nz+1)/2)
+ +        {
+ +            corner_fac = 0.5;
+ +        }
+ +
+ +        p0 = grid + iy*local_size[ZZ]*local_size[XX] + iz*local_size[XX];
+ +
+ +        /* We should skip the k-space point (0,0,0) */
+ +        if (local_offset[XX] > 0 || ky > 0 || kz > 0)
+ +        {
+ +            kxstart = local_offset[XX];
+ +        }
+ +        else
+ +        {
+ +            kxstart = local_offset[XX] + 1;
+ +            p0++;
+ +        }
+ +        kxend = local_offset[XX] + local_ndata[XX];
+ +
+ +        if (bEnerVir)
+ +        {
+ +            /* More expensive inner loop, especially because of the storage
+ +             * of the mh elements in array's.
+ +             * Because x is the minor grid index, all mh elements
+ +             * depend on kx for triclinic unit cells.
+ +             */
+ +
+ +                /* Two explicit loops to avoid a conditional inside the loop */
+ +            for(kx=kxstart; kx<maxkx; kx++)
+ +            {
+ +                mx = kx;
+ +
+ +                mhxk      = mx * rxx;
+ +                mhyk      = mx * ryx + my * ryy;
+ +                mhzk      = mx * rzx + my * rzy + mz * rzz;
+ +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
+ +                mhx[kx]   = mhxk;
+ +                mhy[kx]   = mhyk;
+ +                mhz[kx]   = mhzk;
+ +                m2[kx]    = m2k;
+ +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
+ +                tmp1[kx]  = -factor*m2k;
+ +            }
+ +
+ +            for(kx=maxkx; kx<kxend; kx++)
+ +            {
+ +                mx = (kx - nx);
+ +
+ +                mhxk      = mx * rxx;
+ +                mhyk      = mx * ryx + my * ryy;
+ +                mhzk      = mx * rzx + my * rzy + mz * rzz;
+ +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
+ +                mhx[kx]   = mhxk;
+ +                mhy[kx]   = mhyk;
+ +                mhz[kx]   = mhzk;
+ +                m2[kx]    = m2k;
+ +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
+ +                tmp1[kx]  = -factor*m2k;
+ +            }
+ +
+ +            for(kx=kxstart; kx<kxend; kx++)
+ +            {
+ +                m2inv[kx] = 1.0/m2[kx];
+ +            }
+ +
+ +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
+ +
+ +            for(kx=kxstart; kx<kxend; kx++,p0++)
+ +            {
+ +                d1      = p0->re;
+ +                d2      = p0->im;
+ +
+ +                p0->re  = d1*eterm[kx];
+ +                p0->im  = d2*eterm[kx];
+ +
+ +                struct2 = 2.0*(d1*d1+d2*d2);
+ +
+ +                tmp1[kx] = eterm[kx]*struct2;
+ +            }
+ +
+ +            for(kx=kxstart; kx<kxend; kx++)
+ +            {
+ +                ets2     = corner_fac*tmp1[kx];
+ +                vfactor  = (factor*m2[kx] + 1.0)*2.0*m2inv[kx];
+ +                energy  += ets2;
+ +
+ +                ets2vf   = ets2*vfactor;
+ +                virxx   += ets2vf*mhx[kx]*mhx[kx] - ets2;
+ +                virxy   += ets2vf*mhx[kx]*mhy[kx];
+ +                virxz   += ets2vf*mhx[kx]*mhz[kx];
+ +                viryy   += ets2vf*mhy[kx]*mhy[kx] - ets2;
+ +                viryz   += ets2vf*mhy[kx]*mhz[kx];
+ +                virzz   += ets2vf*mhz[kx]*mhz[kx] - ets2;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* We don't need to calculate the energy and the virial.
+ +             * In this case the triclinic overhead is small.
+ +             */
+ +
+ +            /* Two explicit loops to avoid a conditional inside the loop */
+ +
+ +            for(kx=kxstart; kx<maxkx; kx++)
+ +            {
+ +                mx = kx;
+ +
+ +                mhxk      = mx * rxx;
+ +                mhyk      = mx * ryx + my * ryy;
+ +                mhzk      = mx * rzx + my * rzy + mz * rzz;
+ +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
+ +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
+ +                tmp1[kx]  = -factor*m2k;
+ +            }
+ +
+ +            for(kx=maxkx; kx<kxend; kx++)
+ +            {
+ +                mx = (kx - nx);
+ +
+ +                mhxk      = mx * rxx;
+ +                mhyk      = mx * ryx + my * ryy;
+ +                mhzk      = mx * rzx + my * rzy + mz * rzz;
+ +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
+ +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
+ +                tmp1[kx]  = -factor*m2k;
+ +            }
+ +
+ +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
+ +
+ +            for(kx=kxstart; kx<kxend; kx++,p0++)
+ +            {
+ +                d1      = p0->re;
+ +                d2      = p0->im;
+ +
+ +                p0->re  = d1*eterm[kx];
+ +                p0->im  = d2*eterm[kx];
+ +            }
+ +        }
+ +    }
+ +
+ +    if (bEnerVir)
+ +    {
+ +        /* Update virial with local values.
+ +         * The virial is symmetric by definition.
+ +         * this virial seems ok for isotropic scaling, but I'm
+ +         * experiencing problems on semiisotropic membranes.
+ +         * IS THAT COMMENT STILL VALID??? (DvdS, 2001/02/07).
+ +         */
+ +        work->vir[XX][XX] = 0.25*virxx;
+ +        work->vir[YY][YY] = 0.25*viryy;
+ +        work->vir[ZZ][ZZ] = 0.25*virzz;
+ +        work->vir[XX][YY] = work->vir[YY][XX] = 0.25*virxy;
+ +        work->vir[XX][ZZ] = work->vir[ZZ][XX] = 0.25*virxz;
+ +        work->vir[YY][ZZ] = work->vir[ZZ][YY] = 0.25*viryz;
+ +
+ +        /* This energy should be corrected for a charged system */
+ +        work->energy = 0.5*energy;
+ +    }
+ +
+ +    /* Return the loop count */
+ +    return local_ndata[YY]*local_ndata[XX];
+ +}
+ +
+ +static void get_pme_ener_vir(const gmx_pme_t pme,int nthread,
+ +                             real *mesh_energy,matrix vir)
+ +{
+ +    /* This function sums output over threads
+ +     * and should therefore only be called after thread synchronization.
+ +     */
+ +    int thread;
+ +
+ +    *mesh_energy = pme->work[0].energy;
+ +    copy_mat(pme->work[0].vir,vir);
+ +
+ +    for(thread=1; thread<nthread; thread++)
+ +    {
+ +        *mesh_energy += pme->work[thread].energy;
+ +        m_add(vir,pme->work[thread].vir,vir);
+ +    }
+ +}
+ +
+ +#define DO_FSPLINE(order)                      \
+ +for(ithx=0; (ithx<order); ithx++)              \
+ +{                                              \
+ +    index_x = (i0+ithx)*pny*pnz;               \
+ +    tx      = thx[ithx];                       \
+ +    dx      = dthx[ithx];                      \
+ +                                               \
+ +    for(ithy=0; (ithy<order); ithy++)          \
+ +    {                                          \
+ +        index_xy = index_x+(j0+ithy)*pnz;      \
+ +        ty       = thy[ithy];                  \
+ +        dy       = dthy[ithy];                 \
+ +        fxy1     = fz1 = 0;                    \
+ +                                               \
+ +        for(ithz=0; (ithz<order); ithz++)      \
+ +        {                                      \
+ +            gval  = grid[index_xy+(k0+ithz)];  \
+ +            fxy1 += thz[ithz]*gval;            \
+ +            fz1  += dthz[ithz]*gval;           \
+ +        }                                      \
+ +        fx += dx*ty*fxy1;                      \
+ +        fy += tx*dy*fxy1;                      \
+ +        fz += tx*ty*fz1;                       \
+ +    }                                          \
+ +}
+ +
+ +
+ +static void gather_f_bsplines(gmx_pme_t pme,real *grid,
+ +                              gmx_bool bClearF,pme_atomcomm_t *atc,
+ +                              splinedata_t *spline,
+ +                              real scale)
+ +{
+ +    /* sum forces for local particles */
+ +    int     nn,n,ithx,ithy,ithz,i0,j0,k0;
+ +    int     index_x,index_xy;
+ +    int     nx,ny,nz,pnx,pny,pnz;
+ +    int *   idxptr;
+ +    real    tx,ty,dx,dy,qn;
+ +    real    fx,fy,fz,gval;
+ +    real    fxy1,fz1;
+ +    real    *thx,*thy,*thz,*dthx,*dthy,*dthz;
+ +    int     norder;
+ +    real    rxx,ryx,ryy,rzx,rzy,rzz;
+ +    int     order;
+ +
+ +    pme_spline_work_t *work;
+ +
+ +    work = pme->spline_work;
+ +
+ +    order = pme->pme_order;
+ +    thx   = spline->theta[XX];
+ +    thy   = spline->theta[YY];
+ +    thz   = spline->theta[ZZ];
+ +    dthx  = spline->dtheta[XX];
+ +    dthy  = spline->dtheta[YY];
+ +    dthz  = spline->dtheta[ZZ];
+ +    nx    = pme->nkx;
+ +    ny    = pme->nky;
+ +    nz    = pme->nkz;
+ +    pnx   = pme->pmegrid_nx;
+ +    pny   = pme->pmegrid_ny;
+ +    pnz   = pme->pmegrid_nz;
+ +
+ +    rxx   = pme->recipbox[XX][XX];
+ +    ryx   = pme->recipbox[YY][XX];
+ +    ryy   = pme->recipbox[YY][YY];
+ +    rzx   = pme->recipbox[ZZ][XX];
+ +    rzy   = pme->recipbox[ZZ][YY];
+ +    rzz   = pme->recipbox[ZZ][ZZ];
+ +
+ +    for(nn=0; nn<spline->n; nn++)
+ +    {
+ +        n  = spline->ind[nn];
+ +        qn = scale*atc->q[n];
+ +
+ +        if (bClearF)
+ +        {
+ +            atc->f[n][XX] = 0;
+ +            atc->f[n][YY] = 0;
+ +            atc->f[n][ZZ] = 0;
+ +        }
+ +        if (qn != 0)
+ +        {
+ +            fx     = 0;
+ +            fy     = 0;
+ +            fz     = 0;
+ +            idxptr = atc->idx[n];
+ +            norder = nn*order;
+ +
+ +            i0   = idxptr[XX];
+ +            j0   = idxptr[YY];
+ +            k0   = idxptr[ZZ];
+ +
+ +            /* Pointer arithmetic alert, next six statements */
+ +            thx  = spline->theta[XX] + norder;
+ +            thy  = spline->theta[YY] + norder;
+ +            thz  = spline->theta[ZZ] + norder;
+ +            dthx = spline->dtheta[XX] + norder;
+ +            dthy = spline->dtheta[YY] + norder;
+ +            dthz = spline->dtheta[ZZ] + norder;
+ +
+ +            switch (order) {
+ +            case 4:
+ +#ifdef PME_SSE
+ +#ifdef PME_SSE_UNALIGNED
+ +#define PME_GATHER_F_SSE_ORDER4
+ +#else
+ +#define PME_GATHER_F_SSE_ALIGNED
+ +#define PME_ORDER 4
+ +#endif
+ +#include "pme_sse_single.h"
+ +#else
+ +                DO_FSPLINE(4);
+ +#endif
+ +                break;
+ +            case 5:
+ +#ifdef PME_SSE
+ +#define PME_GATHER_F_SSE_ALIGNED
+ +#define PME_ORDER 5
+ +#include "pme_sse_single.h"
+ +#else
+ +                DO_FSPLINE(5);
+ +#endif
+ +                break;
+ +            default:
+ +                DO_FSPLINE(order);
+ +                break;
+ +            }
+ +
+ +            atc->f[n][XX] += -qn*( fx*nx*rxx );
+ +            atc->f[n][YY] += -qn*( fx*nx*ryx + fy*ny*ryy );
+ +            atc->f[n][ZZ] += -qn*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
+ +        }
+ +    }
+ +    /* Since the energy and not forces are interpolated
+ +     * the net force might not be exactly zero.
+ +     * This can be solved by also interpolating F, but
+ +     * that comes at a cost.
+ +     * A better hack is to remove the net force every
+ +     * step, but that must be done at a higher level
+ +     * since this routine doesn't see all atoms if running
+ +     * in parallel. Don't know how important it is?  EL 990726
+ +     */
+ +}
+ +
+ +
+ +static real gather_energy_bsplines(gmx_pme_t pme,real *grid,
+ +                                   pme_atomcomm_t *atc)
+ +{
+ +    splinedata_t *spline;
+ +    int     n,ithx,ithy,ithz,i0,j0,k0;
+ +    int     index_x,index_xy;
+ +    int *   idxptr;
+ +    real    energy,pot,tx,ty,qn,gval;
+ +    real    *thx,*thy,*thz;
+ +    int     norder;
+ +    int     order;
+ +
+ +    spline = &atc->spline[0];
+ +
+ +    order = pme->pme_order;
+ +
+ +    energy = 0;
+ +    for(n=0; (n<atc->n); n++) {
+ +        qn      = atc->q[n];
+ +
+ +        if (qn != 0) {
+ +            idxptr = atc->idx[n];
+ +            norder = n*order;
+ +
+ +            i0   = idxptr[XX];
+ +            j0   = idxptr[YY];
+ +            k0   = idxptr[ZZ];
+ +
+ +            /* Pointer arithmetic alert, next three statements */
+ +            thx  = spline->theta[XX] + norder;
+ +            thy  = spline->theta[YY] + norder;
+ +            thz  = spline->theta[ZZ] + norder;
+ +
+ +            pot = 0;
+ +            for(ithx=0; (ithx<order); ithx++)
+ +            {
+ +                index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
+ +                tx      = thx[ithx];
+ +
+ +                for(ithy=0; (ithy<order); ithy++)
+ +                {
+ +                    index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
+ +                    ty       = thy[ithy];
+ +
+ +                    for(ithz=0; (ithz<order); ithz++)
+ +                    {
+ +                        gval  = grid[index_xy+(k0+ithz)];
+ +                        pot  += tx*ty*thz[ithz]*gval;
+ +                    }
+ +
+ +                }
+ +            }
+ +
+ +            energy += pot*qn;
+ +        }
+ +    }
+ +
+ +    return energy;
+ +}
+ +
+ +/* Macro to force loop unrolling by fixing order.
+ + * This gives a significant performance gain.
+ + */
+ +#define CALC_SPLINE(order)                     \
+ +{                                              \
+ +    int j,k,l;                                 \
+ +    real dr,div;                               \
+ +    real data[PME_ORDER_MAX];                  \
+ +    real ddata[PME_ORDER_MAX];                 \
+ +                                               \
+ +    for(j=0; (j<DIM); j++)                     \
+ +    {                                          \
+ +        dr  = xptr[j];                         \
+ +                                               \
+ +        /* dr is relative offset from lower cell limit */ \
+ +        data[order-1] = 0;                     \
+ +        data[1] = dr;                          \
+ +        data[0] = 1 - dr;                      \
+ +                                               \
+ +        for(k=3; (k<order); k++)               \
+ +        {                                      \
+ +            div = 1.0/(k - 1.0);               \
+ +            data[k-1] = div*dr*data[k-2];      \
+ +            for(l=1; (l<(k-1)); l++)           \
+ +            {                                  \
+ +                data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)* \
+ +                                   data[k-l-1]);                \
+ +            }                                  \
+ +            data[0] = div*(1-dr)*data[0];      \
+ +        }                                      \
+ +        /* differentiate */                    \
+ +        ddata[0] = -data[0];                   \
+ +        for(k=1; (k<order); k++)               \
+ +        {                                      \
+ +            ddata[k] = data[k-1] - data[k];    \
+ +        }                                      \
+ +                                               \
+ +        div = 1.0/(order - 1);                 \
+ +        data[order-1] = div*dr*data[order-2];  \
+ +        for(l=1; (l<(order-1)); l++)           \
+ +        {                                      \
+ +            data[order-l-1] = div*((dr+l)*data[order-l-2]+    \
+ +                               (order-l-dr)*data[order-l-1]); \
+ +        }                                      \
+ +        data[0] = div*(1 - dr)*data[0];        \
+ +                                               \
+ +        for(k=0; k<order; k++)                 \
+ +        {                                      \
+ +            theta[j][i*order+k]  = data[k];    \
+ +            dtheta[j][i*order+k] = ddata[k];   \
+ +        }                                      \
+ +    }                                          \
+ +}
+ +
+ +void make_bsplines(splinevec theta,splinevec dtheta,int order,
+ +                   rvec fractx[],int nr,int ind[],real charge[],
+ +                   gmx_bool bFreeEnergy)
+ +{
+ +    /* construct splines for local atoms */
+ +    int  i,ii;
+ +    real *xptr;
+ +
+ +    for(i=0; i<nr; i++)
+ +    {
+ +        /* With free energy we do not use the charge check.
+ +         * In most cases this will be more efficient than calling make_bsplines
+ +         * twice, since usually more than half the particles have charges.
+ +         */
+ +        ii = ind[i];
+ +        if (bFreeEnergy || charge[ii] != 0.0) {
+ +            xptr = fractx[ii];
+ +            switch(order) {
+ +            case 4:  CALC_SPLINE(4);     break;
+ +            case 5:  CALC_SPLINE(5);     break;
+ +            default: CALC_SPLINE(order); break;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +
+ +void make_dft_mod(real *mod,real *data,int ndata)
+ +{
+ +  int i,j;
+ +  real sc,ss,arg;
+ +
+ +  for(i=0;i<ndata;i++) {
+ +    sc=ss=0;
+ +    for(j=0;j<ndata;j++) {
+ +      arg=(2.0*M_PI*i*j)/ndata;
+ +      sc+=data[j]*cos(arg);
+ +      ss+=data[j]*sin(arg);
+ +    }
+ +    mod[i]=sc*sc+ss*ss;
+ +  }
+ +  for(i=0;i<ndata;i++)
+ +    if(mod[i]<1e-7)
+ +      mod[i]=(mod[i-1]+mod[i+1])*0.5;
+ +}
+ +
+ +
+ +static void make_bspline_moduli(splinevec bsp_mod,
+ +                                int nx,int ny,int nz,int order)
+ +{
+ +  int nmax=max(nx,max(ny,nz));
+ +  real *data,*ddata,*bsp_data;
+ +  int i,k,l;
+ +  real div;
+ +
+ +  snew(data,order);
+ +  snew(ddata,order);
+ +  snew(bsp_data,nmax);
+ +
+ +  data[order-1]=0;
+ +  data[1]=0;
+ +  data[0]=1;
+ +
+ +  for(k=3;k<order;k++) {
+ +    div=1.0/(k-1.0);
+ +    data[k-1]=0;
+ +    for(l=1;l<(k-1);l++)
+ +      data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
+ +    data[0]=div*data[0];
+ +  }
+ +  /* differentiate */
+ +  ddata[0]=-data[0];
+ +  for(k=1;k<order;k++)
+ +    ddata[k]=data[k-1]-data[k];
+ +  div=1.0/(order-1);
+ +  data[order-1]=0;
+ +  for(l=1;l<(order-1);l++)
+ +    data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
+ +  data[0]=div*data[0];
+ +
+ +  for(i=0;i<nmax;i++)
+ +    bsp_data[i]=0;
+ +  for(i=1;i<=order;i++)
+ +    bsp_data[i]=data[i-1];
+ +
+ +  make_dft_mod(bsp_mod[XX],bsp_data,nx);
+ +  make_dft_mod(bsp_mod[YY],bsp_data,ny);
+ +  make_dft_mod(bsp_mod[ZZ],bsp_data,nz);
+ +
+ +  sfree(data);
+ +  sfree(ddata);
+ +  sfree(bsp_data);
+ +}
+ +
+ +
+ +/* Return the P3M optimal influence function */
+ +static double do_p3m_influence(double z, int order)
+ +{
+ +    double z2,z4;
+ +
+ +    z2 = z*z;
+ +    z4 = z2*z2;
+ +
+ +    /* The formula and most constants can be found in:
+ +     * Ballenegger et al., JCTC 8, 936 (2012)
+ +     */
+ +    switch(order)
+ +    {
+ +    case 2:
+ +        return 1.0 - 2.0*z2/3.0;
+ +        break;
+ +    case 3:
+ +        return 1.0 - z2 + 2.0*z4/15.0;
+ +        break;
+ +    case 4:
+ +        return 1.0 - 4.0*z2/3.0 + 2.0*z4/5.0 + 4.0*z2*z4/315.0;
+ +        break;
+ +    case 5:
+ +        return 1.0 - 5.0*z2/3.0 + 7.0*z4/9.0 - 17.0*z2*z4/189.0 + 2.0*z4*z4/2835.0;
+ +        break;
+ +    case 6:
+ +        return 1.0 - 2.0*z2 + 19.0*z4/15.0 - 256.0*z2*z4/945.0 + 62.0*z4*z4/4725.0 + 4.0*z2*z4*z4/155925.0;
+ +        break;
+ +    case 7:
+ +        return 1.0 - 7.0*z2/3.0 + 28.0*z4/15.0 - 16.0*z2*z4/27.0 + 26.0*z4*z4/405.0 - 2.0*z2*z4*z4/1485.0 + 4.0*z4*z4*z4/6081075.0;
+ +    case 8:
+ +        return 1.0 - 8.0*z2/3.0 + 116.0*z4/45.0 - 344.0*z2*z4/315.0 + 914.0*z4*z4/4725.0 - 248.0*z4*z4*z2/22275.0 + 21844.0*z4*z4*z4/212837625.0 - 8.0*z4*z4*z4*z2/638512875.0;
+ +        break;
+ +    }
+ +
+ +    return 0.0;
+ +}
+ +
+ +/* Calculate the P3M B-spline moduli for one dimension */
+ +static void make_p3m_bspline_moduli_dim(real *bsp_mod,int n,int order)
+ +{
+ +    double zarg,zai,sinzai,infl;
+ +    int    maxk,i;
+ +
+ +    if (order > 8)
+ +    {
+ +        gmx_fatal(FARGS,"The current P3M code only supports orders up to 8");
+ +    }
+ +
+ +    zarg = M_PI/n;
+ +
+ +    maxk = (n + 1)/2;
+ +
+ +    for(i=-maxk; i<0; i++)
+ +    {
+ +        zai    = zarg*i;
+ +        sinzai = sin(zai);
+ +        infl   = do_p3m_influence(sinzai,order);
+ +        bsp_mod[n+i] = infl*infl*pow(sinzai/zai,-2.0*order);
+ +    }
+ +    bsp_mod[0] = 1.0;
+ +    for(i=1; i<maxk; i++)
+ +    {
+ +        zai    = zarg*i;
+ +        sinzai = sin(zai);
+ +        infl   = do_p3m_influence(sinzai,order);
+ +        bsp_mod[i] = infl*infl*pow(sinzai/zai,-2.0*order);
+ +    }
+ +}
+ +
+ +/* Calculate the P3M B-spline moduli */
+ +static void make_p3m_bspline_moduli(splinevec bsp_mod,
+ +                                    int nx,int ny,int nz,int order)
+ +{
+ +    make_p3m_bspline_moduli_dim(bsp_mod[XX],nx,order);
+ +    make_p3m_bspline_moduli_dim(bsp_mod[YY],ny,order);
+ +    make_p3m_bspline_moduli_dim(bsp_mod[ZZ],nz,order);
+ +}
+ +
+ +
+ +static void setup_coordinate_communication(pme_atomcomm_t *atc)
+ +{
+ +  int nslab,n,i;
+ +  int fw,bw;
+ +
+ +  nslab = atc->nslab;
+ +
+ +  n = 0;
+ +  for(i=1; i<=nslab/2; i++) {
+ +    fw = (atc->nodeid + i) % nslab;
+ +    bw = (atc->nodeid - i + nslab) % nslab;
+ +    if (n < nslab - 1) {
+ +      atc->node_dest[n] = fw;
+ +      atc->node_src[n]  = bw;
+ +      n++;
+ +    }
+ +    if (n < nslab - 1) {
+ +      atc->node_dest[n] = bw;
+ +      atc->node_src[n]  = fw;
+ +      n++;
+ +    }
+ +  }
+ +}
+ +
+ +int gmx_pme_destroy(FILE *log,gmx_pme_t *pmedata)
+ +{
+ +    int thread;
+ +
+ +    if(NULL != log)
+ +    {
+ +        fprintf(log,"Destroying PME data structures.\n");
+ +    }
+ +
+ +    sfree((*pmedata)->nnx);
+ +    sfree((*pmedata)->nny);
+ +    sfree((*pmedata)->nnz);
+ +
+ +    pmegrids_destroy(&(*pmedata)->pmegridA);
+ +
+ +    sfree((*pmedata)->fftgridA);
+ +    sfree((*pmedata)->cfftgridA);
+ +    gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupA);
+ +
+ +    if ((*pmedata)->pmegridB.grid.grid != NULL)
+ +    {
+ +        pmegrids_destroy(&(*pmedata)->pmegridB);
+ +        sfree((*pmedata)->fftgridB);
+ +        sfree((*pmedata)->cfftgridB);
+ +        gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupB);
+ +    }
+ +    for(thread=0; thread<(*pmedata)->nthread; thread++)
+ +    {
+ +        free_work(&(*pmedata)->work[thread]);
+ +    }
+ +    sfree((*pmedata)->work);
+ +
+ +    sfree(*pmedata);
+ +    *pmedata = NULL;
+ +
+ +  return 0;
+ +}
+ +
+ +static int mult_up(int n,int f)
+ +{
+ +    return ((n + f - 1)/f)*f;
+ +}
+ +
+ +
+ +static double pme_load_imbalance(gmx_pme_t pme)
+ +{
+ +    int    nma,nmi;
+ +    double n1,n2,n3;
+ +
+ +    nma = pme->nnodes_major;
+ +    nmi = pme->nnodes_minor;
+ +
+ +    n1 = mult_up(pme->nkx,nma)*mult_up(pme->nky,nmi)*pme->nkz;
+ +    n2 = mult_up(pme->nkx,nma)*mult_up(pme->nkz,nmi)*pme->nky;
+ +    n3 = mult_up(pme->nky,nma)*mult_up(pme->nkz,nmi)*pme->nkx;
+ +
+ +    /* pme_solve is roughly double the cost of an fft */
+ +
+ +    return (n1 + n2 + 3*n3)/(double)(6*pme->nkx*pme->nky*pme->nkz);
+ +}
+ +
+ +static void init_atomcomm(gmx_pme_t pme,pme_atomcomm_t *atc, t_commrec *cr,
+ +                          int dimind,gmx_bool bSpread)
+ +{
+ +    int nk,k,s,thread;
+ +
+ +    atc->dimind = dimind;
+ +    atc->nslab  = 1;
+ +    atc->nodeid = 0;
+ +    atc->pd_nalloc = 0;
+ +#ifdef GMX_MPI
+ +    if (pme->nnodes > 1)
+ +    {
+ +        atc->mpi_comm = pme->mpi_comm_d[dimind];
+ +        MPI_Comm_size(atc->mpi_comm,&atc->nslab);
+ +        MPI_Comm_rank(atc->mpi_comm,&atc->nodeid);
+ +    }
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"For PME atom communication in dimind %d: nslab %d rank %d\n",atc->dimind,atc->nslab,atc->nodeid);
+ +    }
+ +#endif
+ +
+ +    atc->bSpread   = bSpread;
+ +    atc->pme_order = pme->pme_order;
+ +
+ +    if (atc->nslab > 1)
+ +    {
+ +        /* These three allocations are not required for particle decomp. */
+ +        snew(atc->node_dest,atc->nslab);
+ +        snew(atc->node_src,atc->nslab);
+ +        setup_coordinate_communication(atc);
+ +
+ +        snew(atc->count_thread,pme->nthread);
+ +        for(thread=0; thread<pme->nthread; thread++)
+ +        {
+ +            snew(atc->count_thread[thread],atc->nslab);
+ +        }
+ +        atc->count = atc->count_thread[0];
+ +        snew(atc->rcount,atc->nslab);
+ +        snew(atc->buf_index,atc->nslab);
+ +    }
+ +
+ +    atc->nthread = pme->nthread;
+ +    if (atc->nthread > 1)
+ +    {
+ +        snew(atc->thread_plist,atc->nthread);
+ +    }
+ +    snew(atc->spline,atc->nthread);
+ +    for(thread=0; thread<atc->nthread; thread++)
+ +    {
+ +        if (atc->nthread > 1)
+ +        {
+ +            snew(atc->thread_plist[thread].n,atc->nthread+2*GMX_CACHE_SEP);
+ +            atc->thread_plist[thread].n += GMX_CACHE_SEP;
+ +        }
++        snew(atc->spline[thread].thread_one,pme->nthread);
++        atc->spline[thread].thread_one[thread] = 1;
+ +    }
+ +}
+ +
+ +static void
+ +init_overlap_comm(pme_overlap_t *  ol,
+ +                  int              norder,
+ +#ifdef GMX_MPI
+ +                  MPI_Comm         comm,
+ +#endif
+ +                  int              nnodes,
+ +                  int              nodeid,
+ +                  int              ndata,
+ +                  int              commplainsize)
+ +{
+ +    int lbnd,rbnd,maxlr,b,i;
+ +    int exten;
+ +    int nn,nk;
+ +    pme_grid_comm_t *pgc;
+ +    gmx_bool bCont;
+ +    int fft_start,fft_end,send_index1,recv_index1;
-     /* Linear translation of the PME grid wo'nt affect reciprocal space
+ +#ifdef GMX_MPI
++    MPI_Status stat;
++
+ +    ol->mpi_comm = comm;
+ +#endif
+ +
+ +    ol->nnodes = nnodes;
+ +    ol->nodeid = nodeid;
+ +
-         gmx_fatal(FARGS,"The pme grid dimensions need to be larger than pme_order (%d) and in parallel larger than 2*pme_ordern for x and/or y",pme->pme_order);
++    /* Linear translation of the PME grid won't affect reciprocal space
+ +     * calculations, so to optimize we only interpolate "upwards",
+ +     * which also means we only have to consider overlap in one direction.
+ +     * I.e., particles on this node might also be spread to grid indices
+ +     * that belong to higher nodes (modulo nnodes)
+ +     */
+ +
+ +    snew(ol->s2g0,ol->nnodes+1);
+ +    snew(ol->s2g1,ol->nnodes);
+ +    if (debug) { fprintf(debug,"PME slab boundaries:"); }
+ +    for(i=0; i<nnodes; i++)
+ +    {
+ +        /* s2g0 the local interpolation grid start.
+ +         * s2g1 the local interpolation grid end.
+ +         * Because grid overlap communication only goes forward,
+ +         * the grid the slabs for fft's should be rounded down.
+ +         */
+ +        ol->s2g0[i] = ( i   *ndata + 0       )/nnodes;
+ +        ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
+ +
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"  %3d %3d",ol->s2g0[i],ol->s2g1[i]);
+ +        }
+ +    }
+ +    ol->s2g0[nnodes] = ndata;
+ +    if (debug) { fprintf(debug,"\n"); }
+ +
+ +    /* Determine with how many nodes we need to communicate the grid overlap */
+ +    b = 0;
+ +    do
+ +    {
+ +        b++;
+ +        bCont = FALSE;
+ +        for(i=0; i<nnodes; i++)
+ +        {
+ +            if ((i+b <  nnodes && ol->s2g1[i] > ol->s2g0[i+b]) ||
+ +                (i+b >= nnodes && ol->s2g1[i] > ol->s2g0[i+b-nnodes] + ndata))
+ +            {
+ +                bCont = TRUE;
+ +            }
+ +        }
+ +    }
+ +    while (bCont && b < nnodes);
+ +    ol->noverlap_nodes = b - 1;
+ +
+ +    snew(ol->send_id,ol->noverlap_nodes);
+ +    snew(ol->recv_id,ol->noverlap_nodes);
+ +    for(b=0; b<ol->noverlap_nodes; b++)
+ +    {
+ +        ol->send_id[b] = (ol->nodeid + (b + 1)) % ol->nnodes;
+ +        ol->recv_id[b] = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
+ +    }
+ +    snew(ol->comm_data, ol->noverlap_nodes);
+ +
++    ol->send_size = 0;
+ +    for(b=0; b<ol->noverlap_nodes; b++)
+ +    {
+ +        pgc = &ol->comm_data[b];
+ +        /* Send */
+ +        fft_start        = ol->s2g0[ol->send_id[b]];
+ +        fft_end          = ol->s2g0[ol->send_id[b]+1];
+ +        if (ol->send_id[b] < nodeid)
+ +        {
+ +            fft_start += ndata;
+ +            fft_end   += ndata;
+ +        }
+ +        send_index1      = ol->s2g1[nodeid];
+ +        send_index1      = min(send_index1,fft_end);
+ +        pgc->send_index0 = fft_start;
+ +        pgc->send_nindex = max(0,send_index1 - pgc->send_index0);
++        ol->send_size    += pgc->send_nindex;
+ +
+ +        /* We always start receiving to the first index of our slab */
+ +        fft_start        = ol->s2g0[ol->nodeid];
+ +        fft_end          = ol->s2g0[ol->nodeid+1];
+ +        recv_index1      = ol->s2g1[ol->recv_id[b]];
+ +        if (ol->recv_id[b] > nodeid)
+ +        {
+ +            recv_index1 -= ndata;
+ +        }
+ +        recv_index1      = min(recv_index1,fft_end);
+ +        pgc->recv_index0 = fft_start;
+ +        pgc->recv_nindex = max(0,recv_index1 - pgc->recv_index0);
+ +    }
+ +
++#ifdef GMX_MPI
++    /* Communicate the buffer sizes to receive */
++    for(b=0; b<ol->noverlap_nodes; b++)
++    {
++        MPI_Sendrecv(&ol->send_size             ,1,MPI_INT,ol->send_id[b],b,
++                     &ol->comm_data[b].recv_size,1,MPI_INT,ol->recv_id[b],b,
++                     ol->mpi_comm,&stat);
++    }
++#endif
++
+ +    /* For non-divisible grid we need pme_order iso pme_order-1 */
+ +    snew(ol->sendbuf,norder*commplainsize);
+ +    snew(ol->recvbuf,norder*commplainsize);
+ +}
+ +
+ +static void
+ +make_gridindex5_to_localindex(int n,int local_start,int local_range,
+ +                              int **global_to_local,
+ +                              real **fraction_shift)
+ +{
+ +    int i;
+ +    int * gtl;
+ +    real * fsh;
+ +
+ +    snew(gtl,5*n);
+ +    snew(fsh,5*n);
+ +    for(i=0; (i<5*n); i++)
+ +    {
+ +        /* Determine the global to local grid index */
+ +        gtl[i] = (i - local_start + n) % n;
+ +        /* For coordinates that fall within the local grid the fraction
+ +         * is correct, we don't need to shift it.
+ +         */
+ +        fsh[i] = 0;
+ +        if (local_range < n)
+ +        {
+ +            /* Due to rounding issues i could be 1 beyond the lower or
+ +             * upper boundary of the local grid. Correct the index for this.
+ +             * If we shift the index, we need to shift the fraction by
+ +             * the same amount in the other direction to not affect
+ +             * the weights.
+ +             * Note that due to this shifting the weights at the end of
+ +             * the spline might change, but that will only involve values
+ +             * between zero and values close to the precision of a real,
+ +             * which is anyhow the accuracy of the whole mesh calculation.
+ +             */
+ +            /* With local_range=0 we should not change i=local_start */
+ +            if (i % n != local_start)
+ +            {
+ +                if (gtl[i] == n-1)
+ +                {
+ +                    gtl[i] = 0;
+ +                    fsh[i] = -1;
+ +                }
+ +                else if (gtl[i] == local_range)
+ +                {
+ +                    gtl[i] = local_range - 1;
+ +                    fsh[i] = 1;
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    *global_to_local = gtl;
+ +    *fraction_shift  = fsh;
+ +}
+ +
+ +static pme_spline_work_t *make_pme_spline_work(int order)
+ +{
+ +    pme_spline_work_t *work;
+ +
+ +#ifdef PME_SSE
+ +    float  tmp[8];
+ +    __m128 zero_SSE;
+ +    int    of,i;
+ +
+ +    snew_aligned(work,1,16);
+ +
+ +    zero_SSE = _mm_setzero_ps();
+ +
+ +    /* Generate bit masks to mask out the unused grid entries,
+ +     * as we only operate on order of the 8 grid entries that are
+ +     * load into 2 SSE float registers.
+ +     */
+ +    for(of=0; of<8-(order-1); of++)
+ +    {
+ +        for(i=0; i<8; i++)
+ +        {
+ +            tmp[i] = (i >= of && i < of+order ? 1 : 0);
+ +        }
+ +        work->mask_SSE0[of] = _mm_loadu_ps(tmp);
+ +        work->mask_SSE1[of] = _mm_loadu_ps(tmp+4);
+ +        work->mask_SSE0[of] = _mm_cmpgt_ps(work->mask_SSE0[of],zero_SSE);
+ +        work->mask_SSE1[of] = _mm_cmpgt_ps(work->mask_SSE1[of],zero_SSE);
+ +    }
+ +#else
+ +    work = NULL;
+ +#endif
+ +
+ +    return work;
+ +}
+ +
+ +static void
+ +gmx_pme_check_grid_restrictions(FILE *fplog,char dim,int nnodes,int *nk)
+ +{
+ +    int nk_new;
+ +
+ +    if (*nk % nnodes != 0)
+ +    {
+ +        nk_new = nnodes*(*nk/nnodes + 1);
+ +
+ +        if (2*nk_new >= 3*(*nk))
+ +        {
+ +            gmx_fatal(FARGS,"The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). The grid size would have to be increased by more than 50%% to make the grid divisible. Change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).",
+ +                      dim,*nk,dim,nnodes,dim);
+ +        }
+ +
+ +        if (fplog != NULL)
+ +        {
+ +            fprintf(fplog,"\nNOTE: The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). Increasing the PME grid size in dim %c to %d. This will increase the accuracy and will not decrease the performance significantly on this number of nodes. For optimal performance change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).\n\n",
+ +                    dim,*nk,dim,nnodes,dim,nk_new,dim);
+ +        }
+ +
+ +        *nk = nk_new;
+ +    }
+ +}
+ +
+ +int gmx_pme_init(gmx_pme_t *         pmedata,
+ +                 t_commrec *         cr,
+ +                 int                 nnodes_major,
+ +                 int                 nnodes_minor,
+ +                 t_inputrec *        ir,
+ +                 int                 homenr,
+ +                 gmx_bool            bFreeEnergy,
+ +                 gmx_bool            bReproducible,
+ +                 int                 nthread)
+ +{
+ +    gmx_pme_t pme=NULL;
+ +
+ +    pme_atomcomm_t *atc;
+ +    ivec ndata;
+ +
+ +    if (debug)
+ +        fprintf(debug,"Creating PME data structures.\n");
+ +    snew(pme,1);
+ +
+ +    pme->redist_init         = FALSE;
+ +    pme->sum_qgrid_tmp       = NULL;
+ +    pme->sum_qgrid_dd_tmp    = NULL;
+ +    pme->buf_nalloc          = 0;
+ +    pme->redist_buf_nalloc   = 0;
+ +
+ +    pme->nnodes              = 1;
+ +    pme->bPPnode             = TRUE;
+ +
+ +    pme->nnodes_major        = nnodes_major;
+ +    pme->nnodes_minor        = nnodes_minor;
+ +
+ +#ifdef GMX_MPI
+ +    if (nnodes_major*nnodes_minor > 1)
+ +    {
+ +        pme->mpi_comm = cr->mpi_comm_mygroup;
+ +
+ +        MPI_Comm_rank(pme->mpi_comm,&pme->nodeid);
+ +        MPI_Comm_size(pme->mpi_comm,&pme->nnodes);
+ +        if (pme->nnodes != nnodes_major*nnodes_minor)
+ +        {
+ +            gmx_incons("PME node count mismatch");
+ +        }
+ +    }
+ +    else
+ +    {
+ +        pme->mpi_comm = MPI_COMM_NULL;
+ +    }
+ +#endif
+ +
+ +    if (pme->nnodes == 1)
+ +    {
+ +#ifdef GMX_MPI
+ +        pme->mpi_comm_d[0] = MPI_COMM_NULL;
+ +        pme->mpi_comm_d[1] = MPI_COMM_NULL;
+ +#endif
+ +        pme->ndecompdim = 0;
+ +        pme->nodeid_major = 0;
+ +        pme->nodeid_minor = 0;
+ +#ifdef GMX_MPI
+ +        pme->mpi_comm_d[0] = pme->mpi_comm_d[1] = MPI_COMM_NULL;
+ +#endif
+ +    }
+ +    else
+ +    {
+ +        if (nnodes_minor == 1)
+ +        {
+ +#ifdef GMX_MPI
+ +            pme->mpi_comm_d[0] = pme->mpi_comm;
+ +            pme->mpi_comm_d[1] = MPI_COMM_NULL;
+ +#endif
+ +            pme->ndecompdim = 1;
+ +            pme->nodeid_major = pme->nodeid;
+ +            pme->nodeid_minor = 0;
+ +
+ +        }
+ +        else if (nnodes_major == 1)
+ +        {
+ +#ifdef GMX_MPI
+ +            pme->mpi_comm_d[0] = MPI_COMM_NULL;
+ +            pme->mpi_comm_d[1] = pme->mpi_comm;
+ +#endif
+ +            pme->ndecompdim = 1;
+ +            pme->nodeid_major = 0;
+ +            pme->nodeid_minor = pme->nodeid;
+ +        }
+ +        else
+ +        {
+ +            if (pme->nnodes % nnodes_major != 0)
+ +            {
+ +                gmx_incons("For 2D PME decomposition, #PME nodes must be divisible by the number of nodes in the major dimension");
+ +            }
+ +            pme->ndecompdim = 2;
+ +
+ +#ifdef GMX_MPI
+ +            MPI_Comm_split(pme->mpi_comm,pme->nodeid % nnodes_minor,
+ +                           pme->nodeid,&pme->mpi_comm_d[0]);  /* My communicator along major dimension */
+ +            MPI_Comm_split(pme->mpi_comm,pme->nodeid/nnodes_minor,
+ +                           pme->nodeid,&pme->mpi_comm_d[1]);  /* My communicator along minor dimension */
+ +
+ +            MPI_Comm_rank(pme->mpi_comm_d[0],&pme->nodeid_major);
+ +            MPI_Comm_size(pme->mpi_comm_d[0],&pme->nnodes_major);
+ +            MPI_Comm_rank(pme->mpi_comm_d[1],&pme->nodeid_minor);
+ +            MPI_Comm_size(pme->mpi_comm_d[1],&pme->nnodes_minor);
+ +#endif
+ +        }
+ +        pme->bPPnode = (cr->duty & DUTY_PP);
+ +    }
+ +
+ +    pme->nthread = nthread;
+ +
+ +    if (ir->ePBC == epbcSCREW)
+ +    {
+ +        gmx_fatal(FARGS,"pme does not (yet) work with pbc = screw");
+ +    }
+ +
+ +    pme->bFEP        = ((ir->efep != efepNO) && bFreeEnergy);
+ +    pme->nkx         = ir->nkx;
+ +    pme->nky         = ir->nky;
+ +    pme->nkz         = ir->nkz;
+ +    pme->bP3M        = (ir->coulombtype == eelP3M_AD || getenv("GMX_PME_P3M") != NULL);
+ +    pme->pme_order   = ir->pme_order;
+ +    pme->epsilon_r   = ir->epsilon_r;
+ +
+ +    if (pme->pme_order > PME_ORDER_MAX)
+ +    {
+ +        gmx_fatal(FARGS,"pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
+ +                  pme->pme_order,PME_ORDER_MAX);
+ +    }
+ +
+ +    /* Currently pme.c supports only the fft5d FFT code.
+ +     * Therefore the grid always needs to be divisible by nnodes.
+ +     * When the old 1D code is also supported again, change this check.
+ +     *
+ +     * This check should be done before calling gmx_pme_init
+ +     * and fplog should be passed iso stderr.
+ +     *
+ +    if (pme->ndecompdim >= 2)
+ +    */
+ +    if (pme->ndecompdim >= 1)
+ +    {
+ +        /*
+ +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
+ +                                        'x',nnodes_major,&pme->nkx);
+ +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
+ +                                        'y',nnodes_minor,&pme->nky);
+ +        */
+ +    }
+ +
+ +    if (pme->nkx <= pme->pme_order*(pme->nnodes_major > 1 ? 2 : 1) ||
+ +        pme->nky <= pme->pme_order*(pme->nnodes_minor > 1 ? 2 : 1) ||
+ +        pme->nkz <= pme->pme_order)
+ +    {
-                       (div_round_up(pme->nkx,pme->nnodes_major)+pme->pme_order)*pme->nkz);
++        gmx_fatal(FARGS,"The PME grid sizes need to be larger than pme_order (%d) and for dimensions with domain decomposition larger than 2*pme_order",pme->pme_order);
+ +    }
+ +
+ +    if (pme->nnodes > 1) {
+ +        double imbal;
+ +
+ +#ifdef GMX_MPI
+ +        MPI_Type_contiguous(DIM, mpi_type, &(pme->rvec_mpi));
+ +        MPI_Type_commit(&(pme->rvec_mpi));
+ +#endif
+ +
+ +        /* Note that the charge spreading and force gathering, which usually
+ +         * takes about the same amount of time as FFT+solve_pme,
+ +         * is always fully load balanced
+ +         * (unless the charge distribution is inhomogeneous).
+ +         */
+ +
+ +        imbal = pme_load_imbalance(pme);
+ +        if (imbal >= 1.2 && pme->nodeid_major == 0 && pme->nodeid_minor == 0)
+ +        {
+ +            fprintf(stderr,
+ +                    "\n"
+ +                    "NOTE: The load imbalance in PME FFT and solve is %d%%.\n"
+ +                    "      For optimal PME load balancing\n"
+ +                    "      PME grid_x (%d) and grid_y (%d) should be divisible by #PME_nodes_x (%d)\n"
+ +                    "      and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_nodes_y (%d)\n"
+ +                    "\n",
+ +                    (int)((imbal-1)*100 + 0.5),
+ +                    pme->nkx,pme->nky,pme->nnodes_major,
+ +                    pme->nky,pme->nkz,pme->nnodes_minor);
+ +        }
+ +    }
+ +
+ +    /* For non-divisible grid we need pme_order iso pme_order-1 */
+ +    /* In sum_qgrid_dd x overlap is copied in place: take padding into account.
+ +     * y is always copied through a buffer: we don't need padding in z,
+ +     * but we do need the overlap in x because of the communication order.
+ +     */
+ +    init_overlap_comm(&pme->overlap[0],pme->pme_order,
+ +#ifdef GMX_MPI
+ +                      pme->mpi_comm_d[0],
+ +#endif
+ +                      pme->nnodes_major,pme->nodeid_major,
+ +                      pme->nkx,
+ +                      (div_round_up(pme->nky,pme->nnodes_minor)+pme->pme_order)*(pme->nkz+pme->pme_order-1));
+ +
++    /* Along overlap dim 1 we can send in multiple pulses in sum_fftgrid_dd.
++     * We do this with an offset buffer of equal size, so we need to allocate
++     * extra for the offset. That's what the (+1)*pme->nkz is for.
++     */
+ +    init_overlap_comm(&pme->overlap[1],pme->pme_order,
+ +#ifdef GMX_MPI
+ +                      pme->mpi_comm_d[1],
+ +#endif
+ +                      pme->nnodes_minor,pme->nodeid_minor,
+ +                      pme->nky,
-     /* Check for a limitation of the (current) sum_fftgrid_dd code */
-     if (pme->nthread > 1 &&
-         (pme->overlap[0].noverlap_nodes > 1 ||
-          pme->overlap[1].noverlap_nodes > 1))
++                      (div_round_up(pme->nkx,pme->nnodes_major)+pme->pme_order+1)*pme->nkz);
+ +
-         gmx_fatal(FARGS,"With threads the number of grid lines per node along x and or y should be pme_order (%d) or more or exactly pme_order-1",pme->pme_order);
++    /* Check for a limitation of the (current) sum_fftgrid_dd code.
++     * We only allow multiple communication pulses in dim 1, not in dim 0.
++     */
++    if (pme->nthread > 1 && (pme->overlap[0].noverlap_nodes > 1 ||
++                             pme->nkx < pme->nnodes_major*pme->pme_order))
+ +    {
- 
++        gmx_fatal(FARGS,"The number of PME grid lines per node along x is %g. But when using OpenMP threads, the number of grid lines per node along x and should be >= pme_order (%d). To resolve this issue, use less nodes along x (and possibly more along y and/or z) by specifying -dd manually.",
++                  pme->nkx/(double)pme->nnodes_major,pme->pme_order);
+ +    }
+ +
+ +    snew(pme->bsp_mod[XX],pme->nkx);
+ +    snew(pme->bsp_mod[YY],pme->nky);
+ +    snew(pme->bsp_mod[ZZ],pme->nkz);
+ +
+ +    /* The required size of the interpolation grid, including overlap.
+ +     * The allocated size (pmegrid_n?) might be slightly larger.
+ +     */
+ +    pme->pmegrid_nx = pme->overlap[0].s2g1[pme->nodeid_major] -
+ +                      pme->overlap[0].s2g0[pme->nodeid_major];
+ +    pme->pmegrid_ny = pme->overlap[1].s2g1[pme->nodeid_minor] -
+ +                      pme->overlap[1].s2g0[pme->nodeid_minor];
+ +    pme->pmegrid_nz_base = pme->nkz;
+ +    pme->pmegrid_nz = pme->pmegrid_nz_base + pme->pme_order - 1;
+ +    set_grid_alignment(&pme->pmegrid_nz,pme->pme_order);
+ +
+ +    pme->pmegrid_start_ix = pme->overlap[0].s2g0[pme->nodeid_major];
+ +    pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor];
+ +    pme->pmegrid_start_iz = 0;
+ +
+ +    make_gridindex5_to_localindex(pme->nkx,
+ +                                  pme->pmegrid_start_ix,
+ +                                  pme->pmegrid_nx - (pme->pme_order-1),
+ +                                  &pme->nnx,&pme->fshx);
+ +    make_gridindex5_to_localindex(pme->nky,
+ +                                  pme->pmegrid_start_iy,
+ +                                  pme->pmegrid_ny - (pme->pme_order-1),
+ +                                  &pme->nny,&pme->fshy);
+ +    make_gridindex5_to_localindex(pme->nkz,
+ +                                  pme->pmegrid_start_iz,
+ +                                  pme->pmegrid_nz_base,
+ +                                  &pme->nnz,&pme->fshz);
+ +
+ +    pmegrids_init(&pme->pmegridA,
+ +                  pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
+ +                  pme->pmegrid_nz_base,
+ +                  pme->pme_order,
+ +                  pme->nthread,
+ +                  pme->overlap[0].s2g1[pme->nodeid_major]-pme->overlap[0].s2g0[pme->nodeid_major+1],
+ +                  pme->overlap[1].s2g1[pme->nodeid_minor]-pme->overlap[1].s2g0[pme->nodeid_minor+1]);
+ +
+ +    pme->spline_work = make_pme_spline_work(pme->pme_order);
+ +
+ +    ndata[0] = pme->nkx;
+ +    ndata[1] = pme->nky;
+ +    ndata[2] = pme->nkz;
+ +
+ +    /* This routine will allocate the grid data to fit the FFTs */
+ +    gmx_parallel_3dfft_init(&pme->pfft_setupA,ndata,
+ +                            &pme->fftgridA,&pme->cfftgridA,
+ +                            pme->mpi_comm_d,
+ +                            pme->overlap[0].s2g0,pme->overlap[1].s2g0,
+ +                            bReproducible,pme->nthread);
+ +
+ +    if (bFreeEnergy)
+ +    {
+ +        pmegrids_init(&pme->pmegridB,
+ +                      pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
+ +                      pme->pmegrid_nz_base,
+ +                      pme->pme_order,
+ +                      pme->nthread,
+ +                      pme->nkx % pme->nnodes_major != 0,
+ +                      pme->nky % pme->nnodes_minor != 0);
+ +
+ +        gmx_parallel_3dfft_init(&pme->pfft_setupB,ndata,
+ +                                &pme->fftgridB,&pme->cfftgridB,
+ +                                pme->mpi_comm_d,
+ +                                pme->overlap[0].s2g0,pme->overlap[1].s2g0,
+ +                                bReproducible,pme->nthread);
+ +    }
+ +    else
+ +    {
+ +        pme->pmegridB.grid.grid = NULL;
+ +        pme->fftgridB           = NULL;
+ +        pme->cfftgridB          = NULL;
+ +    }
+ +
+ +    if (!pme->bP3M)
+ +    {
+ +        /* Use plain SPME B-spline interpolation */
+ +        make_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
+ +    }
+ +    else
+ +    {
+ +        /* Use the P3M grid-optimized influence function */
+ +        make_p3m_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
+ +    }
+ +
+ +    /* Use atc[0] for spreading */
+ +    init_atomcomm(pme,&pme->atc[0],cr,nnodes_major > 1 ? 0 : 1,TRUE);
+ +    if (pme->ndecompdim >= 2)
+ +    {
+ +        init_atomcomm(pme,&pme->atc[1],cr,1,FALSE);
+ +    }
+ +
+ +    if (pme->nnodes == 1) {
+ +        pme->atc[0].n = homenr;
+ +        pme_realloc_atomcomm_things(&pme->atc[0]);
+ +    }
+ +
+ +    {
+ +        int thread;
+ +
+ +        /* Use fft5d, order after FFT is y major, z, x minor */
+ +
+ +        snew(pme->work,pme->nthread);
+ +        for(thread=0; thread<pme->nthread; thread++)
+ +        {
+ +            realloc_work(&pme->work[thread],pme->nkx);
+ +        }
+ +    }
+ +
+ +    *pmedata = pme;
-     nsx = pmegrid->n[XX];
-     nsy = pmegrid->n[YY];
-     nsz = pmegrid->n[ZZ];
++    
+ +    return 0;
+ +}
+ +
++static void reuse_pmegrids(const pmegrids_t *old,pmegrids_t *new)
++{
++    int d,t;
++
++    for(d=0; d<DIM; d++)
++    {
++        if (new->grid.n[d] > old->grid.n[d])
++        {
++            return;
++        }
++    }
++
++    sfree_aligned(new->grid.grid);
++    new->grid.grid = old->grid.grid;
++
++    if (new->nthread > 1 && new->nthread == old->nthread)
++    {
++        sfree_aligned(new->grid_all);
++        for(t=0; t<new->nthread; t++)
++        {
++            new->grid_th[t].grid = old->grid_th[t].grid;
++        }
++    }
++}
++
++int gmx_pme_reinit(gmx_pme_t *         pmedata,
++                   t_commrec *         cr,
++                   gmx_pme_t           pme_src,
++                   const t_inputrec *  ir,
++                   ivec                grid_size)
++{
++    t_inputrec irc;
++    int homenr;
++    int ret;
++
++    irc = *ir;
++    irc.nkx = grid_size[XX];
++    irc.nky = grid_size[YY];
++    irc.nkz = grid_size[ZZ];
++
++    if (pme_src->nnodes == 1)
++    {
++        homenr = pme_src->atc[0].n;
++    }
++    else
++    {
++        homenr = -1;
++    }
++
++    ret = gmx_pme_init(pmedata,cr,pme_src->nnodes_major,pme_src->nnodes_minor,
++                       &irc,homenr,pme_src->bFEP,FALSE,pme_src->nthread);
++
++    if (ret == 0)
++    {
++        /* We can easily reuse the allocated pme grids in pme_src */
++        reuse_pmegrids(&pme_src->pmegridA,&(*pmedata)->pmegridA);
++        /* We would like to reuse the fft grids, but that's harder */
++    }
++
++    return ret;
++}
++
+ +
+ +static void copy_local_grid(gmx_pme_t pme,
+ +                            pmegrids_t *pmegrids,int thread,real *fftgrid)
+ +{
+ +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
+ +    int  fft_my,fft_mz;
+ +    int  nsx,nsy,nsz;
+ +    ivec nf;
+ +    int  offx,offy,offz,x,y,z,i0,i0t;
+ +    int  d;
+ +    pmegrid_t *pmegrid;
+ +    real *grid_th;
+ +
+ +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
+ +                                   local_fft_ndata,
+ +                                   local_fft_offset,
+ +                                   local_fft_size);
+ +    fft_my = local_fft_size[YY];
+ +    fft_mz = local_fft_size[ZZ];
+ +
+ +    pmegrid = &pmegrids->grid_th[thread];
+ +
- static void print_sendbuf(gmx_pme_t pme,real *sendbuf)
- {
-     ivec local_fft_ndata,local_fft_offset,local_fft_size;
-     pme_overlap_t *overlap;
-     int datasize,nind;
-     int i,x,y,z,n;
- 
-     gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
-                                    local_fft_ndata,
-                                    local_fft_offset,
-                                    local_fft_size);
-     /* Major dimension */
-     overlap = &pme->overlap[0];
- 
-     nind   = overlap->comm_data[0].send_nindex;
- 
-     for(y=0; y<local_fft_ndata[YY]; y++) {
-          printf(" %2d",y);
-     }
-     printf("\n");
- 
-     i = 0;
-     for(x=0; x<nind; x++) {
-         for(y=0; y<local_fft_ndata[YY]; y++) {
-             n = 0;
-             for(z=0; z<local_fft_ndata[ZZ]; z++) {
-                 if (sendbuf[i] != 0) n++;
-                 i++;
-             }
-             printf(" %2d",n);
-         }
-         printf("\n");
-     }
- }
- 
++    nsx = pmegrid->s[XX];
++    nsy = pmegrid->s[YY];
++    nsz = pmegrid->s[ZZ];
+ +
+ +    for(d=0; d<DIM; d++)
+ +    {
+ +        nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1),
+ +                    local_fft_ndata[d] - pmegrid->offset[d]);
+ +    }
+ +
+ +    offx = pmegrid->offset[XX];
+ +    offy = pmegrid->offset[YY];
+ +    offz = pmegrid->offset[ZZ];
+ +
+ +    /* Directly copy the non-overlapping parts of the local grids.
+ +     * This also initializes the full grid.
+ +     */
+ +    grid_th = pmegrid->grid;
+ +    for(x=0; x<nf[XX]; x++)
+ +    {
+ +        for(y=0; y<nf[YY]; y++)
+ +        {
+ +            i0  = ((offx + x)*fft_my + (offy + y))*fft_mz + offz;
+ +            i0t = (x*nsy + y)*nsz;
+ +            for(z=0; z<nf[ZZ]; z++)
+ +            {
+ +                fftgrid[i0+z] = grid_th[i0t+z];
+ +            }
+ +        }
+ +    }
+ +}
+ +
-                 nsx = pmegrid_f->n[XX];
-                 nsy = pmegrid_f->n[YY];
-                 nsz = pmegrid_f->n[ZZ];
+ +static void
+ +reduce_threadgrid_overlap(gmx_pme_t pme,
+ +                          const pmegrids_t *pmegrids,int thread,
+ +                          real *fftgrid,real *commbuf_x,real *commbuf_y)
+ +{
+ +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
+ +    int  fft_nx,fft_ny,fft_nz;
+ +    int  fft_my,fft_mz;
+ +    int  buf_my=-1;
+ +    int  nsx,nsy,nsz;
+ +    ivec ne;
+ +    int  offx,offy,offz,x,y,z,i0,i0t;
+ +    int  sx,sy,sz,fx,fy,fz,tx1,ty1,tz1,ox,oy,oz;
+ +    gmx_bool bClearBufX,bClearBufY,bClearBufXY,bClearBuf;
+ +    gmx_bool bCommX,bCommY;
+ +    int  d;
+ +    int  thread_f;
+ +    const pmegrid_t *pmegrid,*pmegrid_g,*pmegrid_f;
+ +    const real *grid_th;
+ +    real *commbuf=NULL;
+ +
+ +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
+ +                                   local_fft_ndata,
+ +                                   local_fft_offset,
+ +                                   local_fft_size);
+ +    fft_nx = local_fft_ndata[XX];
+ +    fft_ny = local_fft_ndata[YY];
+ +    fft_nz = local_fft_ndata[ZZ];
+ +
+ +    fft_my = local_fft_size[YY];
+ +    fft_mz = local_fft_size[ZZ];
+ +
+ +    /* This routine is called when all thread have finished spreading.
+ +     * Here each thread sums grid contributions calculated by other threads
+ +     * to the thread local grid volume.
+ +     * To minimize the number of grid copying operations,
+ +     * this routines sums immediately from the pmegrid to the fftgrid.
+ +     */
+ +
+ +    /* Determine which part of the full node grid we should operate on,
+ +     * this is our thread local part of the full grid.
+ +     */
+ +    pmegrid = &pmegrids->grid_th[thread];
+ +
+ +    for(d=0; d<DIM; d++)
+ +    {
+ +        ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
+ +                    local_fft_ndata[d]);
+ +    }
+ +
+ +    offx = pmegrid->offset[XX];
+ +    offy = pmegrid->offset[YY];
+ +    offz = pmegrid->offset[ZZ];
+ +
+ +
+ +    bClearBufX  = TRUE;
+ +    bClearBufY  = TRUE;
+ +    bClearBufXY = TRUE;
+ +
+ +    /* Now loop over all the thread data blocks that contribute
+ +     * to the grid region we (our thread) are operating on.
+ +     */
+ +    /* Note that ffy_nx/y is equal to the number of grid points
+ +     * between the first point of our node grid and the one of the next node.
+ +     */
+ +    for(sx=0; sx>=-pmegrids->nthread_comm[XX]; sx--)
+ +    {
+ +        fx = pmegrid->ci[XX] + sx;
+ +        ox = 0;
+ +        bCommX = FALSE;
+ +        if (fx < 0) {
+ +            fx += pmegrids->nc[XX];
+ +            ox -= fft_nx;
+ +            bCommX = (pme->nnodes_major > 1);
+ +        }
+ +        pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
+ +        ox += pmegrid_g->offset[XX];
+ +        if (!bCommX)
+ +        {
+ +            tx1 = min(ox + pmegrid_g->n[XX],ne[XX]);
+ +        }
+ +        else
+ +        {
+ +            tx1 = min(ox + pmegrid_g->n[XX],pme->pme_order);
+ +        }
+ +
+ +        for(sy=0; sy>=-pmegrids->nthread_comm[YY]; sy--)
+ +        {
+ +            fy = pmegrid->ci[YY] + sy;
+ +            oy = 0;
+ +            bCommY = FALSE;
+ +            if (fy < 0) {
+ +                fy += pmegrids->nc[YY];
+ +                oy -= fft_ny;
+ +                bCommY = (pme->nnodes_minor > 1);
+ +            }
+ +            pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
+ +            oy += pmegrid_g->offset[YY];
+ +            if (!bCommY)
+ +            {
+ +                ty1 = min(oy + pmegrid_g->n[YY],ne[YY]);
+ +            }
+ +            else
+ +            {
+ +                ty1 = min(oy + pmegrid_g->n[YY],pme->pme_order);
+ +            }
+ +
+ +            for(sz=0; sz>=-pmegrids->nthread_comm[ZZ]; sz--)
+ +            {
+ +                fz = pmegrid->ci[ZZ] + sz;
+ +                oz = 0;
+ +                if (fz < 0)
+ +                {
+ +                    fz += pmegrids->nc[ZZ];
+ +                    oz -= fft_nz;
+ +                }
+ +                pmegrid_g = &pmegrids->grid_th[fz];
+ +                oz += pmegrid_g->offset[ZZ];
+ +                tz1 = min(oz + pmegrid_g->n[ZZ],ne[ZZ]);
+ +
+ +                if (sx == 0 && sy == 0 && sz == 0)
+ +                {
+ +                    /* We have already added our local contribution
+ +                     * before calling this routine, so skip it here.
+ +                     */
+ +                    continue;
+ +                }
+ +
+ +                thread_f = (fx*pmegrids->nc[YY] + fy)*pmegrids->nc[ZZ] + fz;
+ +
+ +                pmegrid_f = &pmegrids->grid_th[thread_f];
+ +
+ +                grid_th = pmegrid_f->grid;
+ +
-     int  send_nindex;
-     int  recv_index0,recv_nindex;
++                nsx = pmegrid_f->s[XX];
++                nsy = pmegrid_f->s[YY];
++                nsz = pmegrid_f->s[ZZ];
+ +
+ +#ifdef DEBUG_PME_REDUCE
+ +                printf("n%d t%d add %d  %2d %2d %2d  %2d %2d %2d  %2d-%2d %2d-%2d, %2d-%2d %2d-%2d, %2d-%2d %2d-%2d\n",
+ +                       pme->nodeid,thread,thread_f,
+ +                       pme->pmegrid_start_ix,
+ +                       pme->pmegrid_start_iy,
+ +                       pme->pmegrid_start_iz,
+ +                       sx,sy,sz,
+ +                       offx-ox,tx1-ox,offx,tx1,
+ +                       offy-oy,ty1-oy,offy,ty1,
+ +                       offz-oz,tz1-oz,offz,tz1);
+ +#endif
+ +
+ +                if (!(bCommX || bCommY))
+ +                {
+ +                    /* Copy from the thread local grid to the node grid */
+ +                    for(x=offx; x<tx1; x++)
+ +                    {
+ +                        for(y=offy; y<ty1; y++)
+ +                        {
+ +                            i0  = (x*fft_my + y)*fft_mz;
+ +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
+ +                            for(z=offz; z<tz1; z++)
+ +                            {
+ +                                fftgrid[i0+z] += grid_th[i0t+z];
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    /* The order of this conditional decides
+ +                     * where the corner volume gets stored with x+y decomp.
+ +                     */
+ +                    if (bCommY)
+ +                    {
+ +                        commbuf = commbuf_y;
+ +                        buf_my  = ty1 - offy;
+ +                        if (bCommX)
+ +                        {
+ +                            /* We index commbuf modulo the local grid size */
+ +                            commbuf += buf_my*fft_nx*fft_nz;
+ +
+ +                            bClearBuf  = bClearBufXY;
+ +                            bClearBufXY = FALSE;
+ +                        }
+ +                        else
+ +                        {
+ +                            bClearBuf  = bClearBufY;
+ +                            bClearBufY = FALSE;
+ +                        }
+ +                    }
+ +                    else
+ +                    {
+ +                        commbuf = commbuf_x;
+ +                        buf_my  = fft_ny;
+ +                        bClearBuf  = bClearBufX;
+ +                        bClearBufX = FALSE;
+ +                    }
+ +
+ +                    /* Copy to the communication buffer */
+ +                    for(x=offx; x<tx1; x++)
+ +                    {
+ +                        for(y=offy; y<ty1; y++)
+ +                        {
+ +                            i0  = (x*buf_my + y)*fft_nz;
+ +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
+ +
+ +                            if (bClearBuf)
+ +                            {
+ +                                /* First access of commbuf, initialize it */
+ +                                for(z=offz; z<tz1; z++)
+ +                                {
+ +                                    commbuf[i0+z]  = grid_th[i0t+z];
+ +                                }
+ +                            }
+ +                            else
+ +                            {
+ +                                for(z=offz; z<tz1; z++)
+ +                                {
+ +                                    commbuf[i0+z] += grid_th[i0t+z];
+ +                                }
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void sum_fftgrid_dd(gmx_pme_t pme,real *fftgrid)
+ +{
+ +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
+ +    pme_overlap_t *overlap;
-     /* Currently supports only a single communication pulse */
- 
- /* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
++    int  send_index0,send_nindex;
++    int  recv_nindex;
+ +#ifdef GMX_MPI
+ +    MPI_Status stat;
+ +#endif
++    int  send_size_y,recv_size_y;
+ +    int  ipulse,send_id,recv_id,datasize,gridsize,size_yx;
+ +    real *sendptr,*recvptr;
+ +    int  x,y,z,indg,indb;
+ +
+ +    /* Note that this routine is only used for forward communication.
+ +     * Since the force gathering, unlike the charge spreading,
+ +     * can be trivially parallelized over the particles,
+ +     * the backwards process is much simpler and can use the "old"
+ +     * communication setup.
+ +     */
+ +
+ +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
+ +                                   local_fft_ndata,
+ +                                   local_fft_offset,
+ +                                   local_fft_size);
+ +
-         datasize = (local_fft_ndata[XX]+size_yx)*local_fft_ndata[ZZ];
+ +    if (pme->nnodes_minor > 1)
+ +    {
+ +        /* Major dimension */
+ +        overlap = &pme->overlap[1];
+ +
+ +        if (pme->nnodes_major > 1)
+ +        {
+ +             size_yx = pme->overlap[0].comm_data[0].send_nindex;
+ +        }
+ +        else
+ +        {
+ +            size_yx = 0;
+ +        }
-         ipulse = 0;
++        datasize = (local_fft_ndata[XX] + size_yx)*local_fft_ndata[ZZ];
+ +
-         send_id = overlap->send_id[ipulse];
-         recv_id = overlap->recv_id[ipulse];
-         send_nindex   = overlap->comm_data[ipulse].send_nindex;
-         /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
-         recv_index0 = 0;
-         recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
- 
-         sendptr = overlap->sendbuf;
-         recvptr = overlap->recvbuf;
++        send_size_y = overlap->send_size;
+ +
-         /*
-         printf("node %d comm %2d x %2d x %2d\n",pme->nodeid,
-                local_fft_ndata[XX]+size_yx,send_nindex,local_fft_ndata[ZZ]);
-         printf("node %d send %f, %f\n",pme->nodeid,
-                sendptr[0],sendptr[send_nindex*datasize-1]);
-         */
++        for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
++        {
++            send_id = overlap->send_id[ipulse];
++            recv_id = overlap->recv_id[ipulse];
++            send_index0   =
++                overlap->comm_data[ipulse].send_index0 -
++                overlap->comm_data[0].send_index0;
++            send_nindex   = overlap->comm_data[ipulse].send_nindex;
++            /* We don't use recv_index0, as we always receive starting at 0 */
++            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
++            recv_size_y   = overlap->comm_data[ipulse].recv_size;
+ +
-         MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
-                      send_id,ipulse,
-                      recvptr,recv_nindex*datasize,GMX_MPI_REAL,
-                      recv_id,ipulse,
-                      overlap->mpi_comm,&stat);
++            sendptr = overlap->sendbuf + send_index0*local_fft_ndata[ZZ];
++            recvptr = overlap->recvbuf;
+ +
+ +#ifdef GMX_MPI
-         for(x=0; x<local_fft_ndata[XX]; x++)
-         {
-             for(y=0; y<recv_nindex; y++)
++            MPI_Sendrecv(sendptr,send_size_y*datasize,GMX_MPI_REAL,
++                         send_id,ipulse,
++                         recvptr,recv_size_y*datasize,GMX_MPI_REAL,
++                         recv_id,ipulse,
++                         overlap->mpi_comm,&stat);
+ +#endif
+ +
-                 indg = (x*local_fft_size[YY] + y)*local_fft_size[ZZ];
-                 indb = (x*recv_nindex        + y)*local_fft_ndata[ZZ];
-                 for(z=0; z<local_fft_ndata[ZZ]; z++)
++            for(x=0; x<local_fft_ndata[XX]; x++)
+ +            {
-                     fftgrid[indg+z] += recvptr[indb+z];
++                for(y=0; y<recv_nindex; y++)
+ +                {
-         }
-         if (pme->nnodes_major > 1)
-         {
-             sendptr = pme->overlap[0].sendbuf;
-             for(x=0; x<size_yx; x++)
++                    indg = (x*local_fft_size[YY] + y)*local_fft_size[ZZ];
++                    indb = (x*recv_size_y        + y)*local_fft_ndata[ZZ];
++                    for(z=0; z<local_fft_ndata[ZZ]; z++)
++                    {
++                        fftgrid[indg+z] += recvptr[indb+z];
++                    }
+ +                }
+ +            }
-                 for(y=0; y<recv_nindex; y++)
++
++            if (pme->nnodes_major > 1)
+ +            {
-                     indg = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
-                     indb = ((local_fft_ndata[XX] + x)*recv_nindex +y)*local_fft_ndata[ZZ];
-                     for(z=0; z<local_fft_ndata[ZZ]; z++)
++                /* Copy from the received buffer to the send buffer for dim 0 */
++                sendptr = pme->overlap[0].sendbuf;
++                for(x=0; x<size_yx; x++)
+ +                {
-                         sendptr[indg+z] += recvptr[indb+z];
++                    for(y=0; y<recv_nindex; y++)
+ +                    {
-     /* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
++                        indg = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
++                        indb = ((local_fft_ndata[XX] + x)*recv_size_y + y)*local_fft_ndata[ZZ];
++                        for(z=0; z<local_fft_ndata[ZZ]; z++)
++                        {
++                            sendptr[indg+z] += recvptr[indb+z];
++                        }
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +
-         /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
-         recv_index0 = 0;
++    /* We only support a single pulse here.
++     * This is not a severe limitation, as this code is only used
++     * with OpenMP and with OpenMP the (PME) domains can be larger.
++     */
+ +    if (pme->nnodes_major > 1)
+ +    {
+ +        /* Major dimension */
+ +        overlap = &pme->overlap[0];
+ +
+ +        datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
+ +        gridsize = local_fft_size[YY] *local_fft_size[ZZ];
+ +
+ +        ipulse = 0;
+ +
+ +        send_id = overlap->send_id[ipulse];
+ +        recv_id = overlap->recv_id[ipulse];
+ +        send_nindex   = overlap->comm_data[ipulse].send_nindex;
- #ifdef PRINT_PME_SENDBUF
-             print_sendbuf(pme,pme->overlap[0].sendbuf);
- #endif
++        /* We don't use recv_index0, as we always receive starting at 0 */
+ +        recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
+ +
+ +        sendptr = overlap->sendbuf;
+ +        recvptr = overlap->recvbuf;
+ +
+ +        if (debug != NULL)
+ +        {
+ +            fprintf(debug,"PME fftgrid comm %2d x %2d x %2d\n",
+ +                   send_nindex,local_fft_ndata[YY],local_fft_ndata[ZZ]);
+ +        }
+ +
+ +#ifdef GMX_MPI
+ +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
+ +                     send_id,ipulse,
+ +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
+ +                     recv_id,ipulse,
+ +                     overlap->mpi_comm,&stat);
+ +#endif
+ +
+ +        for(x=0; x<recv_nindex; x++)
+ +        {
+ +            for(y=0; y<local_fft_ndata[YY]; y++)
+ +            {
+ +                indg = (x*local_fft_size[YY]  + y)*local_fft_size[ZZ];
+ +                indb = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
+ +                for(z=0; z<local_fft_ndata[ZZ]; z++)
+ +                {
+ +                    fftgrid[indg+z] += recvptr[indb+z];
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void spread_on_grid(gmx_pme_t pme,
+ +                           pme_atomcomm_t *atc,pmegrids_t *grids,
+ +                           gmx_bool bCalcSplines,gmx_bool bSpread,
+ +                           real *fftgrid)
+ +{
+ +    int nthread,thread;
+ +#ifdef PME_TIME_THREADS
+ +    gmx_cycles_t c1,c2,c3,ct1a,ct1b,ct1c;
+ +    static double cs1=0,cs2=0,cs3=0;
+ +    static double cs1a[6]={0,0,0,0,0,0};
+ +    static int cnt=0;
+ +#endif
+ +
+ +    nthread = pme->nthread;
+ +    assert(nthread>0);
+ +
+ +#ifdef PME_TIME_THREADS
+ +    c1 = omp_cyc_start();
+ +#endif
+ +    if (bCalcSplines)
+ +    {
+ +#pragma omp parallel for num_threads(nthread) schedule(static)
+ +        for(thread=0; thread<nthread; thread++)
+ +        {
+ +            int start,end;
+ +
+ +            start = atc->n* thread   /nthread;
+ +            end   = atc->n*(thread+1)/nthread;
+ +
+ +            /* Compute fftgrid index for all atoms,
+ +             * with help of some extra variables.
+ +             */
+ +            calc_interpolation_idx(pme,atc,start,end,thread);
+ +        }
+ +    }
+ +#ifdef PME_TIME_THREADS
+ +    c1 = omp_cyc_end(c1);
+ +    cs1 += (double)c1;
+ +#endif
+ +
+ +#ifdef PME_TIME_THREADS
+ +    c2 = omp_cyc_start();
+ +#endif
+ +#pragma omp parallel for num_threads(nthread) schedule(static)
+ +    for(thread=0; thread<nthread; thread++)
+ +    {
+ +        splinedata_t *spline;
+ +        pmegrid_t *grid;
+ +
+ +        /* make local bsplines  */
+ +        if (grids == NULL || grids->nthread == 1)
+ +        {
+ +            spline = &atc->spline[0];
+ +
+ +            spline->n = atc->n;
+ +
+ +            grid = &grids->grid;
+ +        }
+ +        else
+ +        {
+ +            spline = &atc->spline[thread];
+ +
+ +            make_thread_local_ind(atc,thread,spline);
+ +
+ +            grid = &grids->grid_th[thread];
+ +        }
+ +
+ +        if (bCalcSplines)
+ +        {
+ +            make_bsplines(spline->theta,spline->dtheta,pme->pme_order,
+ +                          atc->fractx,spline->n,spline->ind,atc->q,pme->bFEP);
+ +        }
+ +
+ +        if (bSpread)
+ +        {
+ +            /* put local atoms on grid. */
+ +#ifdef PME_TIME_SPREAD
+ +            ct1a = omp_cyc_start();
+ +#endif
+ +            spread_q_bsplines_thread(grid,atc,spline,pme->spline_work);
+ +
+ +            if (grids->nthread > 1)
+ +            {
+ +                copy_local_grid(pme,grids,thread,fftgrid);
+ +            }
+ +#ifdef PME_TIME_SPREAD
+ +            ct1a = omp_cyc_end(ct1a);
+ +            cs1a[thread] += (double)ct1a;
+ +#endif
+ +        }
+ +    }
+ +#ifdef PME_TIME_THREADS
+ +    c2 = omp_cyc_end(c2);
+ +    cs2 += (double)c2;
+ +#endif
+ +
+ +    if (bSpread && grids->nthread > 1)
+ +    {
+ +#ifdef PME_TIME_THREADS
+ +        c3 = omp_cyc_start();
+ +#endif
+ +#pragma omp parallel for num_threads(grids->nthread) schedule(static)
+ +        for(thread=0; thread<grids->nthread; thread++)
+ +        {
+ +            reduce_threadgrid_overlap(pme,grids,thread,
+ +                                      fftgrid,
+ +                                      pme->overlap[0].sendbuf,
+ +                                      pme->overlap[1].sendbuf);
-         /* Domain decomposition */
-         natoms = gmx_pme_recv_q_x(pme_pp,
-                                   &chargeA,&chargeB,box,&x_pp,&f_pp,
-                                   &maxshift_x,&maxshift_y,
-                                   &pme->bFEP,&lambda,
-                                   &bEnerVir,
-                                   &step);
- 
-         if (natoms == -1) {
+ +        }
+ +#ifdef PME_TIME_THREADS
+ +        c3 = omp_cyc_end(c3);
+ +        cs3 += (double)c3;
+ +#endif
+ +
+ +        if (pme->nnodes > 1)
+ +        {
+ +            /* Communicate the overlapping part of the fftgrid */
+ +            sum_fftgrid_dd(pme,fftgrid);
+ +        }
+ +    }
+ +
+ +#ifdef PME_TIME_THREADS
+ +    cnt++;
+ +    if (cnt % 20 == 0)
+ +    {
+ +        printf("idx %.2f spread %.2f red %.2f",
+ +               cs1*1e-9,cs2*1e-9,cs3*1e-9);
+ +#ifdef PME_TIME_SPREAD
+ +        for(thread=0; thread<nthread; thread++)
+ +            printf(" %.2f",cs1a[thread]*1e-9);
+ +#endif
+ +        printf("\n");
+ +    }
+ +#endif
+ +}
+ +
+ +
+ +static void dump_grid(FILE *fp,
+ +                      int sx,int sy,int sz,int nx,int ny,int nz,
+ +                      int my,int mz,const real *g)
+ +{
+ +    int x,y,z;
+ +
+ +    for(x=0; x<nx; x++)
+ +    {
+ +        for(y=0; y<ny; y++)
+ +        {
+ +            for(z=0; z<nz; z++)
+ +            {
+ +                fprintf(fp,"%2d %2d %2d %6.3f\n",
+ +                        sx+x,sy+y,sz+z,g[(x*my + y)*mz + z]);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void dump_local_fftgrid(gmx_pme_t pme,const real *fftgrid)
+ +{
+ +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
+ +
+ +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
+ +                                   local_fft_ndata,
+ +                                   local_fft_offset,
+ +                                   local_fft_size);
+ +
+ +    dump_grid(stderr,
+ +              pme->pmegrid_start_ix,
+ +              pme->pmegrid_start_iy,
+ +              pme->pmegrid_start_iz,
+ +              pme->pmegrid_nx-pme->pme_order+1,
+ +              pme->pmegrid_ny-pme->pme_order+1,
+ +              pme->pmegrid_nz-pme->pme_order+1,
+ +              local_fft_size[YY],
+ +              local_fft_size[ZZ],
+ +              fftgrid);
+ +}
+ +
+ +
+ +void gmx_pme_calc_energy(gmx_pme_t pme,int n,rvec *x,real *q,real *V)
+ +{
+ +    pme_atomcomm_t *atc;
+ +    pmegrids_t *grid;
+ +
+ +    if (pme->nnodes > 1)
+ +    {
+ +        gmx_incons("gmx_pme_calc_energy called in parallel");
+ +    }
+ +    if (pme->bFEP > 1)
+ +    {
+ +        gmx_incons("gmx_pme_calc_energy with free energy");
+ +    }
+ +
+ +    atc = &pme->atc_energy;
+ +    atc->nthread   = 1;
+ +    if (atc->spline == NULL)
+ +    {
+ +        snew(atc->spline,atc->nthread);
+ +    }
+ +    atc->nslab     = 1;
+ +    atc->bSpread   = TRUE;
+ +    atc->pme_order = pme->pme_order;
+ +    atc->n         = n;
+ +    pme_realloc_atomcomm_things(atc);
+ +    atc->x         = x;
+ +    atc->q         = q;
+ +
+ +    /* We only use the A-charges grid */
+ +    grid = &pme->pmegridA;
+ +
+ +    spread_on_grid(pme,atc,NULL,TRUE,FALSE,pme->fftgridA);
+ +
+ +    *V = gather_energy_bsplines(pme,grid->grid.grid,atc);
+ +}
+ +
+ +
+ +static void reset_pmeonly_counters(t_commrec *cr,gmx_wallcycle_t wcycle,
+ +        t_nrnb *nrnb,t_inputrec *ir, gmx_large_int_t step_rel)
+ +{
+ +    /* Reset all the counters related to performance over the run */
+ +    wallcycle_stop(wcycle,ewcRUN);
+ +    wallcycle_reset_all(wcycle);
+ +    init_nrnb(nrnb);
+ +    ir->init_step += step_rel;
+ +    ir->nsteps    -= step_rel;
+ +    wallcycle_start(wcycle,ewcRUN);
+ +}
+ +
+ +
++static void gmx_pmeonly_switch(int *npmedata, gmx_pme_t **pmedata,
++                               ivec grid_size,
++                               t_commrec *cr, t_inputrec *ir,
++                               gmx_pme_t *pme_ret)
++{
++    int ind;
++    gmx_pme_t pme = NULL;
++
++    ind = 0;
++    while (ind < *npmedata)
++    {
++        pme = (*pmedata)[ind];
++        if (pme->nkx == grid_size[XX] &&
++            pme->nky == grid_size[YY] &&
++            pme->nkz == grid_size[ZZ])
++        {
++            *pme_ret = pme;
++
++            return;
++        }
++
++        ind++;
++    }
++
++    (*npmedata)++;
++    srenew(*pmedata,*npmedata);
++
++    /* Generate a new PME data structure, copying part of the old pointers */
++    gmx_pme_reinit(&((*pmedata)[ind]),cr,pme,ir,grid_size);
++
++    *pme_ret = (*pmedata)[ind];
++}
++
++
+ +int gmx_pmeonly(gmx_pme_t pme,
+ +                t_commrec *cr,    t_nrnb *nrnb,
+ +                gmx_wallcycle_t wcycle,
+ +                real ewaldcoeff,  gmx_bool bGatherOnly,
+ +                t_inputrec *ir)
+ +{
++    int npmedata;
++    gmx_pme_t *pmedata;
+ +    gmx_pme_pp_t pme_pp;
+ +    int  natoms;
+ +    matrix box;
+ +    rvec *x_pp=NULL,*f_pp=NULL;
+ +    real *chargeA=NULL,*chargeB=NULL;
+ +    real lambda=0;
+ +    int  maxshift_x=0,maxshift_y=0;
+ +    real energy,dvdlambda;
+ +    matrix vir;
+ +    float cycles;
+ +    int  count;
+ +    gmx_bool bEnerVir;
+ +    gmx_large_int_t step,step_rel;
++    ivec grid_switch;
+ +
++    /* This data will only use with PME tuning, i.e. switching PME grids */
++    npmedata = 1;
++    snew(pmedata,npmedata);
++    pmedata[0] = pme;
+ +
+ +    pme_pp = gmx_pme_pp_init(cr);
+ +
+ +    init_nrnb(nrnb);
+ +
+ +    count = 0;
+ +    do /****** this is a quasi-loop over time steps! */
+ +    {
- 
++        /* The reason for having a loop here is PME grid tuning/switching */
++        do
++        {
++            /* Domain decomposition */
++            natoms = gmx_pme_recv_q_x(pme_pp,
++                                      &chargeA,&chargeB,box,&x_pp,&f_pp,
++                                      &maxshift_x,&maxshift_y,
++                                      &pme->bFEP,&lambda,
++                                      &bEnerVir,
++                                      &step,
++                                      grid_switch,&ewaldcoeff);
++
++            if (natoms == -2)
++            {
++                /* Switch the PME grid to grid_switch */
++                gmx_pmeonly_switch(&npmedata,&pmedata,grid_switch,cr,ir,&pme);
++            }
++        }
++        while (natoms == -2);
++
++        if (natoms == -1)
++        {
+ +            /* We should stop: break out of the loop */
+ +            break;
+ +        }
+ +
+ +        step_rel = step - ir->init_step;
+ +
+ +        if (count == 0)
+ +            wallcycle_start(wcycle,ewcRUN);
+ +
+ +        wallcycle_start(wcycle,ewcPMEMESH);
+ +
+ +        dvdlambda = 0;
+ +        clear_mat(vir);
+ +        gmx_pme_do(pme,0,natoms,x_pp,f_pp,chargeA,chargeB,box,
+ +                   cr,maxshift_x,maxshift_y,nrnb,wcycle,vir,ewaldcoeff,
+ +                   &energy,lambda,&dvdlambda,
+ +                   GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0));
+ +
+ +        cycles = wallcycle_stop(wcycle,ewcPMEMESH);
+ +
+ +        gmx_pme_send_force_vir_ener(pme_pp,
+ +                                    f_pp,vir,energy,dvdlambda,
+ +                                    cycles);
+ +
+ +        count++;
+ +
+ +        if (step_rel == wcycle_get_reset_counters(wcycle))
+ +        {
+ +            /* Reset all the counters related to performance over the run */
+ +            reset_pmeonly_counters(cr,wcycle,nrnb,ir,step_rel);
+ +            wcycle_set_reset_counters(wcycle, 0);
+ +        }
+ +
+ +    } /***** end of quasi-loop, we stop with the break above */
+ +    while (TRUE);
+ +
+ +    return 0;
+ +}
+ +
+ +int gmx_pme_do(gmx_pme_t pme,
+ +               int start,       int homenr,
+ +               rvec x[],        rvec f[],
+ +               real *chargeA,   real *chargeB,
+ +               matrix box, t_commrec *cr,
+ +               int  maxshift_x, int maxshift_y,
+ +               t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
+ +               matrix vir,      real ewaldcoeff,
+ +               real *energy,    real lambda,
+ +               real *dvdlambda, int flags)
+ +{
+ +    int     q,d,i,j,ntot,npme;
+ +    int     nx,ny,nz;
+ +    int     n_d,local_ny;
+ +    pme_atomcomm_t *atc=NULL;
+ +    pmegrids_t *pmegrid=NULL;
+ +    real    *grid=NULL;
+ +    real    *ptr;
+ +    rvec    *x_d,*f_d;
+ +    real    *charge=NULL,*q_d;
+ +    real    energy_AB[2];
+ +    matrix  vir_AB[2];
+ +    gmx_bool bClearF;
+ +    gmx_parallel_3dfft_t pfft_setup;
+ +    real *  fftgrid;
+ +    t_complex * cfftgrid;
+ +    int     thread;
+ +    const gmx_bool bCalcEnerVir = flags & GMX_PME_CALC_ENER_VIR;
+ +    const gmx_bool bCalcF = flags & GMX_PME_CALC_F;
+ +
+ +    assert(pme->nnodes > 0);
+ +    assert(pme->nnodes == 1 || pme->ndecompdim > 0);
+ +
+ +    if (pme->nnodes > 1) {
+ +        atc = &pme->atc[0];
+ +        atc->npd = homenr;
+ +        if (atc->npd > atc->pd_nalloc) {
+ +            atc->pd_nalloc = over_alloc_dd(atc->npd);
+ +            srenew(atc->pd,atc->pd_nalloc);
+ +        }
+ +        atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
+ +    }
+ +    else
+ +    {
+ +        /* This could be necessary for TPI */
+ +        pme->atc[0].n = homenr;
+ +    }
+ +
+ +    for(q=0; q<(pme->bFEP ? 2 : 1); q++) {
+ +        if (q == 0) {
+ +            pmegrid = &pme->pmegridA;
+ +            fftgrid = pme->fftgridA;
+ +            cfftgrid = pme->cfftgridA;
+ +            pfft_setup = pme->pfft_setupA;
+ +            charge = chargeA+start;
+ +        } else {
+ +            pmegrid = &pme->pmegridB;
+ +            fftgrid = pme->fftgridB;
+ +            cfftgrid = pme->cfftgridB;
+ +            pfft_setup = pme->pfft_setupB;
+ +            charge = chargeB+start;
+ +        }
+ +        grid = pmegrid->grid.grid;
+ +        /* Unpack structure */
+ +        if (debug) {
+ +            fprintf(debug,"PME: nnodes = %d, nodeid = %d\n",
+ +                    cr->nnodes,cr->nodeid);
+ +            fprintf(debug,"Grid = %p\n",(void*)grid);
+ +            if (grid == NULL)
+ +                gmx_fatal(FARGS,"No grid!");
+ +        }
+ +        where();
+ +
+ +        m_inv_ur0(box,pme->recipbox);
+ +
+ +        if (pme->nnodes == 1) {
+ +            atc = &pme->atc[0];
+ +            if (DOMAINDECOMP(cr)) {
+ +                atc->n = homenr;
+ +                pme_realloc_atomcomm_things(atc);
+ +            }
+ +            atc->x = x;
+ +            atc->q = charge;
+ +            atc->f = f;
+ +        } else {
+ +            wallcycle_start(wcycle,ewcPME_REDISTXF);
+ +            for(d=pme->ndecompdim-1; d>=0; d--)
+ +            {
+ +                if (d == pme->ndecompdim-1)
+ +                {
+ +                    n_d = homenr;
+ +                    x_d = x + start;
+ +                    q_d = charge;
+ +                }
+ +                else
+ +                {
+ +                    n_d = pme->atc[d+1].n;
+ +                    x_d = atc->x;
+ +                    q_d = atc->q;
+ +                }
+ +                atc = &pme->atc[d];
+ +                atc->npd = n_d;
+ +                if (atc->npd > atc->pd_nalloc) {
+ +                    atc->pd_nalloc = over_alloc_dd(atc->npd);
+ +                    srenew(atc->pd,atc->pd_nalloc);
+ +                }
+ +                atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
+ +                pme_calc_pidx_wrapper(n_d,pme->recipbox,x_d,atc);
+ +                where();
+ +
+ +                /* Redistribute x (only once) and qA or qB */
+ +                if (DOMAINDECOMP(cr)) {
+ +                    dd_pmeredist_x_q(pme, n_d, q==0, x_d, q_d, atc);
+ +                } else {
+ +                    pmeredist_pd(pme, TRUE, n_d, q==0, x_d, q_d, atc);
+ +                }
+ +            }
+ +            where();
+ +
+ +            wallcycle_stop(wcycle,ewcPME_REDISTXF);
+ +        }
+ +
+ +        if (debug)
+ +            fprintf(debug,"Node= %6d, pme local particles=%6d\n",
+ +                    cr->nodeid,atc->n);
+ +
+ +        if (flags & GMX_PME_SPREAD_Q)
+ +        {
+ +            wallcycle_start(wcycle,ewcPME_SPREADGATHER);
+ +
+ +            /* Spread the charges on a grid */
+ +            spread_on_grid(pme,&pme->atc[0],pmegrid,q==0,TRUE,fftgrid);
+ +
+ +            if (q == 0)
+ +            {
+ +                inc_nrnb(nrnb,eNR_WEIGHTS,DIM*atc->n);
+ +            }
+ +            inc_nrnb(nrnb,eNR_SPREADQBSP,
+ +                     pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
+ +
+ +            if (pme->nthread == 1)
+ +            {
+ +                wrap_periodic_pmegrid(pme,grid);
+ +
+ +                /* sum contributions to local grid from other nodes */
+ +#ifdef GMX_MPI
+ +                if (pme->nnodes > 1)
+ +                {
+ +                    gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_FORWARD);
+ +                    where();
+ +                }
+ +#endif
+ +
+ +                copy_pmegrid_to_fftgrid(pme,grid,fftgrid);
+ +            }
+ +
+ +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
+ +
+ +            /*
+ +            dump_local_fftgrid(pme,fftgrid);
+ +            exit(0);
+ +            */
+ +        }
+ +
+ +        /* Here we start a large thread parallel region */
+ +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
+ +        for(thread=0; thread<pme->nthread; thread++)
+ +        {
+ +            if (flags & GMX_PME_SOLVE)
+ +            {
+ +                int loop_count;
+ +
+ +                /* do 3d-fft */
+ +                if (thread == 0)
+ +                {
+ +                    wallcycle_start(wcycle,ewcPME_FFT);
+ +                }
+ +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_REAL_TO_COMPLEX,
+ +                                           fftgrid,cfftgrid,thread,wcycle);
+ +                if (thread == 0)
+ +                {
+ +                    wallcycle_stop(wcycle,ewcPME_FFT);
+ +                }
+ +                where();
+ +
+ +                /* solve in k-space for our local cells */
+ +                if (thread == 0)
+ +                {
+ +                    wallcycle_start(wcycle,ewcPME_SOLVE);
+ +                }
+ +                loop_count =
+ +                    solve_pme_yzx(pme,cfftgrid,ewaldcoeff,
+ +                                  box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
+ +                                  bCalcEnerVir,
+ +                                  pme->nthread,thread);
+ +                if (thread == 0)
+ +                {
+ +                    wallcycle_stop(wcycle,ewcPME_SOLVE);
+ +                    where();
+ +                    inc_nrnb(nrnb,eNR_SOLVEPME,loop_count);
+ +                }
+ +            }
+ +
+ +            if (bCalcF)
+ +            {
+ +                /* do 3d-invfft */
+ +                if (thread == 0)
+ +                {
+ +                    where();
+ +                    wallcycle_start(wcycle,ewcPME_FFT);
+ +                }
+ +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_COMPLEX_TO_REAL,
+ +                                           cfftgrid,fftgrid,thread,wcycle);
+ +                if (thread == 0)
+ +                {
+ +                    wallcycle_stop(wcycle,ewcPME_FFT);
++                    
+ +                    where();
+ +
+ +                    if (pme->nodeid == 0)
+ +                    {
+ +                        ntot = pme->nkx*pme->nky*pme->nkz;
+ +                        npme  = ntot*log((real)ntot)/log(2.0);
+ +                        inc_nrnb(nrnb,eNR_FFT,2*npme);
+ +                    }
+ +
+ +                    wallcycle_start(wcycle,ewcPME_SPREADGATHER);
+ +                }
+ +
+ +                copy_fftgrid_to_pmegrid(pme,fftgrid,grid,pme->nthread,thread);
+ +            }
+ +        }
+ +        /* End of thread parallel section.
+ +         * With MPI we have to synchronize here before gmx_sum_qgrid_dd.
+ +         */
+ +
+ +        if (bCalcF)
+ +        {
+ +            /* distribute local grid to all nodes */
+ +#ifdef GMX_MPI
+ +            if (pme->nnodes > 1) {
+ +                gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_BACKWARD);
+ +            }
+ +#endif
+ +            where();
+ +
+ +            unwrap_periodic_pmegrid(pme,grid);
+ +
+ +            /* interpolate forces for our local atoms */
+ +
+ +            where();
+ +
+ +            /* If we are running without parallelization,
+ +             * atc->f is the actual force array, not a buffer,
+ +             * therefore we should not clear it.
+ +             */
+ +            bClearF = (q == 0 && PAR(cr));
+ +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
+ +            for(thread=0; thread<pme->nthread; thread++)
+ +            {
+ +                gather_f_bsplines(pme,grid,bClearF,atc,
+ +                                  &atc->spline[thread],
+ +                                  pme->bFEP ? (q==0 ? 1.0-lambda : lambda) : 1.0);
+ +            }
+ +
+ +            where();
+ +
+ +            inc_nrnb(nrnb,eNR_GATHERFBSP,
+ +                     pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
+ +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
+ +        }
+ +
+ +        if (bCalcEnerVir)
+ +        {
+ +            /* This should only be called on the master thread
+ +             * and after the threads have synchronized.
+ +             */
+ +            get_pme_ener_vir(pme,pme->nthread,&energy_AB[q],vir_AB[q]);
+ +        }
+ +    } /* of q-loop */
+ +
+ +    if (bCalcF && pme->nnodes > 1) {
+ +        wallcycle_start(wcycle,ewcPME_REDISTXF);
+ +        for(d=0; d<pme->ndecompdim; d++)
+ +        {
+ +            atc = &pme->atc[d];
+ +            if (d == pme->ndecompdim - 1)
+ +            {
+ +                n_d = homenr;
+ +                f_d = f + start;
+ +            }
+ +            else
+ +            {
+ +                n_d = pme->atc[d+1].n;
+ +                f_d = pme->atc[d+1].f;
+ +            }
+ +            if (DOMAINDECOMP(cr)) {
+ +                dd_pmeredist_f(pme,atc,n_d,f_d,
+ +                               d==pme->ndecompdim-1 && pme->bPPnode);
+ +            } else {
+ +                pmeredist_pd(pme, FALSE, n_d, TRUE, f_d, NULL, atc);
+ +            }
+ +        }
+ +
+ +        wallcycle_stop(wcycle,ewcPME_REDISTXF);
+ +    }
+ +    where();
+ +
+ +    if (bCalcEnerVir)
+ +    {
+ +        if (!pme->bFEP) {
+ +            *energy = energy_AB[0];
+ +            m_add(vir,vir_AB[0],vir);
+ +        } else {
+ +            *energy = (1.0-lambda)*energy_AB[0] + lambda*energy_AB[1];
+ +            *dvdlambda += energy_AB[1] - energy_AB[0];
+ +            for(i=0; i<DIM; i++)
+ +            {
+ +                for(j=0; j<DIM; j++)
+ +                {
+ +                    vir[i][j] += (1.0-lambda)*vir_AB[0][i][j] + 
+ +                        lambda*vir_AB[1][i][j];
+ +                }
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        *energy = 0;
+ +    }
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"PME mesh energy: %g\n",*energy);
+ +    }
+ +
+ +    return 0;
+ +}
diff --cc src/gromacs/mdlib/pme_pp.c

index 8664bb4dfac746a32068de74a34492583e51ba81,0000000000000000000000000000000000000000..491655c8884a2bf089e886f086598a913c65b797

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/pme_pp.c
--- /dev/null
+++ b/src/gromacs/mdlib/pme_pp.c
@@@ -1,515 -1,0 +1,550 @@@
- /*
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +
+ +#include <stdio.h>
+ +#include <string.h>
+ +#include <math.h>
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "gmx_fatal.h"
+ +#include "vec.h"
+ +#include "pme.h"
+ +#include "network.h"
+ +#include "domdec.h"
+ +#include "sighandler.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#define PP_PME_CHARGE   (1<<0)
+ +#define PP_PME_CHARGEB  (1<<1)
+ +#define PP_PME_COORD    (1<<2)
+ +#define PP_PME_FEP      (1<<3)
+ +#define PP_PME_ENER_VIR (1<<4)
+ +#define PP_PME_FINISH   (1<<5)
++#define PP_PME_SWITCH   (1<<6)
+ +
+ +#define PME_PP_SIGSTOP     (1<<0)
+ +#define PME_PP_SIGSTOPNSS     (1<<1)
+ +
+ +typedef struct gmx_pme_pp {
+ +#ifdef GMX_MPI
+ +  MPI_Comm mpi_comm_mysim;
+ +#endif
+ +  int  nnode;        /* The number of PP node to communicate with  */
+ +  int  *node;        /* The PP node ranks                          */
+ +  int  node_peer;    /* The peer PP node rank                      */
+ +  int  *nat;         /* The number of atom for each PP node        */
+ +  int  flags_charge; /* The flags sent along with the last charges */
+ +  real *chargeA;
+ +  real *chargeB;
+ +  rvec *x;
+ +  rvec *f;
+ +  int  nalloc;
+ +#ifdef GMX_MPI
+ +  MPI_Request *req;
+ +  MPI_Status  *stat;
+ +#endif
+ +} t_gmx_pme_pp;
+ +
+ +typedef struct gmx_pme_comm_n_box {
-   int    natoms;
-   matrix box;
-   int    maxshift_x;
-   int    maxshift_y;
-   real   lambda;
-   int    flags;
-   gmx_large_int_t step;
++    int    natoms;
++    matrix box;
++    int    maxshift_x;
++    int    maxshift_y;
++    real   lambda;
++    int    flags;
++    gmx_large_int_t step;
++    ivec   grid_size;     /* For PME grid tuning */
++    real   ewaldcoeff;    /* For PME grid tuning */
+ +} gmx_pme_comm_n_box_t;
+ +
+ +typedef struct {
+ +  matrix vir;
+ +  real   energy;
+ +  real   dvdlambda;
+ +  float  cycles;
+ +  gmx_stop_cond_t stop_cond;
+ +} gmx_pme_comm_vir_ene_t;
+ +
+ +
+ +
+ +
+ +gmx_pme_pp_t gmx_pme_pp_init(t_commrec *cr)
+ +{
+ +  struct gmx_pme_pp *pme_pp;
+ +  int rank;
+ +
+ +  snew(pme_pp,1);
+ +
+ +#ifdef GMX_MPI
+ +  pme_pp->mpi_comm_mysim = cr->mpi_comm_mysim;
+ +  MPI_Comm_rank(cr->mpi_comm_mygroup,&rank);
+ +  get_pme_ddnodes(cr,rank,&pme_pp->nnode,&pme_pp->node,&pme_pp->node_peer);
+ +  snew(pme_pp->nat,pme_pp->nnode);
+ +  snew(pme_pp->req,2*pme_pp->nnode);
+ +  snew(pme_pp->stat,2*pme_pp->nnode);
+ +  pme_pp->nalloc = 0;
+ +  pme_pp->flags_charge = 0;
+ +#endif
+ +
+ +  return pme_pp;
+ +}
+ +
+ +/* This should be faster with a real non-blocking MPI implementation */
+ +/* #define GMX_PME_DELAYED_WAIT */
+ +
+ +static void gmx_pme_send_q_x_wait(gmx_domdec_t *dd)
+ +{
+ +#ifdef GMX_MPI
+ +  if (dd->nreq_pme) {
+ +    MPI_Waitall(dd->nreq_pme,dd->req_pme,MPI_STATUSES_IGNORE);
+ +    dd->nreq_pme = 0;
+ +  }
+ +#endif
+ +}
+ +
+ +static void gmx_pme_send_q_x(t_commrec *cr, int flags,
+ +                           real *chargeA, real *chargeB,
+ +                           matrix box, rvec *x,
+ +                           real lambda,
+ +                           int maxshift_x, int maxshift_y,
+ +                           gmx_large_int_t step)
+ +{
+ +  gmx_domdec_t *dd;
+ +  gmx_pme_comm_n_box_t *cnb;
+ +  int  n;
+ +
+ +  dd = cr->dd;
+ +  n = dd->nat_home;
+ +
+ +  if (debug)
+ +    fprintf(debug,"PP node %d sending to PME node %d: %d%s%s\n",
+ +          cr->sim_nodeid,dd->pme_nodeid,n,
+ +          flags & PP_PME_CHARGE ? " charges" : "",
+ +          flags & PP_PME_COORD  ? " coordinates" : "");
+ +
+ +#ifdef GMX_PME_DELAYED_WAIT
+ +  /* When can not use cnb until pending communication has finished */
+ +  gmx_pme_send_x_q_wait(dd);
+ +#endif
+ +
+ +  if (dd->pme_receive_vir_ener) {
+ +    /* Peer PP node: communicate all data */
+ +    if (dd->cnb == NULL)
+ +      snew(dd->cnb,1);
+ +    cnb = dd->cnb;
+ +
+ +    cnb->flags      = flags;
+ +    cnb->natoms     = n;
+ +    cnb->maxshift_x = maxshift_x;
+ +    cnb->maxshift_y = maxshift_y;
+ +    cnb->lambda     = lambda;
+ +    cnb->step       = step;
+ +    if (flags & PP_PME_COORD)
+ +      copy_mat(box,cnb->box);
+ +#ifdef GMX_MPI
+ +    MPI_Isend(cnb,sizeof(*cnb),MPI_BYTE,
+ +            dd->pme_nodeid,0,cr->mpi_comm_mysim,
+ +            &dd->req_pme[dd->nreq_pme++]);
+ +#endif
+ +  } else if (flags & PP_PME_CHARGE) {
+ +#ifdef GMX_MPI
+ +    /* Communicate only the number of atoms */
+ +    MPI_Isend(&n,sizeof(n),MPI_BYTE,
+ +            dd->pme_nodeid,0,cr->mpi_comm_mysim,
+ +            &dd->req_pme[dd->nreq_pme++]);
+ +#endif
+ +  }
+ +
+ +#ifdef GMX_MPI
+ +  if (n > 0) {
+ +    if (flags & PP_PME_CHARGE) {
+ +      MPI_Isend(chargeA,n*sizeof(real),MPI_BYTE,
+ +              dd->pme_nodeid,1,cr->mpi_comm_mysim,
+ +              &dd->req_pme[dd->nreq_pme++]);
+ +    }
+ +    if (flags & PP_PME_CHARGEB) {
+ +      MPI_Isend(chargeB,n*sizeof(real),MPI_BYTE,
+ +              dd->pme_nodeid,2,cr->mpi_comm_mysim,
+ +              &dd->req_pme[dd->nreq_pme++]);
+ +    }
+ +    if (flags & PP_PME_COORD) {
+ +      MPI_Isend(x[0],n*sizeof(rvec),MPI_BYTE,
+ +              dd->pme_nodeid,3,cr->mpi_comm_mysim,
+ +              &dd->req_pme[dd->nreq_pme++]);
+ +    }
+ +  }
+ +
+ +#ifndef GMX_PME_DELAYED_WAIT
+ +  /* Wait for the data to arrive */
+ +  /* We can skip this wait as we are sure x and q will not be modified
+ +   * before the next call to gmx_pme_send_x_q or gmx_pme_receive_f.
+ +   */
+ +  gmx_pme_send_q_x_wait(dd);
+ +#endif
+ +#endif
+ +}
+ +
+ +void gmx_pme_send_q(t_commrec *cr,
+ +                  gmx_bool bFreeEnergy, real *chargeA, real *chargeB,
+ +                  int maxshift_x, int maxshift_y)
+ +{
+ +  int flags;
+ +
+ +  flags = PP_PME_CHARGE;
+ +  if (bFreeEnergy)
+ +    flags |= PP_PME_CHARGEB;
+ +
+ +  gmx_pme_send_q_x(cr,flags,
+ +                 chargeA,chargeB,NULL,NULL,0,maxshift_x,maxshift_y,-1);
+ +}
+ +
+ +void gmx_pme_send_x(t_commrec *cr, matrix box, rvec *x,
+ +                  gmx_bool bFreeEnergy, real lambda,
+ +                  gmx_bool bEnerVir,
+ +                  gmx_large_int_t step)
+ +{
+ +  int flags;
+ +  
+ +  flags = PP_PME_COORD;
+ +  if (bFreeEnergy)
+ +    flags |= PP_PME_FEP;
+ +  if (bEnerVir)
+ +    flags |= PP_PME_ENER_VIR;
+ +
+ +  gmx_pme_send_q_x(cr,flags,NULL,NULL,box,x,lambda,0,0,step);
+ +}
+ +
- void gmx_pme_finish(t_commrec *cr)
++void gmx_pme_send_finish(t_commrec *cr)
+ +{
+ +  int flags;
+ +
+ +  flags = PP_PME_FINISH;
+ +
+ +  gmx_pme_send_q_x(cr,flags,NULL,NULL,NULL,NULL,0,0,0,-1);
+ +}
+ +
++void gmx_pme_send_switch(t_commrec *cr, ivec grid_size, real ewaldcoeff)
++{
++#ifdef GMX_MPI
++    gmx_pme_comm_n_box_t cnb;
++
++    if (cr->dd->pme_receive_vir_ener)
++    {
++        cnb.flags = PP_PME_SWITCH;
++        copy_ivec(grid_size,cnb.grid_size);
++        cnb.ewaldcoeff = ewaldcoeff;
++
++        /* We send this, uncommon, message blocking to simplify the code */
++        MPI_Send(&cnb,sizeof(cnb),MPI_BYTE,
++                 cr->dd->pme_nodeid,0,cr->mpi_comm_mysim);
++    }
++#endif
++}
++
+ +int gmx_pme_recv_q_x(struct gmx_pme_pp *pme_pp,
+ +                     real **chargeA, real **chargeB,
+ +                     matrix box, rvec **x,rvec **f,
+ +                     int *maxshift_x, int *maxshift_y,
+ +                     gmx_bool *bFreeEnergy,real *lambda,
-                    gmx_bool *bEnerVir,
-                      gmx_large_int_t *step)
++                     gmx_bool *bEnerVir,
++                     gmx_large_int_t *step,
++                     ivec grid_size, real *ewaldcoeff)
+ +{
+ +    gmx_pme_comm_n_box_t cnb;
+ +    int  nat=0,q,messages,sender;
+ +    real *charge_pp;
+ +
+ +    messages = 0;
+ +
+ +    /* avoid compiler warning about unused variable without MPI support */
+ +    cnb.flags = 0;    
+ +#ifdef GMX_MPI
+ +    do {
+ +        /* Receive the send count, box and time step from the peer PP node */
+ +        MPI_Recv(&cnb,sizeof(cnb),MPI_BYTE,
+ +                 pme_pp->node_peer,0,
+ +                 pme_pp->mpi_comm_mysim,MPI_STATUS_IGNORE);
+ +
+ +        if (debug)
-             fprintf(debug,"PME only node receiving:%s%s%s\n",
++        {
++            fprintf(debug,"PME only node receiving:%s%s%s%s\n",
+ +                    (cnb.flags & PP_PME_CHARGE) ? " charges" : "",
-                         (cnb.flags & PP_PME_COORD ) ? " coordinates" : "",
-                             (cnb.flags & PP_PME_FINISH) ? " finish" : "");
++                    (cnb.flags & PP_PME_COORD ) ? " coordinates" : "",
++                    (cnb.flags & PP_PME_FINISH) ? " finish" : "",
++                    (cnb.flags & PP_PME_SWITCH) ? " switch" : "");
++        }
++
++        if (cnb.flags & PP_PME_SWITCH)
++        {
++            /* Special case, receive the new parameters and return */
++            copy_ivec(cnb.grid_size,grid_size);
++            *ewaldcoeff = cnb.ewaldcoeff;
++
++            return -2;
++        }
+ +
+ +        if (cnb.flags & PP_PME_CHARGE) {
+ +            /* Receive the send counts from the other PP nodes */
+ +            for(sender=0; sender<pme_pp->nnode; sender++) {
+ +                if (pme_pp->node[sender] == pme_pp->node_peer) {
+ +                    pme_pp->nat[sender] = cnb.natoms;
+ +                } else {
+ +                    MPI_Irecv(&(pme_pp->nat[sender]),sizeof(pme_pp->nat[0]),
+ +                              MPI_BYTE,
+ +                              pme_pp->node[sender],0,
+ +                              pme_pp->mpi_comm_mysim,&pme_pp->req[messages++]);
+ +                }
+ +            }
+ +            MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
+ +            messages = 0;
+ +
+ +            nat = 0;
+ +            for(sender=0; sender<pme_pp->nnode; sender++)
+ +                nat += pme_pp->nat[sender];
+ +
+ +            if (nat > pme_pp->nalloc) {
+ +                pme_pp->nalloc = over_alloc_dd(nat);
+ +                srenew(pme_pp->chargeA,pme_pp->nalloc);
+ +                if (cnb.flags & PP_PME_CHARGEB)
+ +                    srenew(pme_pp->chargeB,pme_pp->nalloc);
+ +                srenew(pme_pp->x,pme_pp->nalloc);
+ +                srenew(pme_pp->f,pme_pp->nalloc);
+ +            }
+ +
+ +            /* maxshift is sent when the charges are sent */
+ +            *maxshift_x = cnb.maxshift_x;
+ +            *maxshift_y = cnb.maxshift_y;
+ +
+ +            /* Receive the charges in place */
+ +            for(q=0; q<((cnb.flags & PP_PME_CHARGEB) ? 2 : 1); q++) {
+ +                if (q == 0)
+ +                    charge_pp = pme_pp->chargeA;
+ +                else
+ +                    charge_pp = pme_pp->chargeB;
+ +                nat = 0;
+ +                for(sender=0; sender<pme_pp->nnode; sender++) {
+ +                    if (pme_pp->nat[sender] > 0) {
+ +                        MPI_Irecv(charge_pp+nat,
+ +                                  pme_pp->nat[sender]*sizeof(real),
+ +                                  MPI_BYTE,
+ +                                  pme_pp->node[sender],1+q,
+ +                                  pme_pp->mpi_comm_mysim,
+ +                                  &pme_pp->req[messages++]);
+ +                        nat += pme_pp->nat[sender];
+ +                        if (debug)
+ +                            fprintf(debug,"Received from PP node %d: %d "
+ +                                "charges\n",
+ +                                    pme_pp->node[sender],pme_pp->nat[sender]);
+ +                    }
+ +                }
+ +            }
+ +
+ +            pme_pp->flags_charge = cnb.flags;
+ +        }
+ +
+ +        if (cnb.flags & PP_PME_COORD) {
+ +            if (!(pme_pp->flags_charge & PP_PME_CHARGE))
+ +                gmx_incons("PME-only node received coordinates before charges"
+ +                    );
+ +
+ +            /* The box, FE flag and lambda are sent along with the coordinates
+ +             *  */
+ +            copy_mat(cnb.box,box);
+ +            *bFreeEnergy = (cnb.flags & PP_PME_FEP);
+ +            *lambda      = cnb.lambda;
+ +          *bEnerVir    = (cnb.flags & PP_PME_ENER_VIR);
+ +
+ +            if (*bFreeEnergy && !(pme_pp->flags_charge & PP_PME_CHARGEB))
+ +                gmx_incons("PME-only node received free energy request, but "
+ +                    "did not receive B-state charges");
+ +
+ +            /* Receive the coordinates in place */
+ +            nat = 0;
+ +            for(sender=0; sender<pme_pp->nnode; sender++) {
+ +                if (pme_pp->nat[sender] > 0) {
+ +                    MPI_Irecv(pme_pp->x[nat],pme_pp->nat[sender]*sizeof(rvec),
+ +                              MPI_BYTE,
+ +                              pme_pp->node[sender],3,
+ +                              pme_pp->mpi_comm_mysim,&pme_pp->req[messages++]);
+ +                    nat += pme_pp->nat[sender];
+ +                    if (debug)
+ +                        fprintf(debug,"Received from PP node %d: %d "
+ +                            "coordinates\n",
+ +                                pme_pp->node[sender],pme_pp->nat[sender]);
+ +                }
+ +            }
+ +        }
+ +
+ +        /* Wait for the coordinates and/or charges to arrive */
+ +        MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
+ +        messages = 0;
+ +    } while (!(cnb.flags & (PP_PME_COORD | PP_PME_FINISH)));
+ +
+ +    *step = cnb.step;
+ +#endif
+ +
+ +    *chargeA = pme_pp->chargeA;
+ +    *chargeB = pme_pp->chargeB;
+ +    *x       = pme_pp->x;
+ +    *f       = pme_pp->f;
+ +
+ +
+ +    return ((cnb.flags & PP_PME_FINISH) ? -1 : nat);
+ +}
+ +
+ +static void receive_virial_energy(t_commrec *cr,
+ +                                matrix vir,real *energy,real *dvdlambda,
+ +                                float *pme_cycles) 
+ +{
+ +  gmx_pme_comm_vir_ene_t cve;
+ +
+ +  if (cr->dd->pme_receive_vir_ener) {
+ +    if (debug)
+ +      fprintf(debug,
+ +            "PP node %d receiving from PME node %d: virial and energy\n",
+ +            cr->sim_nodeid,cr->dd->pme_nodeid);
+ +#ifdef GMX_MPI
+ +    MPI_Recv(&cve,sizeof(cve),MPI_BYTE,cr->dd->pme_nodeid,1,cr->mpi_comm_mysim,
+ +           MPI_STATUS_IGNORE);
+ +#else
+ +    memset(&cve,0,sizeof(cve));
+ +#endif
+ +      
+ +    m_add(vir,cve.vir,vir);
+ +    *energy = cve.energy;
+ +    *dvdlambda += cve.dvdlambda;
+ +    *pme_cycles = cve.cycles;
+ +
+ +    if ( cve.stop_cond != gmx_stop_cond_none )
+ +    {
+ +        gmx_set_stop_condition(cve.stop_cond);
+ +    }
+ +  } else {
+ +    *energy = 0;
+ +    *pme_cycles = 0;
+ +  }
+ +}
+ +
+ +void gmx_pme_receive_f(t_commrec *cr,
+ +                     rvec f[], matrix vir, 
+ +                     real *energy, real *dvdlambda,
+ +                     float *pme_cycles)
+ +{
+ +  int natoms,i;
+ +
+ +#ifdef GMX_PME_DELAYED_WAIT
+ +  /* Wait for the x request to finish */
+ +  gmx_pme_send_q_x_wait(cr->dd);
+ +#endif
+ +
+ +  natoms = cr->dd->nat_home;
+ +
+ +  if (natoms > cr->dd->pme_recv_f_alloc)
+ +  {
+ +      cr->dd->pme_recv_f_alloc = over_alloc_dd(natoms);
+ +      srenew(cr->dd->pme_recv_f_buf, cr->dd->pme_recv_f_alloc);
+ +  }
+ +
+ +#ifdef GMX_MPI  
+ +  MPI_Recv(cr->dd->pme_recv_f_buf[0], 
+ +           natoms*sizeof(rvec),MPI_BYTE,
+ +         cr->dd->pme_nodeid,0,cr->mpi_comm_mysim,
+ +         MPI_STATUS_IGNORE);
+ +#endif
+ +
+ +  for(i=0; i<natoms; i++)
+ +      rvec_inc(f[i],cr->dd->pme_recv_f_buf[i]);
+ +
+ +  
+ +  receive_virial_energy(cr,vir,energy,dvdlambda,pme_cycles);
+ +}
+ +
+ +void gmx_pme_send_force_vir_ener(struct gmx_pme_pp *pme_pp,
+ +                               rvec *f, matrix vir,
+ +                               real energy, real dvdlambda,
+ +                               float cycles)
+ +{
+ +  gmx_pme_comm_vir_ene_t cve; 
+ +  int messages,ind_start,ind_end,receiver;
+ +
+ +  cve.cycles = cycles;
+ +
+ +  /* Now the evaluated forces have to be transferred to the PP nodes */
+ +  messages = 0;
+ +  ind_end = 0;
+ +  for (receiver=0; receiver<pme_pp->nnode; receiver++) {
+ +    ind_start = ind_end;
+ +    ind_end   = ind_start + pme_pp->nat[receiver];
+ +#ifdef GMX_MPI
+ +    if (MPI_Isend(f[ind_start],(ind_end-ind_start)*sizeof(rvec),MPI_BYTE,
+ +                pme_pp->node[receiver],0,
+ +                pme_pp->mpi_comm_mysim,&pme_pp->req[messages++]) != 0)
+ +      gmx_comm("MPI_Isend failed in do_pmeonly");
+ +#endif
+ +    }
+ +  
+ +  /* send virial and energy to our last PP node */
+ +  copy_mat(vir,cve.vir);
+ +  cve.energy    = energy;
+ +  cve.dvdlambda = dvdlambda;
+ +  /* check for the signals to send back to a PP node */
+ +  cve.stop_cond = gmx_get_stop_condition();
+ + 
+ +  cve.cycles = cycles;
+ +  
+ +  if (debug)
+ +    fprintf(debug,"PME node sending to PP node %d: virial and energy\n",
+ +          pme_pp->node_peer);
+ +#ifdef GMX_MPI
+ +  MPI_Isend(&cve,sizeof(cve),MPI_BYTE,
+ +          pme_pp->node_peer,1,
+ +          pme_pp->mpi_comm_mysim,&pme_pp->req[messages++]);
+ +  
+ +  /* Wait for the forces to arrive */
+ +  MPI_Waitall(messages, pme_pp->req, pme_pp->stat);
+ +#endif
+ +}
diff --cc src/gromacs/mdlib/pme_sse_single.h
Simple merge
diff --cc src/gromacs/mdlib/pull.c

index e98305a945202c6f0bd809409c0a5f8b4cd45adc,0000000000000000000000000000000000000000..d1e0849138c7bc38e937df4613d550c80a429971

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/pull.c
--- /dev/null
+++ b/src/gromacs/mdlib/pull.c
@@@ -1,1353 -1,0 +1,1358 @@@
-     gmx_mtop_atomnr_to_atom(mtop,ii,&atom);
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +
+ +#include <math.h>
+ +#include <stdio.h>
+ +#include <stdlib.h>
+ +#include "futil.h"
+ +#include "rdgroup.h"
+ +#include "statutil.h"
+ +#include "gmxfio.h"
+ +#include "vec.h" 
+ +#include "typedefs.h"
+ +#include "network.h"
+ +#include "filenm.h"
+ +#include <string.h>
+ +#include "smalloc.h"
+ +#include "pull.h"
+ +#include "xvgr.h"
+ +#include "names.h"
+ +#include "partdec.h"
+ +#include "pbc.h"
+ +#include "mtop_util.h"
+ +#include "mdrun.h"
+ +#include "gmx_ga2la.h"
+ +#include "copyrite.h"
+ +#include "macros.h"
+ +
+ +static void pull_print_x_grp(FILE *out,gmx_bool bRef,ivec dim,t_pullgrp *pgrp) 
+ +{
+ +    int m;
+ +    
+ +    for(m=0; m<DIM; m++)
+ +    {
+ +        if (dim[m])
+ +        {
+ +            fprintf(out,"\t%g",bRef ? pgrp->x[m] : pgrp->dr[m]);
+ +        }
+ +    }
+ +}
+ +
+ +static void pull_print_x(FILE *out,t_pull *pull,double t) 
+ +{
+ +    int g;
+ +  
+ +    fprintf(out, "%.4f", t);
+ +    
+ +    if (PULL_CYL(pull))
+ +    {
+ +        for (g=1; g<1+pull->ngrp; g++)
+ +        {
+ +            pull_print_x_grp(out,TRUE ,pull->dim,&pull->dyna[g]);
+ +            pull_print_x_grp(out,FALSE,pull->dim,&pull->grp[g]);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        for (g=0; g<1+pull->ngrp; g++)
+ +        {
+ +            if (pull->grp[g].nat > 0)
+ +            {
+ +                pull_print_x_grp(out,g==0,pull->dim,&pull->grp[g]);
+ +            }
+ +        }
+ +    }
+ +    fprintf(out,"\n");
+ +}
+ +
+ +static void pull_print_f(FILE *out,t_pull *pull,double t) 
+ +{
+ +    int g,d;
+ +    
+ +    fprintf(out, "%.4f", t);
+ +    
+ +    for(g=1; g<1+pull->ngrp; g++)
+ +    {
+ +        if (pull->eGeom == epullgPOS)
+ +        {
+ +            for(d=0; d<DIM; d++)
+ +            {
+ +                if (pull->dim[d])
+ +                {
+ +                    fprintf(out,"\t%g",pull->grp[g].f[d]);
+ +                }
+ +            }
+ +        }
+ +        else
+ +        {
+ +            fprintf(out,"\t%g",pull->grp[g].f_scal);
+ +        }
+ +    }
+ +    fprintf(out,"\n");
+ +}
+ +
+ +void pull_print_output(t_pull *pull, gmx_large_int_t step, double time)
+ +{
+ +    if ((pull->nstxout != 0) && (step % pull->nstxout == 0))
+ +    {
+ +        pull_print_x(pull->out_x,pull,time);
+ +    }
+ +    
+ +    if ((pull->nstfout != 0) && (step % pull->nstfout == 0))
+ +    {
+ +        pull_print_f(pull->out_f,pull,time);
+ +    }
+ +}
+ +
+ +static FILE *open_pull_out(const char *fn,t_pull *pull,const output_env_t oenv, 
+ +                           gmx_bool bCoord, unsigned long Flags)
+ +{
+ +    FILE *fp;
+ +    int  nsets,g,m;
+ +    char **setname,buf[10];
+ +    
+ +    if(Flags & MD_APPENDFILES)
+ +    {
+ +        fp = gmx_fio_fopen(fn,"a+");
+ +    }
+ +    else
+ +    {
+ +        fp = gmx_fio_fopen(fn,"w+");
+ +        if (bCoord)
+ +        {
+ +            xvgr_header(fp,"Pull COM",  "Time (ps)","Position (nm)",
+ +                        exvggtXNY,oenv);
+ +        }
+ +        else
+ +        {
+ +            xvgr_header(fp,"Pull force","Time (ps)","Force (kJ/mol/nm)",
+ +                        exvggtXNY,oenv);
+ +        }
+ +        
+ +        snew(setname,(1+pull->ngrp)*DIM);
+ +        nsets = 0;
+ +        for(g=0; g<1+pull->ngrp; g++)
+ +        {
+ +            if (pull->grp[g].nat > 0 &&
+ +                (g > 0 || (bCoord && !PULL_CYL(pull))))
+ +            {
+ +                if (bCoord || pull->eGeom == epullgPOS)
+ +                {
+ +                    if (PULL_CYL(pull))
+ +                    {
+ +                        for(m=0; m<DIM; m++)
+ +                        {
+ +                            if (pull->dim[m])
+ +                            {
+ +                                sprintf(buf,"%d %s%c",g,"c",'X'+m);
+ +                                setname[nsets] = strdup(buf);
+ +                                nsets++;
+ +                            }
+ +                        }
+ +                    }
+ +                    for(m=0; m<DIM; m++)
+ +                    {
+ +                        if (pull->dim[m])
+ +                        {
+ +                            sprintf(buf,"%d %s%c",
+ +                                    g,(bCoord && g > 0)?"d":"",'X'+m);
+ +                            setname[nsets] = strdup(buf);
+ +                            nsets++;
+ +                        }
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    sprintf(buf,"%d",g);
+ +                    setname[nsets] = strdup(buf);
+ +                    nsets++;
+ +                }
+ +            }
+ +        }
+ +        if (bCoord || nsets > 1)
+ +        {
+ +            xvgr_legend(fp,nsets,(const char**)setname,oenv);
+ +        }
+ +        for(g=0; g<nsets; g++)
+ +        {
+ +            sfree(setname[g]);
+ +        }
+ +        sfree(setname);
+ +    }
+ +    
+ +    return fp;
+ +}
+ +
+ +/* Apply forces in a mass weighted fashion */
+ +static void apply_forces_grp(t_pullgrp *pgrp, t_mdatoms * md,
+ +                             gmx_ga2la_t ga2la,
+ +                             dvec f_pull, int sign, rvec *f)
+ +{
+ +    int i,ii,m,start,end;
+ +    double wmass,inv_wm;
+ +    
+ +    start = md->start;
+ +    end   = md->homenr + start;
+ +    
+ +    inv_wm = pgrp->wscale*pgrp->invtm;
+ +    
+ +    for(i=0; i<pgrp->nat_loc; i++)
+ +    {
+ +        ii = pgrp->ind_loc[i];
+ +        wmass = md->massT[ii];
+ +        if (pgrp->weight_loc)
+ +        {
+ +            wmass *= pgrp->weight_loc[i];
+ +        }
+ +    
+ +        for(m=0; m<DIM; m++)
+ +        {
+ +            f[ii][m] += sign * wmass * f_pull[m] * inv_wm;
+ +        }
+ +    }
+ +}
+ +
+ +/* Apply forces in a mass weighted fashion */
+ +static void apply_forces(t_pull * pull, t_mdatoms * md, gmx_ga2la_t ga2la,
+ +                         rvec *f)
+ +{
+ +    int i;
+ +    t_pullgrp *pgrp;
+ +    
+ +    for(i=1; i<pull->ngrp+1; i++)
+ +    {
+ +        pgrp = &(pull->grp[i]);
+ +        apply_forces_grp(pgrp,md,ga2la,pgrp->f,1,f);
+ +        if (pull->grp[0].nat)
+ +        {
+ +            if (PULL_CYL(pull))
+ +            {
+ +                apply_forces_grp(&(pull->dyna[i]),md,ga2la,pgrp->f,-1,f);
+ +            }
+ +            else
+ +            {
+ +                apply_forces_grp(&(pull->grp[0]),md,ga2la,pgrp->f,-1,f);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static double max_pull_distance2(const t_pull *pull,const t_pbc *pbc)
+ +{
+ +    double max_d2;
+ +    int    m;
+ +
+ +    max_d2 = GMX_DOUBLE_MAX;
+ +
+ +    if (pull->eGeom != epullgDIRPBC)
+ +    {
+ +        for(m=0; m<pbc->ndim_ePBC; m++)
+ +        {
+ +            if (pull->dim[m] != 0)
+ +            {
+ +                max_d2 = min(max_d2,norm2(pbc->box[m]));
+ +            }
+ +        }
+ +    }
+ +    
+ +    return 0.25*max_d2;
+ +}
+ +
+ +static void get_pullgrps_dr(const t_pull *pull,const t_pbc *pbc,int g,double t,
+ +                            dvec xg,dvec xref,double max_dist2,
+ +                            dvec dr)
+ +{
+ +    t_pullgrp *pref,*pgrp;
+ +    int       m;
+ +    dvec      xrefr,dref={0,0,0};
+ +    double    dr2;
+ +    
+ +    pgrp = &pull->grp[g];
+ +    
+ +    copy_dvec(xref,xrefr);
+ +
+ +    if (pull->eGeom == epullgDIRPBC)
+ +    {
+ +        for(m=0; m<DIM; m++)
+ +        {
+ +            dref[m] = (pgrp->init[0] + pgrp->rate*t)*pull->grp[g].vec[m];
+ +        }
+ +        /* Add the reference position, so we use the correct periodic image */
+ +        dvec_inc(xrefr,dref);
+ +    }
+ +  
+ +    pbc_dx_d(pbc, xg, xrefr, dr);
+ +    dr2 = 0;
+ +    for(m=0; m<DIM; m++)
+ +    {
+ +        dr[m] *= pull->dim[m];
+ +        dr2 += dr[m]*dr[m];
+ +    }
+ +    if (max_dist2 >= 0 && dr2 > 0.98*0.98*max_dist2)
+ +    {
+ +        gmx_fatal(FARGS,"Distance of pull group %d (%f nm) is larger than 0.49 times the box size (%f)",g,sqrt(dr2),sqrt(max_dist2));
+ +    }
+ +
+ +    if (pull->eGeom == epullgDIRPBC)
+ +    {
+ +        dvec_inc(dr,dref);
+ +    }
+ +}
+ +
+ +static void get_pullgrp_dr(const t_pull *pull,const t_pbc *pbc,int g,double t,
+ +                           dvec dr)
+ +{
+ +    double md2;
+ +
+ +    if (pull->eGeom == epullgDIRPBC)
+ +    {
+ +        md2 = -1;
+ +    }
+ +    else
+ +    {
+ +        md2 = max_pull_distance2(pull,pbc);
+ +    }
+ +
+ +    get_pullgrps_dr(pull,pbc,g,t,
+ +                    pull->grp[g].x,
+ +                    PULL_CYL(pull) ? pull->dyna[g].x : pull->grp[0].x,
+ +                    md2,
+ +                    dr);
+ +}
+ +
+ +void get_pullgrp_distance(t_pull *pull,t_pbc *pbc,int g,double t,
+ +                          dvec dr,dvec dev)
+ +{
+ +    static gmx_bool bWarned=FALSE; /* TODO: this should be fixed for thread-safety, 
+ +                                  but is fairly benign */
+ +    t_pullgrp *pgrp;
+ +    int       m;
+ +    dvec      ref;
+ +    double    drs,inpr;
+ +    
+ +    pgrp = &pull->grp[g];
+ +    
+ +    get_pullgrp_dr(pull,pbc,g,t,dr);
+ +    
+ +    if (pull->eGeom == epullgPOS)
+ +    {
+ +        for(m=0; m<DIM; m++)
+ +        {
+ +            ref[m] = pgrp->init[m] + pgrp->rate*t*pgrp->vec[m];
+ +        }
+ +    }
+ +    else
+ +    {
+ +        ref[0] = pgrp->init[0] + pgrp->rate*t;
+ +    }
+ +    
+ +    switch (pull->eGeom)
+ +    {
+ +    case epullgDIST:
+ +        /* Pull along the vector between the com's */
+ +        if (ref[0] < 0 && !bWarned)
+ +        {
+ +            fprintf(stderr,"\nPull reference distance for group %d is negative (%f)\n",g,ref[0]);
+ +            bWarned = TRUE;
+ +        }
+ +        drs = dnorm(dr);
+ +        if (drs == 0)
+ +        {
+ +            /* With no vector we can not determine the direction for the force,
+ +             * so we set the force to zero.
+ +             */
+ +            dev[0] = 0;
+ +        }
+ +        else
+ +        {
+ +            /* Determine the deviation */
+ +            dev[0] = drs - ref[0];
+ +        }
+ +        break;
+ +    case epullgDIR:
+ +    case epullgDIRPBC:
+ +    case epullgCYL:
+ +        /* Pull along vec */
+ +        inpr = 0;
+ +        for(m=0; m<DIM; m++)
+ +        {
+ +            inpr += pgrp->vec[m]*dr[m];
+ +        }
+ +        dev[0] = inpr - ref[0];
+ +        break;
+ +    case epullgPOS:
+ +        /* Determine the difference of dr and ref along each dimension */
+ +        for(m=0; m<DIM; m++)
+ +        {
+ +            dev[m] = (dr[m] - ref[m])*pull->dim[m];
+ +        }
+ +        break;
+ +    }
+ +}
+ +
+ +void clear_pull_forces(t_pull *pull)
+ +{
+ +    int i;
+ +    
+ +    /* Zeroing the forces is only required for constraint pulling.
+ +     * It can happen that multiple constraint steps need to be applied
+ +     * and therefore the constraint forces need to be accumulated.
+ +     */
+ +    for(i=0; i<1+pull->ngrp; i++)
+ +    {
+ +        clear_dvec(pull->grp[i].f);
+ +        pull->grp[i].f_scal = 0;
+ +    }
+ +}
+ +
+ +/* Apply constraint using SHAKE */
+ +static void do_constraint(t_pull *pull, t_mdatoms *md, t_pbc *pbc,
+ +                          rvec *x, rvec *v,
+ +                          gmx_bool bMaster, tensor vir,
+ +                          double dt, double t) 
+ +{
+ +
+ +    dvec *r_ij;  /* x[i] com of i in prev. step. Obeys constr. -> r_ij[i] */
+ +    dvec unc_ij; /* xp[i] com of i this step, before constr.   -> unc_ij  */
+ +
+ +    dvec *rinew;           /* current 'new' position of group i */
+ +    dvec *rjnew;           /* current 'new' position of group j */
+ +    dvec  ref,vec;
+ +    double d0,inpr;
+ +    double lambda, rm, mass, invdt=0;
+ +    gmx_bool bConverged_all,bConverged=FALSE;
+ +    int niter=0,g,ii,j,m,max_iter=100;
+ +    double q,a,b,c;  /* for solving the quadratic equation, 
+ +                        see Num. Recipes in C ed 2 p. 184 */
+ +    dvec *dr;        /* correction for group i */
+ +    dvec ref_dr;     /* correction for group j */
+ +    dvec f;          /* the pull force */
+ +    dvec tmp,tmp3;
+ +    t_pullgrp *pdyna,*pgrp,*pref;
+ +    
+ +    snew(r_ij,pull->ngrp+1);
+ +    if (PULL_CYL(pull))
+ +    {
+ +        snew(rjnew,pull->ngrp+1);
+ +    }
+ +    else
+ +    {
+ +        snew(rjnew,1);
+ +    }
+ +    snew(dr,pull->ngrp+1);
+ +    snew(rinew,pull->ngrp+1);
+ +    
+ +    /* copy the current unconstrained positions for use in iterations. We 
+ +       iterate until rinew[i] and rjnew[j] obey the constraints. Then
+ +       rinew - pull.x_unc[i] is the correction dr to group i */
+ +    for(g=1; g<1+pull->ngrp; g++)
+ +    {
+ +        copy_dvec(pull->grp[g].xp,rinew[g]);
+ +    }
+ +    if (PULL_CYL(pull))
+ +    {
+ +        for(g=1; g<1+pull->ngrp; g++)
+ +        {
+ +            copy_dvec(pull->dyna[g].xp,rjnew[g]);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        copy_dvec(pull->grp[0].xp,rjnew[0]);
+ +    }
+ +    
+ +    /* Determine the constraint directions from the old positions */
+ +    for(g=1; g<1+pull->ngrp; g++)
+ +    {
+ +        get_pullgrp_dr(pull,pbc,g,t,r_ij[g]);
+ +        /* Store the difference vector at time t for printing */
+ +        copy_dvec(r_ij[g],pull->grp[g].dr);
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Pull group %d dr %f %f %f\n",
+ +                    g,r_ij[g][XX],r_ij[g][YY],r_ij[g][ZZ]);
+ +        }
+ +        
+ +        if (pull->eGeom == epullgDIR || pull->eGeom == epullgDIRPBC)
+ +        {
+ +            /* Select the component along vec */
+ +            a = 0;
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                a += pull->grp[g].vec[m]*r_ij[g][m];
+ +            }
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                r_ij[g][m] = a*pull->grp[g].vec[m];
+ +            }
+ +        }
+ +    }
+ +    
+ +    bConverged_all = FALSE;
+ +    while (!bConverged_all && niter < max_iter)
+ +    {
+ +        bConverged_all = TRUE;
+ +
+ +        /* loop over all constraints */
+ +        for(g=1; g<1+pull->ngrp; g++)
+ +        {
+ +            pgrp = &pull->grp[g];
+ +            if (PULL_CYL(pull))
+ +                pref = &pull->dyna[g];
+ +            else
+ +                pref = &pull->grp[0];
+ +
+ +            /* Get the current difference vector */
+ +            get_pullgrps_dr(pull,pbc,g,t,rinew[g],rjnew[PULL_CYL(pull) ? g : 0],
+ +                            -1,unc_ij);
+ +
+ +            if (pull->eGeom == epullgPOS)
+ +            {
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    ref[m] = pgrp->init[m] + pgrp->rate*t*pgrp->vec[m];
+ +                }
+ +            }
+ +            else
+ +            {
+ +                ref[0] = pgrp->init[0] + pgrp->rate*t;
+ +                /* Keep the compiler happy */
+ +                ref[1] = 0;
+ +                ref[2] = 0;
+ +            }
+ +            
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"Pull group %d, iteration %d\n",g,niter);
+ +            }
+ +            
+ +            rm = 1.0/(pull->grp[g].invtm + pref->invtm);
+ +            
+ +            switch (pull->eGeom)
+ +            {
+ +            case epullgDIST:
+ +                if (ref[0] <= 0)
+ +                {
+ +                    gmx_fatal(FARGS,"The pull constraint reference distance for group %d is <= 0 (%f)",g,ref[0]);
+ +                }
+ +                
+ +                a = diprod(r_ij[g],r_ij[g]); 
+ +                b = diprod(unc_ij,r_ij[g])*2;
+ +                c = diprod(unc_ij,unc_ij) - dsqr(ref[0]);
+ +                
+ +                if (b < 0)
+ +                {
+ +                    q = -0.5*(b - sqrt(b*b - 4*a*c));
+ +                    lambda = -q/a;
+ +                }
+ +                else
+ +                {
+ +                    q = -0.5*(b + sqrt(b*b - 4*a*c));
+ +                    lambda = -c/q;
+ +                }
+ +                
+ +                if (debug)
+ +                {
+ +                    fprintf(debug,
+ +                            "Pull ax^2+bx+c=0: a=%e b=%e c=%e lambda=%e\n",
+ +                            a,b,c,lambda);
+ +                }
+ +                
+ +                /* The position corrections dr due to the constraints */
+ +                dsvmul(-lambda*rm*pgrp->invtm, r_ij[g],  dr[g]);
+ +                dsvmul( lambda*rm*pref->invtm, r_ij[g], ref_dr);
+ +                break;
+ +            case epullgDIR:
+ +            case epullgDIRPBC:
+ +            case epullgCYL:
+ +                /* A 1-dimensional constraint along a vector */
+ +                a = 0;
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    vec[m] = pgrp->vec[m];
+ +                    a += unc_ij[m]*vec[m];
+ +                }
+ +                /* Select only the component along the vector */
+ +                dsvmul(a,vec,unc_ij);
+ +                lambda = a - ref[0];
+ +                if (debug)
+ +                {
+ +                    fprintf(debug,"Pull inpr %e lambda: %e\n",a,lambda);
+ +                }
+ +                
+ +                /* The position corrections dr due to the constraints */
+ +                dsvmul(-lambda*rm*pull->grp[g].invtm, vec, dr[g]);
+ +                dsvmul( lambda*rm*       pref->invtm, vec,ref_dr);
+ +                break;
+ +            case epullgPOS:
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    if (pull->dim[m])
+ +                    {
+ +                        lambda = r_ij[g][m] - ref[m];
+ +                        /* The position corrections dr due to the constraints */
+ +                        dr[g][m]  = -lambda*rm*pull->grp[g].invtm;
+ +                        ref_dr[m] =  lambda*rm*pref->invtm;
+ +                    }
+ +                    else
+ +                    {
+ +                        dr[g][m]  = 0;
+ +                        ref_dr[m] = 0;
+ +                    }
+ +                }
+ +                break;
+ +            }
+ +            
+ +            /* DEBUG */
+ +            if (debug)
+ +            {
+ +                j = (PULL_CYL(pull) ? g : 0);
+ +                get_pullgrps_dr(pull,pbc,g,t,rinew[g],rjnew[j],-1,tmp);
+ +                get_pullgrps_dr(pull,pbc,g,t,dr[g]   ,ref_dr  ,-1,tmp3);
+ +                fprintf(debug,
+ +                        "Pull cur %8.5f %8.5f %8.5f j:%8.5f %8.5f %8.5f d: %8.5f\n",
+ +                        rinew[g][0],rinew[g][1],rinew[g][2], 
+ +                        rjnew[j][0],rjnew[j][1],rjnew[j][2], dnorm(tmp));
+ +                if (pull->eGeom == epullgPOS)
+ +                {
+ +                    fprintf(debug,
+ +                            "Pull ref %8.5f %8.5f %8.5f\n",
+ +                            pgrp->vec[0],pgrp->vec[1],pgrp->vec[2]);
+ +                }
+ +                else
+ +                {
+ +                    fprintf(debug,
+ +                            "Pull ref %8s %8s %8s   %8s %8s %8s d: %8.5f %8.5f %8.5f\n",
+ +                            "","","","","","",ref[0],ref[1],ref[2]);
+ +                }
+ +                fprintf(debug,
+ +                        "Pull cor %8.5f %8.5f %8.5f j:%8.5f %8.5f %8.5f d: %8.5f\n",
+ +                        dr[g][0],dr[g][1],dr[g][2],
+ +                        ref_dr[0],ref_dr[1],ref_dr[2],
+ +                        dnorm(tmp3));
+ +                fprintf(debug,
+ +                        "Pull cor %10.7f %10.7f %10.7f\n",
+ +                        dr[g][0],dr[g][1],dr[g][2]);
+ +            } /* END DEBUG */
+ +            
+ +            /* Update the COMs with dr */
+ +            dvec_inc(rinew[g],                     dr[g]);
+ +            dvec_inc(rjnew[PULL_CYL(pull) ? g : 0],ref_dr);
+ +        }
+ +        
+ +        /* Check if all constraints are fullfilled now */
+ +        for(g=1; g<1+pull->ngrp; g++)
+ +        {
+ +            pgrp = &pull->grp[g];
+ +            
+ +            get_pullgrps_dr(pull,pbc,g,t,rinew[g],rjnew[PULL_CYL(pull) ? g : 0],
+ +                            -1,unc_ij);
+ +            
+ +            switch (pull->eGeom)
+ +            {
+ +            case epullgDIST:
+ +                bConverged = fabs(dnorm(unc_ij) - ref[0]) < pull->constr_tol;
+ +                break;
+ +            case epullgDIR:
+ +            case epullgDIRPBC:
+ +            case epullgCYL:
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    vec[m] = pgrp->vec[m];
+ +                }
+ +                inpr = diprod(unc_ij,vec);
+ +                dsvmul(inpr,vec,unc_ij);
+ +                bConverged =
+ +                    fabs(diprod(unc_ij,vec) - ref[0]) < pull->constr_tol;
+ +                break;
+ +            case epullgPOS:
+ +                bConverged = TRUE;
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    if (pull->dim[m] && 
+ +                        fabs(unc_ij[m] - ref[m]) >= pull->constr_tol)
+ +                    {
+ +                        bConverged = FALSE;
+ +                    }
+ +                }
+ +                break;
+ +            }
+ +            
+ +            if (!bConverged)
+ +            {
+ +                if (debug)
+ +                {
+ +                    fprintf(debug,"NOT CONVERGED YET: Group %d:"
+ +                            "d_ref = %f %f %f, current d = %f\n",
+ +                            g,ref[0],ref[1],ref[2],dnorm(unc_ij));
+ +                }
+ +
+ +                bConverged_all = FALSE;
+ +            }
+ +        }
+ +        
+ +        niter++;
+ +        /* if after all constraints are dealt with and bConverged is still TRUE
+ +           we're finished, if not we do another iteration */
+ +    }
+ +    if (niter > max_iter)
+ +    {
+ +        gmx_fatal(FARGS,"Too many iterations for constraint run: %d",niter);
+ +    }
+ +    
+ +    /* DONE ITERATING, NOW UPDATE COORDINATES AND CALC. CONSTRAINT FORCES */
+ +    
+ +    if (v)
+ +    {
+ +        invdt = 1/dt;
+ +    }
+ +    
+ +    /* update the normal groups */
+ +    for(g=1; g<1+pull->ngrp; g++)
+ +    {
+ +        pgrp = &pull->grp[g];
+ +        /* get the final dr and constraint force for group i */
+ +        dvec_sub(rinew[g],pgrp->xp,dr[g]);
+ +        /* select components of dr */
+ +        for(m=0; m<DIM; m++)
+ +        {
+ +            dr[g][m] *= pull->dim[m];
+ +        }
+ +        dsvmul(1.0/(pgrp->invtm*dt*dt),dr[g],f);
+ +        dvec_inc(pgrp->f,f);
+ +        switch (pull->eGeom)
+ +        {
+ +        case epullgDIST:
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                pgrp->f_scal += r_ij[g][m]*f[m]/dnorm(r_ij[g]);
+ +            }
+ +            break;
+ +        case epullgDIR:
+ +        case epullgDIRPBC:
+ +        case epullgCYL:
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                pgrp->f_scal += pgrp->vec[m]*f[m];
+ +            }
+ +            break;
+ +        case epullgPOS:
+ +            break;
+ +        }
+ +        
+ +        if (vir && bMaster) {
+ +            /* Add the pull contribution to the virial */
+ +            for(j=0; j<DIM; j++)
+ +            {
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    vir[j][m] -= 0.5*f[j]*r_ij[g][m];
+ +                }
+ +            }
+ +        }
+ +        
+ +        /* update the atom positions */
+ +        copy_dvec(dr[g],tmp);
+ +        for(j=0;j<pgrp->nat_loc;j++)
+ +        {
+ +            ii = pgrp->ind_loc[j];
+ +            if (pgrp->weight_loc)
+ +            {
+ +                dsvmul(pgrp->wscale*pgrp->weight_loc[j],dr[g],tmp); 
+ +            }
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                x[ii][m] += tmp[m];
+ +            }
+ +            if (v)
+ +            {
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    v[ii][m] += invdt*tmp[m];
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* update the reference groups */
+ +    if (PULL_CYL(pull))
+ +    {
+ +        /* update the dynamic reference groups */
+ +        for(g=1; g<1+pull->ngrp; g++)
+ +        {
+ +            pdyna = &pull->dyna[g];
+ +            dvec_sub(rjnew[g],pdyna->xp,ref_dr);
+ +            /* select components of ref_dr */
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                ref_dr[m] *= pull->dim[m];
+ +            }
+ +            
+ +            for(j=0;j<pdyna->nat_loc;j++)
+ +            {
+ +                /* reset the atoms with dr, weighted by w_i */
+ +                dsvmul(pdyna->wscale*pdyna->weight_loc[j],ref_dr,tmp); 
+ +                ii = pdyna->ind_loc[j];
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    x[ii][m] += tmp[m];
+ +                }
+ +                if (v)
+ +                {
+ +                    for(m=0; m<DIM; m++)
+ +                    {
+ +                        v[ii][m] += invdt*tmp[m];
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        pgrp = &pull->grp[0];
+ +        /* update the reference group */
+ +        dvec_sub(rjnew[0],pgrp->xp, ref_dr); 
+ +        /* select components of ref_dr */
+ +        for(m=0;m<DIM;m++)
+ +        {
+ +            ref_dr[m] *= pull->dim[m];
+ +        }
+ +        
+ +        copy_dvec(ref_dr,tmp);
+ +        for(j=0; j<pgrp->nat_loc;j++)
+ +        {
+ +            ii = pgrp->ind_loc[j];
+ +            if (pgrp->weight_loc)
+ +            {
+ +                dsvmul(pgrp->wscale*pgrp->weight_loc[j],ref_dr,tmp); 
+ +            }
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                x[ii][m] += tmp[m];
+ +            }
+ +            if (v)
+ +            {
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    v[ii][m] += invdt*tmp[m];
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* finished! I hope. Give back some memory */
+ +    sfree(r_ij);
+ +    sfree(rinew);
+ +    sfree(rjnew);
+ +    sfree(dr);
+ +}
+ +
+ +/* Pulling with a harmonic umbrella potential or constant force */
+ +static void do_pull_pot(int ePull,
+ +                        t_pull *pull, t_pbc *pbc, double t, real lambda,
+ +                        real *V, tensor vir, real *dVdl)
+ +{
+ +    int       g,j,m;
+ +    dvec      dev;
+ +    double    ndr,invdr;
+ +    real      k,dkdl;
+ +    t_pullgrp *pgrp;
+ +    
+ +    /* loop over the groups that are being pulled */
+ +    *V    = 0;
+ +    *dVdl = 0;
+ +    for(g=1; g<1+pull->ngrp; g++)
+ +    {
+ +        pgrp = &pull->grp[g];
+ +        get_pullgrp_distance(pull,pbc,g,t,pgrp->dr,dev);
+ +        
+ +        k    = (1.0 - lambda)*pgrp->k + lambda*pgrp->kB;
+ +        dkdl = pgrp->kB - pgrp->k;
+ +        
+ +        switch (pull->eGeom)
+ +        {
+ +        case epullgDIST:
+ +            ndr   = dnorm(pgrp->dr);
+ +            invdr = 1/ndr;
+ +            if (ePull == epullUMBRELLA)
+ +            {
+ +                pgrp->f_scal  =       -k*dev[0];
+ +                *V           += 0.5*   k*dsqr(dev[0]);
+ +                *dVdl        += 0.5*dkdl*dsqr(dev[0]);
+ +            }
+ +            else
+ +            {
+ +                pgrp->f_scal  =   -k;
+ +                *V           +=    k*ndr;
+ +                *dVdl        += dkdl*ndr;
+ +            }
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                pgrp->f[m]    = pgrp->f_scal*pgrp->dr[m]*invdr;
+ +            }
+ +            break;
+ +        case epullgDIR:
+ +        case epullgDIRPBC:
+ +        case epullgCYL:
+ +            if (ePull == epullUMBRELLA)
+ +            {
+ +                pgrp->f_scal  =       -k*dev[0];
+ +                *V           += 0.5*   k*dsqr(dev[0]);
+ +                *dVdl        += 0.5*dkdl*dsqr(dev[0]);
+ +            }
+ +            else
+ +            {
+ +                ndr = 0;
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    ndr += pgrp->vec[m]*pgrp->dr[m];
+ +                }
+ +                pgrp->f_scal  =   -k;
+ +                *V           +=    k*ndr;
+ +                *dVdl        += dkdl*ndr;
+ +            }
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                pgrp->f[m]    = pgrp->f_scal*pgrp->vec[m];
+ +            }
+ +            break;
+ +        case epullgPOS:
+ +            for(m=0; m<DIM; m++)
+ +            {
+ +                if (ePull == epullUMBRELLA)
+ +                {
+ +                    pgrp->f[m]  =       -k*dev[m];
+ +                    *V         += 0.5*   k*dsqr(dev[m]);
+ +                    *dVdl      += 0.5*dkdl*dsqr(dev[m]);
+ +                }
+ +                else
+ +                {
+ +                    pgrp->f[m]  =   -k*pull->dim[m];
+ +                    *V         +=    k*pgrp->dr[m]*pull->dim[m];
+ +                    *dVdl      += dkdl*pgrp->dr[m]*pull->dim[m];
+ +                }
+ +            }
+ +            break;
+ +        }
+ +        
+ +        if (vir)
+ +        {
+ +            /* Add the pull contribution to the virial */
+ +            for(j=0; j<DIM; j++)
+ +            {
+ +                for(m=0;m<DIM;m++)
+ +                {
+ +                    vir[j][m] -= 0.5*pgrp->f[j]*pgrp->dr[m];
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +real pull_potential(int ePull,t_pull *pull, t_mdatoms *md, t_pbc *pbc,
+ +                  t_commrec *cr, double t, real lambda,
+ +                  rvec *x, rvec *f, tensor vir, real *dvdlambda)
+ +{
+ +  real V,dVdl;
+ +
+ +  pull_calc_coms(cr,pull,md,pbc,t,x,NULL);
+ +
+ +  do_pull_pot(ePull,pull,pbc,t,lambda,
+ +            &V,pull->bVirial && MASTER(cr) ? vir : NULL,&dVdl);
+ +
+ +  /* Distribute forces over pulled groups */
+ +  apply_forces(pull, md, DOMAINDECOMP(cr) ? cr->dd->ga2la : NULL, f);
+ +
+ +  if (MASTER(cr)) {
+ +    *dvdlambda += dVdl;
+ +  }
+ +
+ +  return (MASTER(cr) ? V : 0.0);
+ +}
+ +
+ +void pull_constraint(t_pull *pull, t_mdatoms *md, t_pbc *pbc,
+ +                   t_commrec *cr, double dt, double t,
+ +                   rvec *x, rvec *xp, rvec *v, tensor vir)
+ +{
+ +  pull_calc_coms(cr,pull,md,pbc,t,x,xp);
+ +
+ +  do_constraint(pull,md,pbc,xp,v,pull->bVirial && MASTER(cr),vir,dt,t);
+ +}
+ +
+ +static void make_local_pull_group(gmx_ga2la_t ga2la,
+ +                                t_pullgrp *pg,int start,int end)
+ +{
+ +  int i,ii;
+ +
+ +  pg->nat_loc = 0;
+ +  for(i=0; i<pg->nat; i++) {
+ +    ii = pg->ind[i];
+ +    if (ga2la) {
+ +      if (!ga2la_get_home(ga2la,ii,&ii)) {
+ +        ii = -1;
+ +      }
+ +    }
+ +    if (ii >= start && ii < end) {
+ +      /* This is a home atom, add it to the local pull group */
+ +      if (pg->nat_loc >= pg->nalloc_loc) {
+ +      pg->nalloc_loc = over_alloc_dd(pg->nat_loc+1);
+ +      srenew(pg->ind_loc,pg->nalloc_loc);
+ +      if (pg->epgrppbc == epgrppbcCOS || pg->weight) {
+ +        srenew(pg->weight_loc,pg->nalloc_loc);
+ +      }
+ +      }
+ +      pg->ind_loc[pg->nat_loc] = ii;
+ +      if (pg->weight) {
+ +        pg->weight_loc[pg->nat_loc] = pg->weight[i];
+ +      }
+ +      pg->nat_loc++;
+ +    }
+ +  }
+ +}
+ +
+ +void dd_make_local_pull_groups(gmx_domdec_t *dd,t_pull *pull,t_mdatoms *md)
+ +{
+ +  gmx_ga2la_t ga2la;
+ +  int g;
+ +  
+ +  if (dd) {
+ +    ga2la = dd->ga2la;
+ +  } else {
+ +    ga2la = NULL;
+ +  }
+ +
+ +  if (pull->grp[0].nat > 0)
+ +    make_local_pull_group(ga2la,&pull->grp[0],md->start,md->start+md->homenr);
+ +  for(g=1; g<1+pull->ngrp; g++)
+ +    make_local_pull_group(ga2la,&pull->grp[g],md->start,md->start+md->homenr);
+ +}
+ +
+ +static void init_pull_group_index(FILE *fplog,t_commrec *cr,
+ +                                  int start,int end,
+ +                                  int g,t_pullgrp *pg,ivec pulldims,
+ +                                  gmx_mtop_t *mtop,t_inputrec *ir, real lambda)
+ +{
+ +  int i,ii,d,nfrozen,ndim;
+ +  real m,w,mbd;
+ +  double tmass,wmass,wwmass;
+ +  gmx_bool bDomDec;
+ +  gmx_ga2la_t ga2la=NULL;
+ +  gmx_groups_t *groups;
++  gmx_mtop_atomlookup_t alook;
+ +  t_atom *atom;
+ +
+ +  bDomDec = (cr && DOMAINDECOMP(cr));
+ +  if (bDomDec) {
+ +    ga2la = cr->dd->ga2la;
+ +  }
+ +
+ +  if (EI_ENERGY_MINIMIZATION(ir->eI) || ir->eI == eiBD) {
+ +    /* There are no masses in the integrator.
+ +     * But we still want to have the correct mass-weighted COMs.
+ +     * So we store the real masses in the weights.
+ +     * We do not set nweight, so these weights do not end up in the tpx file.
+ +     */
+ +    if (pg->nweight == 0) {
+ +      snew(pg->weight,pg->nat);
+ +    }
+ +  }
+ +
+ +  if (cr && PAR(cr)) {
+ +    pg->nat_loc    = 0;
+ +    pg->nalloc_loc = 0;
+ +    pg->ind_loc    = NULL;
+ +    pg->weight_loc = NULL;
+ +  } else {
+ +    pg->nat_loc = pg->nat;
+ +    pg->ind_loc = pg->ind;
+ +    if (pg->epgrppbc == epgrppbcCOS) {
+ +      snew(pg->weight_loc,pg->nat);
+ +    } else {
+ +      pg->weight_loc = pg->weight;
+ +    }
+ +  }
+ +
+ +  groups = &mtop->groups;
+ +
++  alook = gmx_mtop_atomlookup_init(mtop);
++
+ +  nfrozen = 0;
+ +  tmass  = 0;
+ +  wmass  = 0;
+ +  wwmass = 0;
+ +  for(i=0; i<pg->nat; i++) {
+ +    ii = pg->ind[i];
++    gmx_mtop_atomnr_to_atom(alook,ii,&atom);
+ +    if (cr && PAR(cr) && !bDomDec && ii >= start && ii < end)
+ +      pg->ind_loc[pg->nat_loc++] = ii;
+ +    if (ir->opts.nFreeze) {
+ +      for(d=0; d<DIM; d++)
+ +      if (pulldims[d] && ir->opts.nFreeze[ggrpnr(groups,egcFREEZE,ii)][d])
+ +        nfrozen++;
+ +    }
+ +    if (ir->efep == efepNO) {
+ +      m = atom->m;
+ +    } else {
+ +      m = (1 - lambda)*atom->m + lambda*atom->mB;
+ +    }
+ +    if (pg->nweight > 0) {
+ +      w = pg->weight[i];
+ +    } else {
+ +      w = 1;
+ +    }
+ +    if (EI_ENERGY_MINIMIZATION(ir->eI)) {
+ +      /* Move the mass to the weight */
+ +      w *= m;
+ +      m = 1;
+ +      pg->weight[i] = w;
+ +    } else if (ir->eI == eiBD) {
+ +      if (ir->bd_fric) {
+ +      mbd = ir->bd_fric*ir->delta_t;
+ +      } else {
+ +      if (groups->grpnr[egcTC] == NULL) {
+ +        mbd = ir->delta_t/ir->opts.tau_t[0];
+ +      } else {
+ +        mbd = ir->delta_t/ir->opts.tau_t[groups->grpnr[egcTC][ii]];
+ +      }
+ +      }
+ +      w *= m/mbd;
+ +      m = mbd;
+ +      pg->weight[i] = w;
+ +    }
+ +    tmass  += m;
+ +    wmass  += m*w;
+ +    wwmass += m*w*w;
+ +  }
+ +
++  gmx_mtop_atomlookup_destroy(alook);
++
+ +  if (wmass == 0) {
+ +    gmx_fatal(FARGS,"The total%s mass of pull group %d is zero",
+ +            pg->weight ? " weighted" : "",g);
+ +  }
+ +  if (fplog) {
+ +    fprintf(fplog,
+ +          "Pull group %d: %5d atoms, mass %9.3f",g,pg->nat,tmass);
+ +    if (pg->weight || EI_ENERGY_MINIMIZATION(ir->eI) || ir->eI == eiBD) {
+ +      fprintf(fplog,", weighted mass %9.3f",wmass*wmass/wwmass);
+ +    }
+ +    if (pg->epgrppbc == epgrppbcCOS) {
+ +      fprintf(fplog,", cosine weighting will be used");
+ +    }
+ +    fprintf(fplog,"\n");
+ +  }
+ +  
+ +  if (nfrozen == 0) {
+ +    /* A value > 0 signals not frozen, it is updated later */
+ +    pg->invtm  = 1.0;
+ +  } else {
+ +    ndim = 0;
+ +    for(d=0; d<DIM; d++)
+ +      ndim += pulldims[d]*pg->nat;
+ +    if (fplog && nfrozen > 0 && nfrozen < ndim) {
+ +      fprintf(fplog,
+ +            "\nWARNING: In pull group %d some, but not all of the degrees of freedom\n"
+ +            "         that are subject to pulling are frozen.\n"
+ +            "         For pulling the whole group will be frozen.\n\n",
+ +            g);
+ +    }
+ +    pg->invtm  = 0.0;
+ +    pg->wscale = 1.0;
+ +  }
+ +}
+ +
+ +void init_pull(FILE *fplog,t_inputrec *ir,int nfile,const t_filenm fnm[],
+ +               gmx_mtop_t *mtop,t_commrec *cr,const output_env_t oenv, real lambda,
+ +               gmx_bool bOutFile, unsigned long Flags)
+ +{
+ +    t_pull    *pull;
+ +    t_pullgrp *pgrp;
+ +    int       g,start=0,end=0,m;
+ +    gmx_bool      bCite;
+ +    
+ +    pull = ir->pull;
+ +    
+ +    pull->ePBC = ir->ePBC;
+ +    switch (pull->ePBC)
+ +    {
+ +    case epbcNONE: pull->npbcdim = 0; break;
+ +    case epbcXY:   pull->npbcdim = 2; break;
+ +    default:       pull->npbcdim = 3; break;
+ +    }
+ +    
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\nWill apply %s COM pulling in geometry '%s'\n",
+ +                EPULLTYPE(ir->ePull),EPULLGEOM(pull->eGeom));
+ +        if (pull->grp[0].nat > 0)
+ +        {
+ +            fprintf(fplog,"between a reference group and %d group%s\n",
+ +                    pull->ngrp,pull->ngrp==1 ? "" : "s");
+ +        }
+ +        else
+ +        {
+ +            fprintf(fplog,"with an absolute reference on %d group%s\n",
+ +                    pull->ngrp,pull->ngrp==1 ? "" : "s");
+ +        }
+ +        bCite = FALSE;
+ +        for(g=0; g<pull->ngrp+1; g++)
+ +        {
+ +            if (pull->grp[g].nat > 1 &&
+ +                pull->grp[g].pbcatom < 0)
+ +            {
+ +                /* We are using cosine weighting */
+ +                fprintf(fplog,"Cosine weighting is used for group %d\n",g);
+ +                bCite = TRUE;
+ +            }
+ +        }
+ +        if (bCite)
+ +        {
+ +            please_cite(fplog,"Engin2010");
+ +        }
+ +    }
+ +    
+ +    /* We always add the virial contribution,
+ +     * except for geometry = direction_periodic where this is impossible.
+ +     */
+ +    pull->bVirial = (pull->eGeom != epullgDIRPBC);
+ +    if (getenv("GMX_NO_PULLVIR") != NULL)
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Found env. var., will not add the virial contribution of the COM pull forces\n");
+ +        }
+ +        pull->bVirial = FALSE;
+ +    }
+ +    
+ +    if (cr && PARTDECOMP(cr))
+ +    {
+ +        pd_at_range(cr,&start,&end);
+ +    }
+ +    pull->rbuf=NULL;
+ +    pull->dbuf=NULL;
+ +    pull->dbuf_cyl=NULL;
+ +    pull->bRefAt = FALSE;
+ +    pull->cosdim = -1;
+ +    for(g=0; g<pull->ngrp+1; g++)
+ +    {
+ +        pgrp = &pull->grp[g];
+ +        pgrp->epgrppbc = epgrppbcNONE;
+ +        if (pgrp->nat > 0)
+ +        {
+ +            /* Determine if we need to take PBC into account for calculating
+ +             * the COM's of the pull groups.
+ +             */
+ +            for(m=0; m<pull->npbcdim; m++)
+ +            {
+ +                if (pull->dim[m] && pgrp->nat > 1)
+ +                {
+ +                    if (pgrp->pbcatom >= 0)
+ +                    {
+ +                        pgrp->epgrppbc = epgrppbcREFAT;
+ +                        pull->bRefAt   = TRUE;
+ +                    }
+ +                    else
+ +                    {
+ +                        if (pgrp->weight)
+ +                        {
+ +                            gmx_fatal(FARGS,"Pull groups can not have relative weights and cosine weighting at same time");
+ +                        }
+ +                        pgrp->epgrppbc = epgrppbcCOS;
+ +                        if (pull->cosdim >= 0 && pull->cosdim != m)
+ +                        {
+ +                            gmx_fatal(FARGS,"Can only use cosine weighting with pulling in one dimension (use mdp option pull_dim)");
+ +                        }
+ +                        pull->cosdim = m;
+ +                    }
+ +                }
+ +            }
+ +            /* Set the indices */
+ +            init_pull_group_index(fplog,cr,start,end,g,pgrp,pull->dim,mtop,ir,lambda);
+ +            if (PULL_CYL(pull) && pgrp->invtm == 0)
+ +            {
+ +                gmx_fatal(FARGS,"Can not have frozen atoms in a cylinder pull group");
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* Absolute reference, set the inverse mass to zero */
+ +            pgrp->invtm  = 0;
+ +            pgrp->wscale = 1;
+ +        }
+ +    }      
+ +    
+ +    /* if we use dynamic reference groups, do some initialising for them */
+ +    if (PULL_CYL(pull))
+ +    {
+ +        if (pull->grp[0].nat == 0)
+ +        {
+ +            gmx_fatal(FARGS, "Dynamic reference groups are not supported when using absolute reference!\n");
+ +        }
+ +        snew(pull->dyna,pull->ngrp+1);
+ +    }
+ +    
+ +    /* Only do I/O when we are doing dynamics and if we are the MASTER */
+ +    pull->out_x = NULL;
+ +    pull->out_f = NULL;
+ +    if (bOutFile)
+ +    {
+ +        if (pull->nstxout > 0)
+ +        {
+ +            pull->out_x = open_pull_out(opt2fn("-px",nfile,fnm),pull,oenv,TRUE,Flags);
+ +        }
+ +        if (pull->nstfout > 0)
+ +        {
+ +            pull->out_f = open_pull_out(opt2fn("-pf",nfile,fnm),pull,oenv,
+ +                                        FALSE,Flags);
+ +        }
+ +    }
+ +}
+ +
+ +void finish_pull(FILE *fplog,t_pull *pull)
+ +{
+ +    if (pull->out_x)
+ +    {
+ +        gmx_fio_fclose(pull->out_x);
+ +    }
+ +    if (pull->out_f)
+ +    {
+ +        gmx_fio_fclose(pull->out_f);
+ +    }
+ +}
diff --cc src/gromacs/mdlib/pull_rotation.c

index 5a21f1dc9ed7fe1b036fe21275c46c66c0112859,0000000000000000000000000000000000000000..cf82fa5731eb02e3a34a991db7f99c24be7e50df

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/pull_rotation.c
--- /dev/null
+++ b/src/gromacs/mdlib/pull_rotation.c
@@@ -1,3876 -1,0 +1,3886 @@@
-             gmx_mtop_atomnr_to_atom(mtop,rotg->ind[i],&atom);
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2008, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ + 
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <stdio.h>
+ +#include <stdlib.h>
+ +#include <string.h>
+ +#include "domdec.h"
+ +#include "gmx_wallcycle.h"
+ +#include "gmx_cyclecounter.h"
+ +#include "trnio.h"
+ +#include "smalloc.h"
+ +#include "network.h"
+ +#include "pbc.h"
+ +#include "futil.h"
+ +#include "mdrun.h"
+ +#include "txtdump.h"
+ +#include "names.h"
+ +#include "mtop_util.h"
+ +#include "names.h"
+ +#include "nrjac.h"
+ +#include "vec.h"
+ +#include "gmx_ga2la.h"
+ +#include "xvgr.h"
+ +#include "gmxfio.h"
+ +#include "groupcoord.h"
+ +#include "pull_rotation.h"
+ +#include "gmx_sort.h"
+ +#include "copyrite.h"
+ +#include "macros.h"
+ +
+ +
+ +static char *RotStr = {"Enforced rotation:"};
+ +
+ +
+ +/* Set the minimum weight for the determination of the slab centers */
+ +#define WEIGHT_MIN (10*GMX_FLOAT_MIN)
+ +
+ +/* Helper structure for sorting positions along rotation vector             */
+ +typedef struct {
+ +    real xcproj;            /* Projection of xc on the rotation vector        */
+ +    int ind;                /* Index of xc                                    */
+ +    real m;                 /* Mass                                           */
+ +    rvec x;                 /* Position                                       */
+ +    rvec x_ref;             /* Reference position                             */
+ +} sort_along_vec_t;
+ +
+ +
+ +/* Enforced rotation / flexible: determine the angle of each slab             */
+ +typedef struct gmx_slabdata
+ +{
+ +    int  nat;               /* Number of atoms belonging to this slab         */
+ +    rvec *x;                /* The positions belonging to this slab. In 
+ +                               general, this should be all positions of the 
+ +                               whole rotation group, but we leave those away 
+ +                               that have a small enough weight                */
+ +    rvec *ref;              /* Same for reference                             */
+ +    real *weight;           /* The weight for each atom                       */
+ +} t_gmx_slabdata;
+ +
+ +
+ +/* Helper structure for potential fitting */
+ +typedef struct gmx_potfit
+ +{
+ +    real   *degangle;       /* Set of angles for which the potential is
+ +                               calculated. The optimum fit is determined as
+ +                               the angle for with the potential is minimal    */
+ +    real   *V;              /* Potential for the different angles             */
+ +    matrix *rotmat;         /* Rotation matrix corresponding to the angles    */
+ +} t_gmx_potfit;
+ +
+ +
+ +/* Enforced rotation data for all groups                                      */
+ +typedef struct gmx_enfrot
+ +{
+ +    FILE  *out_rot;         /* Output file for rotation data                  */
+ +    FILE  *out_torque;      /* Output file for torque data                    */
+ +    FILE  *out_angles;      /* Output file for slab angles for flexible type  */
+ +    FILE  *out_slabs;       /* Output file for slab centers                   */
+ +    int   bufsize;          /* Allocation size of buf                         */
+ +    rvec  *xbuf;            /* Coordinate buffer variable for sorting         */
+ +    real  *mbuf;            /* Masses buffer variable for sorting             */
+ +    sort_along_vec_t *data; /* Buffer variable needed for position sorting    */
+ +    real  *mpi_inbuf;       /* MPI buffer                                     */
+ +    real  *mpi_outbuf;      /* MPI buffer                                     */
+ +    int   mpi_bufsize;      /* Allocation size of in & outbuf                 */
+ +    unsigned long Flags;    /* mdrun flags                                    */
+ +    gmx_bool bOut;          /* Used to skip first output when appending to 
+ +                             * avoid duplicate entries in rotation outfiles   */
+ +} t_gmx_enfrot;
+ +
+ +
+ +/* Global enforced rotation data for a single rotation group                  */
+ +typedef struct gmx_enfrotgrp
+ +{
+ +    real    degangle;       /* Rotation angle in degrees                      */
+ +    matrix  rotmat;         /* Rotation matrix                                */
+ +    atom_id *ind_loc;       /* Local rotation indices                         */
+ +    int     nat_loc;        /* Number of local group atoms                    */
+ +    int     nalloc_loc;     /* Allocation size for ind_loc and weight_loc     */
+ +
+ +    real  V;                /* Rotation potential for this rotation group     */
+ +    rvec  *f_rot_loc;       /* Array to store the forces on the local atoms
+ +                               resulting from enforced rotation potential     */
+ +
+ +    /* Collective coordinates for the whole rotation group */
+ +    real  *xc_ref_length;   /* Length of each x_rotref vector after x_rotref 
+ +                               has been put into origin                       */
+ +    int   *xc_ref_ind;      /* Position of each local atom in the collective
+ +                               array                                          */
+ +    rvec  xc_center;        /* Center of the rotation group positions, may
+ +                               be mass weighted                               */
+ +    rvec  xc_ref_center;    /* dito, for the reference positions              */
+ +    rvec  *xc;              /* Current (collective) positions                 */
+ +    ivec  *xc_shifts;       /* Current (collective) shifts                    */
+ +    ivec  *xc_eshifts;      /* Extra shifts since last DD step                */
+ +    rvec  *xc_old;          /* Old (collective) positions                     */
+ +    rvec  *xc_norm;         /* Normalized form of the current positions       */
+ +    rvec  *xc_ref_sorted;   /* Reference positions (sorted in the same order 
+ +                               as xc when sorted)                             */
+ +    int   *xc_sortind;      /* Where is a position found after sorting?       */
+ +    real  *mc;              /* Collective masses                              */
+ +    real  *mc_sorted;
+ +    real  invmass;          /* one over the total mass of the rotation group  */
+ +
+ +    real  torque_v;         /* Torque in the direction of rotation vector     */
+ +    real  angle_v;          /* Actual angle of the whole rotation group       */
+ +    /* Fixed rotation only */
+ +    real  weight_v;         /* Weights for angle determination                */
+ +    rvec  *xr_loc;          /* Local reference coords, correctly rotated      */
+ +    rvec  *x_loc_pbc;       /* Local current coords, correct PBC image        */
+ +    real  *m_loc;           /* Masses of the current local atoms              */
+ +
+ +    /* Flexible rotation only */
+ +    int   nslabs_alloc;     /* For this many slabs memory is allocated        */
+ +    int   slab_first;       /* Lowermost slab for that the calculation needs 
+ +                               to be performed at a given time step           */
+ +    int   slab_last;        /* Uppermost slab ...                             */
+ +    int   slab_first_ref;   /* First slab for which ref. center is stored     */
+ +    int   slab_last_ref;    /* Last ...                                       */
+ +    int   slab_buffer;      /* Slab buffer region around reference slabs      */
+ +    int   *firstatom;       /* First relevant atom for a slab                 */
+ +    int   *lastatom;        /* Last relevant atom for a slab                  */
+ +    rvec  *slab_center;     /* Gaussian-weighted slab center                  */
+ +    rvec  *slab_center_ref; /* Gaussian-weighted slab center for the
+ +                               reference positions                            */
+ +    real  *slab_weights;    /* Sum of gaussian weights in a slab              */
+ +    real  *slab_torque_v;   /* Torque T = r x f for each slab.                */
+ +                            /* torque_v = m.v = angular momentum in the 
+ +                               direction of v                                 */
+ +    real  max_beta;         /* min_gaussian from inputrec->rotgrp is the
+ +                               minimum value the gaussian must have so that 
+ +                               the force is actually evaluated max_beta is 
+ +                               just another way to put it                     */
+ +    real  *gn_atom;         /* Precalculated gaussians for a single atom      */
+ +    int   *gn_slabind;      /* Tells to which slab each precalculated gaussian 
+ +                               belongs                                        */
+ +    rvec  *slab_innersumvec;/* Inner sum of the flexible2 potential per slab;
+ +                               this is precalculated for optimization reasons */
+ +    t_gmx_slabdata *slab_data; /* Holds atom positions and gaussian weights 
+ +                               of atoms belonging to a slab                   */
+ +
+ +    /* For potential fits with varying angle: */
+ +    t_gmx_potfit *PotAngleFit;  /* Used for fit type 'potential'              */
+ +} t_gmx_enfrotgrp;
+ +
+ +
+ +/* Activate output of forces for correctness checks */
+ +/* #define PRINT_FORCES */
+ +#ifdef PRINT_FORCES
+ +#define PRINT_FORCE_J  fprintf(stderr,"f%d = %15.8f %15.8f %15.8f\n",erg->xc_ref_ind[j],erg->f_rot_loc[j][XX], erg->f_rot_loc[j][YY], erg->f_rot_loc[j][ZZ]);
+ +#define PRINT_POT_TAU  if (MASTER(cr)) { \
+ +                           fprintf(stderr,"potential = %15.8f\n" "torque    = %15.8f\n", erg->V, erg->torque_v); \
+ +                       }
+ +#else
+ +#define PRINT_FORCE_J
+ +#define PRINT_POT_TAU
+ +#endif
+ +
+ +/* Shortcuts for often used queries */
+ +#define ISFLEX(rg) ( (rg->eType==erotgFLEX) || (rg->eType==erotgFLEXT) || (rg->eType==erotgFLEX2) || (rg->eType==erotgFLEX2T) )
+ +#define ISCOLL(rg) ( (rg->eType==erotgFLEX) || (rg->eType==erotgFLEXT) || (rg->eType==erotgFLEX2) || (rg->eType==erotgFLEX2T) || (rg->eType==erotgRMPF) || (rg->eType==erotgRM2PF) )
+ +
+ +
+ +/* Does any of the rotation groups use slab decomposition? */
+ +static gmx_bool HaveFlexibleGroups(t_rot *rot)
+ +{
+ +    int g;
+ +    t_rotgrp *rotg;
+ +
+ +
+ +    for (g=0; g<rot->ngrp; g++)
+ +    {
+ +        rotg = &rot->grp[g];
+ +        if (ISFLEX(rotg))
+ +            return TRUE;
+ +    }
+ +
+ +    return FALSE;
+ +}
+ +
+ +
+ +/* Is for any group the fit angle determined by finding the minimum of the
+ + * rotation potential? */
+ +static gmx_bool HavePotFitGroups(t_rot *rot)
+ +{
+ +    int g;
+ +    t_rotgrp *rotg;
+ +
+ +
+ +    for (g=0; g<rot->ngrp; g++)
+ +    {
+ +        rotg = &rot->grp[g];
+ +        if (erotgFitPOT == rotg->eFittype)
+ +            return TRUE;
+ +    }
+ +
+ +    return FALSE;
+ +}
+ +
+ +
+ +static double** allocate_square_matrix(int dim)
+ +{
+ +    int i;
+ +    double** mat = NULL; 
+ +    
+ +    
+ +    snew(mat, dim);
+ +    for(i=0; i<dim; i++)
+ +        snew(mat[i], dim);
+ +
+ +    return mat;
+ +}
+ +
+ +
+ +static void free_square_matrix(double** mat, int dim)
+ +{
+ +    int i;
+ +    
+ +    
+ +    for (i=0; i<dim; i++)
+ +        sfree(mat[i]);
+ +    sfree(mat);
+ +}
+ +
+ +
+ +/* Return the angle for which the potential is minimal */
+ +static real get_fitangle(t_rotgrp *rotg, gmx_enfrotgrp_t erg)
+ +{
+ +    int i;
+ +    real fitangle = -999.9;
+ +    real pot_min = GMX_FLOAT_MAX;
+ +    t_gmx_potfit *fit;
+ +
+ +
+ +    fit = erg->PotAngleFit;
+ +
+ +    for (i = 0; i < rotg->PotAngle_nstep; i++)
+ +    {
+ +        if (fit->V[i] < pot_min)
+ +        {
+ +            pot_min = fit->V[i];
+ +            fitangle = fit->degangle[i];
+ +        }
+ +    }
+ +
+ +    return fitangle;
+ +}
+ +
+ +
+ +/* Reduce potential angle fit data for this group at this time step? */
+ +static gmx_inline gmx_bool bPotAngle(t_rot *rot, t_rotgrp *rotg, gmx_large_int_t step)
+ +{
+ +    return ( (erotgFitPOT==rotg->eFittype) && (do_per_step(step, rot->nstsout) || do_per_step(step, rot->nstrout)) );
+ +}
+ +
+ +/* Reduce slab torqe data for this group at this time step? */
+ +static gmx_inline gmx_bool bSlabTau(t_rot *rot, t_rotgrp *rotg, gmx_large_int_t step)
+ +{
+ +    return ( (ISFLEX(rotg)) && do_per_step(step, rot->nstsout) );
+ +}
+ +
+ +/* Output rotation energy, torques, etc. for each rotation group */
+ +static void reduce_output(t_commrec *cr, t_rot *rot, real t, gmx_large_int_t step)
+ +{
+ +    int      g,i,islab,nslabs=0;
+ +    int      count;      /* MPI element counter                               */
+ +    t_rotgrp *rotg;
+ +    gmx_enfrot_t er;     /* Pointer to the enforced rotation buffer variables */
+ +    gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data           */
+ +    real     fitangle;
+ +    gmx_bool bFlex;
+ +
+ +    
+ +    er=rot->enfrot;
+ +    
+ +    /* Fill the MPI buffer with stuff to reduce. If items are added for reduction
+ +     * here, the MPI buffer size has to be enlarged also in calc_mpi_bufsize() */
+ +    if (PAR(cr))
+ +    {
+ +        count=0;
+ +        for (g=0; g < rot->ngrp; g++)
+ +        {
+ +            rotg = &rot->grp[g];
+ +            erg = rotg->enfrotgrp;
+ +            nslabs = erg->slab_last - erg->slab_first + 1;
+ +            er->mpi_inbuf[count++] = erg->V;
+ +            er->mpi_inbuf[count++] = erg->torque_v;
+ +            er->mpi_inbuf[count++] = erg->angle_v;
+ +            er->mpi_inbuf[count++] = erg->weight_v; /* weights are not needed for flex types, but this is just a single value */
+ +
+ +            if (bPotAngle(rot, rotg, step))
+ +            {
+ +                for (i = 0; i < rotg->PotAngle_nstep; i++)
+ +                    er->mpi_inbuf[count++] = erg->PotAngleFit->V[i];
+ +            }
+ +            if (bSlabTau(rot, rotg, step))
+ +            {
+ +                for (i=0; i<nslabs; i++)
+ +                    er->mpi_inbuf[count++] = erg->slab_torque_v[i];
+ +            }
+ +        }
+ +        if (count > er->mpi_bufsize)
+ +            gmx_fatal(FARGS, "%s MPI buffer overflow, please report this error.", RotStr);
+ +
+ +#ifdef GMX_MPI
+ +        MPI_Reduce(er->mpi_inbuf, er->mpi_outbuf, count, GMX_MPI_REAL, MPI_SUM, MASTERRANK(cr), cr->mpi_comm_mygroup);
+ +#endif
+ +
+ +        /* Copy back the reduced data from the buffer on the master */
+ +        if (MASTER(cr))
+ +        {
+ +            count=0;
+ +            for (g=0; g < rot->ngrp; g++)
+ +            {
+ +                rotg = &rot->grp[g];
+ +                erg = rotg->enfrotgrp;
+ +                nslabs = erg->slab_last - erg->slab_first + 1;
+ +                erg->V        = er->mpi_outbuf[count++];
+ +                erg->torque_v = er->mpi_outbuf[count++];
+ +                erg->angle_v  = er->mpi_outbuf[count++];
+ +                erg->weight_v = er->mpi_outbuf[count++];
+ +
+ +                if (bPotAngle(rot, rotg, step))
+ +                {
+ +                    for (i = 0; i < rotg->PotAngle_nstep; i++)
+ +                        erg->PotAngleFit->V[i] = er->mpi_outbuf[count++];
+ +                }
+ +                if (bSlabTau(rot, rotg, step))
+ +                {
+ +                    for (i=0; i<nslabs; i++)
+ +                        erg->slab_torque_v[i] = er->mpi_outbuf[count++];
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* Output */
+ +    if (MASTER(cr))
+ +    {
+ +        /* Angle and torque for each rotation group */
+ +        for (g=0; g < rot->ngrp; g++)
+ +        {
+ +            rotg=&rot->grp[g];
+ +            bFlex = ISFLEX(rotg);
+ +
+ +            erg=rotg->enfrotgrp;
+ +            
+ +            /* Output to main rotation output file: */
+ +            if ( do_per_step(step, rot->nstrout) )
+ +            {
+ +                if (erotgFitPOT == rotg->eFittype)
+ +                {
+ +                    fitangle = get_fitangle(rotg, erg);
+ +                }
+ +                else
+ +                {
+ +                    if (bFlex)
+ +                        fitangle = erg->angle_v; /* RMSD fit angle */
+ +                    else
+ +                        fitangle = (erg->angle_v/erg->weight_v)*180.0*M_1_PI;
+ +                }
+ +                fprintf(er->out_rot, "%12.4f", fitangle);
+ +                fprintf(er->out_rot, "%12.3e", erg->torque_v);
+ +                fprintf(er->out_rot, "%12.3e", erg->V);
+ +            }
+ +
+ +            if ( do_per_step(step, rot->nstsout) )
+ +            {
+ +                /* Output to torque log file: */
+ +                if (bFlex)
+ +                {
+ +                    fprintf(er->out_torque, "%12.3e%6d", t, g);
+ +                    for (i=erg->slab_first; i<=erg->slab_last; i++)
+ +                    {
+ +                        islab = i - erg->slab_first;  /* slab index */
+ +                        /* Only output if enough weight is in slab */
+ +                        if (erg->slab_weights[islab] > rotg->min_gaussian)
+ +                            fprintf(er->out_torque, "%6d%12.3e", i, erg->slab_torque_v[islab]);
+ +                    }
+ +                    fprintf(er->out_torque , "\n");
+ +                }
+ +
+ +                /* Output to angles log file: */
+ +                if (erotgFitPOT == rotg->eFittype)
+ +                {
+ +                    fprintf(er->out_angles, "%12.3e%6d%12.4f", t, g, erg->degangle);
+ +                    /* Output energies at a set of angles around the reference angle */
+ +                    for (i = 0; i < rotg->PotAngle_nstep; i++)
+ +                        fprintf(er->out_angles, "%12.3e", erg->PotAngleFit->V[i]);
+ +                    fprintf(er->out_angles, "\n");
+ +                }
+ +            }
+ +        }
+ +        if ( do_per_step(step, rot->nstrout) )
+ +            fprintf(er->out_rot, "\n");
+ +    }
+ +}
+ +
+ +
+ +/* Add the forces from enforced rotation potential to the local forces.
+ + * Should be called after the SR forces have been evaluated */
+ +extern real add_rot_forces(t_rot *rot, rvec f[], t_commrec *cr, gmx_large_int_t step, real t)
+ +{
+ +    int g,l,ii;
+ +    t_rotgrp *rotg;
+ +    gmx_enfrot_t er;     /* Pointer to the enforced rotation buffer variables */
+ +    gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data           */
+ +    real Vrot = 0.0;     /* If more than one rotation group is present, Vrot
+ +                            assembles the local parts from all groups         */
+ +
+ +    
+ +    er=rot->enfrot;
+ +    
+ +    /* Loop over enforced rotation groups (usually 1, though)
+ +     * Apply the forces from rotation potentials */
+ +    for (g=0; g<rot->ngrp; g++)
+ +    {
+ +        rotg = &rot->grp[g];
+ +        erg=rotg->enfrotgrp;
+ +        Vrot += erg->V;  /* add the local parts from the nodes */
+ +        for (l=0; l<erg->nat_loc; l++)
+ +        {
+ +            /* Get the right index of the local force */
+ +            ii = erg->ind_loc[l];
+ +            /* Add */
+ +            rvec_inc(f[ii],erg->f_rot_loc[l]);
+ +        }
+ +    }
+ +
+ +    /* Reduce energy,torque, angles etc. to get the sum values (per rotation group)
+ +     * on the master and output these values to file. */
+ +    if ( (do_per_step(step, rot->nstrout) || do_per_step(step, rot->nstsout)) && er->bOut)
+ +        reduce_output(cr, rot, t, step);
+ +
+ +    /* When appending, er->bOut is FALSE the first time to avoid duplicate entries */
+ +    er->bOut = TRUE;
+ +    
+ +    PRINT_POT_TAU
+ +
+ +    return Vrot;
+ +}
+ +
+ +
+ +/* The Gaussian norm is chosen such that the sum of the gaussian functions
+ + * over the slabs is approximately 1.0 everywhere */
+ +#define GAUSS_NORM   0.569917543430618
+ +
+ +
+ +/* Calculate the maximum beta that leads to a gaussian larger min_gaussian,
+ + * also does some checks
+ + */
+ +static double calc_beta_max(real min_gaussian, real slab_dist)
+ +{
+ +    double sigma;
+ +    double arg;
+ +    
+ +    
+ +    /* Actually the next two checks are already made in grompp */
+ +    if (slab_dist <= 0)
+ +        gmx_fatal(FARGS, "Slab distance of flexible rotation groups must be >=0 !");
+ +    if (min_gaussian <= 0)
+ +        gmx_fatal(FARGS, "Cutoff value for Gaussian must be > 0. (You requested %f)");
+ +
+ +    /* Define the sigma value */
+ +    sigma = 0.7*slab_dist;
+ +
+ +    /* Calculate the argument for the logarithm and check that the log() result is negative or 0 */
+ +    arg = min_gaussian/GAUSS_NORM;
+ +    if (arg > 1.0)
+ +        gmx_fatal(FARGS, "min_gaussian of flexible rotation groups must be <%g", GAUSS_NORM);
+ +    
+ +    return sqrt(-2.0*sigma*sigma*log(min_gaussian/GAUSS_NORM));
+ +}
+ +
+ +
+ +static gmx_inline real calc_beta(rvec curr_x, t_rotgrp *rotg, int n)
+ +{
+ +    return iprod(curr_x, rotg->vec) - rotg->slab_dist * n;
+ +}
+ +
+ +
+ +static gmx_inline real gaussian_weight(rvec curr_x, t_rotgrp *rotg, int n)
+ +{
+ +    const real norm = GAUSS_NORM;
+ +    real       sigma;
+ +
+ +    
+ +    /* Define the sigma value */
+ +    sigma = 0.7*rotg->slab_dist;
+ +    /* Calculate the Gaussian value of slab n for position curr_x */
+ +    return norm * exp( -0.5 * sqr( calc_beta(curr_x, rotg, n)/sigma ) );
+ +}
+ +
+ +
+ +/* Returns the weight in a single slab, also calculates the Gaussian- and mass-
+ + * weighted sum of positions for that slab */
+ +static real get_slab_weight(int j, t_rotgrp *rotg, rvec xc[], real mc[], rvec *x_weighted_sum)
+ +{
+ +    rvec curr_x;              /* The position of an atom                      */
+ +    rvec curr_x_weighted;     /* The gaussian-weighted position               */
+ +    real gaussian;            /* A single gaussian weight                     */
+ +    real wgauss;              /* gaussian times current mass                  */
+ +    real slabweight = 0.0;    /* The sum of weights in the slab               */
+ +    int i,islab;
+ +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data      */
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +    clear_rvec(*x_weighted_sum);
+ +    
+ +    /* Slab index */
+ +    islab = j - erg->slab_first;
+ +    
+ +    /* Loop over all atoms in the rotation group */
+ +     for (i=0; i<rotg->nat; i++)
+ +     {
+ +         copy_rvec(xc[i], curr_x);
+ +         gaussian = gaussian_weight(curr_x, rotg, j);
+ +         wgauss = gaussian * mc[i];
+ +         svmul(wgauss, curr_x, curr_x_weighted);
+ +         rvec_add(*x_weighted_sum, curr_x_weighted, *x_weighted_sum);
+ +         slabweight += wgauss;
+ +     } /* END of loop over rotation group atoms */
+ +
+ +     return slabweight;
+ +}
+ +
+ +
+ +static void get_slab_centers(
+ +        t_rotgrp *rotg,       /* The rotation group information               */
+ +        rvec      *xc,        /* The rotation group positions; will 
+ +                                 typically be enfrotgrp->xc, but at first call 
+ +                                 it is enfrotgrp->xc_ref                      */
+ +        real      *mc,        /* The masses of the rotation group atoms       */
+ +        int       g,          /* The number of the rotation group             */
+ +        real      time,       /* Used for output only                         */
+ +        FILE      *out_slabs, /* For outputting center per slab information   */
+ +        gmx_bool  bOutStep,   /* Is this an output step?                      */
+ +        gmx_bool  bReference) /* If this routine is called from
+ +                                 init_rot_group we need to store
+ +                                 the reference slab centers                   */
+ +{
+ +    int j,islab;
+ +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
+ +    
+ +    
+ +    erg=rotg->enfrotgrp;
+ +
+ +    /* Loop over slabs */
+ +    for (j = erg->slab_first; j <= erg->slab_last; j++)
+ +    {
+ +        islab = j - erg->slab_first;
+ +        erg->slab_weights[islab] = get_slab_weight(j, rotg, xc, mc, &erg->slab_center[islab]);
+ +        
+ +        /* We can do the calculations ONLY if there is weight in the slab! */
+ +        if (erg->slab_weights[islab] > WEIGHT_MIN)
+ +        {
+ +            svmul(1.0/erg->slab_weights[islab], erg->slab_center[islab], erg->slab_center[islab]);
+ +        }
+ +        else
+ +        {
+ +            /* We need to check this here, since we divide through slab_weights
+ +             * in the flexible low-level routines! */
+ +            gmx_fatal(FARGS, "Not enough weight in slab %d. Slab center cannot be determined!", j);
+ +        }
+ +        
+ +        /* At first time step: save the centers of the reference structure */
+ +        if (bReference)
+ +            copy_rvec(erg->slab_center[islab], erg->slab_center_ref[islab]);
+ +    } /* END of loop over slabs */
+ +    
+ +    /* Output on the master */
+ +    if ( (NULL != out_slabs) && bOutStep)
+ +    {
+ +        fprintf(out_slabs, "%12.3e%6d", time, g);
+ +        for (j = erg->slab_first; j <= erg->slab_last; j++)
+ +        {
+ +            islab = j - erg->slab_first;
+ +            fprintf(out_slabs, "%6d%12.3e%12.3e%12.3e",
+ +                    j,erg->slab_center[islab][XX],erg->slab_center[islab][YY],erg->slab_center[islab][ZZ]);
+ +        }
+ +        fprintf(out_slabs, "\n");
+ +    }
+ +}
+ +
+ +
+ +static void calc_rotmat(
+ +        rvec vec,
+ +        real degangle,  /* Angle alpha of rotation at time t in degrees       */
+ +        matrix rotmat)  /* Rotation matrix                                    */
+ +{
+ +    real radangle;            /* Rotation angle in radians */
+ +    real cosa;                /* cosine alpha              */
+ +    real sina;                /* sine alpha                */
+ +    real OMcosa;              /* 1 - cos(alpha)            */
+ +    real dumxy, dumxz, dumyz; /* save computations         */
+ +    rvec rot_vec;             /* Rotate around rot_vec ... */
+ +
+ +
+ +    radangle = degangle * M_PI/180.0;
+ +    copy_rvec(vec , rot_vec );
+ +
+ +    /* Precompute some variables: */
+ +    cosa   = cos(radangle);
+ +    sina   = sin(radangle);
+ +    OMcosa = 1.0 - cosa;
+ +    dumxy  = rot_vec[XX]*rot_vec[YY]*OMcosa;
+ +    dumxz  = rot_vec[XX]*rot_vec[ZZ]*OMcosa;
+ +    dumyz  = rot_vec[YY]*rot_vec[ZZ]*OMcosa;
+ +
+ +    /* Construct the rotation matrix for this rotation group: */
+ +    /* 1st column: */
+ +    rotmat[XX][XX] = cosa  + rot_vec[XX]*rot_vec[XX]*OMcosa;
+ +    rotmat[YY][XX] = dumxy + rot_vec[ZZ]*sina;
+ +    rotmat[ZZ][XX] = dumxz - rot_vec[YY]*sina;
+ +    /* 2nd column: */
+ +    rotmat[XX][YY] = dumxy - rot_vec[ZZ]*sina;
+ +    rotmat[YY][YY] = cosa  + rot_vec[YY]*rot_vec[YY]*OMcosa;
+ +    rotmat[ZZ][YY] = dumyz + rot_vec[XX]*sina;
+ +    /* 3rd column: */
+ +    rotmat[XX][ZZ] = dumxz + rot_vec[YY]*sina;
+ +    rotmat[YY][ZZ] = dumyz - rot_vec[XX]*sina;
+ +    rotmat[ZZ][ZZ] = cosa  + rot_vec[ZZ]*rot_vec[ZZ]*OMcosa;
+ +
+ +#ifdef PRINTMATRIX
+ +    int iii,jjj;
+ +
+ +    for (iii=0; iii<3; iii++) {
+ +        for (jjj=0; jjj<3; jjj++)
+ +            fprintf(stderr, " %10.8f ",  rotmat[iii][jjj]);
+ +        fprintf(stderr, "\n");
+ +    }
+ +#endif
+ +}
+ +
+ +
+ +/* Calculates torque on the rotation axis tau = position x force */
+ +static gmx_inline real torque(
+ +        rvec rotvec,  /* rotation vector; MUST be normalized!                 */
+ +        rvec force,   /* force                                                */
+ +        rvec x,       /* position of atom on which the force acts             */
+ +        rvec pivot)   /* pivot point of rotation axis                         */
+ +{
+ +    rvec vectmp, tau;
+ +
+ +    
+ +    /* Subtract offset */
+ +    rvec_sub(x,pivot,vectmp);
+ +    
+ +    /* position x force */
+ +    cprod(vectmp, force, tau);
+ +    
+ +    /* Return the part of the torque which is parallel to the rotation vector */
+ +    return iprod(tau, rotvec);
+ +}
+ +
+ +
+ +/* Right-aligned output of value with standard width */
+ +static void print_aligned(FILE *fp, char *str)
+ +{
+ +    fprintf(fp, "%12s", str);
+ +}
+ +
+ +
+ +/* Right-aligned output of value with standard short width */
+ +static void print_aligned_short(FILE *fp, char *str)
+ +{
+ +    fprintf(fp, "%6s", str);
+ +}
+ +
+ +
+ +static FILE *open_output_file(const char *fn, int steps, const char what[])
+ +{
+ +    FILE *fp;
+ +    
+ +    
+ +    fp = ffopen(fn, "w");
+ +
+ +    fprintf(fp, "# Output of %s is written in intervals of %d time step%s.\n#\n",
+ +            what,steps, steps>1 ? "s":"");
+ +    
+ +    return fp;
+ +}
+ +
+ +
+ +/* Open output file for slab center data. Call on master only */
+ +static FILE *open_slab_out(const char *fn, t_rot *rot, const output_env_t oenv)
+ +{
+ +    FILE      *fp;
+ +    int       g,i;
+ +    t_rotgrp  *rotg;
+ +
+ +
+ +    if (rot->enfrot->Flags & MD_APPENDFILES)
+ +    {
+ +        fp = gmx_fio_fopen(fn,"a");
+ +    }
+ +    else
+ +    {
+ +        fp = open_output_file(fn, rot->nstsout, "gaussian weighted slab centers");
+ +
+ +        for (g=0; g<rot->ngrp; g++)
+ +        {
+ +            rotg = &rot->grp[g];
+ +            if (ISFLEX(rotg))
+ +            {
+ +                fprintf(fp, "# Rotation group %d (%s), slab distance %f nm, %s.\n",
+ +                        g, erotg_names[rotg->eType], rotg->slab_dist,
+ +                        rotg->bMassW? "centers of mass":"geometrical centers");
+ +            }
+ +        }
+ +
+ +        fprintf(fp, "# Reference centers are listed first (t=-1).\n");
+ +        fprintf(fp, "# The following columns have the syntax:\n");
+ +        fprintf(fp, "#     ");
+ +        print_aligned_short(fp, "t");
+ +        print_aligned_short(fp, "grp");
+ +        /* Print legend for the first two entries only ... */
+ +        for (i=0; i<2; i++)
+ +        {
+ +            print_aligned_short(fp, "slab");
+ +            print_aligned(fp, "X center");
+ +            print_aligned(fp, "Y center");
+ +            print_aligned(fp, "Z center");
+ +        }
+ +        fprintf(fp, " ...\n");
+ +        fflush(fp);
+ +    }
+ +
+ +    return fp;
+ +}
+ +
+ +
+ +/* Adds 'buf' to 'str' */
+ +static void add_to_string(char **str, char *buf)
+ +{
+ +    int len;
+ +
+ +
+ +    len = strlen(*str) + strlen(buf) + 1;
+ +    srenew(*str, len);
+ +    strcat(*str, buf);
+ +}
+ +
+ +
+ +static void add_to_string_aligned(char **str, char *buf)
+ +{
+ +    char buf_aligned[STRLEN];
+ +
+ +    sprintf(buf_aligned, "%12s", buf);
+ +    add_to_string(str, buf_aligned);
+ +}
+ +
+ +
+ +/* Open output file and print some general information about the rotation groups.
+ + * Call on master only */
+ +static FILE *open_rot_out(const char *fn, t_rot *rot, const output_env_t oenv)
+ +{
+ +    FILE       *fp;
+ +    int        g,nsets;
+ +    t_rotgrp   *rotg;
+ +    const char **setname;
+ +    char       buf[50], buf2[75];
+ +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +    gmx_bool   bFlex;
+ +    char       *LegendStr=NULL;
+ +
+ +
+ +    if (rot->enfrot->Flags & MD_APPENDFILES)
+ +    {
+ +        fp = gmx_fio_fopen(fn,"a");
+ +    }
+ +    else
+ +    {
+ +        fp = xvgropen(fn, "Rotation angles and energy", "Time (ps)", "angles (degrees) and energies (kJ/mol)", oenv);
+ +        fprintf(fp, "# Output of enforced rotation data is written in intervals of %d time step%s.\n#\n", rot->nstrout, rot->nstrout > 1 ? "s":"");
+ +        fprintf(fp, "# The scalar tau is the torque (kJ/mol) in the direction of the rotation vector v.\n");
+ +        fprintf(fp, "# To obtain the vectorial torque, multiply tau with the group's rot_vec.\n");
+ +        fprintf(fp, "# For flexible groups, tau(t,n) from all slabs n have been summed in a single value tau(t) here.\n");
+ +        fprintf(fp, "# The torques tau(t,n) are found in the rottorque.log (-rt) output file\n");
+ +        
+ +        for (g=0; g<rot->ngrp; g++)
+ +        {
+ +            rotg = &rot->grp[g];
+ +            erg=rotg->enfrotgrp;
+ +            bFlex = ISFLEX(rotg);
+ +
+ +            fprintf(fp, "#\n");
+ +            fprintf(fp, "# ROTATION GROUP %d, potential type '%s':\n"      , g, erotg_names[rotg->eType]);
+ +            fprintf(fp, "# rot_massw%d          %s\n"                      , g, yesno_names[rotg->bMassW]);
+ +            fprintf(fp, "# rot_vec%d            %12.5e %12.5e %12.5e\n"    , g, rotg->vec[XX], rotg->vec[YY], rotg->vec[ZZ]);
+ +            fprintf(fp, "# rot_rate%d           %12.5e degrees/ps\n"       , g, rotg->rate);
+ +            fprintf(fp, "# rot_k%d              %12.5e kJ/(mol*nm^2)\n"    , g, rotg->k);
+ +            if ( rotg->eType==erotgISO || rotg->eType==erotgPM || rotg->eType==erotgRM || rotg->eType==erotgRM2)
+ +                fprintf(fp, "# rot_pivot%d          %12.5e %12.5e %12.5e  nm\n", g, rotg->pivot[XX], rotg->pivot[YY], rotg->pivot[ZZ]);
+ +
+ +            if (bFlex)
+ +            {
+ +                fprintf(fp, "# rot_slab_distance%d   %f nm\n", g, rotg->slab_dist);
+ +                fprintf(fp, "# rot_min_gaussian%d   %12.5e\n", g, rotg->min_gaussian);
+ +            }
+ +
+ +            /* Output the centers of the rotation groups for the pivot-free potentials */
+ +            if ((rotg->eType==erotgISOPF) || (rotg->eType==erotgPMPF) || (rotg->eType==erotgRMPF) || (rotg->eType==erotgRM2PF
+ +                || (rotg->eType==erotgFLEXT) || (rotg->eType==erotgFLEX2T)) )
+ +            {
+ +                fprintf(fp, "# ref. grp. %d center  %12.5e %12.5e %12.5e\n", g,
+ +                            erg->xc_ref_center[XX], erg->xc_ref_center[YY], erg->xc_ref_center[ZZ]);
+ +
+ +                fprintf(fp, "# grp. %d init.center  %12.5e %12.5e %12.5e\n", g,
+ +                            erg->xc_center[XX], erg->xc_center[YY], erg->xc_center[ZZ]);
+ +            }
+ +
+ +            if ( (rotg->eType == erotgRM2) || (rotg->eType==erotgFLEX2) || (rotg->eType==erotgFLEX2T) )
+ +            {
+ +                fprintf(fp, "# rot_eps%d            %12.5e nm^2\n", g, rotg->eps);
+ +            }
+ +            if (erotgFitPOT == rotg->eFittype)
+ +            {
+ +                fprintf(fp, "#\n");
+ +                fprintf(fp, "# theta_fit%d is determined by first evaluating the potential for %d angles around theta_ref%d.\n",
+ +                            g, rotg->PotAngle_nstep, g);
+ +                fprintf(fp, "# The fit angle is the one with the smallest potential. It is given as the deviation\n");
+ +                fprintf(fp, "# from the reference angle, i.e. if theta_ref=X and theta_fit=Y, then the angle with\n");
+ +                fprintf(fp, "# minimal value of the potential is X+Y. Angular resolution is %g degrees.\n", rotg->PotAngle_step);
+ +            }
+ +        }
+ +        
+ +        /* Print a nice legend */
+ +        snew(LegendStr, 1);
+ +        LegendStr[0] = '\0';
+ +        sprintf(buf, "#     %6s", "time");
+ +        add_to_string_aligned(&LegendStr, buf);
+ +
+ +        nsets = 0;
+ +        snew(setname, 4*rot->ngrp);
+ +        
+ +        for (g=0; g<rot->ngrp; g++)
+ +        {
+ +            rotg = &rot->grp[g];
+ +            sprintf(buf, "theta_ref%d", g);
+ +            add_to_string_aligned(&LegendStr, buf);
+ +
+ +            sprintf(buf2, "%s (degrees)", buf);
+ +            setname[nsets] = strdup(buf2);
+ +            nsets++;
+ +        }
+ +        for (g=0; g<rot->ngrp; g++)
+ +        {
+ +            rotg = &rot->grp[g];
+ +            bFlex = ISFLEX(rotg);
+ +
+ +            /* For flexible axis rotation we use RMSD fitting to determine the
+ +             * actual angle of the rotation group */
+ +            if (bFlex || erotgFitPOT == rotg->eFittype)
+ +                sprintf(buf, "theta_fit%d", g);
+ +            else
+ +                sprintf(buf, "theta_av%d", g);
+ +            add_to_string_aligned(&LegendStr, buf);
+ +            sprintf(buf2, "%s (degrees)", buf);
+ +            setname[nsets] = strdup(buf2);
+ +            nsets++;
+ +
+ +            sprintf(buf, "tau%d", g);
+ +            add_to_string_aligned(&LegendStr, buf);
+ +            sprintf(buf2, "%s (kJ/mol)", buf);
+ +            setname[nsets] = strdup(buf2);
+ +            nsets++;
+ +
+ +            sprintf(buf, "energy%d", g);
+ +            add_to_string_aligned(&LegendStr, buf);
+ +            sprintf(buf2, "%s (kJ/mol)", buf);
+ +            setname[nsets] = strdup(buf2);
+ +            nsets++;
+ +        }
+ +        fprintf(fp, "#\n");
+ +        
+ +        if (nsets > 1)
+ +            xvgr_legend(fp, nsets, setname, oenv);
+ +        sfree(setname);
+ +
+ +        fprintf(fp, "#\n# Legend for the following data columns:\n");
+ +        fprintf(fp, "%s\n", LegendStr);
+ +        sfree(LegendStr);
+ +        
+ +        fflush(fp);
+ +    }
+ +    
+ +    return fp;
+ +}
+ +
+ +
+ +/* Call on master only */
+ +static FILE *open_angles_out(const char *fn, t_rot *rot, const output_env_t oenv)
+ +{
+ +    int      g,i;
+ +    FILE     *fp;
+ +    t_rotgrp *rotg;
+ +    gmx_enfrotgrp_t erg;        /* Pointer to enforced rotation group data */
+ +    char     buf[100];
+ +
+ +
+ +    if (rot->enfrot->Flags & MD_APPENDFILES)
+ +    {
+ +        fp = gmx_fio_fopen(fn,"a");
+ +    }
+ +    else
+ +    {
+ +        /* Open output file and write some information about it's structure: */
+ +        fp = open_output_file(fn, rot->nstsout, "rotation group angles");
+ +        fprintf(fp, "# All angles given in degrees, time in ps.\n");
+ +        for (g=0; g<rot->ngrp; g++)
+ +        {
+ +            rotg = &rot->grp[g];
+ +            erg=rotg->enfrotgrp;
+ +
+ +            /* Output for this group happens only if potential type is flexible or
+ +             * if fit type is potential! */
+ +            if ( ISFLEX(rotg) || (erotgFitPOT == rotg->eFittype) )
+ +            {
+ +                if (ISFLEX(rotg))
+ +                    sprintf(buf, " slab distance %f nm, ", rotg->slab_dist);
+ +                else
+ +                    buf[0] = '\0';
+ +
+ +                fprintf(fp, "#\n# ROTATION GROUP %d '%s',%s fit type '%s'.\n",
+ +                        g, erotg_names[rotg->eType], buf, erotg_fitnames[rotg->eFittype]);
+ +
+ +                /* Special type of fitting using the potential minimum. This is
+ +                 * done for the whole group only, not for the individual slabs. */
+ +                if (erotgFitPOT == rotg->eFittype)
+ +                {
+ +                    fprintf(fp, "#    To obtain theta_fit%d, the potential is evaluated for %d angles around theta_ref%d\n", g, rotg->PotAngle_nstep, g);
+ +                    fprintf(fp, "#    The fit angle in the rotation standard outfile is the one with minimal energy E(theta_fit) [kJ/mol].\n");
+ +                    fprintf(fp, "#\n");
+ +                }
+ +
+ +                fprintf(fp, "# Legend for the group %d data columns:\n", g);
+ +                fprintf(fp, "#     ");
+ +                print_aligned_short(fp, "time");
+ +                print_aligned_short(fp, "grp");
+ +                print_aligned(fp, "theta_ref");
+ +
+ +                if (erotgFitPOT == rotg->eFittype)
+ +                {
+ +                    /* Output the set of angles around the reference angle */
+ +                    for (i = 0; i < rotg->PotAngle_nstep; i++)
+ +                    {
+ +                        sprintf(buf, "E(%g)", erg->PotAngleFit->degangle[i]);
+ +                        print_aligned(fp, buf);
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    /* Output fit angle for each slab */
+ +                    print_aligned_short(fp, "slab");
+ +                    print_aligned_short(fp, "atoms");
+ +                    print_aligned(fp, "theta_fit");
+ +                    print_aligned_short(fp, "slab");
+ +                    print_aligned_short(fp, "atoms");
+ +                    print_aligned(fp, "theta_fit");
+ +                    fprintf(fp, " ...");
+ +                }
+ +                fprintf(fp, "\n");
+ +            }
+ +        }
+ +        fflush(fp);
+ +    }
+ +
+ +    return fp;
+ +}
+ +
+ +
+ +/* Open torque output file and write some information about it's structure.
+ + * Call on master only */
+ +static FILE *open_torque_out(const char *fn, t_rot *rot, const output_env_t oenv)
+ +{
+ +    FILE      *fp;
+ +    int       g;
+ +    t_rotgrp  *rotg;
+ +
+ +
+ +    if (rot->enfrot->Flags & MD_APPENDFILES)
+ +    {
+ +        fp = gmx_fio_fopen(fn,"a");
+ +    }
+ +    else
+ +    {
+ +        fp = open_output_file(fn, rot->nstsout,"torques");
+ +
+ +        for (g=0; g<rot->ngrp; g++)
+ +        {
+ +            rotg = &rot->grp[g];
+ +            if (ISFLEX(rotg))
+ +            {
+ +                fprintf(fp, "# Rotation group %d (%s), slab distance %f nm.\n", g, erotg_names[rotg->eType], rotg->slab_dist);
+ +                fprintf(fp, "# The scalar tau is the torque (kJ/mol) in the direction of the rotation vector.\n");
+ +                fprintf(fp, "# To obtain the vectorial torque, multiply tau with\n");
+ +                fprintf(fp, "# rot_vec%d            %10.3e %10.3e %10.3e\n", g, rotg->vec[XX], rotg->vec[YY], rotg->vec[ZZ]);
+ +                fprintf(fp, "#\n");
+ +            }
+ +        }
+ +        fprintf(fp, "# Legend for the following data columns: (tau=torque for that slab):\n");
+ +        fprintf(fp, "#     ");
+ +        print_aligned_short(fp, "t");
+ +        print_aligned_short(fp, "grp");
+ +        print_aligned_short(fp, "slab");
+ +        print_aligned(fp, "tau");
+ +        print_aligned_short(fp, "slab");
+ +        print_aligned(fp, "tau");
+ +        fprintf(fp, " ...\n");
+ +        fflush(fp);
+ +    }
+ +
+ +    return fp;
+ +}
+ +
+ +
+ +static void swap_val(double* vec, int i, int j)
+ +{
+ +    double tmp = vec[j];
+ +    
+ +    
+ +    vec[j]=vec[i];
+ +    vec[i]=tmp;
+ +}
+ +
+ +
+ +static void swap_col(double **mat, int i, int j)
+ +{
+ +    double tmp[3] = {mat[0][j], mat[1][j], mat[2][j]};
+ +    
+ +    
+ +    mat[0][j]=mat[0][i];
+ +    mat[1][j]=mat[1][i];
+ +    mat[2][j]=mat[2][i];
+ +    
+ +    mat[0][i]=tmp[0];
+ +    mat[1][i]=tmp[1];
+ +    mat[2][i]=tmp[2];
+ +} 
+ +
+ +
+ +/* Eigenvectors are stored in columns of eigen_vec */
+ +static void diagonalize_symmetric(
+ +        double **matrix,
+ +        double **eigen_vec,
+ +        double eigenval[3])
+ +{
+ +    int n_rot;
+ +    
+ +    
+ +    jacobi(matrix,3,eigenval,eigen_vec,&n_rot);
+ +    
+ +    /* sort in ascending order */
+ +    if (eigenval[0] > eigenval[1])
+ +    {
+ +        swap_val(eigenval, 0, 1);
+ +        swap_col(eigen_vec, 0, 1);
+ +    } 
+ +    if (eigenval[1] > eigenval[2])
+ +    {
+ +        swap_val(eigenval, 1, 2);
+ +        swap_col(eigen_vec, 1, 2);
+ +    }
+ +    if (eigenval[0] > eigenval[1])
+ +    {
+ +        swap_val(eigenval, 0, 1);
+ +        swap_col(eigen_vec, 0, 1);
+ +    }
+ +}
+ +
+ +
+ +static void align_with_z(
+ +        rvec* s,           /* Structure to align */
+ +        int natoms,
+ +        rvec axis)
+ +{
+ +    int    i, j, k;
+ +    rvec   zet = {0.0, 0.0, 1.0};
+ +    rvec   rot_axis={0.0, 0.0, 0.0};
+ +    rvec   *rotated_str=NULL;
+ +    real   ooanorm;
+ +    real   angle;
+ +    matrix rotmat;
+ +    
+ +    
+ +    snew(rotated_str, natoms);
+ +
+ +    /* Normalize the axis */
+ +    ooanorm = 1.0/norm(axis);
+ +    svmul(ooanorm, axis, axis);
+ +    
+ +    /* Calculate the angle for the fitting procedure */
+ +    cprod(axis, zet, rot_axis);
+ +    angle = acos(axis[2]);
+ +    if (angle < 0.0)
+ +        angle += M_PI;
+ +    
+ +    /* Calculate the rotation matrix */
+ +    calc_rotmat(rot_axis, angle*180.0/M_PI, rotmat);
+ +    
+ +    /* Apply the rotation matrix to s */
+ +    for (i=0; i<natoms; i++)
+ +    {    
+ +        for(j=0; j<3; j++)
+ +        {
+ +            for(k=0; k<3; k++)
+ +            {
+ +                rotated_str[i][j] += rotmat[j][k]*s[i][k];
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* Rewrite the rotated structure to s */
+ +    for(i=0; i<natoms; i++)
+ +    {
+ +        for(j=0; j<3; j++)
+ +        {
+ +            s[i][j]=rotated_str[i][j];
+ +        }
+ +    }
+ +    
+ +    sfree(rotated_str);
+ +} 
+ +
+ +
+ +static void calc_correl_matrix(rvec* Xstr, rvec* Ystr, double** Rmat, int natoms)
+ +{    
+ +    int i, j, k;
+ + 
+ +    
+ +    for (i=0; i<3; i++)
+ +        for (j=0; j<3; j++)
+ +            Rmat[i][j] = 0.0;
+ +    
+ +    for (i=0; i<3; i++) 
+ +        for (j=0; j<3; j++) 
+ +            for (k=0; k<natoms; k++) 
+ +                Rmat[i][j] += Ystr[k][i] * Xstr[k][j];
+ +}
+ +
+ +
+ +static void weigh_coords(rvec* str, real* weight, int natoms)
+ +{
+ +    int i, j;
+ +    
+ +    
+ +    for(i=0; i<natoms; i++)
+ +    {
+ +        for(j=0; j<3; j++)
+ +            str[i][j] *= sqrt(weight[i]);
+ +    }  
+ +}
+ +
+ +
+ +static real opt_angle_analytic(
+ +        rvec* ref_s,
+ +        rvec* act_s,
+ +        real* weight, 
+ +        int natoms,
+ +        rvec ref_com,
+ +        rvec act_com,
+ +        rvec axis)
+ +{    
+ +    int    i, j, k;
+ +    rvec   *ref_s_1=NULL;
+ +    rvec   *act_s_1=NULL;
+ +    rvec   shift;
+ +    double **Rmat, **RtR, **eigvec;
+ +    double eigval[3];
+ +    double V[3][3], WS[3][3];
+ +    double rot_matrix[3][3];
+ +    double opt_angle;
+ +    
+ +    
+ +    /* Do not change the original coordinates */ 
+ +    snew(ref_s_1, natoms);
+ +    snew(act_s_1, natoms);
+ +    for(i=0; i<natoms; i++)
+ +    {
+ +        copy_rvec(ref_s[i], ref_s_1[i]);
+ +        copy_rvec(act_s[i], act_s_1[i]);
+ +    }
+ +    
+ +    /* Translate the structures to the origin */
+ +    shift[XX] = -ref_com[XX];
+ +    shift[YY] = -ref_com[YY];
+ +    shift[ZZ] = -ref_com[ZZ];
+ +    translate_x(ref_s_1, natoms, shift);
+ +    
+ +    shift[XX] = -act_com[XX];
+ +    shift[YY] = -act_com[YY];
+ +    shift[ZZ] = -act_com[ZZ];
+ +    translate_x(act_s_1, natoms, shift);
+ +    
+ +    /* Align rotation axis with z */
+ +    align_with_z(ref_s_1, natoms, axis);
+ +    align_with_z(act_s_1, natoms, axis);
+ +    
+ +    /* Correlation matrix */
+ +    Rmat = allocate_square_matrix(3);
+ +    
+ +    for (i=0; i<natoms; i++)
+ +    {
+ +        ref_s_1[i][2]=0.0;
+ +        act_s_1[i][2]=0.0;
+ +    }
+ +    
+ +    /* Weight positions with sqrt(weight) */
+ +    if (NULL != weight)
+ +    {
+ +        weigh_coords(ref_s_1, weight, natoms);
+ +        weigh_coords(act_s_1, weight, natoms);
+ +    }
+ +    
+ +    /* Calculate correlation matrices R=YXt (X=ref_s; Y=act_s) */
+ +    calc_correl_matrix(ref_s_1, act_s_1, Rmat, natoms);
+ +    
+ +    /* Calculate RtR */
+ +    RtR = allocate_square_matrix(3);
+ +    for (i=0; i<3; i++)
+ +    {
+ +        for (j=0; j<3; j++)
+ +        {
+ +            for (k=0; k<3; k++)
+ +            {
+ +                RtR[i][j] += Rmat[k][i] * Rmat[k][j];
+ +            }
+ +        }
+ +    }
+ +    /* Diagonalize RtR */
+ +    snew(eigvec,3);
+ +    for (i=0; i<3; i++)
+ +        snew(eigvec[i],3);
+ +    
+ +    diagonalize_symmetric(RtR, eigvec, eigval);
+ +    swap_col(eigvec,0,1);
+ +    swap_col(eigvec,1,2);
+ +    swap_val(eigval,0,1);
+ +    swap_val(eigval,1,2);
+ +    
+ +    /* Calculate V */
+ +    for(i=0; i<3; i++)
+ +    {
+ +        for(j=0; j<3; j++)
+ +        {
+ +            V[i][j]  = 0.0;
+ +            WS[i][j] = 0.0;
+ +        }
+ +    }
+ +    
+ +    for (i=0; i<2; i++)
+ +        for (j=0; j<2; j++)
+ +            WS[i][j] = eigvec[i][j] / sqrt(eigval[j]);
+ +    
+ +    for (i=0; i<3; i++)
+ +    {
+ +        for (j=0; j<3; j++)
+ +        {
+ +            for (k=0; k<3; k++)
+ +            {
+ +                V[i][j] += Rmat[i][k]*WS[k][j];
+ +            }
+ +        }
+ +    }
+ +    free_square_matrix(Rmat, 3);
+ +    
+ +    /* Calculate optimal rotation matrix */
+ +    for (i=0; i<3; i++)
+ +        for (j=0; j<3; j++)
+ +            rot_matrix[i][j] = 0.0;
+ +    
+ +    for (i=0; i<3; i++)
+ +    {
+ +        for(j=0; j<3; j++)
+ +        {
+ +            for(k=0; k<3; k++){
+ +                rot_matrix[i][j] += eigvec[i][k]*V[j][k];
+ +            }
+ +        }
+ +    }
+ +    rot_matrix[2][2] = 1.0;
+ +        
+ +    /* In some cases abs(rot_matrix[0][0]) can be slighly larger
+ +     * than unity due to numerical inacurracies. To be able to calculate
+ +     * the acos function, we put these values back in range. */
+ +    if (rot_matrix[0][0] > 1.0)
+ +    {
+ +        rot_matrix[0][0] = 1.0;
+ +    }
+ +    else if (rot_matrix[0][0] < -1.0)
+ +    {
+ +        rot_matrix[0][0] = -1.0;
+ +    }
+ +
+ +    /* Determine the optimal rotation angle: */
+ +    opt_angle = (-1.0)*acos(rot_matrix[0][0])*180.0/M_PI;
+ +    if (rot_matrix[0][1] < 0.0)
+ +        opt_angle = (-1.0)*opt_angle;
+ +        
+ +    /* Give back some memory */
+ +    free_square_matrix(RtR, 3);
+ +    sfree(ref_s_1);
+ +    sfree(act_s_1);
+ +    for (i=0; i<3; i++)
+ +        sfree(eigvec[i]);
+ +    sfree(eigvec);
+ +    
+ +    return (real) opt_angle;
+ +}
+ +
+ +
+ +/* Determine angle of the group by RMSD fit to the reference */
+ +/* Not parallelized, call this routine only on the master */
+ +static real flex_fit_angle(t_rotgrp *rotg)
+ +{
+ +    int         i;
+ +    rvec        *fitcoords=NULL;
+ +    rvec        center;         /* Center of positions passed to the fit routine */
+ +    real        fitangle;       /* Angle of the rotation group derived by fitting */
+ +    rvec        coord;
+ +    real        scal;
+ +    gmx_enfrotgrp_t erg;        /* Pointer to enforced rotation group data */
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +
+ +    /* Get the center of the rotation group.
+ +     * Note, again, erg->xc has been sorted in do_flexible */
+ +    get_center(erg->xc, erg->mc_sorted, rotg->nat, center);
+ +
+ +    /* === Determine the optimal fit angle for the rotation group === */
+ +    if (rotg->eFittype == erotgFitNORM)
+ +    {
+ +        /* Normalize every position to it's reference length */
+ +        for (i=0; i<rotg->nat; i++)
+ +        {
+ +            /* Put the center of the positions into the origin */
+ +            rvec_sub(erg->xc[i], center, coord);
+ +            /* Determine the scaling factor for the length: */
+ +            scal = erg->xc_ref_length[erg->xc_sortind[i]] / norm(coord);
+ +            /* Get position, multiply with the scaling factor and save  */
+ +            svmul(scal, coord, erg->xc_norm[i]);
+ +        }
+ +        fitcoords = erg->xc_norm;
+ +    }
+ +    else
+ +    {
+ +        fitcoords = erg->xc;
+ +    }
+ +    /* From the point of view of the current positions, the reference has rotated
+ +     * backwards. Since we output the angle relative to the fixed reference,
+ +     * we need the minus sign. */
+ +    fitangle = -opt_angle_analytic(erg->xc_ref_sorted, fitcoords, erg->mc_sorted,
+ +                                   rotg->nat, erg->xc_ref_center, center, rotg->vec);
+ +
+ +    return fitangle;
+ +}
+ +
+ +
+ +/* Determine actual angle of each slab by RMSD fit to the reference */
+ +/* Not parallelized, call this routine only on the master */
+ +static void flex_fit_angle_perslab(
+ +        int  g,
+ +        t_rotgrp *rotg,
+ +        double t,
+ +        real degangle,
+ +        FILE *fp)
+ +{
+ +    int         i,l,n,islab,ind;
+ +    rvec        curr_x, ref_x;
+ +    rvec        act_center;  /* Center of actual positions that are passed to the fit routine */
+ +    rvec        ref_center;  /* Same for the reference positions */
+ +    real        fitangle;    /* Angle of a slab derived from an RMSD fit to
+ +                              * the reference structure at t=0  */
+ +    t_gmx_slabdata *sd;
+ +    gmx_enfrotgrp_t erg;     /* Pointer to enforced rotation group data */
+ +    real        OOm_av;      /* 1/average_mass of a rotation group atom */
+ +    real        m_rel;       /* Relative mass of a rotation group atom  */
+ +
+ +
+ +    erg=rotg->enfrotgrp;
+ +
+ +    /* Average mass of a rotation group atom: */
+ +    OOm_av = erg->invmass*rotg->nat;
+ +
+ +    /**********************************/
+ +    /* First collect the data we need */
+ +    /**********************************/
+ +
+ +    /* Collect the data for the individual slabs */
+ +    for (n = erg->slab_first; n <= erg->slab_last; n++)
+ +    {
+ +        islab = n - erg->slab_first; /* slab index */
+ +        sd = &(rotg->enfrotgrp->slab_data[islab]);
+ +        sd->nat = erg->lastatom[islab]-erg->firstatom[islab]+1;
+ +        ind = 0;
+ +
+ +        /* Loop over the relevant atoms in the slab */
+ +        for (l=erg->firstatom[islab]; l<=erg->lastatom[islab]; l++)
+ +        {
+ +            /* Current position of this atom: x[ii][XX/YY/ZZ] */
+ +            copy_rvec(erg->xc[l], curr_x);
+ +
+ +            /* The (unrotated) reference position of this atom is copied to ref_x.
+ +             * Beware, the xc coords have been sorted in do_flexible */
+ +            copy_rvec(erg->xc_ref_sorted[l], ref_x);
+ +
+ +            /* Save data for doing angular RMSD fit later */
+ +            /* Save the current atom position */
+ +            copy_rvec(curr_x, sd->x[ind]);
+ +            /* Save the corresponding reference position */
+ +            copy_rvec(ref_x , sd->ref[ind]);
+ +
+ +            /* Maybe also mass-weighting was requested. If yes, additionally
+ +             * multiply the weights with the relative mass of the atom. If not,
+ +             * multiply with unity. */
+ +            m_rel = erg->mc_sorted[l]*OOm_av;
+ +
+ +            /* Save the weight for this atom in this slab */
+ +            sd->weight[ind] = gaussian_weight(curr_x, rotg, n) * m_rel;
+ +
+ +            /* Next atom in this slab */
+ +            ind++;
+ +        }
+ +    }
+ +
+ +    /******************************/
+ +    /* Now do the fit calculation */
+ +    /******************************/
+ +
+ +    fprintf(fp, "%12.3e%6d%12.3f", t, g, degangle);
+ +
+ +    /* === Now do RMSD fitting for each slab === */
+ +    /* We require at least SLAB_MIN_ATOMS in a slab, such that the fit makes sense. */
+ +#define SLAB_MIN_ATOMS 4
+ +
+ +    for (n = erg->slab_first; n <= erg->slab_last; n++)
+ +    {
+ +        islab = n - erg->slab_first; /* slab index */
+ +        sd = &(rotg->enfrotgrp->slab_data[islab]);
+ +        if (sd->nat >= SLAB_MIN_ATOMS)
+ +        {
+ +            /* Get the center of the slabs reference and current positions */
+ +            get_center(sd->ref, sd->weight, sd->nat, ref_center);
+ +            get_center(sd->x  , sd->weight, sd->nat, act_center);
+ +            if (rotg->eFittype == erotgFitNORM)
+ +            {
+ +                /* Normalize every position to it's reference length
+ +                 * prior to performing the fit */
+ +                for (i=0; i<sd->nat;i++) /* Center */
+ +                {
+ +                    rvec_dec(sd->ref[i], ref_center);
+ +                    rvec_dec(sd->x[i]  , act_center);
+ +                    /* Normalize x_i such that it gets the same length as ref_i */
+ +                    svmul( norm(sd->ref[i])/norm(sd->x[i]), sd->x[i], sd->x[i] );
+ +                }
+ +                /* We already subtracted the centers */
+ +                clear_rvec(ref_center);
+ +                clear_rvec(act_center);
+ +            }
+ +            fitangle = -opt_angle_analytic(sd->ref, sd->x, sd->weight, sd->nat,
+ +                                           ref_center, act_center, rotg->vec);
+ +            fprintf(fp, "%6d%6d%12.3f", n, sd->nat, fitangle);
+ +        }
+ +    }
+ +    fprintf(fp     , "\n");
+ +
+ +#undef SLAB_MIN_ATOMS
+ +}
+ +
+ +
+ +/* Shift x with is */
+ +static gmx_inline void shift_single_coord(matrix box, rvec x, const ivec is)
+ +{
+ +    int tx,ty,tz;
+ +
+ +
+ +    tx=is[XX];
+ +    ty=is[YY];
+ +    tz=is[ZZ];
+ +
+ +    if(TRICLINIC(box))
+ +    {
+ +        x[XX] += tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
+ +        x[YY] += ty*box[YY][YY]+tz*box[ZZ][YY];
+ +        x[ZZ] += tz*box[ZZ][ZZ];
+ +    } else
+ +    {
+ +        x[XX] += tx*box[XX][XX];
+ +        x[YY] += ty*box[YY][YY];
+ +        x[ZZ] += tz*box[ZZ][ZZ];
+ +    }
+ +}
+ +
+ +
+ +/* Determine the 'home' slab of this atom which is the
+ + * slab with the highest Gaussian weight of all */
+ +#define round(a) (int)(a+0.5)
+ +static gmx_inline int get_homeslab(
+ +        rvec curr_x,   /* The position for which the home slab shall be determined */ 
+ +        rvec rotvec,   /* The rotation vector */
+ +        real slabdist) /* The slab distance */
+ +{
+ +    real dist;
+ +    
+ +    
+ +    /* The distance of the atom to the coordinate center (where the
+ +     * slab with index 0) is */
+ +    dist = iprod(rotvec, curr_x);
+ +    
+ +    return round(dist / slabdist); 
+ +}
+ +
+ +
+ +/* For a local atom determine the relevant slabs, i.e. slabs in
+ + * which the gaussian is larger than min_gaussian
+ + */
+ +static int get_single_atom_gaussians(
+ +        rvec      curr_x,
+ +        t_rotgrp  *rotg)
+ +{
+ +   int slab, homeslab;
+ +   real g;
+ +   int count = 0;
+ +   gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +
+ +   
+ +   erg=rotg->enfrotgrp;
+ +   
+ +   /* Determine the 'home' slab of this atom: */
+ +   homeslab = get_homeslab(curr_x, rotg->vec, rotg->slab_dist);
+ +
+ +   /* First determine the weight in the atoms home slab: */
+ +   g = gaussian_weight(curr_x, rotg, homeslab);
+ +   
+ +   erg->gn_atom[count] = g;
+ +   erg->gn_slabind[count] = homeslab;
+ +   count++;
+ +   
+ +   
+ +   /* Determine the max slab */
+ +   slab = homeslab;
+ +   while (g > rotg->min_gaussian)
+ +   {
+ +       slab++;
+ +       g = gaussian_weight(curr_x, rotg, slab);
+ +       erg->gn_slabind[count]=slab;
+ +       erg->gn_atom[count]=g;
+ +       count++;
+ +   }
+ +   count--;
+ +   
+ +   /* Determine the max slab */
+ +   slab = homeslab;
+ +   do
+ +   {
+ +       slab--;
+ +       g = gaussian_weight(curr_x, rotg, slab);       
+ +       erg->gn_slabind[count]=slab;
+ +       erg->gn_atom[count]=g;
+ +       count++;
+ +   }
+ +   while (g > rotg->min_gaussian);
+ +   count--;
+ +   
+ +   return count;
+ +}
+ +
+ +
+ +static void flex2_precalc_inner_sum(t_rotgrp *rotg)
+ +{
+ +    int  i,n,islab;
+ +    rvec  xi;                /* positions in the i-sum                        */
+ +    rvec  xcn, ycn;          /* the current and the reference slab centers    */
+ +    real gaussian_xi;
+ +    rvec yi0;
+ +    rvec  rin;               /* Helper variables                              */
+ +    real  fac,fac2;
+ +    rvec innersumvec;
+ +    real OOpsii,OOpsiistar;
+ +    real sin_rin;          /* s_ii.r_ii */
+ +    rvec s_in,tmpvec,tmpvec2;
+ +    real mi,wi;            /* Mass-weighting of the positions                 */
+ +    real N_M;              /* N/M                                             */
+ +    gmx_enfrotgrp_t erg;    /* Pointer to enforced rotation group data */
+ +
+ +
+ +    erg=rotg->enfrotgrp;
+ +    N_M = rotg->nat * erg->invmass;
+ +
+ +    /* Loop over all slabs that contain something */
+ +    for (n=erg->slab_first; n <= erg->slab_last; n++)
+ +    {
+ +        islab = n - erg->slab_first; /* slab index */
+ +
+ +        /* The current center of this slab is saved in xcn: */
+ +        copy_rvec(erg->slab_center[islab], xcn);
+ +        /* ... and the reference center in ycn: */
+ +        copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn);
+ +
+ +        /*** D. Calculate the whole inner sum used for second and third sum */
+ +        /* For slab n, we need to loop over all atoms i again. Since we sorted
+ +         * the atoms with respect to the rotation vector, we know that it is sufficient
+ +         * to calculate from firstatom to lastatom only. All other contributions will
+ +         * be very small. */
+ +        clear_rvec(innersumvec);
+ +        for (i = erg->firstatom[islab]; i <= erg->lastatom[islab]; i++)
+ +        {
+ +            /* Coordinate xi of this atom */
+ +            copy_rvec(erg->xc[i],xi);
+ +
+ +            /* The i-weights */
+ +            gaussian_xi = gaussian_weight(xi,rotg,n);
+ +            mi = erg->mc_sorted[i];  /* need the sorted mass here */
+ +            wi = N_M*mi;
+ +
+ +            /* Calculate rin */
+ +            copy_rvec(erg->xc_ref_sorted[i],yi0); /* Reference position yi0   */
+ +            rvec_sub(yi0, ycn, tmpvec2);          /* tmpvec2 = yi0 - ycn      */
+ +            mvmul(erg->rotmat, tmpvec2, rin);     /* rin = Omega.(yi0 - ycn)  */
+ +
+ +            /* Calculate psi_i* and sin */
+ +            rvec_sub(xi, xcn, tmpvec2);           /* tmpvec2 = xi - xcn       */
+ +            cprod(rotg->vec, tmpvec2, tmpvec);    /* tmpvec = v x (xi - xcn)  */
+ +            OOpsiistar = norm2(tmpvec)+rotg->eps; /* OOpsii* = 1/psii* = |v x (xi-xcn)|^2 + eps */
+ +            OOpsii = norm(tmpvec);                /* OOpsii = 1 / psii = |v x (xi - xcn)| */
+ +
+ +                                       /*         v x (xi - xcn)          */
+ +            unitv(tmpvec, s_in);       /*  sin = ----------------         */
+ +                                       /*        |v x (xi - xcn)|         */
+ +
+ +            sin_rin=iprod(s_in,rin);   /* sin_rin = sin . rin             */
+ +
+ +            /* Now the whole sum */
+ +            fac = OOpsii/OOpsiistar;
+ +            svmul(fac, rin, tmpvec);
+ +            fac2 = fac*fac*OOpsii;
+ +            svmul(fac2*sin_rin, s_in, tmpvec2);
+ +            rvec_dec(tmpvec, tmpvec2);
+ +
+ +            svmul(wi*gaussian_xi*sin_rin, tmpvec, tmpvec2);
+ +
+ +            rvec_inc(innersumvec,tmpvec2);
+ +        } /* now we have the inner sum, used both for sum2 and sum3 */
+ +
+ +        /* Save it to be used in do_flex2_lowlevel */
+ +        copy_rvec(innersumvec, erg->slab_innersumvec[islab]);
+ +    } /* END of loop over slabs */
+ +}
+ +
+ +
+ +static void flex_precalc_inner_sum(t_rotgrp *rotg)
+ +{
+ +    int   i,n,islab;
+ +    rvec  xi;                /* position                                      */
+ +    rvec  xcn, ycn;          /* the current and the reference slab centers    */
+ +    rvec  qin,rin;           /* q_i^n and r_i^n                               */
+ +    real  bin;
+ +    rvec  tmpvec;
+ +    rvec  innersumvec;       /* Inner part of sum_n2                          */
+ +    real  gaussian_xi;       /* Gaussian weight gn(xi)                        */
+ +    real  mi,wi;             /* Mass-weighting of the positions               */
+ +    real  N_M;               /* N/M                                           */
+ +
+ +    gmx_enfrotgrp_t erg;    /* Pointer to enforced rotation group data */
+ +
+ +
+ +    erg=rotg->enfrotgrp;
+ +    N_M = rotg->nat * erg->invmass;
+ +
+ +    /* Loop over all slabs that contain something */
+ +    for (n=erg->slab_first; n <= erg->slab_last; n++)
+ +    {
+ +        islab = n - erg->slab_first; /* slab index */
+ +
+ +        /* The current center of this slab is saved in xcn: */
+ +        copy_rvec(erg->slab_center[islab], xcn);
+ +        /* ... and the reference center in ycn: */
+ +        copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn);
+ +
+ +        /* For slab n, we need to loop over all atoms i again. Since we sorted
+ +         * the atoms with respect to the rotation vector, we know that it is sufficient
+ +         * to calculate from firstatom to lastatom only. All other contributions will
+ +         * be very small. */
+ +        clear_rvec(innersumvec);
+ +        for (i=erg->firstatom[islab]; i<=erg->lastatom[islab]; i++)
+ +        {
+ +            /* Coordinate xi of this atom */
+ +            copy_rvec(erg->xc[i],xi);
+ +
+ +            /* The i-weights */
+ +            gaussian_xi = gaussian_weight(xi,rotg,n);
+ +            mi = erg->mc_sorted[i];  /* need the sorted mass here */
+ +            wi = N_M*mi;
+ +
+ +            /* Calculate rin and qin */
+ +            rvec_sub(erg->xc_ref_sorted[i], ycn, tmpvec); /* tmpvec = yi0-ycn */
+ +            mvmul(erg->rotmat, tmpvec, rin);      /* rin = Omega.(yi0 - ycn)  */
+ +            cprod(rotg->vec, rin, tmpvec);    /* tmpvec = v x Omega*(yi0-ycn) */
+ +
+ +                                             /*        v x Omega*(yi0-ycn)    */
+ +            unitv(tmpvec, qin);              /* qin = ---------------------   */
+ +                                             /*       |v x Omega*(yi0-ycn)|   */
+ +
+ +            /* Calculate bin */
+ +            rvec_sub(xi, xcn, tmpvec);            /* tmpvec = xi-xcn          */
+ +            bin = iprod(qin, tmpvec);             /* bin  = qin*(xi-xcn)      */
+ +
+ +            svmul(wi*gaussian_xi*bin, qin, tmpvec);
+ +
+ +            /* Add this contribution to the inner sum: */
+ +            rvec_add(innersumvec, tmpvec, innersumvec);
+ +        } /* now we have the inner sum vector S^n for this slab */
+ +        /* Save it to be used in do_flex_lowlevel */
+ +        copy_rvec(innersumvec, erg->slab_innersumvec[islab]);
+ +    }
+ +}
+ +
+ +
+ +static real do_flex2_lowlevel(
+ +        t_rotgrp  *rotg,
+ +        real      sigma,    /* The Gaussian width sigma */
+ +        rvec      x[],
+ +        gmx_bool  bOutstepRot,
+ +        gmx_bool  bOutstepSlab,
+ +        matrix    box)
+ +{
+ +    int  count,ic,ii,j,m,n,islab,iigrp,ifit;
+ +    rvec xj;                 /* position in the i-sum                         */
+ +    rvec yj0;                /* the reference position in the j-sum           */
+ +    rvec xcn, ycn;           /* the current and the reference slab centers    */
+ +    real V;                  /* This node's part of the rotation pot. energy  */
+ +    real gaussian_xj;        /* Gaussian weight                               */
+ +    real beta;
+ +
+ +    real  numerator,fit_numerator;
+ +    rvec  rjn,fit_rjn;       /* Helper variables                              */
+ +    real  fac,fac2;
+ +
+ +    real OOpsij,OOpsijstar;
+ +    real OOsigma2;           /* 1/(sigma^2)                                   */
+ +    real sjn_rjn;
+ +    real betasigpsi;
+ +    rvec sjn,tmpvec,tmpvec2,yj0_ycn;
+ +    rvec sum1vec_part,sum1vec,sum2vec_part,sum2vec,sum3vec,sum4vec,innersumvec;
+ +    real sum3,sum4;
+ +    gmx_enfrotgrp_t erg;     /* Pointer to enforced rotation group data       */
+ +    real mj,wj;              /* Mass-weighting of the positions               */
+ +    real N_M;                /* N/M                                           */
+ +    real Wjn;                /* g_n(x_j) m_j / Mjn                            */
+ +    gmx_bool bCalcPotFit;
+ +
+ +    /* To calculate the torque per slab */
+ +    rvec slab_force;         /* Single force from slab n on one atom          */
+ +    rvec slab_sum1vec_part;
+ +    real slab_sum3part,slab_sum4part;
+ +    rvec slab_sum1vec, slab_sum2vec, slab_sum3vec, slab_sum4vec;
+ +
+ +
+ +    erg=rotg->enfrotgrp;
+ +
+ +    /* Pre-calculate the inner sums, so that we do not have to calculate
+ +     * them again for every atom */
+ +    flex2_precalc_inner_sum(rotg);
+ +
+ +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT==rotg->eFittype);
+ +
+ +    /********************************************************/
+ +    /* Main loop over all local atoms of the rotation group */
+ +    /********************************************************/
+ +    N_M = rotg->nat * erg->invmass;
+ +    V = 0.0;
+ +    OOsigma2 = 1.0 / (sigma*sigma);
+ +    for (j=0; j<erg->nat_loc; j++)
+ +    {
+ +        /* Local index of a rotation group atom  */
+ +        ii = erg->ind_loc[j];
+ +        /* Position of this atom in the collective array */
+ +        iigrp = erg->xc_ref_ind[j];
+ +        /* Mass-weighting */
+ +        mj = erg->mc[iigrp];  /* need the unsorted mass here */
+ +        wj = N_M*mj;
+ +        
+ +        /* Current position of this atom: x[ii][XX/YY/ZZ]
+ +         * Note that erg->xc_center contains the center of mass in case the flex2-t
+ +         * potential was chosen. For the flex2 potential erg->xc_center must be
+ +         * zero. */
+ +        rvec_sub(x[ii], erg->xc_center, xj);
+ +
+ +        /* Shift this atom such that it is near its reference */
+ +        shift_single_coord(box, xj, erg->xc_shifts[iigrp]);
+ +
+ +        /* Determine the slabs to loop over, i.e. the ones with contributions
+ +         * larger than min_gaussian */
+ +        count = get_single_atom_gaussians(xj, rotg);
+ +        
+ +        clear_rvec(sum1vec_part);
+ +        clear_rvec(sum2vec_part);
+ +        sum3 = 0.0;
+ +        sum4 = 0.0;
+ +        /* Loop over the relevant slabs for this atom */
+ +        for (ic=0; ic < count; ic++)  
+ +        {
+ +            n = erg->gn_slabind[ic];
+ +            
+ +            /* Get the precomputed Gaussian value of curr_slab for curr_x */
+ +            gaussian_xj = erg->gn_atom[ic];
+ +
+ +            islab = n - erg->slab_first; /* slab index */
+ +            
+ +            /* The (unrotated) reference position of this atom is copied to yj0: */
+ +            copy_rvec(rotg->x_ref[iigrp], yj0);
+ +
+ +            beta = calc_beta(xj, rotg,n);
+ +
+ +            /* The current center of this slab is saved in xcn: */
+ +            copy_rvec(erg->slab_center[islab], xcn);
+ +            /* ... and the reference center in ycn: */
+ +            copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn);
+ +            
+ +            rvec_sub(yj0, ycn, yj0_ycn);          /* yj0_ycn = yj0 - ycn      */
+ +
+ +            /* Rotate: */
+ +            mvmul(erg->rotmat, yj0_ycn, rjn);     /* rjn = Omega.(yj0 - ycn)  */
+ +            
+ +            /* Subtract the slab center from xj */
+ +            rvec_sub(xj, xcn, tmpvec2);           /* tmpvec2 = xj - xcn       */
+ +
+ +            /* Calculate sjn */
+ +            cprod(rotg->vec, tmpvec2, tmpvec);    /* tmpvec = v x (xj - xcn)  */
+ +
+ +            OOpsijstar = norm2(tmpvec)+rotg->eps; /* OOpsij* = 1/psij* = |v x (xj-xcn)|^2 + eps */
+ +
+ +            numerator = sqr(iprod(tmpvec, rjn));
+ +            
+ +            /*********************************/
+ +            /* Add to the rotation potential */
+ +            /*********************************/
+ +            V += 0.5*rotg->k*wj*gaussian_xj*numerator/OOpsijstar;
+ +
+ +            /* If requested, also calculate the potential for a set of angles
+ +             * near the current reference angle */
+ +            if (bCalcPotFit)
+ +            {
+ +                for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
+ +                {
+ +                    mvmul(erg->PotAngleFit->rotmat[ifit], yj0_ycn, fit_rjn);
+ +                    fit_numerator = sqr(iprod(tmpvec, fit_rjn));
+ +                    erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*gaussian_xj*fit_numerator/OOpsijstar;
+ +                }
+ +            }
+ +
+ +            /*************************************/
+ +            /* Now calculate the force on atom j */
+ +            /*************************************/
+ +
+ +            OOpsij = norm(tmpvec);    /* OOpsij = 1 / psij = |v x (xj - xcn)| */
+ +
+ +                                           /*         v x (xj - xcn)          */
+ +            unitv(tmpvec, sjn);            /*  sjn = ----------------         */
+ +                                           /*        |v x (xj - xcn)|         */
+ +
+ +            sjn_rjn=iprod(sjn,rjn);        /* sjn_rjn = sjn . rjn             */
+ +
+ +
+ +            /*** A. Calculate the first of the four sum terms: ****************/
+ +            fac = OOpsij/OOpsijstar;
+ +            svmul(fac, rjn, tmpvec);
+ +            fac2 = fac*fac*OOpsij;
+ +            svmul(fac2*sjn_rjn, sjn, tmpvec2);
+ +            rvec_dec(tmpvec, tmpvec2);
+ +            fac2 = wj*gaussian_xj; /* also needed for sum4 */
+ +            svmul(fac2*sjn_rjn, tmpvec, slab_sum1vec_part);
+ +            /********************/
+ +            /*** Add to sum1: ***/
+ +            /********************/
+ +            rvec_inc(sum1vec_part, slab_sum1vec_part); /* sum1 still needs to vector multiplied with v */
+ +
+ +            /*** B. Calculate the forth of the four sum terms: ****************/
+ +            betasigpsi = beta*OOsigma2*OOpsij; /* this is also needed for sum3 */
+ +            /********************/
+ +            /*** Add to sum4: ***/
+ +            /********************/
+ +            slab_sum4part = fac2*betasigpsi*fac*sjn_rjn*sjn_rjn; /* Note that fac is still valid from above */
+ +            sum4 += slab_sum4part;
+ +
+ +            /*** C. Calculate Wjn for second and third sum */
+ +            /* Note that we can safely divide by slab_weights since we check in
+ +             * get_slab_centers that it is non-zero. */
+ +            Wjn = gaussian_xj*mj/erg->slab_weights[islab];
+ +
+ +            /* We already have precalculated the inner sum for slab n */
+ +            copy_rvec(erg->slab_innersumvec[islab], innersumvec);
+ +
+ +            /* Weigh the inner sum vector with Wjn */
+ +            svmul(Wjn, innersumvec, innersumvec);
+ +
+ +            /*** E. Calculate the second of the four sum terms: */
+ +            /********************/
+ +            /*** Add to sum2: ***/
+ +            /********************/
+ +            rvec_inc(sum2vec_part, innersumvec); /* sum2 still needs to be vector crossproduct'ed with v */
+ +            
+ +            /*** F. Calculate the third of the four sum terms: */
+ +            slab_sum3part = betasigpsi * iprod(sjn, innersumvec);
+ +            sum3 += slab_sum3part; /* still needs to be multiplied with v */
+ +
+ +            /*** G. Calculate the torque on the local slab's axis: */
+ +            if (bOutstepRot)
+ +            {
+ +                /* Sum1 */
+ +                cprod(slab_sum1vec_part, rotg->vec, slab_sum1vec);
+ +                /* Sum2 */
+ +                cprod(innersumvec, rotg->vec, slab_sum2vec);
+ +                /* Sum3 */
+ +                svmul(slab_sum3part, rotg->vec, slab_sum3vec);
+ +                /* Sum4 */
+ +                svmul(slab_sum4part, rotg->vec, slab_sum4vec);
+ +
+ +                /* The force on atom ii from slab n only: */
+ +                for (m=0; m<DIM; m++)
+ +                    slab_force[m] = rotg->k * (-slab_sum1vec[m] + slab_sum2vec[m] - slab_sum3vec[m] + 0.5*slab_sum4vec[m]);
+ +
+ +                erg->slab_torque_v[islab] += torque(rotg->vec, slab_force, xj, xcn);
+ +            }
+ +        } /* END of loop over slabs */
+ +
+ +        /* Construct the four individual parts of the vector sum: */
+ +        cprod(sum1vec_part, rotg->vec, sum1vec);      /* sum1vec =   { } x v  */
+ +        cprod(sum2vec_part, rotg->vec, sum2vec);      /* sum2vec =   { } x v  */
+ +        svmul(sum3, rotg->vec, sum3vec);              /* sum3vec =   { } . v  */
+ +        svmul(sum4, rotg->vec, sum4vec);              /* sum4vec =   { } . v  */
+ +
+ +        /* Store the additional force so that it can be added to the force
+ +         * array after the normal forces have been evaluated */
+ +        for (m=0; m<DIM; m++)
+ +            erg->f_rot_loc[j][m] = rotg->k * (-sum1vec[m] + sum2vec[m] - sum3vec[m] + 0.5*sum4vec[m]);
+ +
+ +#ifdef SUM_PARTS
+ +        fprintf(stderr, "sum1: %15.8f %15.8f %15.8f\n",    -rotg->k*sum1vec[XX],    -rotg->k*sum1vec[YY],    -rotg->k*sum1vec[ZZ]);
+ +        fprintf(stderr, "sum2: %15.8f %15.8f %15.8f\n",     rotg->k*sum2vec[XX],     rotg->k*sum2vec[YY],     rotg->k*sum2vec[ZZ]);
+ +        fprintf(stderr, "sum3: %15.8f %15.8f %15.8f\n",    -rotg->k*sum3vec[XX],    -rotg->k*sum3vec[YY],    -rotg->k*sum3vec[ZZ]);
+ +        fprintf(stderr, "sum4: %15.8f %15.8f %15.8f\n", 0.5*rotg->k*sum4vec[XX], 0.5*rotg->k*sum4vec[YY], 0.5*rotg->k*sum4vec[ZZ]);
+ +#endif
+ +
+ +        PRINT_FORCE_J
+ +
+ +    } /* END of loop over local atoms */
+ +
+ +    return V;
+ +}
+ +
+ +
+ +static real do_flex_lowlevel(
+ +        t_rotgrp *rotg,
+ +        real      sigma,     /* The Gaussian width sigma                      */
+ +        rvec      x[],
+ +        gmx_bool  bOutstepRot,
+ +        gmx_bool  bOutstepSlab,
+ +        matrix    box)
+ +{
+ +    int   count,ic,ifit,ii,j,m,n,islab,iigrp;
+ +    rvec  xj,yj0;            /* current and reference position                */
+ +    rvec  xcn, ycn;          /* the current and the reference slab centers    */
+ +    rvec  yj0_ycn;           /* yj0 - ycn                                     */
+ +    rvec  xj_xcn;            /* xj - xcn                                      */
+ +    rvec  qjn,fit_qjn;       /* q_i^n                                         */
+ +    rvec  sum_n1,sum_n2;     /* Two contributions to the rotation force       */
+ +    rvec  innersumvec;       /* Inner part of sum_n2                          */
+ +    rvec  s_n;
+ +    rvec  force_n;           /* Single force from slab n on one atom          */
+ +    rvec  force_n1,force_n2; /* First and second part of force_n              */
+ +    rvec  tmpvec,tmpvec2,tmp_f;   /* Helper variables                         */
+ +    real  V;                 /* The rotation potential energy                 */
+ +    real  OOsigma2;          /* 1/(sigma^2)                                   */
+ +    real  beta;              /* beta_n(xj)                                    */
+ +    real  bjn, fit_bjn;      /* b_j^n                                         */
+ +    real  gaussian_xj;       /* Gaussian weight gn(xj)                        */
+ +    real  betan_xj_sigma2;
+ +    real  mj,wj;             /* Mass-weighting of the positions               */
+ +    real  N_M;               /* N/M                                           */
+ +    gmx_enfrotgrp_t erg;     /* Pointer to enforced rotation group data       */
+ +    gmx_bool bCalcPotFit;
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +
+ +    /* Pre-calculate the inner sums, so that we do not have to calculate
+ +     * them again for every atom */
+ +    flex_precalc_inner_sum(rotg);
+ +
+ +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT==rotg->eFittype);
+ +
+ +    /********************************************************/
+ +    /* Main loop over all local atoms of the rotation group */
+ +    /********************************************************/
+ +    OOsigma2 = 1.0/(sigma*sigma);
+ +    N_M = rotg->nat * erg->invmass;
+ +    V = 0.0;
+ +    for (j=0; j<erg->nat_loc; j++)
+ +    {
+ +        /* Local index of a rotation group atom  */
+ +        ii = erg->ind_loc[j];
+ +        /* Position of this atom in the collective array */
+ +        iigrp = erg->xc_ref_ind[j];
+ +        /* Mass-weighting */
+ +        mj = erg->mc[iigrp];  /* need the unsorted mass here */
+ +        wj = N_M*mj;
+ +        
+ +        /* Current position of this atom: x[ii][XX/YY/ZZ]
+ +         * Note that erg->xc_center contains the center of mass in case the flex-t
+ +         * potential was chosen. For the flex potential erg->xc_center must be
+ +         * zero. */
+ +        rvec_sub(x[ii], erg->xc_center, xj);
+ +        
+ +        /* Shift this atom such that it is near its reference */
+ +        shift_single_coord(box, xj, erg->xc_shifts[iigrp]);
+ +
+ +        /* Determine the slabs to loop over, i.e. the ones with contributions
+ +         * larger than min_gaussian */
+ +        count = get_single_atom_gaussians(xj, rotg);
+ +
+ +        clear_rvec(sum_n1);
+ +        clear_rvec(sum_n2);
+ +
+ +        /* Loop over the relevant slabs for this atom */
+ +        for (ic=0; ic < count; ic++)  
+ +        {
+ +            n = erg->gn_slabind[ic];
+ +                
+ +            /* Get the precomputed Gaussian for xj in slab n */
+ +            gaussian_xj = erg->gn_atom[ic];
+ +
+ +            islab = n - erg->slab_first; /* slab index */
+ +            
+ +            /* The (unrotated) reference position of this atom is saved in yj0: */
+ +            copy_rvec(rotg->x_ref[iigrp], yj0);
+ +
+ +            beta = calc_beta(xj, rotg, n);
+ +
+ +            /* The current center of this slab is saved in xcn: */
+ +            copy_rvec(erg->slab_center[islab], xcn);
+ +            /* ... and the reference center in ycn: */
+ +            copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn);
+ +            
+ +            rvec_sub(yj0, ycn, yj0_ycn); /* yj0_ycn = yj0 - ycn */
+ +
+ +            /* Rotate: */
+ +            mvmul(erg->rotmat, yj0_ycn, tmpvec2); /* tmpvec2= Omega.(yj0-ycn) */
+ +            
+ +            /* Subtract the slab center from xj */
+ +            rvec_sub(xj, xcn, xj_xcn);           /* xj_xcn = xj - xcn         */
+ +            
+ +            /* Calculate qjn */
+ +            cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec= v x Omega.(yj0-ycn) */
+ +
+ +                                 /*         v x Omega.(yj0-ycn)    */
+ +            unitv(tmpvec,qjn);   /*  qjn = ---------------------   */
+ +                                 /*        |v x Omega.(yj0-ycn)|   */
+ +
+ +            bjn = iprod(qjn, xj_xcn);   /* bjn = qjn * (xj - xcn) */
+ +            
+ +            /*********************************/
+ +            /* Add to the rotation potential */
+ +            /*********************************/
+ +            V += 0.5*rotg->k*wj*gaussian_xj*sqr(bjn);
+ +            
+ +            /* If requested, also calculate the potential for a set of angles
+ +             * near the current reference angle */
+ +            if (bCalcPotFit)
+ +            {
+ +                for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
+ +                {
+ +                    /* As above calculate Omega.(yj0-ycn), now for the other angles */
+ +                    mvmul(erg->PotAngleFit->rotmat[ifit], yj0_ycn, tmpvec2); /* tmpvec2= Omega.(yj0-ycn) */
+ +                    /* As above calculate qjn */
+ +                    cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec= v x Omega.(yj0-ycn) */
+ +                                             /*             v x Omega.(yj0-ycn)    */
+ +                    unitv(tmpvec,fit_qjn);   /*  fit_qjn = ---------------------   */
+ +                                             /*            |v x Omega.(yj0-ycn)|   */
+ +                    fit_bjn = iprod(fit_qjn, xj_xcn);   /* fit_bjn = fit_qjn * (xj - xcn) */
+ +                    /* Add to the rotation potential for this angle */
+ +                    erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*gaussian_xj*sqr(fit_bjn);
+ +                }
+ +            }
+ +
+ +            /****************************************************************/
+ +            /* sum_n1 will typically be the main contribution to the force: */
+ +            /****************************************************************/
+ +            betan_xj_sigma2 = beta*OOsigma2;  /*  beta_n(xj)/sigma^2  */
+ +
+ +            /* The next lines calculate
+ +             *  qjn - (bjn*beta(xj)/(2sigma^2))v  */
+ +            svmul(bjn*0.5*betan_xj_sigma2, rotg->vec, tmpvec2);
+ +            rvec_sub(qjn,tmpvec2,tmpvec);
+ +
+ +            /* Multiply with gn(xj)*bjn: */
+ +            svmul(gaussian_xj*bjn,tmpvec,tmpvec2);
+ +
+ +            /* Sum over n: */
+ +            rvec_inc(sum_n1,tmpvec2);
+ +            
+ +            /* We already have precalculated the Sn term for slab n */
+ +            copy_rvec(erg->slab_innersumvec[islab], s_n);
+ +                                                                          /*          beta_n(xj)              */
+ +            svmul(betan_xj_sigma2*iprod(s_n, xj_xcn), rotg->vec, tmpvec); /* tmpvec = ---------- s_n (xj-xcn) */
+ +                                                                          /*            sigma^2               */
+ +
+ +            rvec_sub(s_n, tmpvec, innersumvec);
+ +            
+ +            /* We can safely divide by slab_weights since we check in get_slab_centers
+ +             * that it is non-zero. */
+ +            svmul(gaussian_xj/erg->slab_weights[islab], innersumvec, innersumvec);
+ +
+ +            rvec_add(sum_n2, innersumvec, sum_n2);
+ +            
+ +            /* Calculate the torque: */
+ +            if (bOutstepRot)
+ +            {
+ +                /* The force on atom ii from slab n only: */
+ +                svmul(-rotg->k*wj, tmpvec2    , force_n1); /* part 1 */
+ +                svmul( rotg->k*mj, innersumvec, force_n2); /* part 2 */
+ +                rvec_add(force_n1, force_n2, force_n);
+ +                erg->slab_torque_v[islab] += torque(rotg->vec, force_n, xj, xcn);
+ +            }
+ +        } /* END of loop over slabs */
+ +
+ +        /* Put both contributions together: */
+ +        svmul(wj, sum_n1, sum_n1);
+ +        svmul(mj, sum_n2, sum_n2);
+ +        rvec_sub(sum_n2,sum_n1,tmp_f); /* F = -grad V */
+ +
+ +        /* Store the additional force so that it can be added to the force
+ +         * array after the normal forces have been evaluated */
+ +        for(m=0; m<DIM; m++)
+ +            erg->f_rot_loc[j][m] = rotg->k*tmp_f[m];
+ +
+ +        PRINT_FORCE_J
+ +
+ +    } /* END of loop over local atoms */
+ +
+ +    return V;
+ +}
+ +
+ +#ifdef PRINT_COORDS
+ +static void print_coordinates(t_rotgrp *rotg, rvec x[], matrix box, int step)
+ +{
+ +    int i;
+ +    static FILE *fp;
+ +    static char buf[STRLEN];
+ +    static gmx_bool bFirst=1;
+ +
+ +
+ +    if (bFirst)
+ +    {
+ +        sprintf(buf, "coords%d.txt", cr->nodeid);
+ +        fp = fopen(buf, "w");
+ +        bFirst = 0;
+ +    }
+ +
+ +    fprintf(fp, "\nStep %d\n", step);
+ +    fprintf(fp, "box: %f %f %f %f %f %f %f %f %f\n",
+ +            box[XX][XX], box[XX][YY], box[XX][ZZ],
+ +            box[YY][XX], box[YY][YY], box[YY][ZZ],
+ +            box[ZZ][XX], box[ZZ][ZZ], box[ZZ][ZZ]);
+ +    for (i=0; i<rotg->nat; i++)
+ +    {
+ +        fprintf(fp, "%4d  %f %f %f\n", i,
+ +                erg->xc[i][XX], erg->xc[i][YY], erg->xc[i][ZZ]);
+ +    }
+ +    fflush(fp);
+ +
+ +}
+ +#endif
+ +
+ +
+ +static int projection_compare(const void *a, const void *b)
+ +{
+ +    sort_along_vec_t *xca, *xcb;
+ +    
+ +    
+ +    xca = (sort_along_vec_t *)a;
+ +    xcb = (sort_along_vec_t *)b;
+ +    
+ +    if (xca->xcproj < xcb->xcproj)
+ +        return -1;
+ +    else if (xca->xcproj > xcb->xcproj)
+ +        return 1;
+ +    else
+ +        return 0;
+ +}
+ +
+ +
+ +static void sort_collective_coordinates(
+ +        t_rotgrp *rotg,         /* Rotation group */
+ +        sort_along_vec_t *data) /* Buffer for sorting the positions */
+ +{
+ +    int i;
+ +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +    
+ +    /* The projection of the position vector on the rotation vector is
+ +     * the relevant value for sorting. Fill the 'data' structure */
+ +    for (i=0; i<rotg->nat; i++)
+ +    {
+ +        data[i].xcproj = iprod(erg->xc[i], rotg->vec);  /* sort criterium */
+ +        data[i].m      = erg->mc[i];
+ +        data[i].ind    = i;
+ +        copy_rvec(erg->xc[i]    , data[i].x    );
+ +        copy_rvec(rotg->x_ref[i], data[i].x_ref);
+ +    }
+ +    /* Sort the 'data' structure */
+ +    gmx_qsort(data, rotg->nat, sizeof(sort_along_vec_t), projection_compare);
+ +    
+ +    /* Copy back the sorted values */
+ +    for (i=0; i<rotg->nat; i++)
+ +    {
+ +        copy_rvec(data[i].x    , erg->xc[i]           );
+ +        copy_rvec(data[i].x_ref, erg->xc_ref_sorted[i]);
+ +        erg->mc_sorted[i]  = data[i].m;
+ +        erg->xc_sortind[i] = data[i].ind;
+ +    }
+ +}
+ +
+ +
+ +/* For each slab, get the first and the last index of the sorted atom
+ + * indices */
+ +static void get_firstlast_atom_per_slab(t_rotgrp *rotg)
+ +{
+ +    int i,islab,n;
+ +    real beta;
+ +    gmx_enfrotgrp_t erg;     /* Pointer to enforced rotation group data */
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +
+ +    /* Find the first atom that needs to enter the calculation for each slab */
+ +    n = erg->slab_first;  /* slab */
+ +    i = 0; /* start with the first atom */
+ +    do
+ +    {
+ +        /* Find the first atom that significantly contributes to this slab */
+ +        do /* move forward in position until a large enough beta is found */
+ +        {
+ +            beta = calc_beta(erg->xc[i], rotg, n);
+ +            i++;
+ +        } while ((beta < -erg->max_beta) && (i < rotg->nat));
+ +        i--;
+ +        islab = n - erg->slab_first;  /* slab index */
+ +        erg->firstatom[islab] = i;
+ +        /* Proceed to the next slab */
+ +        n++;
+ +    } while (n <= erg->slab_last);
+ +    
+ +    /* Find the last atom for each slab */
+ +     n = erg->slab_last; /* start with last slab */
+ +     i = rotg->nat-1;  /* start with the last atom */
+ +     do
+ +     {
+ +         do /* move backward in position until a large enough beta is found */
+ +         {
+ +             beta = calc_beta(erg->xc[i], rotg, n);
+ +             i--;
+ +         } while ((beta > erg->max_beta) && (i > -1));
+ +         i++;
+ +         islab = n - erg->slab_first;  /* slab index */
+ +         erg->lastatom[islab] = i;
+ +         /* Proceed to the next slab */
+ +         n--;
+ +     } while (n >= erg->slab_first);
+ +}
+ +
+ +
+ +/* Determine the very first and very last slab that needs to be considered 
+ + * For the first slab that needs to be considered, we have to find the smallest
+ + * n that obeys:
+ + * 
+ + * x_first * v - n*Delta_x <= beta_max
+ + * 
+ + * slab index n, slab distance Delta_x, rotation vector v. For the last slab we 
+ + * have to find the largest n that obeys
+ + * 
+ + * x_last * v - n*Delta_x >= -beta_max
+ + *  
+ + */
+ +static gmx_inline int get_first_slab(
+ +        t_rotgrp *rotg,     /* The rotation group (inputrec data) */
+ +        real     max_beta,  /* The max_beta value, instead of min_gaussian */
+ +        rvec     firstatom) /* First atom after sorting along the rotation vector v */
+ +{
+ +    /* Find the first slab for the first atom */   
+ +    return ceil((iprod(firstatom, rotg->vec) - max_beta)/rotg->slab_dist);
+ +}
+ +
+ +
+ +static gmx_inline int get_last_slab(
+ +        t_rotgrp *rotg,     /* The rotation group (inputrec data) */
+ +        real     max_beta,  /* The max_beta value, instead of min_gaussian */
+ +        rvec     lastatom)  /* Last atom along v */
+ +{
+ +    /* Find the last slab for the last atom */
+ +    return floor((iprod(lastatom, rotg->vec) + max_beta)/rotg->slab_dist);    
+ +}
+ +
+ +
+ +static void get_firstlast_slab_check(
+ +        t_rotgrp        *rotg,     /* The rotation group (inputrec data) */
+ +        t_gmx_enfrotgrp *erg,      /* The rotation group (data only accessible in this file) */
+ +        rvec            firstatom, /* First atom after sorting along the rotation vector v */
+ +        rvec            lastatom,  /* Last atom along v */
+ +        int             g)         /* The rotation group number */
+ +{
+ +    erg->slab_first = get_first_slab(rotg, erg->max_beta, firstatom);
+ +    erg->slab_last  = get_last_slab(rotg, erg->max_beta, lastatom);
+ +
+ +    /* Check whether we have reference data to compare against */
+ +    if (erg->slab_first < erg->slab_first_ref)
+ +        gmx_fatal(FARGS, "%s No reference data for first slab (n=%d), unable to proceed.",
+ +                  RotStr, erg->slab_first);
+ +    
+ +    /* Check whether we have reference data to compare against */
+ +    if (erg->slab_last > erg->slab_last_ref)
+ +        gmx_fatal(FARGS, "%s No reference data for last slab (n=%d), unable to proceed.",
+ +                  RotStr, erg->slab_last);
+ +}
+ +
+ +
+ +/* Enforced rotation with a flexible axis */
+ +static void do_flexible(
+ +        gmx_bool  bMaster,
+ +        gmx_enfrot_t enfrot,    /* Other rotation data                        */
+ +        t_rotgrp  *rotg,        /* The rotation group                         */
+ +        int       g,            /* Group number                               */
+ +        rvec      x[],          /* The local positions                        */
+ +        matrix    box,
+ +        double    t,            /* Time in picoseconds                        */
+ +        gmx_large_int_t step,   /* The time step                              */
+ +        gmx_bool  bOutstepRot,  /* Output to main rotation output file        */
+ +        gmx_bool  bOutstepSlab) /* Output per-slab data                       */
+ +{
+ +    int          l,nslabs;
+ +    real         sigma;       /* The Gaussian width sigma */
+ +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +
+ +    /* Define the sigma value */
+ +    sigma = 0.7*rotg->slab_dist;
+ +    
+ +    /* Sort the collective coordinates erg->xc along the rotation vector. This is
+ +     * an optimization for the inner loop. */
+ +    sort_collective_coordinates(rotg, enfrot->data);
+ +    
+ +    /* Determine the first relevant slab for the first atom and the last
+ +     * relevant slab for the last atom */
+ +    get_firstlast_slab_check(rotg, erg, erg->xc[0], erg->xc[rotg->nat-1], g);
+ +    
+ +    /* Determine for each slab depending on the min_gaussian cutoff criterium,
+ +     * a first and a last atom index inbetween stuff needs to be calculated */
+ +    get_firstlast_atom_per_slab(rotg);
+ +
+ +    /* Determine the gaussian-weighted center of positions for all slabs */
+ +    get_slab_centers(rotg,erg->xc,erg->mc_sorted,g,t,enfrot->out_slabs,bOutstepSlab,FALSE);
+ +        
+ +    /* Clear the torque per slab from last time step: */
+ +    nslabs = erg->slab_last - erg->slab_first + 1;
+ +    for (l=0; l<nslabs; l++)
+ +        erg->slab_torque_v[l] = 0.0;
+ +    
+ +    /* Call the rotational forces kernel */
+ +    if (rotg->eType == erotgFLEX || rotg->eType == erotgFLEXT)
+ +        erg->V = do_flex_lowlevel(rotg, sigma, x, bOutstepRot, bOutstepSlab, box);
+ +    else if (rotg->eType == erotgFLEX2 || rotg->eType == erotgFLEX2T)
+ +        erg->V = do_flex2_lowlevel(rotg, sigma, x, bOutstepRot, bOutstepSlab, box);
+ +    else
+ +        gmx_fatal(FARGS, "Unknown flexible rotation type");
+ +    
+ +    /* Determine angle by RMSD fit to the reference - Let's hope this */
+ +    /* only happens once in a while, since this is not parallelized! */
+ +    if ( bMaster && (erotgFitPOT != rotg->eFittype) )
+ +    {
+ +        if (bOutstepRot)
+ +        {
+ +            /* Fit angle of the whole rotation group */
+ +            erg->angle_v = flex_fit_angle(rotg);
+ +        }
+ +        if (bOutstepSlab)
+ +        {
+ +            /* Fit angle of each slab */
+ +            flex_fit_angle_perslab(g, rotg, t, erg->degangle, enfrot->out_angles);
+ +        }
+ +    }
+ +
+ +    /* Lump together the torques from all slabs: */
+ +    erg->torque_v = 0.0;
+ +    for (l=0; l<nslabs; l++)
+ +         erg->torque_v += erg->slab_torque_v[l];
+ +}
+ +
+ +
+ +/* Calculate the angle between reference and actual rotation group atom,
+ + * both projected into a plane perpendicular to the rotation vector: */
+ +static void angle(t_rotgrp *rotg,
+ +        rvec x_act,
+ +        rvec x_ref,
+ +        real *alpha,
+ +        real *weight)  /* atoms near the rotation axis should count less than atoms far away */
+ +{
+ +    rvec xp, xrp;  /* current and reference positions projected on a plane perpendicular to pg->vec */
+ +    rvec dum;
+ +
+ +
+ +    /* Project x_ref and x into a plane through the origin perpendicular to rot_vec: */
+ +    /* Project x_ref: xrp = x_ref - (vec * x_ref) * vec */
+ +    svmul(iprod(rotg->vec, x_ref), rotg->vec, dum);
+ +    rvec_sub(x_ref, dum, xrp);
+ +    /* Project x_act: */
+ +    svmul(iprod(rotg->vec, x_act), rotg->vec, dum);
+ +    rvec_sub(x_act, dum, xp);
+ +
+ +    /* Retrieve information about which vector precedes. gmx_angle always
+ +     * returns a positive angle. */
+ +    cprod(xp, xrp, dum); /* if reference precedes, this is pointing into the same direction as vec */
+ +
+ +    if (iprod(rotg->vec, dum) >= 0)
+ +        *alpha = -gmx_angle(xrp, xp);
+ +    else
+ +        *alpha = +gmx_angle(xrp, xp);
+ +    
+ +    /* Also return the weight */
+ +    *weight = norm(xp);
+ +}
+ +
+ +
+ +/* Project first vector onto a plane perpendicular to the second vector 
+ + * dr = dr - (dr.v)v
+ + * Note that v must be of unit length.
+ + */
+ +static gmx_inline void project_onto_plane(rvec dr, const rvec v)
+ +{
+ +    rvec tmp;
+ +    
+ +    
+ +    svmul(iprod(dr,v),v,tmp);  /* tmp = (dr.v)v */
+ +    rvec_dec(dr, tmp);         /* dr = dr - (dr.v)v */
+ +}
+ +
+ +
+ +/* Fixed rotation: The rotation reference group rotates around the v axis. */
+ +/* The atoms of the actual rotation group are attached with imaginary  */
+ +/* springs to the reference atoms.                                     */
+ +static void do_fixed(
+ +        t_rotgrp  *rotg,        /* The rotation group                         */
+ +        rvec      x[],          /* The positions                              */
+ +        matrix    box,          /* The simulation box                         */
+ +        double    t,            /* Time in picoseconds                        */
+ +        gmx_large_int_t step,   /* The time step                              */
+ +        gmx_bool  bOutstepRot,  /* Output to main rotation output file        */
+ +        gmx_bool  bOutstepSlab) /* Output per-slab data                       */
+ +{
+ +    int       ifit,j,jj,m;
+ +    rvec      dr;
+ +    rvec      tmp_f;           /* Force */
+ +    real      alpha;           /* a single angle between an actual and a reference position */
+ +    real      weight;          /* single weight for a single angle */
+ +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +    rvec      xi_xc;           /* xi - xc */
+ +    gmx_bool  bCalcPotFit;
+ +    rvec      fit_xr_loc;
+ +
+ +    /* for mass weighting: */
+ +    real      wi;              /* Mass-weighting of the positions */
+ +    real      N_M;             /* N/M */
+ +    real      k_wi;            /* k times wi */
+ +
+ +    gmx_bool  bProject;
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +    bProject = (rotg->eType==erotgPM) || (rotg->eType==erotgPMPF);
+ +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT==rotg->eFittype);
+ +
+ +    N_M = rotg->nat * erg->invmass;
+ +
+ +    /* Each process calculates the forces on its local atoms */
+ +    for (j=0; j<erg->nat_loc; j++)
+ +    {
+ +        /* Calculate (x_i-x_c) resp. (x_i-u) */
+ +        rvec_sub(erg->x_loc_pbc[j], erg->xc_center, xi_xc);
+ +
+ +        /* Calculate Omega*(y_i-y_c)-(x_i-x_c) */
+ +        rvec_sub(erg->xr_loc[j], xi_xc, dr);
+ +        
+ +        if (bProject)
+ +            project_onto_plane(dr, rotg->vec);
+ +            
+ +        /* Mass-weighting */
+ +        wi = N_M*erg->m_loc[j];
+ +
+ +        /* Store the additional force so that it can be added to the force
+ +         * array after the normal forces have been evaluated */
+ +        k_wi = rotg->k*wi;
+ +        for (m=0; m<DIM; m++)
+ +        {
+ +            tmp_f[m]             = k_wi*dr[m];
+ +            erg->f_rot_loc[j][m] = tmp_f[m];
+ +            erg->V              += 0.5*k_wi*sqr(dr[m]);
+ +        }
+ +        
+ +        /* If requested, also calculate the potential for a set of angles
+ +         * near the current reference angle */
+ +        if (bCalcPotFit)
+ +        {
+ +            for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
+ +            {
+ +                /* Index of this rotation group atom with respect to the whole rotation group */
+ +                jj = erg->xc_ref_ind[j];
+ +
+ +                /* Rotate with the alternative angle. Like rotate_local_reference(),
+ +                 * just for a single local atom */
+ +                mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[jj], fit_xr_loc); /* fit_xr_loc = Omega*(y_i-y_c) */
+ +
+ +                /* Calculate Omega*(y_i-y_c)-(x_i-x_c) */
+ +                rvec_sub(fit_xr_loc, xi_xc, dr);
+ +
+ +                if (bProject)
+ +                    project_onto_plane(dr, rotg->vec);
+ +
+ +                /* Add to the rotation potential for this angle: */
+ +                erg->PotAngleFit->V[ifit] += 0.5*k_wi*norm2(dr);
+ +            }
+ +        }
+ +
+ +        if (bOutstepRot)
+ +        {
+ +            /* Add to the torque of this rotation group */
+ +            erg->torque_v += torque(rotg->vec, tmp_f, erg->x_loc_pbc[j], erg->xc_center);
+ +            
+ +            /* Calculate the angle between reference and actual rotation group atom. */
+ +            angle(rotg, xi_xc, erg->xr_loc[j], &alpha, &weight);  /* angle in rad, weighted */
+ +            erg->angle_v  += alpha * weight;
+ +            erg->weight_v += weight;
+ +        }
+ +        /* If you want enforced rotation to contribute to the virial,
+ +         * activate the following lines:
+ +            if (MASTER(cr))
+ +            {
+ +               Add the rotation contribution to the virial
+ +              for(j=0; j<DIM; j++)
+ +                for(m=0;m<DIM;m++)
+ +                  vir[j][m] += 0.5*f[ii][j]*dr[m];
+ +            }
+ +         */
+ +
+ +        PRINT_FORCE_J
+ +
+ +    } /* end of loop over local rotation group atoms */
+ +}
+ +
+ +
+ +/* Calculate the radial motion potential and forces */
+ +static void do_radial_motion(
+ +        t_rotgrp  *rotg,        /* The rotation group                         */
+ +        rvec      x[],          /* The positions                              */
+ +        matrix    box,          /* The simulation box                         */
+ +        double    t,            /* Time in picoseconds                        */
+ +        gmx_large_int_t step,   /* The time step                              */
+ +        gmx_bool  bOutstepRot,  /* Output to main rotation output file        */
+ +        gmx_bool  bOutstepSlab) /* Output per-slab data                       */
+ +{
+ +    int       j,jj,ifit;
+ +    rvec      tmp_f;           /* Force */
+ +    real      alpha;           /* a single angle between an actual and a reference position */
+ +    real      weight;          /* single weight for a single angle */
+ +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +    rvec      xj_u;            /* xj - u */
+ +    rvec      tmpvec,fit_tmpvec;
+ +    real      fac,fac2,sum=0.0;
+ +    rvec      pj;
+ +    gmx_bool  bCalcPotFit;
+ +
+ +    /* For mass weighting: */
+ +    real      wj;              /* Mass-weighting of the positions */
+ +    real      N_M;             /* N/M */
+ +
+ +
+ +    erg=rotg->enfrotgrp;
+ +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT==rotg->eFittype);
+ +
+ +    N_M = rotg->nat * erg->invmass;
+ +
+ +    /* Each process calculates the forces on its local atoms */
+ +    for (j=0; j<erg->nat_loc; j++)
+ +    {
+ +        /* Calculate (xj-u) */
+ +        rvec_sub(erg->x_loc_pbc[j], erg->xc_center, xj_u);  /* xj_u = xj-u */
+ +
+ +        /* Calculate Omega.(yj0-u) */
+ +        cprod(rotg->vec, erg->xr_loc[j], tmpvec);  /* tmpvec = v x Omega.(yj0-u) */
+ +
+ +                              /*         v x Omega.(yj0-u)     */
+ +        unitv(tmpvec, pj);    /*  pj = ---------------------   */
+ +                              /*       | v x Omega.(yj0-u) |   */
+ +
+ +        fac = iprod(pj, xj_u);  /* fac = pj.(xj-u) */
+ +        fac2 = fac*fac;
+ +
+ +        /* Mass-weighting */
+ +        wj = N_M*erg->m_loc[j];
+ +
+ +        /* Store the additional force so that it can be added to the force
+ +         * array after the normal forces have been evaluated */
+ +        svmul(-rotg->k*wj*fac, pj, tmp_f);
+ +        copy_rvec(tmp_f, erg->f_rot_loc[j]);
+ +        sum += wj*fac2;
+ +
+ +        /* If requested, also calculate the potential for a set of angles
+ +         * near the current reference angle */
+ +        if (bCalcPotFit)
+ +        {
+ +            for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
+ +            {
+ +                /* Index of this rotation group atom with respect to the whole rotation group */
+ +                jj = erg->xc_ref_ind[j];
+ +
+ +                /* Rotate with the alternative angle. Like rotate_local_reference(),
+ +                 * just for a single local atom */
+ +                mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[jj], fit_tmpvec); /* fit_tmpvec = Omega*(yj0-u) */
+ +
+ +                /* Calculate Omega.(yj0-u) */
+ +                cprod(rotg->vec, fit_tmpvec, tmpvec);  /* tmpvec = v x Omega.(yj0-u) */
+ +                                      /*         v x Omega.(yj0-u)     */
+ +                unitv(tmpvec, pj);    /*  pj = ---------------------   */
+ +                                      /*       | v x Omega.(yj0-u) |   */
+ +
+ +                fac = iprod(pj, xj_u);  /* fac = pj.(xj-u) */
+ +                fac2 = fac*fac;
+ +
+ +                /* Add to the rotation potential for this angle: */
+ +                erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*fac2;
+ +            }
+ +        }
+ +
+ +        if (bOutstepRot)
+ +        {
+ +            /* Add to the torque of this rotation group */
+ +            erg->torque_v += torque(rotg->vec, tmp_f, erg->x_loc_pbc[j], erg->xc_center);
+ +
+ +            /* Calculate the angle between reference and actual rotation group atom. */
+ +            angle(rotg, xj_u, erg->xr_loc[j], &alpha, &weight);  /* angle in rad, weighted */
+ +            erg->angle_v  += alpha * weight;
+ +            erg->weight_v += weight;
+ +        }
+ +
+ +        PRINT_FORCE_J
+ +
+ +    } /* end of loop over local rotation group atoms */
+ +    erg->V = 0.5*rotg->k*sum;
+ +}
+ +
+ +
+ +/* Calculate the radial motion pivot-free potential and forces */
+ +static void do_radial_motion_pf(
+ +        t_rotgrp  *rotg,        /* The rotation group                         */
+ +        rvec      x[],          /* The positions                              */
+ +        matrix    box,          /* The simulation box                         */
+ +        double    t,            /* Time in picoseconds                        */
+ +        gmx_large_int_t step,   /* The time step                              */
+ +        gmx_bool  bOutstepRot,  /* Output to main rotation output file        */
+ +        gmx_bool  bOutstepSlab) /* Output per-slab data                       */
+ +{
+ +    int       i,ii,iigrp,ifit,j;
+ +    rvec      xj;              /* Current position */
+ +    rvec      xj_xc;           /* xj  - xc  */
+ +    rvec      yj0_yc0;         /* yj0 - yc0 */
+ +    rvec      tmp_f;           /* Force */
+ +    real      alpha;           /* a single angle between an actual and a reference position */
+ +    real      weight;          /* single weight for a single angle */
+ +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +    rvec      tmpvec, tmpvec2;
+ +    rvec      innersumvec;     /* Precalculation of the inner sum */
+ +    rvec      innersumveckM;
+ +    real      fac,fac2,V=0.0;
+ +    rvec      qi,qj;
+ +    gmx_bool  bCalcPotFit;
+ +
+ +    /* For mass weighting: */
+ +    real      mj,wi,wj;        /* Mass-weighting of the positions */
+ +    real      N_M;             /* N/M */
+ +
+ +
+ +    erg=rotg->enfrotgrp;
+ +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT==rotg->eFittype);
+ +
+ +    N_M = rotg->nat * erg->invmass;
+ +
+ +    /* Get the current center of the rotation group: */
+ +    get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center);
+ +
+ +    /* Precalculate Sum_i [ wi qi.(xi-xc) qi ] which is needed for every single j */
+ +    clear_rvec(innersumvec);
+ +    for (i=0; i < rotg->nat; i++)
+ +    {
+ +        /* Mass-weighting */
+ +        wi = N_M*erg->mc[i];
+ +
+ +        /* Calculate qi. Note that xc_ref_center has already been subtracted from
+ +         * x_ref in init_rot_group.*/
+ +        mvmul(erg->rotmat, rotg->x_ref[i], tmpvec);  /* tmpvec  = Omega.(yi0-yc0) */
+ +
+ +        cprod(rotg->vec, tmpvec, tmpvec2);          /* tmpvec2 = v x Omega.(yi0-yc0) */
+ +
+ +                              /*         v x Omega.(yi0-yc0)     */
+ +        unitv(tmpvec2, qi);   /*  qi = -----------------------   */
+ +                              /*       | v x Omega.(yi0-yc0) |   */
+ +
+ +        rvec_sub(erg->xc[i], erg->xc_center, tmpvec);  /* tmpvec = xi-xc */
+ +
+ +        svmul(wi*iprod(qi, tmpvec), qi, tmpvec2);
+ +
+ +        rvec_inc(innersumvec, tmpvec2);
+ +    }
+ +    svmul(rotg->k*erg->invmass, innersumvec, innersumveckM);
+ +
+ +    /* Each process calculates the forces on its local atoms */
+ +    for (j=0; j<erg->nat_loc; j++)
+ +    {
+ +        /* Local index of a rotation group atom  */
+ +        ii = erg->ind_loc[j];
+ +        /* Position of this atom in the collective array */
+ +        iigrp = erg->xc_ref_ind[j];
+ +        /* Mass-weighting */
+ +        mj = erg->mc[iigrp];  /* need the unsorted mass here */
+ +        wj = N_M*mj;
+ +
+ +        /* Current position of this atom: x[ii][XX/YY/ZZ] */
+ +        copy_rvec(x[ii], xj);
+ +
+ +        /* Shift this atom such that it is near its reference */
+ +        shift_single_coord(box, xj, erg->xc_shifts[iigrp]);
+ +
+ +        /* The (unrotated) reference position is yj0. yc0 has already
+ +         * been subtracted in init_rot_group */
+ +        copy_rvec(rotg->x_ref[iigrp], yj0_yc0);   /* yj0_yc0 = yj0 - yc0      */
+ +
+ +        /* Calculate Omega.(yj0-yc0) */
+ +        mvmul(erg->rotmat, yj0_yc0, tmpvec2);     /* tmpvec2 = Omega.(yj0 - yc0)  */
+ +
+ +        cprod(rotg->vec, tmpvec2, tmpvec);  /* tmpvec = v x Omega.(yj0-yc0) */
+ +
+ +                              /*         v x Omega.(yj0-yc0)     */
+ +        unitv(tmpvec, qj);    /*  qj = -----------------------   */
+ +                              /*       | v x Omega.(yj0-yc0) |   */
+ +
+ +        /* Calculate (xj-xc) */
+ +        rvec_sub(xj, erg->xc_center, xj_xc);  /* xj_xc = xj-xc */
+ +
+ +        fac = iprod(qj, xj_xc);  /* fac = qj.(xj-xc) */
+ +        fac2 = fac*fac;
+ +
+ +        /* Store the additional force so that it can be added to the force
+ +         * array after the normal forces have been evaluated */
+ +        svmul(-rotg->k*wj*fac, qj, tmp_f); /* part 1 of force */
+ +        svmul(mj, innersumveckM, tmpvec);  /* part 2 of force */
+ +        rvec_inc(tmp_f, tmpvec);
+ +        copy_rvec(tmp_f, erg->f_rot_loc[j]);
+ +        V += wj*fac2;
+ +
+ +        /* If requested, also calculate the potential for a set of angles
+ +         * near the current reference angle */
+ +        if (bCalcPotFit)
+ +        {
+ +            for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
+ +            {
+ +                /* Rotate with the alternative angle. Like rotate_local_reference(),
+ +                 * just for a single local atom */
+ +                mvmul(erg->PotAngleFit->rotmat[ifit], yj0_yc0, tmpvec2); /* tmpvec2 = Omega*(yj0-yc0) */
+ +
+ +                /* Calculate Omega.(yj0-u) */
+ +                cprod(rotg->vec, tmpvec2, tmpvec);  /* tmpvec = v x Omega.(yj0-yc0) */
+ +                                      /*         v x Omega.(yj0-yc0)     */
+ +                unitv(tmpvec, qj);    /*  qj = -----------------------   */
+ +                                      /*       | v x Omega.(yj0-yc0) |   */
+ +
+ +                fac = iprod(qj, xj_xc);  /* fac = qj.(xj-xc) */
+ +                fac2 = fac*fac;
+ +
+ +                /* Add to the rotation potential for this angle: */
+ +                erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*fac2;
+ +            }
+ +        }
+ +
+ +        if (bOutstepRot)
+ +        {
+ +            /* Add to the torque of this rotation group */
+ +            erg->torque_v += torque(rotg->vec, tmp_f, xj, erg->xc_center);
+ +
+ +            /* Calculate the angle between reference and actual rotation group atom. */
+ +            angle(rotg, xj_xc, yj0_yc0, &alpha, &weight);  /* angle in rad, weighted */
+ +            erg->angle_v  += alpha * weight;
+ +            erg->weight_v += weight;
+ +        }
+ +
+ +        PRINT_FORCE_J
+ +
+ +    } /* end of loop over local rotation group atoms */
+ +    erg->V = 0.5*rotg->k*V;
+ +}
+ +
+ +
+ +/* Precalculate the inner sum for the radial motion 2 forces */
+ +static void radial_motion2_precalc_inner_sum(t_rotgrp  *rotg, rvec innersumvec)
+ +{
+ +    int       i;
+ +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +    rvec      xi_xc;           /* xj - xc */
+ +    rvec      tmpvec,tmpvec2;
+ +    real      fac,fac2;
+ +    rvec      ri,si;
+ +    real      siri;
+ +    rvec      v_xi_xc;          /* v x (xj - u) */
+ +    real      psii,psiistar;
+ +    real      wi;              /* Mass-weighting of the positions */
+ +    real      N_M;             /* N/M */
+ +    rvec      sumvec;
+ +
+ +    erg=rotg->enfrotgrp;
+ +    N_M = rotg->nat * erg->invmass;
+ +
+ +    /* Loop over the collective set of positions */
+ +    clear_rvec(sumvec);
+ +    for (i=0; i<rotg->nat; i++)
+ +    {
+ +        /* Mass-weighting */
+ +        wi = N_M*erg->mc[i];
+ +
+ +        rvec_sub(erg->xc[i], erg->xc_center, xi_xc); /* xi_xc = xi-xc         */
+ +
+ +        /* Calculate ri. Note that xc_ref_center has already been subtracted from
+ +         * x_ref in init_rot_group.*/
+ +        mvmul(erg->rotmat, rotg->x_ref[i], ri);      /* ri  = Omega.(yi0-yc0) */
+ +
+ +        cprod(rotg->vec, xi_xc, v_xi_xc);            /* v_xi_xc = v x (xi-u)  */
+ +
+ +        fac = norm2(v_xi_xc);
+ +                                          /*                      1           */
+ +        psiistar = 1.0/(fac + rotg->eps); /* psiistar = --------------------- */
+ +                                          /*            |v x (xi-xc)|^2 + eps */
+ +
+ +        psii = gmx_invsqrt(fac);          /*                 1                */
+ +                                          /*  psii    = -------------         */
+ +                                          /*            |v x (xi-xc)|         */
+ +
+ +        svmul(psii, v_xi_xc, si);          /*  si = psii * (v x (xi-xc) )     */
+ +
+ +        fac = iprod(v_xi_xc, ri);                   /* fac = (v x (xi-xc)).ri */
+ +        fac2 = fac*fac;
+ +
+ +        siri = iprod(si, ri);                       /* siri = si.ri           */
+ +
+ +        svmul(psiistar/psii, ri, tmpvec);
+ +        svmul(psiistar*psiistar/(psii*psii*psii) * siri, si, tmpvec2);
+ +        rvec_dec(tmpvec, tmpvec2);
+ +        cprod(tmpvec, rotg->vec, tmpvec2);
+ +
+ +        svmul(wi*siri, tmpvec2, tmpvec);
+ +
+ +        rvec_inc(sumvec, tmpvec);
+ +    }
+ +    svmul(rotg->k*erg->invmass, sumvec, innersumvec);
+ +}
+ +
+ +
+ +/* Calculate the radial motion 2 potential and forces */
+ +static void do_radial_motion2(
+ +        t_rotgrp  *rotg,        /* The rotation group                         */
+ +        rvec      x[],          /* The positions                              */
+ +        matrix    box,          /* The simulation box                         */
+ +        double    t,            /* Time in picoseconds                        */
+ +        gmx_large_int_t step,   /* The time step                              */
+ +        gmx_bool  bOutstepRot,  /* Output to main rotation output file        */
+ +        gmx_bool  bOutstepSlab) /* Output per-slab data                       */
+ +{
+ +    int       ii,iigrp,ifit,j;
+ +    rvec      xj;              /* Position */
+ +    real      alpha;           /* a single angle between an actual and a reference position */
+ +    real      weight;          /* single weight for a single angle */
+ +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +    rvec      xj_u;            /* xj - u */
+ +    rvec      yj0_yc0;         /* yj0 -yc0 */
+ +    rvec      tmpvec,tmpvec2;
+ +    real      fac,fit_fac,fac2,Vpart=0.0;
+ +    rvec      rj,fit_rj,sj;
+ +    real      sjrj;
+ +    rvec      v_xj_u;          /* v x (xj - u) */
+ +    real      psij,psijstar;
+ +    real      mj,wj;           /* For mass-weighting of the positions */
+ +    real      N_M;             /* N/M */
+ +    gmx_bool  bPF;
+ +    rvec      innersumvec;
+ +    gmx_bool  bCalcPotFit;
+ +
+ +
+ +    erg=rotg->enfrotgrp;
+ +
+ +    bPF = rotg->eType==erotgRM2PF;
+ +    bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT==rotg->eFittype);
+ +
+ +
+ +    clear_rvec(yj0_yc0); /* Make the compiler happy */
+ +
+ +    clear_rvec(innersumvec);
+ +    if (bPF)
+ +    {
+ +        /* For the pivot-free variant we have to use the current center of
+ +         * mass of the rotation group instead of the pivot u */
+ +        get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center);
+ +
+ +        /* Also, we precalculate the second term of the forces that is identical
+ +         * (up to the weight factor mj) for all forces */
+ +        radial_motion2_precalc_inner_sum(rotg,innersumvec);
+ +    }
+ +
+ +    N_M = rotg->nat * erg->invmass;
+ +
+ +    /* Each process calculates the forces on its local atoms */
+ +    for (j=0; j<erg->nat_loc; j++)
+ +    {
+ +        if (bPF)
+ +        {
+ +            /* Local index of a rotation group atom  */
+ +            ii = erg->ind_loc[j];
+ +            /* Position of this atom in the collective array */
+ +            iigrp = erg->xc_ref_ind[j];
+ +            /* Mass-weighting */
+ +            mj = erg->mc[iigrp];
+ +
+ +            /* Current position of this atom: x[ii] */
+ +            copy_rvec(x[ii], xj);
+ +
+ +            /* Shift this atom such that it is near its reference */
+ +            shift_single_coord(box, xj, erg->xc_shifts[iigrp]);
+ +
+ +            /* The (unrotated) reference position is yj0. yc0 has already
+ +             * been subtracted in init_rot_group */
+ +            copy_rvec(rotg->x_ref[iigrp], yj0_yc0);   /* yj0_yc0 = yj0 - yc0  */
+ +
+ +            /* Calculate Omega.(yj0-yc0) */
+ +            mvmul(erg->rotmat, yj0_yc0, rj);         /* rj = Omega.(yj0-yc0)  */
+ +        }
+ +        else
+ +        {
+ +            mj = erg->m_loc[j];
+ +            copy_rvec(erg->x_loc_pbc[j], xj);
+ +            copy_rvec(erg->xr_loc[j], rj);           /* rj = Omega.(yj0-u)    */
+ +        }
+ +        /* Mass-weighting */
+ +        wj = N_M*mj;
+ +
+ +        /* Calculate (xj-u) resp. (xj-xc) */
+ +        rvec_sub(xj, erg->xc_center, xj_u);          /* xj_u = xj-u           */
+ +
+ +        cprod(rotg->vec, xj_u, v_xj_u);              /* v_xj_u = v x (xj-u)   */
+ +
+ +        fac = norm2(v_xj_u);
+ +                                          /*                      1           */
+ +        psijstar = 1.0/(fac + rotg->eps); /*  psistar = --------------------  */
+ +                                          /*            |v x (xj-u)|^2 + eps  */
+ +
+ +        psij = gmx_invsqrt(fac);          /*                 1                */
+ +                                          /*  psij    = ------------          */
+ +                                          /*            |v x (xj-u)|          */
+ +
+ +        svmul(psij, v_xj_u, sj);          /*  sj = psij * (v x (xj-u) )       */
+ +
+ +        fac = iprod(v_xj_u, rj);                     /* fac = (v x (xj-u)).rj */
+ +        fac2 = fac*fac;
+ +
+ +        sjrj = iprod(sj, rj);                        /* sjrj = sj.rj          */
+ +
+ +        svmul(psijstar/psij, rj, tmpvec);
+ +        svmul(psijstar*psijstar/(psij*psij*psij) * sjrj, sj, tmpvec2);
+ +        rvec_dec(tmpvec, tmpvec2);
+ +        cprod(tmpvec, rotg->vec, tmpvec2);
+ +
+ +        /* Store the additional force so that it can be added to the force
+ +         * array after the normal forces have been evaluated */
+ +        svmul(-rotg->k*wj*sjrj, tmpvec2, tmpvec);
+ +        svmul(mj, innersumvec, tmpvec2);  /* This is != 0 only for the pivot-free variant */
+ +
+ +        rvec_add(tmpvec2, tmpvec, erg->f_rot_loc[j]);
+ +        Vpart += wj*psijstar*fac2;
+ +
+ +        /* If requested, also calculate the potential for a set of angles
+ +         * near the current reference angle */
+ +        if (bCalcPotFit)
+ +        {
+ +            for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++)
+ +            {
+ +                if (bPF)
+ +                {
+ +                    mvmul(erg->PotAngleFit->rotmat[ifit], yj0_yc0, fit_rj); /* fit_rj = Omega.(yj0-yc0) */
+ +                }
+ +                else
+ +                {
+ +                    /* Position of this atom in the collective array */
+ +                    iigrp = erg->xc_ref_ind[j];
+ +                    /* Rotate with the alternative angle. Like rotate_local_reference(),
+ +                     * just for a single local atom */
+ +                    mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[iigrp], fit_rj); /* fit_rj = Omega*(yj0-u) */
+ +                }
+ +                fit_fac = iprod(v_xj_u, fit_rj); /* fac = (v x (xj-u)).fit_rj */
+ +                /* Add to the rotation potential for this angle: */
+ +                erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*psijstar*fit_fac*fit_fac;
+ +            }
+ +        }
+ +
+ +        if (bOutstepRot)
+ +        {
+ +            /* Add to the torque of this rotation group */
+ +            erg->torque_v += torque(rotg->vec, erg->f_rot_loc[j], xj, erg->xc_center);
+ +
+ +            /* Calculate the angle between reference and actual rotation group atom. */
+ +            angle(rotg, xj_u, rj, &alpha, &weight);  /* angle in rad, weighted */
+ +            erg->angle_v  += alpha * weight;
+ +            erg->weight_v += weight;
+ +        }
+ +
+ +        PRINT_FORCE_J
+ +
+ +    } /* end of loop over local rotation group atoms */
+ +    erg->V = 0.5*rotg->k*Vpart;
+ +}
+ +
+ +
+ +/* Determine the smallest and largest position vector (with respect to the 
+ + * rotation vector) for the reference group */
+ +static void get_firstlast_atom_ref(
+ +        t_rotgrp  *rotg, 
+ +        int       *firstindex, 
+ +        int       *lastindex)
+ +{
+ +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +    int i;
+ +    real xcproj;               /* The projection of a reference position on the 
+ +                                  rotation vector */
+ +    real minproj, maxproj;     /* Smallest and largest projection on v */
+ +    
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +
+ +    /* Start with some value */
+ +    minproj = iprod(rotg->x_ref[0], rotg->vec);
+ +    maxproj = minproj;
+ +    
+ +    /* This is just to ensure that it still works if all the atoms of the 
+ +     * reference structure are situated in a plane perpendicular to the rotation 
+ +     * vector */
+ +    *firstindex = 0;
+ +    *lastindex  = rotg->nat-1;
+ +    
+ +    /* Loop over all atoms of the reference group, 
+ +     * project them on the rotation vector to find the extremes */
+ +    for (i=0; i<rotg->nat; i++)
+ +    {
+ +        xcproj = iprod(rotg->x_ref[i], rotg->vec);
+ +        if (xcproj < minproj)
+ +        {
+ +            minproj = xcproj;
+ +            *firstindex = i;
+ +        }
+ +        if (xcproj > maxproj)
+ +        {
+ +            maxproj = xcproj;
+ +            *lastindex = i;
+ +        }
+ +    }
+ +}
+ +
+ +
+ +/* Allocate memory for the slabs */
+ +static void allocate_slabs(
+ +        t_rotgrp  *rotg, 
+ +        FILE      *fplog, 
+ +        int       g, 
+ +        gmx_bool  bVerbose)
+ +{
+ +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
+ +    int i, nslabs;
+ +    
+ +    
+ +    erg=rotg->enfrotgrp;
+ +    
+ +    /* More slabs than are defined for the reference are never needed */
+ +    nslabs = erg->slab_last_ref - erg->slab_first_ref + 1;
+ +    
+ +    /* Remember how many we allocated */
+ +    erg->nslabs_alloc = nslabs;
+ +
+ +    if ( (NULL != fplog) && bVerbose )
+ +        fprintf(fplog, "%s allocating memory to store data for %d slabs (rotation group %d).\n",
+ +                RotStr, nslabs,g);
+ +    snew(erg->slab_center     , nslabs);
+ +    snew(erg->slab_center_ref , nslabs);
+ +    snew(erg->slab_weights    , nslabs);
+ +    snew(erg->slab_torque_v   , nslabs);
+ +    snew(erg->slab_data       , nslabs);
+ +    snew(erg->gn_atom         , nslabs);
+ +    snew(erg->gn_slabind      , nslabs);
+ +    snew(erg->slab_innersumvec, nslabs);
+ +    for (i=0; i<nslabs; i++)
+ +    {
+ +        snew(erg->slab_data[i].x     , rotg->nat);
+ +        snew(erg->slab_data[i].ref   , rotg->nat);
+ +        snew(erg->slab_data[i].weight, rotg->nat);
+ +    }
+ +    snew(erg->xc_ref_sorted, rotg->nat);
+ +    snew(erg->xc_sortind   , rotg->nat);
+ +    snew(erg->firstatom    , nslabs);
+ +    snew(erg->lastatom     , nslabs);
+ +}
+ +
+ +
+ +/* From the extreme coordinates of the reference group, determine the first 
+ + * and last slab of the reference. We can never have more slabs in the real
+ + * simulation than calculated here for the reference.
+ + */
+ +static void get_firstlast_slab_ref(t_rotgrp *rotg, real mc[], int ref_firstindex, int ref_lastindex)
+ +{
+ +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
+ +    int first,last,firststart;
+ +    rvec dummy;
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +    first = get_first_slab(rotg, erg->max_beta, rotg->x_ref[ref_firstindex]);
+ +    last  = get_last_slab( rotg, erg->max_beta, rotg->x_ref[ref_lastindex ]);
+ +    firststart = first;
+ +
+ +    while (get_slab_weight(first, rotg, rotg->x_ref, mc, &dummy) > WEIGHT_MIN)
+ +    {
+ +        first--;
+ +    }
+ +    erg->slab_first_ref = first+1;
+ +    while (get_slab_weight(last, rotg, rotg->x_ref, mc, &dummy) > WEIGHT_MIN)
+ +    {
+ +        last++;
+ +    }
+ +    erg->slab_last_ref  = last-1;
+ +    
+ +    erg->slab_buffer = firststart - erg->slab_first_ref;
+ +}
+ +
+ +
+ +/* Special version of copy_rvec:
+ + * During the copy procedure of xcurr to b, the correct PBC image is chosen
+ + * such that the copied vector ends up near its reference position xref */
+ +static inline void copy_correct_pbc_image(
+ +        const rvec  xcurr,            /* copy vector xcurr ...                */
+ +        rvec        b,                /* ... to b ...                         */
+ +        const rvec  xref,   /* choosing the PBC image such that b ends up near xref */
+ +        matrix      box,
+ +        int         npbcdim)
+ +{
+ +    rvec  dx;
+ +    int   d,m;
+ +    ivec  shift;
+ +
+ +
+ +    /* Shortest PBC distance between the atom and its reference */
+ +    rvec_sub(xcurr, xref, dx);
+ +
+ +    /* Determine the shift for this atom */
+ +    clear_ivec(shift);
+ +    for(m=npbcdim-1; m>=0; m--)
+ +    {
+ +        while (dx[m] < -0.5*box[m][m])
+ +        {
+ +            for(d=0; d<DIM; d++)
+ +                dx[d] += box[m][d];
+ +            shift[m]++;
+ +        }
+ +        while (dx[m] >= 0.5*box[m][m])
+ +        {
+ +            for(d=0; d<DIM; d++)
+ +                dx[d] -= box[m][d];
+ +            shift[m]--;
+ +        }
+ +    }
+ +
+ +    /* Apply the shift to the position */
+ +    copy_rvec(xcurr, b);
+ +    shift_single_coord(box, b, shift);
+ +}
+ +
+ +
+ +static void init_rot_group(FILE *fplog,t_commrec *cr,int g,t_rotgrp *rotg,
+ +        rvec *x,gmx_mtop_t *mtop,gmx_bool bVerbose,FILE *out_slabs, matrix box,
+ +        gmx_bool bOutputCenters)
+ +{
+ +    int i,ii;
+ +    rvec        coord,*xdum;
+ +    gmx_bool    bFlex,bColl;
+ +    t_atom      *atom;
+ +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
+ +    int         ref_firstindex, ref_lastindex;
++    gmx_mtop_atomlookup_t alook=NULL;
+ +    real        mass,totalmass;
+ +    real        start=0.0;
+ +    
+ +
+ +    /* Do we have a flexible axis? */
+ +    bFlex = ISFLEX(rotg);
+ +    /* Do we use a global set of coordinates? */
+ +    bColl = ISCOLL(rotg);
+ +
+ +    erg=rotg->enfrotgrp;
+ +    
+ +    /* Allocate space for collective coordinates if needed */
+ +    if (bColl)
+ +    {
+ +        snew(erg->xc        , rotg->nat);
+ +        snew(erg->xc_shifts , rotg->nat);
+ +        snew(erg->xc_eshifts, rotg->nat);
+ +
+ +        /* Save the original (whole) set of positions such that later the
+ +         * molecule can always be made whole again */
+ +        snew(erg->xc_old    , rotg->nat);        
+ +        if (MASTER(cr))
+ +        {
+ +            for (i=0; i<rotg->nat; i++)
+ +            {
+ +                ii = rotg->ind[i];
+ +                copy_correct_pbc_image(x[ii], erg->xc_old[i],rotg->x_ref[i],box,3);
+ +            }
+ +        }
+ +#ifdef GMX_MPI
+ +        if (PAR(cr))
+ +            gmx_bcast(rotg->nat*sizeof(erg->xc_old[0]),erg->xc_old, cr);
+ +#endif
+ +
+ +        if (rotg->eFittype == erotgFitNORM)
+ +        {
+ +            snew(erg->xc_ref_length, rotg->nat); /* in case fit type NORM is chosen */
+ +            snew(erg->xc_norm      , rotg->nat);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        snew(erg->xr_loc   , rotg->nat);
+ +        snew(erg->x_loc_pbc, rotg->nat);
+ +    }
+ +    
+ +    snew(erg->f_rot_loc , rotg->nat);
+ +    snew(erg->xc_ref_ind, rotg->nat);
+ +    
+ +    /* Make space for the calculation of the potential at other angles (used
+ +     * for fitting only) */
+ +    if (erotgFitPOT == rotg->eFittype)
+ +    {
+ +        snew(erg->PotAngleFit, 1);
+ +        snew(erg->PotAngleFit->degangle, rotg->PotAngle_nstep);
+ +        snew(erg->PotAngleFit->V       , rotg->PotAngle_nstep);
+ +        snew(erg->PotAngleFit->rotmat  , rotg->PotAngle_nstep);
+ +
+ +        /* Get the set of angles around the reference angle */
+ +        start = -0.5 * (rotg->PotAngle_nstep - 1)*rotg->PotAngle_step;
+ +        for (i = 0; i < rotg->PotAngle_nstep; i++)
+ +            erg->PotAngleFit->degangle[i] = start + i*rotg->PotAngle_step;
+ +    }
+ +    else
+ +    {
+ +        erg->PotAngleFit = NULL;
+ +    }
+ +
+ +    /* xc_ref_ind needs to be set to identity in the serial case */
+ +    if (!PAR(cr))
+ +        for (i=0; i<rotg->nat; i++)
+ +            erg->xc_ref_ind[i] = i;
+ +
+ +    /* Copy the masses so that the center can be determined. For all types of
+ +     * enforced rotation, we store the masses in the erg->mc array. */
++    if (rotg->bMassW)
++    {
++      alook = gmx_mtop_atomlookup_init(mtop);
++    }
+ +    snew(erg->mc, rotg->nat);
+ +    if (bFlex)
+ +        snew(erg->mc_sorted, rotg->nat);
+ +    if (!bColl)
+ +        snew(erg->m_loc, rotg->nat);
+ +    totalmass=0.0;
+ +    for (i=0; i<rotg->nat; i++)
+ +    {
+ +        if (rotg->bMassW)
+ +        {
++            gmx_mtop_atomnr_to_atom(alook,rotg->ind[i],&atom);
+ +            mass=atom->m;
+ +        }
+ +        else
+ +        {
+ +            mass=1.0;
+ +        }
+ +        erg->mc[i] = mass;
+ +        totalmass += mass;
+ +    }
+ +    erg->invmass = 1.0/totalmass;
+ +    
++    if (rotg->bMassW)
++    {
++      gmx_mtop_atomlookup_destroy(alook);
++    }
++
+ +    /* Set xc_ref_center for any rotation potential */
+ +    if ((rotg->eType==erotgISO) || (rotg->eType==erotgPM) || (rotg->eType==erotgRM) || (rotg->eType==erotgRM2))
+ +    {
+ +        /* Set the pivot point for the fixed, stationary-axis potentials. This
+ +         * won't change during the simulation */
+ +        copy_rvec(rotg->pivot, erg->xc_ref_center);
+ +        copy_rvec(rotg->pivot, erg->xc_center    );
+ +    }
+ +    else
+ +    {
+ +        /* Center of the reference positions */
+ +        get_center(rotg->x_ref, erg->mc, rotg->nat, erg->xc_ref_center);
+ +
+ +        /* Center of the actual positions */
+ +        if (MASTER(cr))
+ +        {
+ +            snew(xdum, rotg->nat);
+ +            for (i=0; i<rotg->nat; i++)
+ +            {
+ +                ii = rotg->ind[i];
+ +                copy_rvec(x[ii], xdum[i]);
+ +            }
+ +            get_center(xdum, erg->mc, rotg->nat, erg->xc_center);
+ +            sfree(xdum);
+ +        }
+ +#ifdef GMX_MPI
+ +        if (PAR(cr))
+ +            gmx_bcast(sizeof(erg->xc_center), erg->xc_center, cr);
+ +#endif
+ +    }
+ +
+ +    if ( (rotg->eType != erotgFLEX) && (rotg->eType != erotgFLEX2) )
+ +    {
+ +        /* Put the reference positions into origin: */
+ +        for (i=0; i<rotg->nat; i++)
+ +            rvec_dec(rotg->x_ref[i], erg->xc_ref_center);
+ +    }
+ +
+ +    /* Enforced rotation with flexible axis */
+ +    if (bFlex)
+ +    {
+ +        /* Calculate maximum beta value from minimum gaussian (performance opt.) */
+ +        erg->max_beta = calc_beta_max(rotg->min_gaussian, rotg->slab_dist);
+ +
+ +        /* Determine the smallest and largest coordinate with respect to the rotation vector */
+ +        get_firstlast_atom_ref(rotg, &ref_firstindex, &ref_lastindex);
+ +        
+ +        /* From the extreme coordinates of the reference group, determine the first 
+ +         * and last slab of the reference. */
+ +        get_firstlast_slab_ref(rotg, erg->mc, ref_firstindex, ref_lastindex);
+ +                
+ +        /* Allocate memory for the slabs */
+ +        allocate_slabs(rotg, fplog, g, bVerbose);
+ +
+ +        /* Flexible rotation: determine the reference centers for the rest of the simulation */
+ +        erg->slab_first = erg->slab_first_ref;
+ +        erg->slab_last = erg->slab_last_ref;
+ +        get_slab_centers(rotg,rotg->x_ref,erg->mc,g,-1,out_slabs,bOutputCenters,TRUE);
+ +
+ +        /* Length of each x_rotref vector from center (needed if fit routine NORM is chosen): */
+ +        if (rotg->eFittype == erotgFitNORM)
+ +        {
+ +            for (i=0; i<rotg->nat; i++)
+ +            {
+ +                rvec_sub(rotg->x_ref[i], erg->xc_ref_center, coord);
+ +                erg->xc_ref_length[i] = norm(coord);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +
+ +extern void dd_make_local_rotation_groups(gmx_domdec_t *dd,t_rot *rot)
+ +{
+ +    gmx_ga2la_t ga2la;
+ +    int g;
+ +    t_rotgrp *rotg;
+ +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
+ +    
+ +    ga2la = dd->ga2la;
+ +
+ +    for(g=0; g<rot->ngrp; g++)
+ +    {
+ +        rotg = &rot->grp[g];
+ +        erg  = rotg->enfrotgrp;
+ +
+ +
+ +        dd_make_local_group_indices(ga2la,rotg->nat,rotg->ind,
+ +                &erg->nat_loc,&erg->ind_loc,&erg->nalloc_loc,erg->xc_ref_ind);
+ +    }
+ +}
+ +
+ +
+ +/* Calculate the size of the MPI buffer needed in reduce_output() */
+ +static int calc_mpi_bufsize(t_rot *rot)
+ +{
+ +    int g;
+ +    int count_group, count_total;
+ +    t_rotgrp *rotg;
+ +    gmx_enfrotgrp_t erg;      /* Pointer to enforced rotation group data */
+ +
+ +
+ +    count_total = 0;
+ +    for (g=0; g<rot->ngrp; g++)
+ +    {
+ +        rotg = &rot->grp[g];
+ +        erg  = rotg->enfrotgrp;
+ +
+ +        /* Count the items that are transferred for this group: */
+ +        count_group = 4; /* V, torque, angle, weight */
+ +
+ +        /* Add the maximum number of slabs for flexible groups */
+ +        if (ISFLEX(rotg))
+ +            count_group += erg->slab_last_ref - erg->slab_first_ref + 1;
+ +
+ +        /* Add space for the potentials at different angles: */
+ +        if (erotgFitPOT == rotg->eFittype)
+ +            count_group += rotg->PotAngle_nstep;
+ +
+ +        /* Add to the total number: */
+ +        count_total += count_group;
+ +    }
+ +
+ +    return count_total;
+ +}
+ +
+ +
+ +extern void init_rot(FILE *fplog,t_inputrec *ir,int nfile,const t_filenm fnm[],
+ +        t_commrec *cr, rvec *x, matrix box, gmx_mtop_t *mtop, const output_env_t oenv,
+ +        gmx_bool bVerbose, unsigned long Flags)
+ +{
+ +    t_rot    *rot;
+ +    t_rotgrp *rotg;
+ +    int      g;
+ +    int      nat_max=0;     /* Size of biggest rotation group */
+ +    gmx_enfrot_t er;        /* Pointer to the enforced rotation buffer variables */    
+ +    gmx_enfrotgrp_t erg;    /* Pointer to enforced rotation group data */
+ +    rvec     *x_pbc=NULL;   /* Space for the pbc-correct atom positions */
+ +
+ +
+ +    if ( (PAR(cr)) && !DOMAINDECOMP(cr) )
+ +        gmx_fatal(FARGS, "Enforced rotation is only implemented for domain decomposition!");
+ +
+ +    if ( MASTER(cr) && bVerbose)
+ +        fprintf(stdout, "%s Initializing ...\n", RotStr);
+ +
+ +    rot = ir->rot;
+ +    snew(rot->enfrot, 1);
+ +    er = rot->enfrot;
+ +    er->Flags = Flags;
+ +
+ +    /* When appending, skip first output to avoid duplicate entries in the data files */
+ +    if (er->Flags & MD_APPENDFILES)
+ +        er->bOut = FALSE;
+ +    else
+ +        er->bOut = TRUE;
+ +
+ +    if ( MASTER(cr) && er->bOut )
+ +        please_cite(fplog, "Kutzner2011");
+ +
+ +    /* Output every step for reruns */
+ +    if (er->Flags & MD_RERUN)
+ +    {
+ +        if (NULL != fplog)
+ +            fprintf(fplog, "%s rerun - will write rotation output every available step.\n", RotStr);
+ +        rot->nstrout = 1;
+ +        rot->nstsout = 1;
+ +    }
+ +
+ +    er->out_slabs = NULL;
+ +    if ( MASTER(cr) && HaveFlexibleGroups(rot) )
+ +        er->out_slabs = open_slab_out(opt2fn("-rs",nfile,fnm), rot, oenv);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        /* Remove pbc, make molecule whole.
+ +         * When ir->bContinuation=TRUE this has already been done, but ok. */
+ +        snew(x_pbc,mtop->natoms);
+ +        m_rveccopy(mtop->natoms,x,x_pbc);
+ +        do_pbc_first_mtop(NULL,ir->ePBC,box,mtop,x_pbc);
+ +        /* All molecules will be whole now, but not necessarily in the home box.
+ +         * Additionally, if a rotation group consists of more than one molecule
+ +         * (e.g. two strands of DNA), each one of them can end up in a different
+ +         * periodic box. This is taken care of in init_rot_group.  */
+ +    }
+ +
+ +    for (g=0; g<rot->ngrp; g++)
+ +    {
+ +        rotg = &rot->grp[g];
+ +
+ +        if (NULL != fplog)
+ +            fprintf(fplog,"%s group %d type '%s'\n", RotStr, g, erotg_names[rotg->eType]);
+ +        
+ +        if (rotg->nat > 0)
+ +        {
+ +            /* Allocate space for the rotation group's data: */
+ +            snew(rotg->enfrotgrp, 1);
+ +            erg  = rotg->enfrotgrp;
+ +
+ +            nat_max=max(nat_max, rotg->nat);
+ +            
+ +            if (PAR(cr))
+ +            {
+ +                erg->nat_loc    = 0;
+ +                erg->nalloc_loc = 0;
+ +                erg->ind_loc    = NULL;
+ +            }
+ +            else
+ +            {
+ +                erg->nat_loc = rotg->nat;
+ +                erg->ind_loc = rotg->ind;
+ +            }
+ +            init_rot_group(fplog,cr,g,rotg,x_pbc,mtop,bVerbose,er->out_slabs,box,
+ +                           !(er->Flags & MD_APPENDFILES) ); /* Do not output the reference centers
+ +                                                             * again if we are appending */
+ +        }
+ +    }
+ +    
+ +    /* Allocate space for enforced rotation buffer variables */
+ +    er->bufsize = nat_max;
+ +    snew(er->data, nat_max);
+ +    snew(er->xbuf, nat_max);
+ +    snew(er->mbuf, nat_max);
+ +
+ +    /* Buffers for MPI reducing torques, angles, weights (for each group), and V */
+ +    if (PAR(cr))
+ +    {
+ +        er->mpi_bufsize = calc_mpi_bufsize(rot) + 100; /* larger to catch errors */
+ +        snew(er->mpi_inbuf , er->mpi_bufsize);
+ +        snew(er->mpi_outbuf, er->mpi_bufsize);
+ +    }
+ +    else
+ +    {
+ +        er->mpi_bufsize = 0;
+ +        er->mpi_inbuf = NULL;
+ +        er->mpi_outbuf = NULL;
+ +    }
+ +
+ +    /* Only do I/O on the MASTER */
+ +    er->out_angles  = NULL;
+ +    er->out_rot     = NULL;
+ +    er->out_torque  = NULL;
+ +    if (MASTER(cr))
+ +    {
+ +        er->out_rot = open_rot_out(opt2fn("-ro",nfile,fnm), rot, oenv);
+ +
+ +        if (rot->nstsout > 0)
+ +        {
+ +            if ( HaveFlexibleGroups(rot) || HavePotFitGroups(rot) )
+ +                er->out_angles  = open_angles_out(opt2fn("-ra",nfile,fnm), rot, oenv);
+ +            if ( HaveFlexibleGroups(rot) )
+ +                er->out_torque  = open_torque_out(opt2fn("-rt",nfile,fnm), rot, oenv);
+ +        }
+ +
+ +        sfree(x_pbc);
+ +    }
+ +}
+ +
+ +
+ +extern void finish_rot(FILE *fplog,t_rot *rot)
+ +{
+ +    gmx_enfrot_t er;        /* Pointer to the enforced rotation buffer variables */    
+ +
+ +    
+ +    er=rot->enfrot;
+ +    if (er->out_rot)
+ +        gmx_fio_fclose(er->out_rot);
+ +    if (er->out_slabs)
+ +        gmx_fio_fclose(er->out_slabs);
+ +    if (er->out_angles)
+ +        gmx_fio_fclose(er->out_angles);
+ +    if (er->out_torque)
+ +        gmx_fio_fclose(er->out_torque);
+ +}
+ +
+ +
+ +/* Rotate the local reference positions and store them in
+ + * erg->xr_loc[0...(nat_loc-1)]
+ + *
+ + * Note that we already subtracted u or y_c from the reference positions
+ + * in init_rot_group().
+ + */
+ +static void rotate_local_reference(t_rotgrp *rotg)
+ +{
+ +    gmx_enfrotgrp_t erg;
+ +    int i,ii;
+ +
+ +    
+ +    erg=rotg->enfrotgrp;
+ +    
+ +    for (i=0; i<erg->nat_loc; i++)
+ +    {
+ +        /* Index of this rotation group atom with respect to the whole rotation group */
+ +        ii = erg->xc_ref_ind[i];
+ +        /* Rotate */
+ +        mvmul(erg->rotmat, rotg->x_ref[ii], erg->xr_loc[i]);
+ +    }
+ +}
+ +
+ +
+ +/* Select the PBC representation for each local x position and store that
+ + * for later usage. We assume the right PBC image of an x is the one nearest to
+ + * its rotated reference */
+ +static void choose_pbc_image(rvec x[], t_rotgrp *rotg, matrix box, int npbcdim)
+ +{
+ +    int i,ii;
+ +    gmx_enfrotgrp_t erg;       /* Pointer to enforced rotation group data */
+ +    rvec xref;
+ +
+ +
+ +    erg=rotg->enfrotgrp;
+ +
+ +    for (i=0; i<erg->nat_loc; i++)
+ +    {
+ +        /* Index of a rotation group atom  */
+ +        ii = erg->ind_loc[i];
+ +
+ +        /* Get the reference position. The pivot was already
+ +         * subtracted in init_rot_group() from the reference positions. Also,
+ +         * the reference positions have already been rotated in
+ +         * rotate_local_reference() */
+ +        copy_rvec(erg->xr_loc[i], xref);
+ +
+ +        copy_correct_pbc_image(x[ii],erg->x_loc_pbc[i], xref, box, npbcdim);
+ +    }
+ +}
+ +
+ +
+ +extern void do_rotation(
+ +        t_commrec *cr,
+ +        t_inputrec *ir,
+ +        matrix box,
+ +        rvec x[],
+ +        real t,
+ +        gmx_large_int_t step,
+ +        gmx_wallcycle_t wcycle,
+ +        gmx_bool bNS)
+ +{
+ +    int      g,i,ii;
+ +    t_rot    *rot;
+ +    t_rotgrp *rotg;
+ +    gmx_bool outstep_slab, outstep_rot;
+ +    gmx_bool bFlex,bColl;
+ +    gmx_enfrot_t er;     /* Pointer to the enforced rotation buffer variables */
+ +    gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data           */
+ +    rvec     transvec;
+ +    t_gmx_potfit *fit=NULL;     /* For fit type 'potential' determine the fit
+ +                                   angle via the potential minimum            */
+ +
+ +    /* Enforced rotation cycle counting: */
+ +    gmx_cycles_t cycles_comp;   /* Cycles for the enf. rotation computation
+ +                                   only, does not count communication. This
+ +                                   counter is used for load-balancing         */
+ +
+ +#ifdef TAKETIME
+ +    double t0;
+ +#endif
+ +    
+ +    rot=ir->rot;
+ +    er=rot->enfrot;
+ +    
+ +    /* When to output in main rotation output file */
+ +    outstep_rot  = do_per_step(step, rot->nstrout) && er->bOut;
+ +    /* When to output per-slab data */
+ +    outstep_slab = do_per_step(step, rot->nstsout) && er->bOut;
+ +
+ +    /* Output time into rotation output file */
+ +    if (outstep_rot && MASTER(cr))
+ +        fprintf(er->out_rot, "%12.3e",t);
+ +
+ +    /**************************************************************************/
+ +    /* First do ALL the communication! */
+ +    for(g=0; g<rot->ngrp; g++)
+ +    {
+ +        rotg = &rot->grp[g];
+ +        erg=rotg->enfrotgrp;
+ +
+ +        /* Do we have a flexible axis? */
+ +        bFlex = ISFLEX(rotg);
+ +        /* Do we use a collective (global) set of coordinates? */
+ +        bColl = ISCOLL(rotg);
+ +
+ +        /* Calculate the rotation matrix for this angle: */
+ +        erg->degangle = rotg->rate * t;
+ +        calc_rotmat(rotg->vec,erg->degangle,erg->rotmat);
+ +
+ +        if (bColl)
+ +        {
+ +            /* Transfer the rotation group's positions such that every node has
+ +             * all of them. Every node contributes its local positions x and stores
+ +             * it in the collective erg->xc array. */
+ +            communicate_group_positions(cr,erg->xc, erg->xc_shifts, erg->xc_eshifts, bNS,
+ +                    x, rotg->nat, erg->nat_loc, erg->ind_loc, erg->xc_ref_ind, erg->xc_old, box);
+ +        }
+ +        else
+ +        {
+ +            /* Fill the local masses array;
+ +             * this array changes in DD/neighborsearching steps */
+ +            if (bNS)
+ +            {
+ +                for (i=0; i<erg->nat_loc; i++)
+ +                {
+ +                    /* Index of local atom w.r.t. the collective rotation group */
+ +                    ii = erg->xc_ref_ind[i];
+ +                    erg->m_loc[i] = erg->mc[ii];
+ +                }
+ +            }
+ +
+ +            /* Calculate Omega*(y_i-y_c) for the local positions */
+ +            rotate_local_reference(rotg);
+ +
+ +            /* Choose the nearest PBC images of the group atoms with respect
+ +             * to the rotated reference positions */
+ +            choose_pbc_image(x, rotg, box, 3);
+ +
+ +            /* Get the center of the rotation group */
+ +            if ( (rotg->eType==erotgISOPF) || (rotg->eType==erotgPMPF) )
+ +                get_center_comm(cr, erg->x_loc_pbc, erg->m_loc, erg->nat_loc, rotg->nat, erg->xc_center);
+ +        }
+ +
+ +    } /* End of loop over rotation groups */
+ +
+ +    /**************************************************************************/
+ +    /* Done communicating, we can start to count cycles for the load balancing now ... */
+ +    cycles_comp = gmx_cycles_read();
+ +
+ +
+ +#ifdef TAKETIME
+ +    t0 = MPI_Wtime();
+ +#endif
+ +
+ +    for(g=0; g<rot->ngrp; g++)
+ +    {
+ +        rotg = &rot->grp[g];
+ +        erg=rotg->enfrotgrp;
+ +
+ +        bFlex = ISFLEX(rotg);
+ +        bColl = ISCOLL(rotg);
+ +
+ +        if (outstep_rot && MASTER(cr))
+ +            fprintf(er->out_rot, "%12.4f", erg->degangle);
+ +
+ +        /* Calculate angles and rotation matrices for potential fitting: */
+ +        if ( (outstep_rot || outstep_slab) && (erotgFitPOT == rotg->eFittype) )
+ +        {
+ +            fit = erg->PotAngleFit;
+ +            for (i = 0; i < rotg->PotAngle_nstep; i++)
+ +            {
+ +                calc_rotmat(rotg->vec, erg->degangle + fit->degangle[i], fit->rotmat[i]);
+ +
+ +                /* Clear value from last step */
+ +                erg->PotAngleFit->V[i] = 0.0;
+ +            }
+ +        }
+ +
+ +        /* Clear values from last time step */
+ +        erg->V        = 0.0;
+ +        erg->torque_v = 0.0;
+ +        erg->angle_v  = 0.0;
+ +        erg->weight_v = 0.0;
+ +
+ +        switch(rotg->eType)
+ +        {
+ +            case erotgISO:
+ +            case erotgISOPF:
+ +            case erotgPM:
+ +            case erotgPMPF:
+ +                do_fixed(rotg,x,box,t,step,outstep_rot,outstep_slab);
+ +                break;
+ +            case erotgRM:
+ +                do_radial_motion(rotg,x,box,t,step,outstep_rot,outstep_slab);
+ +                break;
+ +            case erotgRMPF:
+ +                do_radial_motion_pf(rotg,x,box,t,step,outstep_rot,outstep_slab);
+ +                break;
+ +            case erotgRM2:
+ +            case erotgRM2PF:
+ +                do_radial_motion2(rotg,x,box,t,step,outstep_rot,outstep_slab);
+ +                break;
+ +            case erotgFLEXT:
+ +            case erotgFLEX2T:
+ +                /* Subtract the center of the rotation group from the collective positions array
+ +                 * Also store the center in erg->xc_center since it needs to be subtracted
+ +                 * in the low level routines from the local coordinates as well */
+ +                get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center);
+ +                svmul(-1.0, erg->xc_center, transvec);
+ +                translate_x(erg->xc, rotg->nat, transvec);
+ +                do_flexible(MASTER(cr),er,rotg,g,x,box,t,step,outstep_rot,outstep_slab);
+ +                break;
+ +            case erotgFLEX:
+ +            case erotgFLEX2:
+ +                /* Do NOT subtract the center of mass in the low level routines! */
+ +                clear_rvec(erg->xc_center);
+ +                do_flexible(MASTER(cr),er,rotg,g,x,box,t,step,outstep_rot,outstep_slab);
+ +                break;
+ +            default:
+ +                gmx_fatal(FARGS, "No such rotation potential.");
+ +                break;
+ +        }
+ +    }
+ +
+ +#ifdef TAKETIME
+ +    if (MASTER(cr))
+ +        fprintf(stderr, "%s calculation (step %d) took %g seconds.\n", RotStr, step, MPI_Wtime()-t0);
+ +#endif
+ +
+ +    /* Stop the enforced rotation cycle counter and add the computation-only
+ +     * cycles to the force cycles for load balancing */
+ +    cycles_comp  = gmx_cycles_read() - cycles_comp;
+ +
+ +    if (DOMAINDECOMP(cr) && wcycle)
+ +        dd_cycles_add(cr->dd,cycles_comp,ddCyclF);
+ +}
diff --cc src/gromacs/mdlib/qmmm.c

index 834c43d2d72638a45aacdc05f7569132f80c28e0,0000000000000000000000000000000000000000..6e808087f72d19a8d104c73ad2f96bb93fe2856e

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/qmmm.c
--- /dev/null
+++ b/src/gromacs/mdlib/qmmm.c
@@@ -1,1103 -1,0 +1,1115 @@@
-     gmx_mtop_atomnr_to_atom(mtop,qm->indexQM[i],&atom);
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ + 
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <math.h>
+ +#include "sysstuff.h"
+ +#include "typedefs.h"
+ +#include "macros.h"
+ +#include "smalloc.h"
+ +#include "physics.h"
+ +#include "macros.h"
+ +#include "vec.h"
+ +#include "force.h"
+ +#include "invblock.h"
+ +#include "confio.h"
+ +#include "names.h"
+ +#include "network.h"
+ +#include "pbc.h"
+ +#include "ns.h"
+ +#include "nrnb.h"
+ +#include "bondf.h"
+ +#include "mshift.h"
+ +#include "txtdump.h"
+ +#include "copyrite.h"
+ +#include "qmmm.h"
+ +#include <stdio.h>
+ +#include <string.h>
+ +#include "gmx_fatal.h"
+ +#include "typedefs.h"
+ +#include <stdlib.h>
+ +#include "mtop_util.h"
+ +
+ +
+ +/* declarations of the interfaces to the QM packages. The _SH indicate
+ + * the QM interfaces can be used for Surface Hopping simulations 
+ + */
+ +#ifdef GMX_QMMM_GAMESS
+ +/* GAMESS interface */
+ +
+ +void 
+ +init_gamess(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
+ +
+ +real 
+ +call_gamess(t_commrec *cr,t_forcerec *fr,
+ +            t_QMrec *qm, t_MMrec *mm,rvec f[], rvec fshift[]);
+ +
+ +#elif defined GMX_QMMM_MOPAC
+ +/* MOPAC interface */
+ +
+ +void 
+ +init_mopac(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
+ +
+ +real 
+ +call_mopac(t_commrec *cr,t_forcerec *fr, t_QMrec *qm, 
+ +           t_MMrec *mm,rvec f[], rvec fshift[]);
+ +
+ +real 
+ +call_mopac_SH(t_commrec *cr,t_forcerec *fr,t_QMrec *qm, 
+ +              t_MMrec *mm,rvec f[], rvec fshift[]);
+ +
+ +#elif defined GMX_QMMM_GAUSSIAN
+ +/* GAUSSIAN interface */
+ +
+ +void 
+ +init_gaussian(t_commrec *cr ,t_QMrec *qm, t_MMrec *mm);
+ +
+ +real 
+ +call_gaussian_SH(t_commrec *cr,t_forcerec *fr,t_QMrec *qm, 
+ +                 t_MMrec *mm,rvec f[], rvec fshift[]);
+ +
+ +real 
+ +call_gaussian(t_commrec *cr,t_forcerec *fr, t_QMrec *qm,
+ +              t_MMrec *mm,rvec f[], rvec fshift[]);
+ +
+ +#elif defined GMX_QMMM_ORCA
+ +/* ORCA interface */
+ +
+ +void 
+ +init_orca(t_commrec *cr ,t_QMrec *qm, t_MMrec *mm);
+ +
+ +real 
+ +call_orca(t_commrec *cr,t_forcerec *fr, t_QMrec *qm,
+ +              t_MMrec *mm,rvec f[], rvec fshift[]);
+ +
+ +#endif
+ +
+ +
+ +
+ +
+ +/* this struct and these comparison functions are needed for creating
+ + * a QMMM input for the QM routines from the QMMM neighbor list.  
+ + */
+ +
+ +typedef struct {
+ +  int      j;
+ +  int      shift;
+ +} t_j_particle;
+ +
+ +static int struct_comp(const void *a, const void *b){
+ +
+ +  return (int)(((t_j_particle *)a)->j)-(int)(((t_j_particle *)b)->j);
+ +  
+ +} /* struct_comp */
+ +
+ +static int int_comp(const void *a,const void *b){
+ +  
+ +  return (*(int *)a) - (*(int *)b);
+ +  
+ +} /* int_comp */
+ +
+ +static int QMlayer_comp(const void *a, const void *b){
+ +  
+ +  return (int)(((t_QMrec *)a)->nrQMatoms)-(int)(((t_QMrec *)b)->nrQMatoms);
+ +  
+ +} /* QMlayer_comp */
+ +
+ +real call_QMroutine(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, 
+ +                  t_MMrec *mm, rvec f[], rvec fshift[])
+ +{
+ +  /* makes a call to the requested QM routine (qm->QMmethod) 
+ +   * Note that f is actually the gradient, i.e. -f
+ +   */
+ +  real
+ +    QMener=0.0;
+ +
+ +    /* do a semi-empiprical calculation */
+ +    
+ +    if (qm->QMmethod<eQMmethodRHF && !(mm->nrMMatoms))
+ +    {
+ +#ifdef GMX_QMMM_MOPAC
+ +        if (qm->bSH)
+ +            QMener = call_mopac_SH(cr,fr,qm,mm,f,fshift);
+ +        else
+ +            QMener = call_mopac(cr,fr,qm,mm,f,fshift);
+ +#else
+ +        gmx_fatal(FARGS,"Semi-empirical QM only supported with Mopac.");
+ +#endif
+ +    }
+ +    else
+ +    {
+ +        /* do an ab-initio calculation */
+ +        if (qm->bSH && qm->QMmethod==eQMmethodCASSCF)
+ +        {
+ +#ifdef GMX_QMMM_GAUSSIAN            
+ +            QMener = call_gaussian_SH(cr,fr,qm,mm,f,fshift);
+ +#else
+ +            gmx_fatal(FARGS,"Ab-initio Surface-hopping only supported with Gaussian.");
+ +#endif
+ +        }
+ +        else
+ +        {
+ +#ifdef GMX_QMMM_GAMESS
+ +            QMener = call_gamess(cr,fr,qm,mm,f,fshift);
+ +#elif defined GMX_QMMM_GAUSSIAN
+ +            QMener = call_gaussian(cr,fr,qm,mm,f,fshift);
+ +#elif defined GMX_QMMM_ORCA
+ +            QMener = call_orca(cr,fr,qm,mm,f,fshift);
+ +#else
+ +            gmx_fatal(FARGS,"Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");
+ +#endif
+ +        }
+ +    }
+ +    return (QMener);
+ +}
+ +
+ +void init_QMroutine(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
+ +{
+ +    /* makes a call to the requested QM routine (qm->QMmethod) 
+ +     */
+ +    if (qm->QMmethod<eQMmethodRHF){
+ +#ifdef GMX_QMMM_MOPAC
+ +        /* do a semi-empiprical calculation */
+ +        init_mopac(cr,qm,mm);
+ +#else
+ +        gmx_fatal(FARGS,"Semi-empirical QM only supported with Mopac.");
+ +#endif
+ +    }
+ +    else 
+ +    {
+ +        /* do an ab-initio calculation */
+ +#ifdef GMX_QMMM_GAMESS
+ +        init_gamess(cr,qm,mm);
+ +#elif defined GMX_QMMM_GAUSSIAN
+ +        init_gaussian(cr,qm,mm);
+ +#elif defined GMX_QMMM_ORCA
+ +        init_orca(cr,qm,mm);
+ +#else
+ +        gmx_fatal(FARGS,"Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");   
+ +#endif
+ +    }
+ +} /* init_QMroutine */
+ +
+ +void update_QMMM_coord(rvec x[],t_forcerec *fr, t_QMrec *qm, t_MMrec *mm)
+ +{
+ +  /* shifts the QM and MM particles into the central box and stores
+ +   * these shifted coordinates in the coordinate arrays of the
+ +   * QMMMrec. These coordinates are passed on the QM subroutines.
+ +   */
+ +  int
+ +    i;
+ +
+ +  /* shift the QM atoms into the central box 
+ +   */
+ +  for(i=0;i<qm->nrQMatoms;i++){
+ +    rvec_sub(x[qm->indexQM[i]],fr->shift_vec[qm->shiftQM[i]],qm->xQM[i]);
+ +  }
+ +  /* also shift the MM atoms into the central box, if any 
+ +   */
+ +  for(i=0;i<mm->nrMMatoms;i++){
+ +      rvec_sub(x[mm->indexMM[i]],fr->shift_vec[mm->shiftMM[i]],mm->xMM[i]);   
+ +  }
+ +} /* update_QMMM_coord */
+ +
+ +static void punch_QMMM_excl(t_QMrec *qm,t_MMrec *mm,t_blocka *excls)
+ +{
+ +  /* punch a file containing the bonded interactions of each QM
+ +   * atom with MM atoms. These need to be excluded in the QM routines
+ +   * Only needed in case of QM/MM optimizations
+ +   */
+ +  FILE
+ +    *out=NULL;
+ +  int
+ +    i,j,k,nrexcl=0,*excluded=NULL,max=0;
+ +  
+ +  
+ +  out = fopen("QMMMexcl.dat","w");
+ +  
+ +  /* this can be done more efficiently I think 
+ +   */
+ +  for(i=0;i<qm->nrQMatoms;i++){
+ +    nrexcl = 0;
+ +    for(j=excls->index[qm->indexQM[i]];
+ +      j<excls->index[qm->indexQM[i]+1];
+ +      j++){
+ +      for(k=0;k<mm->nrMMatoms;k++){
+ +      if(mm->indexMM[k]==excls->a[j]){/* the excluded MM atom */
+ +        if(nrexcl >= max){
+ +          max += 1000;
+ +          srenew(excluded,max);
+ +        }     
+ +        excluded[nrexcl++]=k;
+ +        continue;
+ +      }
+ +      }
+ +    }
+ +    /* write to file: */
+ +    fprintf(out,"%5d %5d\n",i+1,nrexcl);
+ +    for(j=0;j<nrexcl;j++){
+ +      fprintf(out,"%5d ",excluded[j]);
+ +    }
+ +    fprintf(out,"\n");
+ +  }
+ +  free(excluded);
+ +  fclose(out);
+ +} /* punch_QMMM_excl */
+ +
+ +
+ +/* end of QMMM subroutines */
+ +
+ +/* QMMM core routines */
+ +
+ +t_QMrec *mk_QMrec(void){
+ +  t_QMrec *qm;
+ +  snew(qm,1);
+ +  return qm;
+ +} /* mk_QMrec */
+ +
+ +t_MMrec *mk_MMrec(void){
+ +  t_MMrec *mm;
+ +  snew(mm,1);
+ +  return mm;
+ +} /* mk_MMrec */
+ +
+ +static void init_QMrec(int grpnr, t_QMrec *qm,int nr, int *atomarray, 
+ +                     gmx_mtop_t *mtop, t_inputrec *ir)
+ +{
+ +  /* fills the t_QMrec struct of QM group grpnr 
+ +   */
+ +  int i;
++  gmx_mtop_atomlookup_t alook;
+ +  t_atom *atom;
+ +
+ +
+ +  qm->nrQMatoms = nr;
+ +  snew(qm->xQM,nr);
+ +  snew(qm->indexQM,nr);
+ +  snew(qm->shiftQM,nr); /* the shifts */
+ +  for(i=0;i<nr;i++){
+ +    qm->indexQM[i]=atomarray[i];
+ +  }
+ +
++  alook = gmx_mtop_atomlookup_init(mtop);
++
+ +  snew(qm->atomicnumberQM,nr);
+ +  for (i=0;i<qm->nrQMatoms;i++){
-       gmx_mtop_atomnr_to_atom(mtop,qm_arr[k],&atom);
++    gmx_mtop_atomnr_to_atom(alook,qm->indexQM[i],&atom);
+ +    qm->nelectrons       += mtop->atomtypes.atomnumber[atom->type];
+ +    qm->atomicnumberQM[i] = mtop->atomtypes.atomnumber[atom->type];
+ +  }
++
++  gmx_mtop_atomlookup_destroy(alook);
++
+ +  qm->QMcharge       = ir->opts.QMcharge[grpnr];
+ +  qm->multiplicity   = ir->opts.QMmult[grpnr];
+ +  qm->nelectrons    -= ir->opts.QMcharge[grpnr];
+ +
+ +  qm->QMmethod       = ir->opts.QMmethod[grpnr];
+ +  qm->QMbasis        = ir->opts.QMbasis[grpnr];
+ +  /* trajectory surface hopping setup (Gaussian only) */
+ +  qm->bSH            = ir->opts.bSH[grpnr];
+ +  qm->CASorbitals    = ir->opts.CASorbitals[grpnr];
+ +  qm->CASelectrons   = ir->opts.CASelectrons[grpnr];
+ +  qm->SAsteps        = ir->opts.SAsteps[grpnr];
+ +  qm->SAon           = ir->opts.SAon[grpnr];
+ +  qm->SAoff          = ir->opts.SAoff[grpnr];
+ +  /* hack to prevent gaussian from reinitializing all the time */
+ +  qm->nQMcpus        = 0; /* number of CPU's to be used by g01, is set
+ +                         * upon initializing gaussian
+ +                         * (init_gaussian() 
+ +                         */
+ +  /* print the current layer to allow users to check their input */
+ +  fprintf(stderr,"Layer %d\nnr of QM atoms %d\n",grpnr,nr);
+ +  fprintf(stderr,"QMlevel: %s/%s\n\n",
+ +        eQMmethod_names[qm->QMmethod],eQMbasis_names[qm->QMbasis]);
+ +  
+ +  /* frontier atoms */
+ +  snew(qm->frontatoms,nr);
+ +  /* Lennard-Jones coefficients */ 
+ +  snew(qm->c6,nr);
+ +  snew(qm->c12,nr);
+ +  /* do we optimize the QM separately using the algorithms of the QM program??
+ +   */
+ +  qm->bTS      = ir->opts.bTS[grpnr];
+ +  qm->bOPT     = ir->opts.bOPT[grpnr];
+ +
+ +} /* init_QMrec */  
+ +
+ +t_QMrec *copy_QMrec(t_QMrec *qm)
+ +{
+ +  /* copies the contents of qm into a new t_QMrec struct */
+ +  t_QMrec
+ +    *qmcopy;
+ +  int
+ +    i;
+ +  
+ +  qmcopy = mk_QMrec();
+ +  qmcopy->nrQMatoms = qm->nrQMatoms;
+ +  snew(qmcopy->xQM,qmcopy->nrQMatoms);
+ +  snew(qmcopy->indexQM,qmcopy->nrQMatoms);
+ +  snew(qmcopy->atomicnumberQM,qm->nrQMatoms);
+ +  snew(qmcopy->shiftQM,qmcopy->nrQMatoms); /* the shifts */
+ +  for (i=0;i<qmcopy->nrQMatoms;i++){
+ +    qmcopy->shiftQM[i]        = qm->shiftQM[i];
+ +    qmcopy->indexQM[i]        = qm->indexQM[i];
+ +    qmcopy->atomicnumberQM[i] = qm->atomicnumberQM[i];
+ +  }
+ +  qmcopy->nelectrons   = qm->nelectrons;
+ +  qmcopy->multiplicity = qm->multiplicity;
+ +  qmcopy->QMcharge     = qm->QMcharge;
+ +  qmcopy->nelectrons   = qm->nelectrons;
+ +  qmcopy->QMmethod     = qm->QMmethod; 
+ +  qmcopy->QMbasis      = qm->QMbasis;  
+ +  /* trajectory surface hopping setup (Gaussian only) */
+ +  qmcopy->bSH          = qm->bSH;
+ +  qmcopy->CASorbitals  = qm->CASorbitals;
+ +  qmcopy->CASelectrons = qm->CASelectrons;
+ +  qmcopy->SAsteps      = qm->SAsteps;
+ +  qmcopy->SAon         = qm->SAon;
+ +  qmcopy->SAoff        = qm->SAoff;
+ +  qmcopy->bOPT         = qm->bOPT;
+ +
+ +  /* Gaussian init. variables */
+ +  qmcopy->nQMcpus      = qm->nQMcpus;
+ +  for(i=0;i<DIM;i++)
+ +    qmcopy->SHbasis[i] = qm->SHbasis[i];
+ +  qmcopy->QMmem        = qm->QMmem;
+ +  qmcopy->accuracy     = qm->accuracy;
+ +  qmcopy->cpmcscf      = qm->cpmcscf;
+ +  qmcopy->SAstep       = qm->SAstep;
+ +  snew(qmcopy->frontatoms,qm->nrQMatoms);
+ +  snew(qmcopy->c12,qmcopy->nrQMatoms);
+ +  snew(qmcopy->c6,qmcopy->nrQMatoms);
+ +  if(qmcopy->bTS||qmcopy->bOPT){
+ +    for(i=1;i<qmcopy->nrQMatoms;i++){
+ +      qmcopy->frontatoms[i] = qm->frontatoms[i];
+ +      qmcopy->c12[i]        = qm->c12[i];
+ +      qmcopy->c6[i]         = qm->c6[i];
+ +    }
+ +  }
+ +
+ +  return(qmcopy);
+ +
+ +} /*copy_QMrec */
+ +
+ +t_QMMMrec *mk_QMMMrec(void)
+ +{
+ +
+ +  t_QMMMrec *qr;
+ +
+ +  snew(qr,1);
+ +
+ +  return qr;
+ +
+ +} /* mk_QMMMrec */
+ +
+ +void init_QMMMrec(t_commrec *cr,
+ +                matrix box,
+ +                gmx_mtop_t *mtop,
+ +                t_inputrec *ir,
+ +                t_forcerec *fr)
+ +{
+ +  /* we put the atomsnumbers of atoms that belong to the QMMM group in
+ +   * an array that will be copied later to QMMMrec->indexQM[..]. Also
+ +   * it will be used to create an QMMMrec->bQMMM index array that
+ +   * simply contains true/false for QM and MM (the other) atoms.
+ +   */
+ +
+ +  gmx_groups_t *groups;
+ +  atom_id   *qm_arr=NULL,vsite,ai,aj;
+ +  int       qm_max=0,qm_nr=0,i,j,jmax,k,l,nrvsite2=0;
+ +  t_QMMMrec *qr;
+ +  t_MMrec   *mm;
+ +  t_iatom   *iatoms;
+ +  real      c12au,c6au;
+ +  gmx_mtop_atomloop_all_t aloop;
+ +  t_atom    *atom;
+ +  gmx_mtop_ilistloop_all_t iloop;
+ +  int       a_offset;
+ +  t_ilist   *ilist_mol;
++  gmx_mtop_atomlookup_t alook;
+ +
+ +  c6au  = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,6)); 
+ +  c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,12)); 
+ +  /* issue a fatal if the user wants to run with more than one node */
+ +  if ( PAR(cr)) gmx_fatal(FARGS,"QM/MM does not work in parallel, use a single node instead\n");
+ +
+ +  /* Make a local copy of the QMMMrec */
+ +  qr = fr->qr;
+ +
+ +  /* bQMMM[..] is an array containing TRUE/FALSE for atoms that are
+ +   * QM/not QM. We first set all elemenst at false. Afterwards we use
+ +   * the qm_arr (=MMrec->indexQM) to changes the elements
+ +   * corresponding to the QM atoms at TRUE.  */
+ +
+ +  qr->QMMMscheme     = ir->QMMMscheme;
+ +
+ +  /* we take the possibility into account that a user has
+ +   * defined more than one QM group:
+ +   */
+ +  /* an ugly work-around in case there is only one group In this case
+ +   * the whole system is treated as QM. Otherwise the second group is
+ +   * always the rest of the total system and is treated as MM.  
+ +   */
+ +
+ +  /* small problem if there is only QM.... so no MM */
+ +  
+ +  jmax = ir->opts.ngQM;
+ +
+ +  if(qr->QMMMscheme==eQMMMschemeoniom)
+ +    qr->nrQMlayers = jmax;
+ +  else
+ +    qr->nrQMlayers = 1; 
+ +
+ +  groups = &mtop->groups;
+ +
+ +  /* there are jmax groups of QM atoms. In case of multiple QM groups
+ +   * I assume that the users wants to do ONIOM. However, maybe it
+ +   * should also be possible to define more than one QM subsystem with
+ +   * independent neighbourlists. I have to think about
+ +   * that.. 11-11-2003 
+ +   */
+ +  snew(qr->qm,jmax);
+ +  for(j=0;j<jmax;j++){
+ +    /* new layer */
+ +    aloop = gmx_mtop_atomloop_all_init(mtop);
+ +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
+ +      if(qm_nr >= qm_max){
+ +      qm_max += 1000;
+ +      srenew(qm_arr,qm_max);
+ +      }
+ +      if (ggrpnr(groups,egcQMMM ,i) == j) {
+ +      /* hack for tip4p */
+ +      qm_arr[qm_nr++] = i;
+ +      }
+ +    }
+ +    if(qr->QMMMscheme==eQMMMschemeoniom){
+ +      /* add the atoms to the bQMMM array
+ +       */
+ +
+ +      /* I assume that users specify the QM groups from small to
+ +       * big(ger) in the mdp file 
+ +       */
+ +      qr->qm[j] = mk_QMrec(); 
+ +      /* we need to throw out link atoms that in the previous layer
+ +       * existed to separate this QMlayer from the previous
+ +       * QMlayer. We use the iatoms array in the idef for that
+ +       * purpose. If all atoms defining the current Link Atom (Dummy2)
+ +       * are part of the current QM layer it needs to be removed from
+ +       * qm_arr[].  */
+ +   
+ +      iloop = gmx_mtop_ilistloop_all_init(mtop);
+ +      while (gmx_mtop_ilistloop_all_next(iloop,&ilist_mol,&a_offset)) {
+ +      nrvsite2 = ilist_mol[F_VSITE2].nr;
+ +      iatoms   = ilist_mol[F_VSITE2].iatoms;
+ +      
+ +      for(k=0; k<nrvsite2; k+=4) {
+ +        vsite = a_offset + iatoms[k+1]; /* the vsite         */
+ +        ai    = a_offset + iatoms[k+2]; /* constructing atom */
+ +        aj    = a_offset + iatoms[k+3]; /* constructing atom */
+ +        if (ggrpnr(groups, egcQMMM, vsite) == ggrpnr(groups, egcQMMM, ai)
+ +            &&
+ +            ggrpnr(groups, egcQMMM, vsite) == ggrpnr(groups, egcQMMM, aj)) {
+ +          /* this dummy link atom needs to be removed from the qm_arr
+ +           * before making the QMrec of this layer!  
+ +           */
+ +          for(i=0;i<qm_nr;i++){
+ +            if(qm_arr[i]==vsite){
+ +              /* drop the element */
+ +              for(l=i;l<qm_nr;l++){
+ +                qm_arr[l]=qm_arr[l+1];
+ +              }
+ +              qm_nr--;
+ +            }
+ +          }
+ +        }
+ +      }
+ +      }
+ +
+ +      /* store QM atoms in this layer in the QMrec and initialise layer 
+ +       */
+ +      init_QMrec(j,qr->qm[j],qm_nr,qm_arr,mtop,ir);
+ +      
+ +      /* we now store the LJ C6 and C12 parameters in QM rec in case
+ +       * we need to do an optimization 
+ +       */
+ +      if(qr->qm[j]->bOPT || qr->qm[j]->bTS){
+ +      for(i=0;i<qm_nr;i++){
+ +        qr->qm[j]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
+ +                                atom->type,atom->type)/c6au;
+ +        qr->qm[j]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
+ +                                atom->type,atom->type)/c12au;
+ +      }
+ +      }
+ +      /* now we check for frontier QM atoms. These occur in pairs that
+ +       * construct the vsite
+ +       */
+ +      iloop = gmx_mtop_ilistloop_all_init(mtop);
+ +      while (gmx_mtop_ilistloop_all_next(iloop,&ilist_mol,&a_offset)) {
+ +      nrvsite2 = ilist_mol[F_VSITE2].nr;
+ +      iatoms   = ilist_mol[F_VSITE2].iatoms;
+ +
+ +      for(k=0; k<nrvsite2; k+=4){
+ +        vsite = a_offset + iatoms[k+1]; /* the vsite         */
+ +        ai    = a_offset + iatoms[k+2]; /* constructing atom */
+ +        aj    = a_offset + iatoms[k+3]; /* constructing atom */
+ +        if(ggrpnr(groups,egcQMMM,ai) < (groups->grps[egcQMMM].nr-1) &&
+ +           (ggrpnr(groups,egcQMMM,aj) >= (groups->grps[egcQMMM].nr-1))){
+ +            /* mark ai as frontier atom */
+ +          for(i=0;i<qm_nr;i++){
+ +            if( (qm_arr[i]==ai) || (qm_arr[i]==vsite) ){
+ +              qr->qm[j]->frontatoms[i]=TRUE;
+ +            }
+ +          }
+ +        }
+ +        else if(ggrpnr(groups,egcQMMM,aj) < (groups->grps[egcQMMM].nr-1) &&
+ +                (ggrpnr(groups,egcQMMM,ai) >= (groups->grps[egcQMMM].nr-1))){
+ +          /* mark aj as frontier atom */
+ +          for(i=0;i<qm_nr;i++){
+ +            if( (qm_arr[i]==aj) || (qm_arr[i]==vsite)){
+ +              qr->qm[j]->frontatoms[i]=TRUE;
+ +            }
+ +          }
+ +        }
+ +      }
+ +      }
+ +    }
+ +  }
+ +  if(qr->QMMMscheme!=eQMMMschemeoniom){
+ +
+ +    /* standard QMMM, all layers are merged together so there is one QM 
+ +     * subsystem and one MM subsystem. 
+ +     * Also we set the charges to zero in the md->charge arrays to prevent 
+ +     * the innerloops from doubly counting the electostatic QM MM interaction
+ +     */
++
++    alook = gmx_mtop_atomlookup_init(mtop);
++
+ +    for (k=0;k<qm_nr;k++){
-       gmx_mtop_atomnr_to_atom(mtop,qm_arr[i],&atom);
++      gmx_mtop_atomnr_to_atom(alook,qm_arr[k],&atom);
+ +      atom->q  = 0.0;
+ +      atom->qB = 0.0;
+ +    } 
+ +    qr->qm[0] = mk_QMrec();
+ +    /* store QM atoms in the QMrec and initialise
+ +     */
+ +    init_QMrec(0,qr->qm[0],qm_nr,qm_arr,mtop,ir);
+ +    if(qr->qm[0]->bOPT || qr->qm[0]->bTS){
+ +      for(i=0;i<qm_nr;i++){
-       gmx_mtop_atomnr_to_ilist(mtop,qm_arr[i],&ilist_mol,&a_offset);
++      gmx_mtop_atomnr_to_atom(alook,qm_arr[i],&atom);
+ +      qr->qm[0]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
+ +                              atom->type,atom->type)/c6au;
+ +      qr->qm[0]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
+ +                              atom->type,atom->type)/c12au;
+ +      }
+ +      
+ +    }
+ +    
+ +
+ +
+ +    /* find frontier atoms and mark them true in the frontieratoms array.
+ +     */
+ +    for(i=0;i<qm_nr;i++) {
-       
++      gmx_mtop_atomnr_to_ilist(alook,qm_arr[i],&ilist_mol,&a_offset);
+ +      nrvsite2 = ilist_mol[F_VSITE2].nr;
+ +      iatoms   = ilist_mol[F_VSITE2].iatoms;
+ +      
+ +      for(k=0;k<nrvsite2;k+=4){
+ +      vsite = a_offset + iatoms[k+1]; /* the vsite         */
+ +      ai    = a_offset + iatoms[k+2]; /* constructing atom */
+ +      aj    = a_offset + iatoms[k+3]; /* constructing atom */
+ +      if(ggrpnr(groups,egcQMMM,ai) < (groups->grps[egcQMMM].nr-1) &&
+ +         (ggrpnr(groups,egcQMMM,aj) >= (groups->grps[egcQMMM].nr-1))){
+ +      /* mark ai as frontier atom */
+ +        if ( (qm_arr[i]==ai) || (qm_arr[i]==vsite) ){
+ +          qr->qm[0]->frontatoms[i]=TRUE;
+ +        }
+ +      }
+ +      else if (ggrpnr(groups,egcQMMM,aj) < (groups->grps[egcQMMM].nr-1) &&
+ +               (ggrpnr(groups,egcQMMM,ai) >=(groups->grps[egcQMMM].nr-1))) {
+ +        /* mark aj as frontier atom */
+ +        if ( (qm_arr[i]==aj) || (qm_arr[i]==vsite) ){
+ +          qr->qm[0]->frontatoms[i]=TRUE;
+ +        }
+ +      }
+ +      }
+ +    }
++
++    gmx_mtop_atomlookup_destroy(alook);
++
+ +    /* MM rec creation */
+ +    mm               = mk_MMrec(); 
+ +    mm->scalefactor  = ir->scalefactor;
+ +    mm->nrMMatoms    = (mtop->natoms)-(qr->qm[0]->nrQMatoms); /* rest of the atoms */
+ +    qr->mm           = mm;
+ +  } else {/* ONIOM */
+ +    /* MM rec creation */    
+ +    mm               = mk_MMrec(); 
+ +    mm->scalefactor  = ir->scalefactor;
+ +    mm->nrMMatoms    = 0;
+ +    qr->mm           = mm;
+ +  }
+ +  
+ +  /* these variables get updated in the update QMMMrec */
+ +
+ +  if(qr->nrQMlayers==1){
+ +    /* with only one layer there is only one initialisation
+ +     * needed. Multilayer is a bit more complicated as it requires
+ +     * re-initialisation at every step of the simulation. This is due
+ +     * to the use of COMMON blocks in the fortran QM subroutines.  
+ +     */
+ +    if (qr->qm[0]->QMmethod<eQMmethodRHF)
+ +    {
+ +#ifdef GMX_QMMM_MOPAC
+ +        /* semi-empiprical 1-layer ONIOM calculation requested (mopac93) */
+ +        init_mopac(cr,qr->qm[0],qr->mm);
+ +#else
+ +        gmx_fatal(FARGS,"Semi-empirical QM only supported with Mopac.");
+ +#endif
+ +    }
+ +    else 
+ +    { 
+ +        /* ab initio calculation requested (gamess/gaussian/ORCA) */
+ +#ifdef GMX_QMMM_GAMESS
+ +        init_gamess(cr,qr->qm[0],qr->mm);
+ +#elif defined GMX_QMMM_GAUSSIAN
+ +        init_gaussian(cr,qr->qm[0],qr->mm);
+ +#elif defined GMX_QMMM_ORCA
+ +        init_orca(cr,qr->qm[0],qr->mm);
+ +#else
+ +        gmx_fatal(FARGS,"Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");
+ +#endif
+ +    }
+ +  }
+ +} /* init_QMMMrec */
+ +
+ +void update_QMMMrec(t_commrec *cr,
+ +                  t_forcerec *fr,
+ +                  rvec x[],
+ +                  t_mdatoms *md,
+ +                  matrix box,
+ +                  gmx_localtop_t *top)
+ +{
+ +  /* updates the coordinates of both QM atoms and MM atoms and stores
+ +   * them in the QMMMrec.  
+ +   *
+ +   * NOTE: is NOT yet working if there are no PBC. Also in ns.c, simple
+ +   * ns needs to be fixed!  
+ +   */
+ +  int 
+ +    mm_max=0,mm_nr=0,mm_nr_new,i,j,is,k,shift;
+ +  t_j_particle 
+ +    *mm_j_particles=NULL,*qm_i_particles=NULL;
+ +  t_QMMMrec 
+ +    *qr; 
+ +  t_nblist 
+ +    QMMMlist;
+ +  rvec
+ +    dx,crd;
+ +  int
+ +    *MMatoms;
+ +  t_QMrec
+ +    *qm;
+ +  t_MMrec
+ +    *mm;
+ +  t_pbc
+ +    pbc;
+ +  int  
+ +    *parallelMMarray=NULL;
+ +  real
+ +    c12au,c6au;
+ +
+ +  c6au  = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,6)); 
+ +  c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,12)); 
+ +
+ +  /* every cpu has this array. On every processor we fill this array
+ +   * with 1's and 0's. 1's indicate the atoms is a QM atom on the
+ +   * current cpu in a later stage these arrays are all summed. indexes
+ +   * > 0 indicate the atom is a QM atom. Every node therefore knows
+ +   * whcih atoms are part of the QM subsystem.  
+ +   */
+ +  /* copy some pointers */
+ +  qr          = fr->qr;
+ +  mm          = qr->mm;
+ +  QMMMlist    = fr->QMMMlist;
+ +
+ +  
+ +
+ +  /*  init_pbc(box);  needs to be called first, see pbc.h */
+ +  set_pbc_dd(&pbc,fr->ePBC,DOMAINDECOMP(cr) ? cr->dd : NULL,FALSE,box);
+ +  /* only in standard (normal) QMMM we need the neighbouring MM
+ +   * particles to provide a electric field of point charges for the QM
+ +   * atoms.  
+ +   */
+ +  if(qr->QMMMscheme==eQMMMschemenormal){ /* also implies 1 QM-layer */
+ +    /* we NOW create/update a number of QMMMrec entries:
+ +     *
+ +     * 1) the shiftQM, containing the shifts of the QM atoms
+ +     *
+ +     * 2) the indexMM array, containing the index of the MM atoms
+ +     * 
+ +     * 3) the shiftMM, containing the shifts of the MM atoms
+ +     *
+ +     * 4) the shifted coordinates of the MM atoms
+ +     *
+ +     * the shifts are used for computing virial of the QM/MM particles.
+ +     */
+ +    qm = qr->qm[0]; /* in case of normal QMMM, there is only one group */
+ +    snew(qm_i_particles,QMMMlist.nri);
+ +    if(QMMMlist.nri){
+ +      qm_i_particles[0].shift = XYZ2IS(0,0,0);
+ +      for(i=0;i<QMMMlist.nri;i++){
+ +      qm_i_particles[i].j     = QMMMlist.iinr[i];
+ +      
+ +      if(i){
+ +        qm_i_particles[i].shift = pbc_dx_aiuc(&pbc,x[QMMMlist.iinr[0]],
+ +                                              x[QMMMlist.iinr[i]],dx);
+ +        
+ +      }
+ +      /* However, since nri >= nrQMatoms, we do a quicksort, and throw
+ +       * out double, triple, etc. entries later, as we do for the MM
+ +       * list too.  
+ +       */
+ +      
+ +      /* compute the shift for the MM j-particles with respect to
+ +       * the QM i-particle and store them. 
+ +       */
+ +      
+ +      crd[0] = IS2X(QMMMlist.shift[i]) + IS2X(qm_i_particles[i].shift);
+ +      crd[1] = IS2Y(QMMMlist.shift[i]) + IS2Y(qm_i_particles[i].shift);
+ +      crd[2] = IS2Z(QMMMlist.shift[i]) + IS2Z(qm_i_particles[i].shift);
+ +      is = XYZ2IS(crd[0],crd[1],crd[2]); 
+ +      for(j=QMMMlist.jindex[i];
+ +          j<QMMMlist.jindex[i+1];
+ +          j++){
+ +        if(mm_nr >= mm_max){
+ +          mm_max += 1000;
+ +          srenew(mm_j_particles,mm_max);
+ +        }       
+ +        
+ +        mm_j_particles[mm_nr].j = QMMMlist.jjnr[j];
+ +        mm_j_particles[mm_nr].shift = is;
+ +        mm_nr++;
+ +      }
+ +      }
+ +      
+ +      /* quicksort QM and MM shift arrays and throw away multiple entries */
+ +      
+ +
+ +
+ +      qsort(qm_i_particles,QMMMlist.nri,
+ +          (size_t)sizeof(qm_i_particles[0]),
+ +          struct_comp);
+ +      qsort(mm_j_particles,mm_nr,
+ +          (size_t)sizeof(mm_j_particles[0]),
+ +          struct_comp);
+ +      /* remove multiples in the QM shift array, since in init_QMMM() we
+ +       * went through the atom numbers from 0 to md.nr, the order sorted
+ +       * here matches the one of QMindex already.
+ +       */
+ +      j=0;
+ +      for(i=0;i<QMMMlist.nri;i++){
+ +      if (i==0 || qm_i_particles[i].j!=qm_i_particles[i-1].j){
+ +        qm_i_particles[j++] = qm_i_particles[i];
+ +      }
+ +      }
+ +      mm_nr_new = 0;
+ +      if(qm->bTS||qm->bOPT){
+ +      /* only remove double entries for the MM array */
+ +      for(i=0;i<mm_nr;i++){
+ +        if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j)
+ +           && !md->bQM[mm_j_particles[i].j]){
+ +          mm_j_particles[mm_nr_new++] = mm_j_particles[i];
+ +        }
+ +      }
+ +      }      
+ +      /* we also remove mm atoms that have no charges! 
+ +      * actually this is already done in the ns.c  
+ +      */
+ +      else{
+ +      for(i=0;i<mm_nr;i++){
+ +        if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j)
+ +           && !md->bQM[mm_j_particles[i].j] 
+ +           && (md->chargeA[mm_j_particles[i].j]
+ +               || (md->chargeB && md->chargeB[mm_j_particles[i].j]))) {
+ +          mm_j_particles[mm_nr_new++] = mm_j_particles[i];
+ +        }
+ +      }
+ +      }
+ +      mm_nr = mm_nr_new;
+ +      /* store the data retrieved above into the QMMMrec
+ +       */    
+ +      k=0;
+ +      /* Keep the compiler happy,
+ +       * shift will always be set in the loop for i=0
+ +       */
+ +      shift = 0;
+ +      for(i=0;i<qm->nrQMatoms;i++){
+ +      /* not all qm particles might have appeared as i
+ +       * particles. They might have been part of the same charge
+ +       * group for instance.
+ +       */
+ +      if (qm->indexQM[i] == qm_i_particles[k].j) {
+ +        shift = qm_i_particles[k++].shift;
+ +      }
+ +      /* use previous shift, assuming they belong the same charge
+ +       * group anyway,
+ +       */
+ +      
+ +      qm->shiftQM[i] = shift;
+ +      }
+ +    }
+ +    /* parallel excecution */
+ +    if(PAR(cr)){
+ +      snew(parallelMMarray,2*(md->nr)); 
+ +      /* only MM particles have a 1 at their atomnumber. The second part
+ +       * of the array contains the shifts. Thus:
+ +       * p[i]=1/0 depending on wether atomnumber i is a MM particle in the QM
+ +       * step or not. p[i+md->nr] is the shift of atomnumber i.
+ +       */
+ +      for(i=0;i<2*(md->nr);i++){
+ +      parallelMMarray[i]=0;
+ +      }
+ +      
+ +      for(i=0;i<mm_nr;i++){
+ +      parallelMMarray[mm_j_particles[i].j]=1;
+ +      parallelMMarray[mm_j_particles[i].j+(md->nr)]=mm_j_particles[i].shift;
+ +      }
+ +      gmx_sumi(md->nr,parallelMMarray,cr);
+ +      mm_nr=0;
+ +      
+ +      mm_max = 0;
+ +      for(i=0;i<md->nr;i++){
+ +      if(parallelMMarray[i]){
+ +        if(mm_nr >= mm_max){
+ +          mm_max += 1000;
+ +          srenew(mm->indexMM,mm_max);
+ +          srenew(mm->shiftMM,mm_max);
+ +        }
+ +        mm->indexMM[mm_nr]  = i;
+ +        mm->shiftMM[mm_nr++]= parallelMMarray[i+md->nr]/parallelMMarray[i];
+ +      }
+ +      }
+ +      mm->nrMMatoms=mm_nr;
+ +      free(parallelMMarray);
+ +    }
+ +    /* serial execution */
+ +    else{
+ +      mm->nrMMatoms = mm_nr;
+ +      srenew(mm->shiftMM,mm_nr);
+ +      srenew(mm->indexMM,mm_nr);
+ +      for(i=0;i<mm_nr;i++){
+ +      mm->indexMM[i]=mm_j_particles[i].j;
+ +      mm->shiftMM[i]=mm_j_particles[i].shift;
+ +      }
+ +      
+ +    }
+ +    /* (re) allocate memory for the MM coordiate array. The QM
+ +     * coordinate array was already allocated in init_QMMM, and is
+ +     * only (re)filled in the update_QMMM_coordinates routine 
+ +     */
+ +    srenew(mm->xMM,mm->nrMMatoms);
+ +    /* now we (re) fill the array that contains the MM charges with
+ +     * the forcefield charges. If requested, these charges will be
+ +     * scaled by a factor 
+ +     */
+ +    srenew(mm->MMcharges,mm->nrMMatoms);
+ +    for(i=0;i<mm->nrMMatoms;i++){/* no free energy yet */
+ +      mm->MMcharges[i]=md->chargeA[mm->indexMM[i]]*mm->scalefactor; 
+ +    }  
+ +    if(qm->bTS||qm->bOPT){
+ +      /* store (copy) the c6 and c12 parameters into the MMrec struct 
+ +       */
+ +      srenew(mm->c6,mm->nrMMatoms);
+ +      srenew(mm->c12,mm->nrMMatoms);
+ +      for (i=0;i<mm->nrMMatoms;i++){
+ +      mm->c6[i]  = C6(fr->nbfp,top->idef.atnr,
+ +                      md->typeA[mm->indexMM[i]],
+ +                      md->typeA[mm->indexMM[i]])/c6au;
+ +      mm->c12[i] =C12(fr->nbfp,top->idef.atnr,
+ +                      md->typeA[mm->indexMM[i]],
+ +                      md->typeA[mm->indexMM[i]])/c12au;
+ +      }
+ +      punch_QMMM_excl(qr->qm[0],mm,&(top->excls));
+ +    }
+ +    /* the next routine fills the coordinate fields in the QMMM rec of
+ +     * both the qunatum atoms and the MM atoms, using the shifts
+ +     * calculated above.  
+ +     */
+ +
+ +    update_QMMM_coord(x,fr,qr->qm[0],qr->mm);
+ +    free(qm_i_particles);
+ +    free(mm_j_particles);
+ +  } 
+ +  else { /* ONIOM */ /* ????? */
+ +    mm->nrMMatoms=0;
+ +    /* do for each layer */
+ +    for (j=0;j<qr->nrQMlayers;j++){
+ +      qm = qr->qm[j];
+ +      qm->shiftQM[0]=XYZ2IS(0,0,0);
+ +      for(i=1;i<qm->nrQMatoms;i++){
+ +      qm->shiftQM[i] = pbc_dx_aiuc(&pbc,x[qm->indexQM[0]],x[qm->indexQM[i]],
+ +                                   dx);
+ +      }
+ +      update_QMMM_coord(x,fr,qm,mm);    
+ +    }
+ +  }
+ +} /* update_QMMM_rec */
+ +
+ +
+ +real calculate_QMMM(t_commrec *cr,
+ +                  rvec x[],rvec f[],
+ +                  t_forcerec *fr,
+ +                  t_mdatoms *md)
+ +{
+ +  real
+ +    QMener=0.0;
+ +  /* a selection for the QM package depending on which is requested
+ +   * (Gaussian, GAMESS-UK, MOPAC or ORCA) needs to be implemented here. Now
+ +   * it works through defines.... Not so nice yet 
+ +   */
+ +  t_QMMMrec
+ +    *qr;
+ +  t_QMrec
+ +    *qm,*qm2;
+ +  t_MMrec
+ +    *mm=NULL;
+ +  rvec 
+ +    *forces=NULL,*fshift=NULL,    
+ +    *forces2=NULL, *fshift2=NULL; /* needed for multilayer ONIOM */
+ +  int
+ +    i,j,k;
+ +  /* make a local copy the QMMMrec pointer 
+ +   */
+ +  qr = fr->qr;
+ +  mm = qr->mm;
+ +
+ +  /* now different procedures are carried out for one layer ONION and
+ +   * normal QMMM on one hand and multilayer oniom on the other
+ +   */
+ +  if(qr->QMMMscheme==eQMMMschemenormal || qr->nrQMlayers==1){
+ +    qm = qr->qm[0];
+ +    snew(forces,(qm->nrQMatoms+mm->nrMMatoms));
+ +    snew(fshift,(qm->nrQMatoms+mm->nrMMatoms));
+ +    QMener = call_QMroutine(cr,fr,qm,mm,forces,fshift);
+ +    for(i=0;i<qm->nrQMatoms;i++){
+ +      for(j=0;j<DIM;j++){
+ +      f[qm->indexQM[i]][j]          -= forces[i][j];
+ +      fr->fshift[qm->shiftQM[i]][j] += fshift[i][j];
+ +      }
+ +    }
+ +    for(i=0;i<mm->nrMMatoms;i++){
+ +      for(j=0;j<DIM;j++){
+ +      f[mm->indexMM[i]][j]          -= forces[qm->nrQMatoms+i][j];
+ +      fr->fshift[mm->shiftMM[i]][j] += fshift[qm->nrQMatoms+i][j];
+ +      }
+ +      
+ +    }
+ +    free(forces);
+ +    free(fshift);
+ +  }
+ +  else{ /* Multi-layer ONIOM */
+ +    for(i=0;i<qr->nrQMlayers-1;i++){ /* last layer is special */
+ +      qm  = qr->qm[i];
+ +      qm2 = copy_QMrec(qr->qm[i+1]);
+ +
+ +      qm2->nrQMatoms = qm->nrQMatoms;
+ +    
+ +      for(j=0;j<qm2->nrQMatoms;j++){
+ +      for(k=0;k<DIM;k++)
+ +        qm2->xQM[j][k]       = qm->xQM[j][k];
+ +      qm2->indexQM[j]        = qm->indexQM[j];
+ +      qm2->atomicnumberQM[j] = qm->atomicnumberQM[j];
+ +      qm2->shiftQM[j]        = qm->shiftQM[j];
+ +      }
+ +
+ +      qm2->QMcharge = qm->QMcharge;
+ +      /* this layer at the higher level of theory */
+ +      srenew(forces,qm->nrQMatoms);
+ +      srenew(fshift,qm->nrQMatoms);
+ +      /* we need to re-initialize the QMroutine every step... */
+ +      init_QMroutine(cr,qm,mm);
+ +      QMener += call_QMroutine(cr,fr,qm,mm,forces,fshift);
+ +
+ +      /* this layer at the lower level of theory */
+ +      srenew(forces2,qm->nrQMatoms);
+ +      srenew(fshift2,qm->nrQMatoms);
+ +      init_QMroutine(cr,qm2,mm);
+ +      QMener -= call_QMroutine(cr,fr,qm2,mm,forces2,fshift2);
+ +      /* E = E1high-E1low The next layer includes the current layer at
+ +       * the lower level of theory, which provides + E2low
+ +       * this is similar for gradients
+ +       */
+ +      for(i=0;i<qm->nrQMatoms;i++){
+ +      for(j=0;j<DIM;j++){
+ +        f[qm->indexQM[i]][j]          -= (forces[i][j]-forces2[i][j]);
+ +        fr->fshift[qm->shiftQM[i]][j] += (fshift[i][j]-fshift2[i][j]);
+ +      }
+ +      }
+ +      free(qm2);
+ +    }
+ +    /* now the last layer still needs to be done: */
+ +    qm      = qr->qm[qr->nrQMlayers-1]; /* C counts from 0 */
+ +    init_QMroutine(cr,qm,mm);
+ +    srenew(forces,qm->nrQMatoms);
+ +    srenew(fshift,qm->nrQMatoms);
+ +    QMener += call_QMroutine(cr,fr,qm,mm,forces,fshift);
+ +    for(i=0;i<qm->nrQMatoms;i++){
+ +      for(j=0;j<DIM;j++){
+ +      f[qm->indexQM[i]][j]          -= forces[i][j];
+ +      fr->fshift[qm->shiftQM[i]][j] += fshift[i][j];
+ +      }
+ +    }
+ +    free(forces);
+ +    free(fshift);
+ +    free(forces2);
+ +    free(fshift2);
+ +  }
+ +  if(qm->bTS||qm->bOPT){
+ +    /* qm[0] still contains the largest ONIOM QM subsystem 
+ +     * we take the optimized coordiates and put the in x[]
+ +     */
+ +    for(i=0;i<qm->nrQMatoms;i++){
+ +      for(j=0;j<DIM;j++){
+ +      x[qm->indexQM[i]][j] = qm->xQM[i][j];
+ +      }
+ +    }
+ +  }
+ +  return(QMener);
+ +} /* calculate_QMMM */
+ +
+ +/* end of QMMM core routines */
diff --cc src/gromacs/mdlib/shakef.c
Simple merge
diff --cc src/gromacs/mdlib/shellfc.c

index 03310b2cd46dd7c027cb17922091975e3d7d91a6,0000000000000000000000000000000000000000..54901aa43f99b9046a7ddb4b41e900b58f44a1e7

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/shellfc.c
--- /dev/null
+++ b/src/gromacs/mdlib/shellfc.c
@@@ -1,1050 -1,0 +1,1063 @@@
-       gmx_mtop_atomnr_to_atom(mtop,n1,&atom);
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2008, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ + 
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "gmx_fatal.h"
+ +#include "vec.h"
+ +#include "txtdump.h"
+ +#include "mdrun.h"
+ +#include "partdec.h"
+ +#include "mdatoms.h"
+ +#include "vsite.h"
+ +#include "network.h"
+ +#include "names.h"
+ +#include "constr.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "physics.h"
+ +#include "copyrite.h"
+ +#include "shellfc.h"
+ +#include "mtop_util.h"
+ +#include "chargegroup.h"
+ +#include "macros.h"
+ +
+ +
+ +typedef struct {
+ +  int     nnucl;
+ +  atom_id shell;              /* The shell id                         */
+ +  atom_id nucl1,nucl2,nucl3;  /* The nuclei connected to the shell    */
+ +  /* gmx_bool    bInterCG; */       /* Coupled to nuclei outside cg?        */
+ +  real    k;                  /* force constant                       */
+ +  real    k_1;                        /* 1 over force constant                */
+ +  rvec    xold;
+ +  rvec    fold;
+ +  rvec    step;
+ +} t_shell;
+ +
+ +typedef struct gmx_shellfc {
+ +  int     nshell_gl;       /* The number of shells in the system       */
+ +  t_shell *shell_gl;       /* All the shells (for DD only)             */
+ +  int     *shell_index_gl; /* Global shell index (for DD only)         */
+ +  gmx_bool    bInterCG;        /* Are there inter charge-group shells?     */
+ +  int     nshell;          /* The number of local shells               */
+ +  t_shell *shell;          /* The local shells                         */
+ +  int     shell_nalloc;    /* The allocation size of shell             */
+ +  gmx_bool bPredict;       /* Predict shell positions                  */
+ +  gmx_bool bRequireInit;   /* Require initialization of shell positions  */
+ +  int     nflexcon;        /* The number of flexible constraints       */
+ +  rvec    *x[2];           /* Array for iterative minimization         */
+ +  rvec    *f[2];           /* Array for iterative minimization         */
+ +  int     x_nalloc;        /* The allocation size of x and f           */
+ +  rvec    *acc_dir;        /* Acceleration direction for flexcon       */
+ +  rvec    *x_old;          /* Old coordinates for flexcon              */
+ +  int     flex_nalloc;     /* The allocation size of acc_dir and x_old */
+ +  rvec    *adir_xnold;     /* Work space for init_adir                 */
+ +  rvec    *adir_xnew;      /* Work space for init_adir                 */
+ +  int     adir_nalloc;     /* Work space for init_adir                 */
+ +} t_gmx_shellfc;
+ +
+ +      
+ +static void pr_shell(FILE *fplog,int ns,t_shell s[])
+ +{
+ +  int i;
+ +  
+ +  fprintf(fplog,"SHELL DATA\n");
+ +  fprintf(fplog,"%5s  %8s  %5s  %5s  %5s\n",
+ +        "Shell","Force k","Nucl1","Nucl2","Nucl3");
+ +  for(i=0; (i<ns); i++) {
+ +    fprintf(fplog,"%5d  %8.3f  %5d",s[i].shell,1.0/s[i].k_1,s[i].nucl1);
+ +    if (s[i].nnucl == 2)
+ +      fprintf(fplog,"  %5d\n",s[i].nucl2);
+ +    else if (s[i].nnucl == 3)
+ +      fprintf(fplog,"  %5d  %5d\n",s[i].nucl2,s[i].nucl3);
+ +    else
+ +      fprintf(fplog,"\n");
+ +  }
+ +}
+ +
+ +static void predict_shells(FILE *fplog,rvec x[],rvec v[],real dt,
+ +                         int ns,t_shell s[],
+ +                         real mass[],gmx_mtop_t *mtop,gmx_bool bInit)
+ +{
+ +  int  i,m,s1,n1,n2,n3;
+ +  real dt_1,dt_2,dt_3,fudge,tm,m1,m2,m3;
+ +  rvec *ptr;
++  gmx_mtop_atomlookup_t alook=NULL;
+ +  t_atom *atom;
++
++  if (mass == NULL) {
++    alook = gmx_mtop_atomlookup_init(mtop);
++  }
+ +  
+ +  /* We introduce a fudge factor for performance reasons: with this choice
+ +   * the initial force on the shells is about a factor of two lower than 
+ +   * without
+ +   */
+ +  fudge = 1.0;
+ +    
+ +  if (bInit) {
+ +    if (fplog)
+ +      fprintf(fplog,"RELAX: Using prediction for initial shell placement\n");
+ +    ptr  = x;
+ +    dt_1 = 1;
+ +  }
+ +  else {
+ +    ptr  = v;
+ +    dt_1 = fudge*dt;
+ +  }
+ +    
+ +  for(i=0; (i<ns); i++) {
+ +    s1 = s[i].shell;
+ +    if (bInit)
+ +      clear_rvec(x[s1]);
+ +    switch (s[i].nnucl) {
+ +    case 1:
+ +      n1 = s[i].nucl1;
+ +      for(m=0; (m<DIM); m++)
+ +      x[s1][m]+=ptr[n1][m]*dt_1;
+ +      break;
+ +    case 2:
+ +      n1 = s[i].nucl1;
+ +      n2 = s[i].nucl2;
+ +      if (mass) {
+ +      m1 = mass[n1];
+ +      m2 = mass[n2];
+ +      } else {
+ +      /* Not the correct masses with FE, but it is just a prediction... */
+ +      m1 = atom[n1].m;
+ +      m2 = atom[n2].m;
+ +      }
+ +      tm = dt_1/(m1+m2);
+ +      for(m=0; (m<DIM); m++)
+ +      x[s1][m]+=(m1*ptr[n1][m]+m2*ptr[n2][m])*tm;
+ +      break;
+ +    case 3:
+ +      n1 = s[i].nucl1;
+ +      n2 = s[i].nucl2;
+ +      n3 = s[i].nucl3;
+ +      if (mass) {
+ +      m1 = mass[n1];
+ +      m2 = mass[n2];
+ +      m3 = mass[n3];
+ +      } else {
+ +      /* Not the correct masses with FE, but it is just a prediction... */
-       gmx_mtop_atomnr_to_atom(mtop,n2,&atom);
++      gmx_mtop_atomnr_to_atom(alook,n1,&atom);
+ +      m1 = atom->m;
-       gmx_mtop_atomnr_to_atom(mtop,n3,&atom);
++      gmx_mtop_atomnr_to_atom(alook,n2,&atom);
+ +      m2 = atom->m;
-                     rvec *f,rvec *acc_dir,matrix box,
++      gmx_mtop_atomnr_to_atom(alook,n3,&atom);
+ +      m3 = atom->m;
+ +      }
+ +      tm = dt_1/(m1+m2+m3);
+ +      for(m=0; (m<DIM); m++)
+ +      x[s1][m]+=(m1*ptr[n1][m]+m2*ptr[n2][m]+m3*ptr[n3][m])*tm;
+ +      break;
+ +    default:
+ +      gmx_fatal(FARGS,"Shell %d has %d nuclei!",i,s[i].nnucl);
+ +    }
+ +  }
++
++  if (mass == NULL) {
++    gmx_mtop_atomlookup_destroy(alook);
++  }
+ +}
+ +
+ +gmx_shellfc_t init_shell_flexcon(FILE *fplog,
+ +                               gmx_mtop_t *mtop,int nflexcon,
+ +                               rvec *x)
+ +{
+ +  struct gmx_shellfc *shfc;
+ +  t_shell     *shell;
+ +  int         *shell_index=NULL,*at2cg;
+ +  t_atom      *atom;
+ +  int         n[eptNR],ns,nshell,nsi;
+ +  int         i,j,nmol,type,mb,mt,a_offset,cg,mol,ftype,nra;
+ +  real        qS,alpha;
+ +  int         aS,aN=0; /* Shell and nucleus */
+ +  int         bondtypes[] = { F_BONDS, F_HARMONIC, F_CUBICBONDS, F_POLARIZATION, F_ANHARM_POL, F_WATER_POL };
+ +#define NBT asize(bondtypes)
+ +  t_iatom     *ia;
+ +  gmx_mtop_atomloop_block_t aloopb;
+ +  gmx_mtop_atomloop_all_t aloop;
+ +  gmx_ffparams_t *ffparams;
+ +  gmx_molblock_t *molb;
+ +  gmx_moltype_t *molt;
+ +  t_block     *cgs;
+ +
+ +  /* Count number of shells, and find their indices */
+ +  for(i=0; (i<eptNR); i++) {
+ +    n[i] = 0;
+ +  }
+ +
+ +  aloopb = gmx_mtop_atomloop_block_init(mtop);
+ +  while (gmx_mtop_atomloop_block_next(aloopb,&atom,&nmol)) {
+ +    n[atom->ptype] += nmol;
+ +  }
+ +
+ +  if (fplog) {
+ +    /* Print the number of each particle type */  
+ +    for(i=0; (i<eptNR); i++) {
+ +      if (n[i] != 0) {
+ +      fprintf(fplog,"There are: %d %ss\n",n[i],ptype_str[i]);
+ +      }
+ +    }
+ +  }
+ +
+ +  nshell = n[eptShell];
+ +  
+ +  if (nshell == 0 && nflexcon == 0) {
+ +    return NULL;
+ +  }
+ +
+ +  snew(shfc,1);
+ +  shfc->nflexcon = nflexcon;
+ +
+ +  if (nshell == 0) {
+ +    return shfc;
+ +  }
+ +
+ +  /* We have shells: fill the shell data structure */
+ +
+ +  /* Global system sized array, this should be avoided */
+ +  snew(shell_index,mtop->natoms);
+ +
+ +  aloop = gmx_mtop_atomloop_all_init(mtop);
+ +  nshell = 0;
+ +  while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
+ +    if (atom->ptype == eptShell) {
+ +      shell_index[i] = nshell++;
+ +    }
+ +  }
+ +
+ +  snew(shell,nshell);
+ +  
+ +  /* Initiate the shell structures */    
+ +  for(i=0; (i<nshell); i++) {
+ +    shell[i].shell = NO_ATID;
+ +    shell[i].nnucl = 0;
+ +    shell[i].nucl1 = NO_ATID;
+ +    shell[i].nucl2 = NO_ATID;
+ +    shell[i].nucl3 = NO_ATID;
+ +    /* shell[i].bInterCG=FALSE; */
+ +    shell[i].k_1   = 0;
+ +    shell[i].k     = 0;
+ +  }
+ +
+ +  ffparams = &mtop->ffparams;
+ +
+ +  /* Now fill the structures */
+ +  shfc->bInterCG = FALSE;
+ +  ns = 0;
+ +  a_offset = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    molb = &mtop->molblock[mb];
+ +    molt = &mtop->moltype[molb->type];
+ +
+ +    cgs = &molt->cgs;
+ +    snew(at2cg,molt->atoms.nr);
+ +    for(cg=0; cg<cgs->nr; cg++) {
+ +      for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++) {
+ +      at2cg[i] = cg;
+ +      }
+ +    }
+ +
+ +    atom = molt->atoms.atom;
+ +    for(mol=0; mol<molb->nmol; mol++) {
+ +      for(j=0; (j<NBT); j++) {
+ +      ia = molt->ilist[bondtypes[j]].iatoms;
+ +      for(i=0; (i<molt->ilist[bondtypes[j]].nr); ) {
+ +        type  = ia[0];
+ +        ftype = ffparams->functype[type];
+ +        nra   = interaction_function[ftype].nratoms;
+ +        
+ +        /* Check whether we have a bond with a shell */
+ +        aS = NO_ATID;
+ +        
+ +        switch (bondtypes[j]) {
+ +        case F_BONDS:
+ +        case F_HARMONIC:
+ +        case F_CUBICBONDS:
+ +        case F_POLARIZATION:
+ +        case F_ANHARM_POL:
+ +          if (atom[ia[1]].ptype == eptShell) {
+ +            aS = ia[1];
+ +            aN = ia[2];
+ +          }
+ +          else if (atom[ia[2]].ptype == eptShell) {
+ +            aS = ia[2];
+ +            aN = ia[1];
+ +          }
+ +          break;
+ +        case F_WATER_POL:
+ +          aN    = ia[4];  /* Dummy */
+ +          aS    = ia[5];  /* Shell */
+ +          break;
+ +        default:
+ +          gmx_fatal(FARGS,"Death Horror: %s, %d",__FILE__,__LINE__);
+ +        }
+ +        
+ +        if (aS != NO_ATID) {    
+ +          qS = atom[aS].q;
+ +          
+ +          /* Check whether one of the particles is a shell... */
+ +          nsi = shell_index[a_offset+aS];
+ +          if ((nsi < 0) || (nsi >= nshell))
+ +            gmx_fatal(FARGS,"nsi is %d should be within 0 - %d. aS = %d",
+ +                      nsi,nshell,aS);
+ +          if (shell[nsi].shell == NO_ATID) {
+ +            shell[nsi].shell = a_offset + aS;
+ +            ns ++;
+ +          }
+ +          else if (shell[nsi].shell != a_offset+aS)
+ +            gmx_fatal(FARGS,"Weird stuff in %s, %d",__FILE__,__LINE__);
+ +          
+ +          if      (shell[nsi].nucl1 == NO_ATID) {
+ +            shell[nsi].nucl1 = a_offset + aN;
+ +          } else if (shell[nsi].nucl2 == NO_ATID) {
+ +            shell[nsi].nucl2 = a_offset + aN;
+ +          } else if (shell[nsi].nucl3 == NO_ATID) {
+ +            shell[nsi].nucl3 = a_offset + aN;
+ +          } else {
+ +            if (fplog)
+ +              pr_shell(fplog,ns,shell);
+ +            gmx_fatal(FARGS,"Can not handle more than three bonds per shell\n");
+ +          }
+ +          if (at2cg[aS] != at2cg[aN]) {
+ +            /* shell[nsi].bInterCG = TRUE; */
+ +            shfc->bInterCG = TRUE;
+ +          }
+ +          
+ +          switch (bondtypes[j]) {
+ +          case F_BONDS:
+ +          case F_HARMONIC:
+ +            shell[nsi].k    += ffparams->iparams[type].harmonic.krA;
+ +            break;
+ +          case F_CUBICBONDS:
+ +            shell[nsi].k    += ffparams->iparams[type].cubic.kb;
+ +            break;
+ +          case F_POLARIZATION:
+ +          case F_ANHARM_POL:
+ +            if (!gmx_within_tol(qS, atom[aS].qB, GMX_REAL_EPS*10))
+ +              gmx_fatal(FARGS,"polarize can not be used with qA(%e) != qB(%e) for atom %d of molecule block %d", qS, atom[aS].qB, aS+1, mb+1);
+ +            shell[nsi].k    += sqr(qS)*ONE_4PI_EPS0/
+ +              ffparams->iparams[type].polarize.alpha;
+ +            break;
+ +          case F_WATER_POL:
+ +            if (!gmx_within_tol(qS, atom[aS].qB, GMX_REAL_EPS*10))
+ +              gmx_fatal(FARGS,"water_pol can not be used with qA(%e) != qB(%e) for atom %d of molecule block %d", qS, atom[aS].qB, aS+1, mb+1);
+ +            alpha          = (ffparams->iparams[type].wpol.al_x+
+ +                              ffparams->iparams[type].wpol.al_y+
+ +                              ffparams->iparams[type].wpol.al_z)/3.0;
+ +            shell[nsi].k  += sqr(qS)*ONE_4PI_EPS0/alpha;
+ +            break;
+ +          default:
+ +            gmx_fatal(FARGS,"Death Horror: %s, %d",__FILE__,__LINE__);
+ +          }
+ +          shell[nsi].nnucl++;
+ +        }
+ +        ia += nra+1;
+ +        i  += nra+1;
+ +      }
+ +      }
+ +      a_offset += molt->atoms.nr;
+ +    }
+ +    /* Done with this molecule type */
+ +    sfree(at2cg);
+ +  }
+ +  
+ +  /* Verify whether it's all correct */
+ +  if (ns != nshell)
+ +    gmx_fatal(FARGS,"Something weird with shells. They may not be bonded to something");
+ +  
+ +  for(i=0; (i<ns); i++)
+ +    shell[i].k_1 = 1.0/shell[i].k;
+ +  
+ +  if (debug)
+ +    pr_shell(debug,ns,shell);
+ +
+ +  
+ +  shfc->nshell_gl      = ns;
+ +  shfc->shell_gl       = shell;
+ +  shfc->shell_index_gl = shell_index;
+ +
+ +  shfc->bPredict   = (getenv("GMX_NOPREDICT") == NULL);
+ +  shfc->bRequireInit = FALSE;
+ +  if (!shfc->bPredict) {
+ +    if (fplog)
+ +      fprintf(fplog,"\nWill never predict shell positions\n");
+ +  } else {
+ +    shfc->bRequireInit = (getenv("GMX_REQUIRE_SHELL_INIT") != NULL);
+ +    if (shfc->bRequireInit && fplog)
+ +      fprintf(fplog,"\nWill always initiate shell positions\n");
+ +  }
+ +
+ +  if (shfc->bPredict) {
+ +    if (x) {
+ +      predict_shells(fplog,x,NULL,0,shfc->nshell_gl,shfc->shell_gl,
+ +                   NULL,mtop,TRUE);
+ +    }
+ +
+ +    if (shfc->bInterCG) {
+ +      if (fplog)
+ +      fprintf(fplog,"\nNOTE: there all shells that are connected to particles outside thier own charge group, will not predict shells positions during the run\n\n");
+ +      shfc->bPredict = FALSE;
+ +    }
+ +  }
+ +
+ +  return shfc;
+ +}
+ +
+ +void make_local_shells(t_commrec *cr,t_mdatoms *md,
+ +                     struct gmx_shellfc *shfc)
+ +{
+ +  t_shell *shell;
+ +  int a0,a1,*ind,nshell,i;
+ +  gmx_domdec_t *dd=NULL;
+ +
+ +  if (PAR(cr)) {
+ +    if (DOMAINDECOMP(cr)) {
+ +      dd = cr->dd;
+ +      a0 = 0;
+ +      a1 = dd->nat_home;
+ +    } else {
+ +      pd_at_range(cr,&a0,&a1);
+ +    }
+ +  } else {
+ +    /* Single node: we need all shells, just copy the pointer */
+ +    shfc->nshell = shfc->nshell_gl;
+ +    shfc->shell  = shfc->shell_gl;
+ +    
+ +    return;
+ +  }
+ +
+ +  ind = shfc->shell_index_gl;
+ +
+ +  nshell = 0;
+ +  shell  = shfc->shell; 
+ +  for(i=a0; i<a1; i++) {
+ +    if (md->ptype[i] == eptShell) {
+ +      if (nshell+1 > shfc->shell_nalloc) {
+ +      shfc->shell_nalloc = over_alloc_dd(nshell+1);
+ +      srenew(shell,shfc->shell_nalloc);
+ +      }
+ +      if (dd) {
+ +      shell[nshell] = shfc->shell_gl[ind[dd->gatindex[i]]];
+ +      } else {
+ +      shell[nshell] = shfc->shell_gl[ind[i]];
+ +      }
+ +      /* With inter-cg shells we can no do shell prediction,
+ +       * so we do not need the nuclei numbers.
+ +       */
+ +      if (!shfc->bInterCG) {
+ +      shell[nshell].nucl1   = i + shell[nshell].nucl1 - shell[nshell].shell;
+ +      if (shell[nshell].nnucl > 1)
+ +        shell[nshell].nucl2 = i + shell[nshell].nucl2 - shell[nshell].shell;
+ +      if (shell[nshell].nnucl > 2)
+ +        shell[nshell].nucl3 = i + shell[nshell].nucl3 - shell[nshell].shell;
+ +      }
+ +      shell[nshell].shell = i;
+ +      nshell++;
+ +    }
+ +  }
+ +
+ +  shfc->nshell = nshell;
+ +  shfc->shell  = shell;
+ +}
+ +
+ +static void do_1pos(rvec xnew,rvec xold,rvec f,real step)
+ +{
+ +  real xo,yo,zo;
+ +  real dx,dy,dz;
+ +  
+ +  xo=xold[XX];
+ +  yo=xold[YY];
+ +  zo=xold[ZZ];
+ +
+ +  dx=f[XX]*step;
+ +  dy=f[YY]*step;
+ +  dz=f[ZZ]*step;
+ +
+ +  xnew[XX]=xo+dx;
+ +  xnew[YY]=yo+dy;
+ +  xnew[ZZ]=zo+dz;
+ +}
+ +
+ +static void do_1pos3(rvec xnew,rvec xold,rvec f,rvec step)
+ +{
+ +  real xo,yo,zo;
+ +  real dx,dy,dz;
+ +  
+ +  xo=xold[XX];
+ +  yo=xold[YY];
+ +  zo=xold[ZZ];
+ +
+ +  dx=f[XX]*step[XX];
+ +  dy=f[YY]*step[YY];
+ +  dz=f[ZZ]*step[ZZ];
+ +
+ +  xnew[XX]=xo+dx;
+ +  xnew[YY]=yo+dy;
+ +  xnew[ZZ]=zo+dz;
+ +}
+ +
+ +static void directional_sd(FILE *log,rvec xold[],rvec xnew[],rvec acc_dir[],
+ +                         int start,int homenr,real step)
+ +{
+ +  int  i;
+ +
+ +  for(i=start; i<homenr; i++)
+ +    do_1pos(xnew[i],xold[i],acc_dir[i],step);
+ +}
+ +
+ +static void shell_pos_sd(FILE *log,rvec xcur[],rvec xnew[],rvec f[],
+ +                       int ns,t_shell s[],int count)
+ +{
+ +    const real step_scale_min = 0.8,
+ +        step_scale_increment = 0.2,
+ +        step_scale_max = 1.2,
+ +        step_scale_multiple = (step_scale_max - step_scale_min) / step_scale_increment;
+ +  int  i,shell,d;
+ +  real dx,df,k_est;
+ +#ifdef PRINT_STEP  
+ +  real step_min,step_max;
+ +
+ +  step_min = 1e30;
+ +  step_max = 0;
+ +#endif
+ +  for(i=0; (i<ns); i++) {
+ +    shell = s[i].shell;
+ +    if (count == 1) {
+ +      for(d=0; d<DIM; d++) {
+ +      s[i].step[d] = s[i].k_1;
+ +#ifdef PRINT_STEP
+ +      step_min = min(step_min,s[i].step[d]);
+ +      step_max = max(step_max,s[i].step[d]);
+ +#endif
+ +      }
+ +    } else {
+ +      for(d=0; d<DIM; d++) {
+ +      dx = xcur[shell][d] - s[i].xold[d];
+ +      df =    f[shell][d] - s[i].fold[d];
+ +    /* -dx/df gets used to generate an interpolated value, but would
+ +     * cause a NaN if df were binary-equal to zero. Values close to
+ +     * zero won't cause problems (because of the min() and max()), so
+ +     * just testing for binary inequality is OK. */
+ +    if (0.0 != df)
+ +    {
+ +        k_est = -dx/df;
+ +        /* Scale the step size by a factor interpolated from
+ +         * step_scale_min to step_scale_max, as k_est goes from 0 to
+ +         * step_scale_multiple * s[i].step[d] */
+ +        s[i].step[d] =
+ +            step_scale_min * s[i].step[d] +
+ +            step_scale_increment * min(step_scale_multiple * s[i].step[d], max(k_est, 0));
+ +    }
+ +    else
+ +    {
+ +        /* Here 0 == df */
+ +        if (gmx_numzero(dx)) /* 0 == dx */
+ +        {
+ +            /* Likely this will never happen, but if it does just
+ +             * don't scale the step. */
+ +        }
+ +        else /* 0 != dx */
+ +        {
+ +            s[i].step[d] *= step_scale_max;
+ +        }
+ +    }
+ +#ifdef PRINT_STEP
+ +      step_min = min(step_min,s[i].step[d]);
+ +      step_max = max(step_max,s[i].step[d]);
+ +#endif
+ +      }
+ +    }
+ +    copy_rvec(xcur[shell],s[i].xold);
+ +    copy_rvec(f[shell],   s[i].fold);
+ +
+ +    do_1pos3(xnew[shell],xcur[shell],f[shell],s[i].step);
+ +
+ +    if (gmx_debug_at) {
+ +      fprintf(debug,"shell[%d] = %d\n",i,shell);
+ +      pr_rvec(debug,0,"fshell",f[shell],DIM,TRUE);
+ +      pr_rvec(debug,0,"xold",xcur[shell],DIM,TRUE);
+ +      pr_rvec(debug,0,"step",s[i].step,DIM,TRUE);
+ +      pr_rvec(debug,0,"xnew",xnew[shell],DIM,TRUE);
+ +    }
+ +  }
+ +#ifdef PRINT_STEP
+ +  printf("step %.3e %.3e\n",step_min,step_max);
+ +#endif
+ +}
+ +
+ +static void decrease_step_size(int nshell,t_shell s[])
+ +{
+ +  int i;
+ +  
+ +  for(i=0; i<nshell; i++)
+ +    svmul(0.8,s[i].step,s[i].step);
+ +}
+ +
+ +static void print_epot(FILE *fp,gmx_large_int_t mdstep,int count,real epot,real df,
+ +                     int ndir,real sf_dir)
+ +{
+ +  char buf[22];
+ +
+ +  fprintf(fp,"MDStep=%5s/%2d EPot: %12.8e, rmsF: %6.2e",
+ +        gmx_step_str(mdstep,buf),count,epot,df);
+ +  if (ndir)
+ +    fprintf(fp,", dir. rmsF: %6.2e\n",sqrt(sf_dir/ndir));
+ +  else
+ +    fprintf(fp,"\n");
+ +}
+ +
+ +
+ +static real rms_force(t_commrec *cr,rvec f[],int ns,t_shell s[],
+ +                    int ndir,real *sf_dir,real *Epot)
+ +{
+ +  int  i,shell,ntot;
+ +  double buf[4];
+ +
+ +  buf[0] = *sf_dir;
+ +  for(i=0; i<ns; i++) {
+ +    shell = s[i].shell;
+ +    buf[0]  += norm2(f[shell]);
+ +  }
+ +  ntot = ns;
+ +
+ +  if (PAR(cr)) {
+ +    buf[1] = ntot;
+ +    buf[2] = *sf_dir;
+ +    buf[3] = *Epot;
+ +    gmx_sumd(4,buf,cr);
+ +    ntot = (int)(buf[1] + 0.5);
+ +    *sf_dir = buf[2];
+ +    *Epot   = buf[3];
+ +  }
+ +  ntot += ndir;
+ +
+ +  return (ntot ? sqrt(buf[0]/ntot) : 0);
+ +}
+ +
+ +static void check_pbc(FILE *fp,rvec x[],int shell)
+ +{
+ +  int m,now;
+ +  
+ +  now = shell-4;
+ +  for(m=0; (m<DIM); m++)
+ +    if (fabs(x[shell][m]-x[now][m]) > 0.3) {
+ +      pr_rvecs(fp,0,"SHELL-X",x+now,5);
+ +      break;
+ +    }
+ +}
+ +
+ +static void dump_shells(FILE *fp,rvec x[],rvec f[],real ftol,int ns,t_shell s[])
+ +{
+ +  int  i,shell;
+ +  real ft2,ff2;
+ +  
+ +  ft2 = sqr(ftol);
+ +  
+ +  for(i=0; (i<ns); i++) {
+ +    shell = s[i].shell;
+ +    ff2   = iprod(f[shell],f[shell]);
+ +    if (ff2 > ft2)
+ +      fprintf(fp,"SHELL %5d, force %10.5f  %10.5f  %10.5f, |f| %10.5f\n",
+ +            shell,f[shell][XX],f[shell][YY],f[shell][ZZ],sqrt(ff2));
+ +    check_pbc(fp,x,shell);
+ +  }
+ +}
+ +
+ +static void init_adir(FILE *log,gmx_shellfc_t shfc,
+ +                    gmx_constr_t constr,t_idef *idef,t_inputrec *ir,
+ +                    t_commrec *cr,int dd_ac1,
+ +                    gmx_large_int_t step,t_mdatoms *md,int start,int end,
+ +                    rvec *x_old,rvec *x_init,rvec *x,
-           x,xnold-start,NULL,box,
-           lambda[efptBONDED],&(dvdlambda[efptBONDED]),NULL,NULL,nrnb,econqCoord,FALSE,0,0);
++                    rvec *f,rvec *acc_dir,
++                    gmx_bool bMolPBC,matrix box,
+ +                    real *lambda,real *dvdlambda,t_nrnb *nrnb)
+ +{
+ +  rvec   *xnold,*xnew;
+ +  double w_dt;
+ +  int    gf,ga,gt;
+ +  real   dt,scale;
+ +  int    n,d; 
+ +  unsigned short *ptype;
+ +  rvec   p,dx;
+ +  
+ +  if (DOMAINDECOMP(cr))
+ +    n = dd_ac1;
+ +  else
+ +    n = end - start;
+ +  if (n > shfc->adir_nalloc) {
+ +    shfc->adir_nalloc = over_alloc_dd(n);
+ +    srenew(shfc->adir_xnold,shfc->adir_nalloc);
+ +    srenew(shfc->adir_xnew ,shfc->adir_nalloc);
+ +  }
+ +  xnold = shfc->adir_xnold;
+ +  xnew  = shfc->adir_xnew;
+ +    
+ +  ptype = md->ptype;
+ +
+ +  dt = ir->delta_t;
+ +
+ +  /* Does NOT work with freeze or acceleration groups (yet) */
+ +  for (n=start; n<end; n++) {  
+ +    w_dt = md->invmass[n]*dt;
+ +    
+ +    for (d=0; d<DIM; d++) {
+ +      if ((ptype[n] != eptVSite) && (ptype[n] != eptShell)) {
+ +      xnold[n-start][d] = x[n][d] - (x_init[n][d] - x_old[n][d]);
+ +      xnew[n-start][d] = 2*x[n][d] - x_old[n][d] + f[n][d]*w_dt*dt;
+ +      } else {
+ +      xnold[n-start][d] = x[n][d];
+ +      xnew[n-start][d] = x[n][d];
+ +      }
+ +    }
+ +  }
+ +  constrain(log,FALSE,FALSE,constr,idef,ir,NULL,cr,step,0,md,
-           x,xnew-start,NULL,box,
-           lambda[efptBONDED],&(dvdlambda[efptBONDED]),NULL,NULL,nrnb,econqCoord,FALSE,0,0);
++          x,xnold-start,NULL,bMolPBC,box,
++          lambda[efptBONDED],&(dvdlambda[efptBONDED]),
++          NULL,NULL,nrnb,econqCoord,FALSE,0,0);
+ +  constrain(log,FALSE,FALSE,constr,idef,ir,NULL,cr,step,0,md,
-   /* Set xnew to minus the acceleration */
++          x,xnew-start,NULL,bMolPBC,box,
++          lambda[efptBONDED],&(dvdlambda[efptBONDED]),
++          NULL,NULL,nrnb,econqCoord,FALSE,0,0);
+ +
-           x_old,xnew-start,acc_dir,box,
-           lambda[efptBONDED],&(dvdlambda[efptBONDED]),NULL,NULL,nrnb,econqDeriv_FlexCon,FALSE,0,0);
+ +  for (n=start; n<end; n++) {
+ +    for(d=0; d<DIM; d++)
+ +      xnew[n-start][d] =
+ +      -(2*x[n][d]-xnold[n-start][d]-xnew[n-start][d])/sqr(dt)
+ +      - f[n][d]*md->invmass[n];
+ +    clear_rvec(acc_dir[n]);
+ +  }
+ +
+ +  /* Project the acceleration on the old bond directions */
+ +  constrain(log,FALSE,FALSE,constr,idef,ir,NULL,cr,step,0,md,
-             shfc->acc_dir-start,state->box,state->lambda,&dum,nrnb);
++          x_old,xnew-start,acc_dir,bMolPBC,box,
++          lambda[efptBONDED],&(dvdlambda[efptBONDED]),
++          NULL,NULL,nrnb,econqDeriv_FlexCon,FALSE,0,0); 
+ +}
+ +
+ +int relax_shell_flexcon(FILE *fplog,t_commrec *cr,gmx_bool bVerbose,
+ +                      gmx_large_int_t mdstep,t_inputrec *inputrec,
+ +                      gmx_bool bDoNS,int force_flags,
+ +                      gmx_bool bStopCM,
+ +                      gmx_localtop_t *top,
+ +                      gmx_mtop_t* mtop,
+ +                      gmx_constr_t constr,
+ +                      gmx_enerdata_t *enerd,t_fcdata *fcd,
+ +                      t_state *state,rvec f[],
+ +                      tensor force_vir,
+ +                      t_mdatoms *md,
+ +                      t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +                      t_graph *graph,
+ +                      gmx_groups_t *groups,
+ +                      struct gmx_shellfc *shfc,
+ +                      t_forcerec *fr,
+ +                      gmx_bool bBornRadii,
+ +                      double t,rvec mu_tot,
+ +                      int natoms,gmx_bool *bConverged,
+ +                      gmx_vsite_t *vsite,
+ +                      FILE *fp_field)
+ +{
+ +  int    nshell;
+ +  t_shell *shell;
+ +  t_idef *idef;
+ +  rvec   *pos[2],*force[2],*acc_dir=NULL,*x_old=NULL;
+ +  real   Epot[2],df[2];
+ +  rvec   dx;
+ +  real   sf_dir,invdt;
+ +  real   ftol,xiH,xiS,dum=0;
+ +  char   sbuf[22];
+ +  gmx_bool   bCont,bInit;
+ +  int    nat,dd_ac0,dd_ac1=0,i;
+ +  int    start=md->start,homenr=md->homenr,end=start+homenr,cg0,cg1;
+ +  int    nflexcon,g,number_steps,d,Min=0,count=0;
+ +#define  Try (1-Min)             /* At start Try = 1 */
+ +
+ +  bCont        = (mdstep == inputrec->init_step) && inputrec->bContinuation;
+ +  bInit        = (mdstep == inputrec->init_step) || shfc->bRequireInit;
+ +  ftol         = inputrec->em_tol;
+ +  number_steps = inputrec->niter;
+ +  nshell       = shfc->nshell;
+ +  shell        = shfc->shell;
+ +  nflexcon     = shfc->nflexcon;
+ +
+ +  idef = &top->idef;
+ +
+ +  if (DOMAINDECOMP(cr)) {
+ +    nat = dd_natoms_vsite(cr->dd);
+ +    if (nflexcon > 0) {
+ +      dd_get_constraint_range(cr->dd,&dd_ac0,&dd_ac1);
+ +      nat = max(nat,dd_ac1);
+ +    }
+ +  } else {
+ +    nat = state->natoms;
+ +  }
+ +
+ +  if (nat > shfc->x_nalloc) {
+ +    /* Allocate local arrays */
+ +    shfc->x_nalloc = over_alloc_dd(nat);
+ +    for(i=0; (i<2); i++) {
+ +      srenew(shfc->x[i],shfc->x_nalloc);
+ +      srenew(shfc->f[i],shfc->x_nalloc);
+ +    }
+ +  }
+ +  for(i=0; (i<2); i++) {
+ +    pos[i]   = shfc->x[i];
+ +    force[i] = shfc->f[i];
+ +  }
+ +     
+ +  /* With particle decomposition this code only works
+ +   * when all particles involved with each shell are in the same cg.
+ +   */
+ +
+ +  if (bDoNS && inputrec->ePBC != epbcNONE && !DOMAINDECOMP(cr)) {
+ +    /* This is the only time where the coordinates are used
+ +     * before do_force is called, which normally puts all
+ +     * charge groups in the box.
+ +     */
+ +    if (PARTDECOMP(cr)) {
+ +      pd_cg_range(cr,&cg0,&cg1);
+ +    } else {
+ +      cg0 = 0;
+ +      cg1 = top->cgs.nr;
+ +    }
+ +    put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,state->box,
+ +                           &(top->cgs),state->x,fr->cg_cm);
+ +    if (graph)
+ +      mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
+ +  }
+ +
+ +  /* After this all coordinate arrays will contain whole molecules */
+ +  if (graph)
+ +    shift_self(graph,state->box,state->x);
+ +
+ +  if (nflexcon) {
+ +    if (nat > shfc->flex_nalloc) {
+ +      shfc->flex_nalloc = over_alloc_dd(nat);
+ +      srenew(shfc->acc_dir,shfc->flex_nalloc);
+ +      srenew(shfc->x_old,shfc->flex_nalloc);
+ +    }
+ +    acc_dir = shfc->acc_dir;
+ +    x_old   = shfc->x_old;
+ +    for(i=0; i<homenr; i++) {
+ +      for(d=0; d<DIM; d++)
+ +        shfc->x_old[i][d] =
+ +        state->x[start+i][d] - state->v[start+i][d]*inputrec->delta_t;
+ +    }
+ +  }
+ +
+ +  /* Do a prediction of the shell positions */
+ +  if (shfc->bPredict && !bCont) {
+ +    predict_shells(fplog,state->x,state->v,inputrec->delta_t,nshell,shell,
+ +                 md->massT,NULL,bInit);
+ +  }
+ +
+ +  /* do_force expected the charge groups to be in the box */
+ +  if (graph)
+ +    unshift_self(graph,state->box,state->x);
+ +
+ +  /* Calculate the forces first time around */
+ +  if (gmx_debug_at) {
+ +    pr_rvecs(debug,0,"x b4 do_force",state->x + start,homenr);
+ +  }
+ +  do_force(fplog,cr,inputrec,mdstep,nrnb,wcycle,top,mtop,groups,
+ +         state->box,state->x,&state->hist,
+ +         force[Min],force_vir,md,enerd,fcd,
+ +         state->lambda,graph,
+ +         fr,vsite,mu_tot,t,fp_field,NULL,bBornRadii,
+ +         (bDoNS ? GMX_FORCE_NS : 0) | force_flags);
+ +
+ +  sf_dir = 0;
+ +  if (nflexcon) {
+ +    init_adir(fplog,shfc,
+ +            constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end,
+ +            shfc->x_old-start,state->x,state->x,force[Min],
-               state->box,state->lambda,&dum,nrnb);
++            shfc->acc_dir-start,
++            fr->bMolPBC,state->box,state->lambda,&dum,nrnb);
+ +
+ +    for(i=start; i<end; i++)
+ +      sf_dir += md->massT[i]*norm2(shfc->acc_dir[i-start]);
+ +  }
+ +
+ +  Epot[Min] = enerd->term[F_EPOT];
+ +
+ +  df[Min]=rms_force(cr,shfc->f[Min],nshell,shell,nflexcon,&sf_dir,&Epot[Min]);
+ +  df[Try]=0;
+ +  if (debug) {
+ +    fprintf(debug,"df = %g  %g\n",df[Min],df[Try]);
+ +  }
+ +
+ +  if (gmx_debug_at) {
+ +    pr_rvecs(debug,0,"force0",force[Min],md->nr);
+ +  }
+ +
+ +  if (nshell+nflexcon > 0) {
+ +    /* Copy x to pos[Min] & pos[Try]: during minimization only the
+ +     * shell positions are updated, therefore the other particles must
+ +     * be set here.
+ +     */
+ +    memcpy(pos[Min],state->x,nat*sizeof(state->x[0]));
+ +    memcpy(pos[Try],state->x,nat*sizeof(state->x[0]));
+ +  }
+ +  
+ +  if (bVerbose && MASTER(cr))
+ +    print_epot(stdout,mdstep,0,Epot[Min],df[Min],nflexcon,sf_dir);
+ +
+ +  if (debug) {
+ +    fprintf(debug,"%17s: %14.10e\n",
+ +          interaction_function[F_EKIN].longname,enerd->term[F_EKIN]);
+ +    fprintf(debug,"%17s: %14.10e\n",
+ +          interaction_function[F_EPOT].longname,enerd->term[F_EPOT]);
+ +    fprintf(debug,"%17s: %14.10e\n",
+ +          interaction_function[F_ETOT].longname,enerd->term[F_ETOT]);
+ +    fprintf(debug,"SHELLSTEP %s\n",gmx_step_str(mdstep,sbuf));
+ +  }
+ +  
+ +  /* First check whether we should do shells, or whether the force is 
+ +   * low enough even without minimization.
+ +   */
+ +  *bConverged = (df[Min] < ftol);
+ +  
+ +  for(count=1; (!(*bConverged) && (count < number_steps)); count++) {
+ +    if (vsite)
+ +      construct_vsites(fplog,vsite,pos[Min],nrnb,inputrec->delta_t,state->v,
+ +                     idef->iparams,idef->il,
+ +                     fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +     
+ +    if (nflexcon) {
+ +      init_adir(fplog,shfc,
+ +              constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end,
+ +              x_old-start,state->x,pos[Min],force[Min],acc_dir-start,
-               state->box,state->lambda,&dum,nrnb);
++              fr->bMolPBC,state->box,state->lambda,&dum,nrnb);
+ +      
+ +      directional_sd(fplog,pos[Min],pos[Try],acc_dir-start,start,end,
+ +                   fr->fc_stepsize);
+ +    }
+ +    
+ +    /* New positions, Steepest descent */
+ +    shell_pos_sd(fplog,pos[Min],pos[Try],force[Min],nshell,shell,count); 
+ +
+ +    /* do_force expected the charge groups to be in the box */
+ +    if (graph)
+ +      unshift_self(graph,state->box,pos[Try]);
+ +
+ +    if (gmx_debug_at) {
+ +      pr_rvecs(debug,0,"RELAX: pos[Min]  ",pos[Min] + start,homenr);
+ +      pr_rvecs(debug,0,"RELAX: pos[Try]  ",pos[Try] + start,homenr);
+ +    }
+ +    /* Try the new positions */
+ +    do_force(fplog,cr,inputrec,1,nrnb,wcycle,
+ +           top,mtop,groups,state->box,pos[Try],&state->hist,
+ +           force[Try],force_vir,
+ +           md,enerd,fcd,state->lambda,graph,
+ +           fr,vsite,mu_tot,t,fp_field,NULL,bBornRadii,
+ +           force_flags);
+ +    
+ +    if (gmx_debug_at) {
+ +      pr_rvecs(debug,0,"RELAX: force[Min]",force[Min] + start,homenr);
+ +      pr_rvecs(debug,0,"RELAX: force[Try]",force[Try] + start,homenr);
+ +    }
+ +    sf_dir = 0;
+ +    if (nflexcon) {
+ +      init_adir(fplog,shfc,
+ +              constr,idef,inputrec,cr,dd_ac1,mdstep,md,start,end,
+ +              x_old-start,state->x,pos[Try],force[Try],acc_dir-start,
++              fr->bMolPBC,state->box,state->lambda,&dum,nrnb);
+ +
+ +      for(i=start; i<end; i++)
+ +      sf_dir += md->massT[i]*norm2(acc_dir[i-start]);
+ +    }
+ +
+ +    Epot[Try] = enerd->term[F_EPOT]; 
+ +    
+ +    df[Try]=rms_force(cr,force[Try],nshell,shell,nflexcon,&sf_dir,&Epot[Try]);
+ +
+ +    if (debug)
+ +      fprintf(debug,"df = %g  %g\n",df[Min],df[Try]);
+ +
+ +    if (debug) {
+ +      if (gmx_debug_at)
+ +      pr_rvecs(debug,0,"F na do_force",force[Try] + start,homenr);
+ +      if (gmx_debug_at) {
+ +      fprintf(debug,"SHELL ITER %d\n",count);
+ +      dump_shells(debug,pos[Try],force[Try],ftol,nshell,shell);
+ +      }
+ +    }
+ +
+ +    if (bVerbose && MASTER(cr))
+ +      print_epot(stdout,mdstep,count,Epot[Try],df[Try],nflexcon,sf_dir);
+ +      
+ +    *bConverged = (df[Try] < ftol);
+ +    
+ +    if ((df[Try] < df[Min])) {
+ +      if (debug)
+ +      fprintf(debug,"Swapping Min and Try\n");
+ +      if (nflexcon) {
+ +      /* Correct the velocities for the flexible constraints */
+ +      invdt = 1/inputrec->delta_t;
+ +      for(i=start; i<end; i++) {
+ +        for(d=0; d<DIM; d++)
+ +          state->v[i][d] += (pos[Try][i][d] - pos[Min][i][d])*invdt;
+ +      }
+ +      }
+ +      Min  = Try;
+ +    } else {
+ +      decrease_step_size(nshell,shell);
+ +    }
+ +  }
+ +  if (MASTER(cr) && !(*bConverged)) {
+ +    /* Note that the energies and virial are incorrect when not converged */
+ +    if (fplog)
+ +      fprintf(fplog,
+ +            "step %s: EM did not converge in %d iterations, RMS force %.3f\n",
+ +            gmx_step_str(mdstep,sbuf),number_steps,df[Min]);
+ +    fprintf(stderr,
+ +          "step %s: EM did not converge in %d iterations, RMS force %.3f\n",
+ +          gmx_step_str(mdstep,sbuf),number_steps,df[Min]);
+ +  }
+ +
+ +  /* Copy back the coordinates and the forces */
+ +  memcpy(state->x,pos[Min],nat*sizeof(state->x[0]));
+ +  memcpy(f,force[Min],nat*sizeof(f[0]));
+ +
+ +  return count; 
+ +}
+ +
diff --cc src/gromacs/mdlib/sim_util.c

index c83b5529578a787f8f7a03d2c28d05c3a3ac67c5,0000000000000000000000000000000000000000..3f70f25392de8e9ae5a63dde8af5d3aed2359902

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/sim_util.c
--- /dev/null
+++ b/src/gromacs/mdlib/sim_util.c
@@@ -1,1654 -1,0 +1,2614 @@@
-         if ((ir->nstlist == 0) || ((step % ir->nstlist) == 0))
-         {
-             /* We have done a full cycle let's update time_per_step */
-             runtime->last = gmx_gettime();
-             dt = difftime(runtime->last,runtime->real);
-             runtime->time_per_step = dt/(step - ir->init_step + 1);
-         }
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + *
+ + *                This source code is part of
+ + *
+ + *                 G   R   O   M   A   C   S
+ + *
+ + *          GROningen MAchine for Chemical Simulations
+ + *
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + *
+ + * For more info, check our website at http://www.gromacs.org
+ + *
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#ifdef GMX_CRAY_XT3
+ +#include<catamount/dclock.h>
+ +#endif
+ +
+ +
+ +#include <stdio.h>
+ +#include <time.h>
+ +#ifdef HAVE_SYS_TIME_H
+ +#include <sys/time.h>
+ +#endif
+ +#include <math.h>
+ +#include "typedefs.h"
+ +#include "string2.h"
+ +#include "gmxfio.h"
+ +#include "smalloc.h"
+ +#include "names.h"
+ +#include "confio.h"
+ +#include "mvdata.h"
+ +#include "txtdump.h"
+ +#include "pbc.h"
+ +#include "chargegroup.h"
+ +#include "vec.h"
+ +#include <time.h>
+ +#include "nrnb.h"
+ +#include "mshift.h"
+ +#include "mdrun.h"
++#include "sim_util.h"
+ +#include "update.h"
+ +#include "physics.h"
+ +#include "main.h"
+ +#include "mdatoms.h"
+ +#include "force.h"
+ +#include "bondf.h"
+ +#include "pme.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "network.h"
+ +#include "calcmu.h"
+ +#include "constr.h"
+ +#include "xvgr.h"
+ +#include "trnio.h"
+ +#include "xtcio.h"
+ +#include "copyrite.h"
+ +#include "pull_rotation.h"
+ +#include "gmx_random.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "gmx_wallcycle.h"
+ +#include "genborn.h"
++#include "nbnxn_search.h"
++#include "nbnxn_kernels/nbnxn_kernel_ref.h"
++#include "nbnxn_kernels/nbnxn_kernel_x86_simd128.h"
++#include "nbnxn_kernels/nbnxn_kernel_x86_simd256.h"
++#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#include "adress.h"
+ +#include "qmmm.h"
+ +
++#include "nbnxn_cuda_data_mgmt.h"
++#include "nbnxn_cuda/nbnxn_cuda.h"
++
+ +#if 0
+ +typedef struct gmx_timeprint {
+ +
+ +} t_gmx_timeprint;
+ +#endif
+ +
+ +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
+ +char *
+ +gmx_ctime_r(const time_t *clock,char *buf, int n);
+ +
+ +
+ +double
+ +gmx_gettime()
+ +{
+ +#ifdef HAVE_GETTIMEOFDAY
+ +      struct timeval t;
+ +      double seconds;
+ +
+ +      gettimeofday(&t,NULL);
+ +
+ +      seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
+ +
+ +      return seconds;
+ +#else
+ +      double  seconds;
+ +
+ +      seconds = time(NULL);
+ +
+ +      return seconds;
+ +#endif
+ +}
+ +
+ +
+ +#define difftime(end,start) ((double)(end)-(double)(start))
+ +
+ +void print_time(FILE *out,gmx_runtime_t *runtime,gmx_large_int_t step,
+ +                t_inputrec *ir, t_commrec *cr)
+ +{
+ +    time_t finish;
+ +    char   timebuf[STRLEN];
+ +    double dt;
+ +    char buf[48];
+ +
+ +#ifndef GMX_THREAD_MPI
+ +    if (!PAR(cr))
+ +#endif
+ +    {
+ +        fprintf(out,"\r");
+ +    }
+ +    fprintf(out,"step %s",gmx_step_str(step,buf));
+ +    if ((step >= ir->nstlist))
+ +    {
-             Ext[m] = 0;
++        runtime->last = gmx_gettime();
++        dt = difftime(runtime->last,runtime->real);
++        runtime->time_per_step = dt/(step - ir->init_step + 1);
++
+ +        dt = (ir->nsteps + ir->init_step - step)*runtime->time_per_step;
+ +
+ +        if (ir->nsteps >= 0)
+ +        {
+ +            if (dt >= 300)
+ +            {
+ +                finish = (time_t) (runtime->last + dt);
+ +                gmx_ctime_r(&finish,timebuf,STRLEN);
+ +                sprintf(buf,"%s",timebuf);
+ +                buf[strlen(buf)-1]='\0';
+ +                fprintf(out,", will finish %s",buf);
+ +            }
+ +            else
+ +                fprintf(out,", remaining runtime: %5d s          ",(int)dt);
+ +        }
+ +        else
+ +        {
+ +            fprintf(out," performance: %.1f ns/day    ",
+ +                    ir->delta_t/1000*24*60*60/runtime->time_per_step);
+ +        }
+ +    }
+ +#ifndef GMX_THREAD_MPI
+ +    if (PAR(cr))
+ +    {
+ +        fprintf(out,"\n");
+ +    }
+ +#endif
+ +
+ +    fflush(out);
+ +}
+ +
+ +#ifdef NO_CLOCK
+ +#define clock() -1
+ +#endif
+ +
+ +static double set_proctime(gmx_runtime_t *runtime)
+ +{
+ +    double diff;
+ +#ifdef GMX_CRAY_XT3
+ +    double prev;
+ +
+ +    prev = runtime->proc;
+ +    runtime->proc = dclock();
+ +
+ +    diff = runtime->proc - prev;
+ +#else
+ +    clock_t prev;
+ +
+ +    prev = runtime->proc;
+ +    runtime->proc = clock();
+ +
+ +    diff = (double)(runtime->proc - prev)/(double)CLOCKS_PER_SEC;
+ +#endif
+ +    if (diff < 0)
+ +    {
+ +        /* The counter has probably looped, ignore this data */
+ +        diff = 0;
+ +    }
+ +
+ +    return diff;
+ +}
+ +
+ +void runtime_start(gmx_runtime_t *runtime)
+ +{
+ +    runtime->real = gmx_gettime();
+ +    runtime->proc          = 0;
+ +    set_proctime(runtime);
+ +    runtime->realtime      = 0;
+ +    runtime->proctime      = 0;
+ +    runtime->last          = 0;
+ +    runtime->time_per_step = 0;
+ +}
+ +
+ +void runtime_end(gmx_runtime_t *runtime)
+ +{
+ +    double now;
+ +
+ +    now = gmx_gettime();
+ +
+ +    runtime->proctime += set_proctime(runtime);
+ +    runtime->realtime  = now - runtime->real;
+ +    runtime->real      = now;
+ +}
+ +
+ +void runtime_upd_proc(gmx_runtime_t *runtime)
+ +{
+ +    runtime->proctime += set_proctime(runtime);
+ +}
+ +
+ +void print_date_and_time(FILE *fplog,int nodeid,const char *title,
+ +                         const gmx_runtime_t *runtime)
+ +{
+ +    int i;
+ +    char timebuf[STRLEN];
+ +    char time_string[STRLEN];
+ +    time_t tmptime;
+ +
+ +    if (fplog)
+ +    {
+ +        if (runtime != NULL)
+ +        {
+ +            tmptime = (time_t) runtime->real;
+ +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
+ +        }
+ +        else
+ +        {
+ +            tmptime = (time_t) gmx_gettime();
+ +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
+ +        }
+ +        for(i=0; timebuf[i]>=' '; i++)
+ +        {
+ +            time_string[i]=timebuf[i];
+ +        }
+ +        time_string[i]='\0';
+ +
+ +        fprintf(fplog,"%s on node %d %s\n",title,nodeid,time_string);
+ +    }
+ +}
+ +
+ +static void sum_forces(int start,int end,rvec f[],rvec flr[])
+ +{
+ +  int i;
+ +
+ +  if (gmx_debug_at) {
+ +    pr_rvecs(debug,0,"fsr",f+start,end-start);
+ +    pr_rvecs(debug,0,"flr",flr+start,end-start);
+ +  }
+ +  for(i=start; (i<end); i++)
+ +    rvec_inc(f[i],flr[i]);
+ +}
+ +
+ +/*
+ + * calc_f_el calculates forces due to an electric field.
+ + *
+ + * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
+ + *
+ + * Et[] contains the parameters for the time dependent
+ + * part of the field (not yet used).
+ + * Ex[] contains the parameters for
+ + * the spatial dependent part of the field. You can have cool periodic
+ + * fields in principle, but only a constant field is supported
+ + * now.
+ + * The function should return the energy due to the electric field
+ + * (if any) but for now returns 0.
+ + *
+ + * WARNING:
+ + * There can be problems with the virial.
+ + * Since the field is not self-consistent this is unavoidable.
+ + * For neutral molecules the virial is correct within this approximation.
+ + * For neutral systems with many charged molecules the error is small.
+ + * But for systems with a net charge or a few charged molecules
+ + * the error can be significant when the field is high.
+ + * Solution: implement a self-consitent electric field into PME.
+ + */
+ +static void calc_f_el(FILE *fp,int  start,int homenr,
+ +                      real charge[],rvec x[],rvec f[],
+ +                      t_cosines Ex[],t_cosines Et[],double t)
+ +{
+ +    rvec Ext;
+ +    real t0;
+ +    int  i,m;
+ +
+ +    for(m=0; (m<DIM); m++)
+ +    {
+ +        if (Et[m].n > 0)
+ +        {
+ +            if (Et[m].n == 3)
+ +            {
+ +                t0 = Et[m].a[1];
+ +                Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
+ +            }
+ +            else
+ +            {
+ +                Ext[m] = cos(Et[m].a[0]*t);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            Ext[m] = 1.0;
+ +        }
+ +        if (Ex[m].n > 0)
+ +        {
+ +            /* Convert the field strength from V/nm to MD-units */
+ +            Ext[m] *= Ex[m].a[0]*FIELDFAC;
+ +            for(i=start; (i<start+homenr); i++)
+ +                f[i][m] += charge[i]*Ext[m];
+ +        }
+ +        else
+ +        {
-     if (fp != NULL)
++            Ext[m] = 0;
++        }
++    }
++    if (fp != NULL)
++    {
++        fprintf(fp,"%10g  %10g  %10g  %10g #FIELD\n",t,
++                Ext[XX]/FIELDFAC,Ext[YY]/FIELDFAC,Ext[ZZ]/FIELDFAC);
++    }
++}
++
++static void calc_virial(FILE *fplog,int start,int homenr,rvec x[],rvec f[],
++                      tensor vir_part,t_graph *graph,matrix box,
++                      t_nrnb *nrnb,const t_forcerec *fr,int ePBC)
++{
++  int i,j;
++  tensor virtest;
++
++  /* The short-range virial from surrounding boxes */
++  clear_mat(vir_part);
++  calc_vir(fplog,SHIFTS,fr->shift_vec,fr->fshift,vir_part,ePBC==epbcSCREW,box);
++  inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS);
++
++  /* Calculate partial virial, for local atoms only, based on short range.
++   * Total virial is computed in global_stat, called from do_md
++   */
++  f_calc_vir(fplog,start,start+homenr,x,f,vir_part,graph,box);
++  inc_nrnb(nrnb,eNR_VIRIAL,homenr);
++
++  /* Add position restraint contribution */
++  for(i=0; i<DIM; i++) {
++    vir_part[i][i] += fr->vir_diag_posres[i];
++  }
++
++  /* Add wall contribution */
++  for(i=0; i<DIM; i++) {
++    vir_part[i][ZZ] += fr->vir_wall_z[i];
++  }
++
++  if (debug)
++    pr_rvecs(debug,0,"vir_part",vir_part,DIM);
++}
++
++static void posres_wrapper(FILE *fplog,
++                           int flags,
++                           gmx_bool bSepDVDL,
++                           t_inputrec *ir,
++                           t_nrnb *nrnb,
++                           gmx_localtop_t *top,
++                           matrix box,rvec x[],
++                           rvec f[],
++                           gmx_enerdata_t *enerd,
++                           real *lambda,
++                           t_forcerec *fr)
++{
++    t_pbc pbc;
++    real  v,dvdl;
++    int   i;
++
++    /* Position restraints always require full pbc */
++    set_pbc(&pbc,ir->ePBC,box);
++    dvdl = 0;
++    v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
++               top->idef.iparams_posres,
++               (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
++               ir->ePBC==epbcNONE ? NULL : &pbc,
++               lambda[efptRESTRAINT],&dvdl,
++               fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
++    if (bSepDVDL)
++    {
++        fprintf(fplog,sepdvdlformat,
++                interaction_function[F_POSRES].longname,v,dvdl);
++    }
++    enerd->term[F_POSRES] += v;
++    /* If just the force constant changes, the FEP term is linear,
++     * but if k changes, it is not.
++     */
++    enerd->dvdl_nonlin[efptRESTRAINT] += dvdl;
++    inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
++
++    if ((ir->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL))
++    {
++        for(i=0; i<enerd->n_lambda; i++)
++        {
++            real dvdl_dum,lambda_dum;
++
++            lambda_dum = (i==0 ? lambda[efptRESTRAINT] : ir->fepvals->all_lambda[efptRESTRAINT][i-1]);
++            v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
++                       top->idef.iparams_posres,
++                       (const rvec*)x,NULL,NULL,
++                       ir->ePBC==epbcNONE ? NULL : &pbc,lambda_dum,&dvdl,
++                       fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
++            enerd->enerpart_lambda[i] += v;
++        }
++    }
++}
++
++static void pull_potential_wrapper(FILE *fplog,
++                                   gmx_bool bSepDVDL,
++                                   t_commrec *cr,
++                                   t_inputrec *ir,
++                                   matrix box,rvec x[],
++                                   rvec f[],
++                                   tensor vir_force,
++                                   t_mdatoms *mdatoms,
++                                   gmx_enerdata_t *enerd,
++                                   real *lambda,
++                                   double t)
++{
++    t_pbc  pbc;
++    real   dvdl;
++
++    /* Calculate the center of mass forces, this requires communication,
++     * which is why pull_potential is called close to other communication.
++     * The virial contribution is calculated directly,
++     * which is why we call pull_potential after calc_virial.
++     */
++    set_pbc(&pbc,ir->ePBC,box);
++    dvdl = 0; 
++    enerd->term[F_COM_PULL] +=
++        pull_potential(ir->ePull,ir->pull,mdatoms,&pbc,
++                       cr,t,lambda[efptRESTRAINT],x,f,vir_force,&dvdl);
++    if (bSepDVDL)
++    {
++        fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl);
++    }
++    enerd->dvdl_lin[efptRESTRAINT] += dvdl;
++}
++
++static void pme_receive_force_ener(FILE *fplog,
++                                   gmx_bool bSepDVDL,
++                                   t_commrec *cr,
++                                   gmx_wallcycle_t wcycle,
++                                   gmx_enerdata_t *enerd,
++                                   t_forcerec *fr)
++{
++    real   e,v,dvdl;    
++    float  cycles_ppdpme,cycles_seppme;
++
++    cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
++    dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
++
++    /* In case of node-splitting, the PP nodes receive the long-range 
++     * forces, virial and energy from the PME nodes here.
++     */    
++    wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
++    dvdl = 0;
++    gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl,
++                      &cycles_seppme);
++    if (bSepDVDL)
++    {
++        fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl);
++    }
++    enerd->term[F_COUL_RECIP] += e;
++    enerd->dvdl_lin[efptCOUL] += dvdl;
++    if (wcycle)
++    {
++        dd_cycles_add(cr->dd,cycles_seppme,ddCyclPME);
++    }
++    wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
++}
++
++static void print_large_forces(FILE *fp,t_mdatoms *md,t_commrec *cr,
++                             gmx_large_int_t step,real pforce,rvec *x,rvec *f)
++{
++  int  i;
++  real pf2,fn2;
++  char buf[STEPSTRSIZE];
++
++  pf2 = sqr(pforce);
++  for(i=md->start; i<md->start+md->homenr; i++) {
++    fn2 = norm2(f[i]);
++    /* We also catch NAN, if the compiler does not optimize this away. */
++    if (fn2 >= pf2 || fn2 != fn2) {
++      fprintf(fp,"step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
++            gmx_step_str(step,buf),
++            ddglatnr(cr->dd,i),x[i][XX],x[i][YY],x[i][ZZ],sqrt(fn2));
++    }
++  }
++}
++
++static void post_process_forces(FILE *fplog,
++                                t_commrec *cr,
++                                gmx_large_int_t step,
++                                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
++                                gmx_localtop_t *top,
++                                matrix box,rvec x[],
++                                rvec f[],
++                                tensor vir_force,
++                                t_mdatoms *mdatoms,
++                                t_graph *graph,
++                                t_forcerec *fr,gmx_vsite_t *vsite,
++                                int flags)
++{
++    if (fr->bF_NoVirSum)
++    {
++        if (vsite)
++        {
++            /* Spread the mesh force on virtual sites to the other particles... 
++             * This is parallellized. MPI communication is performed
++             * if the constructing atoms aren't local.
++             */
++            wallcycle_start(wcycle,ewcVSITESPREAD);
++            spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,
++                           (flags & GMX_FORCE_VIRIAL),fr->vir_el_recip,
++                           nrnb,
++                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
++            wallcycle_stop(wcycle,ewcVSITESPREAD);
++        }
++        if (flags & GMX_FORCE_VIRIAL)
++        {
++            /* Now add the forces, this is local */
++            if (fr->bDomDec)
++            {
++                sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
++            }
++            else
++            {
++                sum_forces(mdatoms->start,mdatoms->start+mdatoms->homenr,
++                           f,fr->f_novirsum);
++            }
++            if (EEL_FULL(fr->eeltype))
++            {
++                /* Add the mesh contribution to the virial */
++                m_add(vir_force,fr->vir_el_recip,vir_force);
++            }
++            if (debug)
++            {
++                pr_rvecs(debug,0,"vir_force",vir_force,DIM);
++            }
++        }
++    }
++    
++    if (fr->print_force >= 0)
++    {
++        print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
++    }
++}
++
++static void do_nb_verlet(t_forcerec *fr,
++                         interaction_const_t *ic,
++                         gmx_enerdata_t *enerd,
++                         int flags, int ilocality,
++                         int clearF,
++                         t_nrnb *nrnb,
++                         gmx_wallcycle_t wcycle)
++{
++    int     nnbl, kernel_type, sh_e;
++    char    *env;
++    nonbonded_verlet_group_t  *nbvg;
++
++    if (!(flags & GMX_FORCE_NONBONDED))
++    {
++        /* skip non-bonded calculation */
++        return;
++    }
++
++    nbvg = &fr->nbv->grp[ilocality];
++
++    /* CUDA kernel launch overhead is already timed separately */
++    if (fr->cutoff_scheme != ecutsVERLET)
++    {
++        gmx_incons("Invalid cut-off scheme passed!");
++    }
++
++    if (nbvg->kernel_type != nbk8x8x8_CUDA)
++    {
++        wallcycle_sub_start(wcycle, ewcsNONBONDED);
++    }
++    switch (nbvg->kernel_type)
++    {
++        case nbk4x4_PlainC:
++            nbnxn_kernel_ref(&nbvg->nbl_lists,
++                             nbvg->nbat, ic,
++                             fr->shift_vec,
++                             flags,
++                             clearF,
++                             fr->fshift[0],
++                             enerd->grpp.ener[egCOULSR],
++                             fr->bBHAM ?
++                             enerd->grpp.ener[egBHAMSR] :
++                             enerd->grpp.ener[egLJSR]);
++            break;
++        
++        case nbk4xN_X86_SIMD128:
++            nbnxn_kernel_x86_simd128(&nbvg->nbl_lists,
++                                     nbvg->nbat, ic,
++                                     fr->shift_vec,
++                                     flags,
++                                     clearF,
++                                     fr->fshift[0],
++                                     enerd->grpp.ener[egCOULSR],
++                                     fr->bBHAM ?
++                                     enerd->grpp.ener[egBHAMSR] :
++                                     enerd->grpp.ener[egLJSR]);
++            break;
++        case nbk4xN_X86_SIMD256:
++            nbnxn_kernel_x86_simd256(&nbvg->nbl_lists,
++                                     nbvg->nbat, ic,
++                                     fr->shift_vec,
++                                     flags,
++                                     clearF,
++                                     fr->fshift[0],
++                                     enerd->grpp.ener[egCOULSR],
++                                     fr->bBHAM ?
++                                     enerd->grpp.ener[egBHAMSR] :
++                                     enerd->grpp.ener[egLJSR]);
++            break;
++
++        case nbk8x8x8_CUDA:
++            nbnxn_cuda_launch_kernel(fr->nbv->cu_nbv, nbvg->nbat, flags, ilocality);
++            break;
++
++        case nbk8x8x8_PlainC:
++            nbnxn_kernel_gpu_ref(nbvg->nbl_lists.nbl[0],
++                                 nbvg->nbat, ic,
++                                 fr->shift_vec,
++                                 flags,
++                                 clearF,
++                                 nbvg->nbat->out[0].f,
++                                 fr->fshift[0],
++                                 enerd->grpp.ener[egCOULSR],
++                                 fr->bBHAM ?
++                                 enerd->grpp.ener[egBHAMSR] :
++                                 enerd->grpp.ener[egLJSR]);
++            break;
++
++        default:
++            gmx_incons("Invalid nonbonded kernel type passed!");
++
++    }
++    if (nbvg->kernel_type != nbk8x8x8_CUDA)
++    {
++        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
++    }
++
++    /* In eNR_??? the nbnxn F+E kernels are always the F kernel + 1 */
++    sh_e = ((flags & GMX_FORCE_ENERGY) ? 1 : 0);
++    inc_nrnb(nrnb,
++             ((EEL_RF(ic->eeltype) || ic->eeltype == eelCUT) ?
++              eNR_NBNXN_LJ_RF : eNR_NBNXN_LJ_TAB) + sh_e,
++             nbvg->nbl_lists.natpair_ljq);
++    inc_nrnb(nrnb,eNR_NBNXN_LJ+sh_e,nbvg->nbl_lists.natpair_lj);
++    inc_nrnb(nrnb,
++             ((EEL_RF(ic->eeltype) || ic->eeltype == eelCUT) ?
++              eNR_NBNXN_RF : eNR_NBNXN_TAB)+sh_e,
++             nbvg->nbl_lists.natpair_q);
++}
++
++void do_force_cutsVERLET(FILE *fplog,t_commrec *cr,
++              t_inputrec *inputrec,
++              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
++              gmx_localtop_t *top,
++              gmx_mtop_t *mtop,
++              gmx_groups_t *groups,
++              matrix box,rvec x[],history_t *hist,
++              rvec f[],
++              tensor vir_force,
++              t_mdatoms *mdatoms,
++              gmx_enerdata_t *enerd,t_fcdata *fcd,
++              real *lambda,t_graph *graph,
++              t_forcerec *fr, interaction_const_t *ic,
++              gmx_vsite_t *vsite,rvec mu_tot,
++              double t,FILE *field,gmx_edsam_t ed,
++              gmx_bool bBornRadii,
++              int flags)
++{
++    int     cg0,cg1,i,j;
++    int     start,homenr;
++    int     nb_kernel_type;
++    double  mu[2*DIM];
++    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
++    gmx_bool   bDoLongRange,bDoForces,bSepLRF,bUseGPU,bUseOrEmulGPU;
++    gmx_bool   bDiffKernels=FALSE;
++    matrix  boxs;
++    rvec    vzero,box_diag;
++    real    e,v,dvdl;
++    float  cycles_pme,cycles_force;
++    nonbonded_verlet_t *nbv;
++
++    cycles_force = 0;
++    nbv = fr->nbv;
++    nb_kernel_type = fr->nbv->grp[0].kernel_type;
++
++    start  = mdatoms->start;
++    homenr = mdatoms->homenr;
++
++    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
++
++    clear_mat(vir_force);
++
++    cg0 = 0;
++    if (DOMAINDECOMP(cr))
++    {
++        cg1 = cr->dd->ncg_tot;
++    }
++    else
++    {
++        cg1 = top->cgs.nr;
++    }
++    if (fr->n_tpi > 0)
++    {
++        cg1--;
++    }
++
++    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
++    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE); 
++    bFillGrid     = (bNS && bStateChanged);
++    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
++    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
++    bDoForces     = (flags & GMX_FORCE_FORCES);
++    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
++    bUseGPU       = fr->nbv->bUseGPU;
++    bUseOrEmulGPU = bUseGPU || (nbv->grp[0].kernel_type == nbk8x8x8_PlainC);
++
++    if (bStateChanged)
++    {
++        update_forcerec(fplog,fr,box);
++
++        if (NEED_MUTOT(*inputrec))
++        {
++            /* Calculate total (local) dipole moment in a temporary common array.
++             * This makes it possible to sum them over nodes faster.
++             */
++            calc_mu(start,homenr,
++                    x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
++                    mu,mu+DIM);
++        }
++    }
++
++    if (fr->ePBC != epbcNONE) { 
++        /* Compute shift vectors every step,
++         * because of pressure coupling or box deformation!
++         */
++        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
++            calc_shifts(box,fr->shift_vec);
++
++        if (bCalcCGCM) { 
++            put_atoms_in_box_omp(fr->ePBC,box,homenr,x);
++            inc_nrnb(nrnb,eNR_SHIFTX,homenr);
++        } 
++        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
++            unshift_self(graph,box,x);
++        }
++    } 
++
++    nbnxn_atomdata_copy_shiftvec(flags & GMX_FORCE_DYNAMICBOX,
++                                  fr->shift_vec,nbv->grp[0].nbat);
++
++#ifdef GMX_MPI
++    if (!(cr->duty & DUTY_PME)) {
++        /* Send particle coordinates to the pme nodes.
++         * Since this is only implemented for domain decomposition
++         * and domain decomposition does not use the graph,
++         * we do not need to worry about shifting.
++         */    
++
++        wallcycle_start(wcycle,ewcPP_PMESENDX);
++
++        bBS = (inputrec->nwall == 2);
++        if (bBS) {
++            copy_mat(box,boxs);
++            svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
++        }
++
++        gmx_pme_send_x(cr,bBS ? boxs : box,x,
++                       mdatoms->nChargePerturbed,lambda[efptCOUL],
++                       (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),step);
++
++        wallcycle_stop(wcycle,ewcPP_PMESENDX);
++    }
++#endif /* GMX_MPI */
++
++    /* do gridding for pair search */
++    if (bNS)
++    {
++        if (graph && bStateChanged)
++        {
++            /* Calculate intramolecular shift vectors to make molecules whole */
++            mk_mshift(fplog,graph,fr->ePBC,box,x);
++        }
++
++        clear_rvec(vzero);
++        box_diag[XX] = box[XX][XX];
++        box_diag[YY] = box[YY][YY];
++        box_diag[ZZ] = box[ZZ][ZZ];
++
++        wallcycle_start(wcycle,ewcNS);
++        if (!fr->bDomDec)
++        {
++            wallcycle_sub_start(wcycle,ewcsNBS_GRID_LOCAL);
++            nbnxn_put_on_grid(nbv->nbs,fr->ePBC,box,
++                              0,vzero,box_diag,
++                              0,mdatoms->homenr,-1,fr->cginfo,x,
++                              0,NULL,
++                              nbv->grp[eintLocal].kernel_type,
++                              nbv->grp[eintLocal].nbat);
++            wallcycle_sub_stop(wcycle,ewcsNBS_GRID_LOCAL);
++        }
++        else
++        {
++            wallcycle_sub_start(wcycle,ewcsNBS_GRID_NONLOCAL);
++            nbnxn_put_on_grid_nonlocal(nbv->nbs,domdec_zones(cr->dd),
++                                       fr->cginfo,x,
++                                       nbv->grp[eintNonlocal].kernel_type,
++                                       nbv->grp[eintNonlocal].nbat);
++            wallcycle_sub_stop(wcycle,ewcsNBS_GRID_NONLOCAL);
++        }
++
++        if (nbv->ngrp == 1 ||
++            nbv->grp[eintNonlocal].nbat == nbv->grp[eintLocal].nbat)
++        {
++            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat,eatAll,
++                                nbv->nbs,mdatoms,fr->cginfo);
++        }
++        else
++        {
++            nbnxn_atomdata_set(nbv->grp[eintLocal].nbat,eatLocal,
++                                nbv->nbs,mdatoms,fr->cginfo);
++            nbnxn_atomdata_set(nbv->grp[eintNonlocal].nbat,eatAll,
++                                nbv->nbs,mdatoms,fr->cginfo);
++        }
++        wallcycle_stop(wcycle, ewcNS);
++    }
++
++    /* initialize the GPU atom data and copy shift vector */
++    if (bUseGPU)
++    {
++        if (bNS)
++        {
++            wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
++            nbnxn_cuda_init_atomdata(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
++            wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
++        }
++
++        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
++        nbnxn_cuda_upload_shiftvec(nbv->cu_nbv, nbv->grp[eintLocal].nbat);
++        wallcycle_stop(wcycle, ewcLAUNCH_GPU_NB);
++    }
++
++    /* do local pair search */
++    if (bNS)
++    {
++        wallcycle_start_nocount(wcycle,ewcNS);
++        wallcycle_sub_start(wcycle,ewcsNBS_SEARCH_LOCAL);
++        nbnxn_make_pairlist(nbv->nbs,nbv->grp[eintLocal].nbat,
++                            &top->excls,
++                            ic->rlist,
++                            nbv->min_ci_balanced,
++                            &nbv->grp[eintLocal].nbl_lists,
++                            eintLocal,
++                            nbv->grp[eintLocal].kernel_type,
++                            nrnb);
++        wallcycle_sub_stop(wcycle,ewcsNBS_SEARCH_LOCAL);
++
++        if (bUseGPU)
++        {
++            /* initialize local pair-list on the GPU */
++            nbnxn_cuda_init_pairlist(nbv->cu_nbv,
++                                     nbv->grp[eintLocal].nbl_lists.nbl[0],
++                                     eintLocal);
++        }
++        wallcycle_stop(wcycle, ewcNS);
++    }
++    else
++    {
++        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
++        wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
++        nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs,eatLocal,FALSE,x,
++                                        nbv->grp[eintLocal].nbat);
++        wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
++        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
++    }
++
++    if (bUseGPU)
++    {
++        wallcycle_start(wcycle,ewcLAUNCH_GPU_NB);
++        /* launch local nonbonded F on GPU */
++        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFNo,
++                     nrnb, wcycle);
++        wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
++    }
++
++    /* Communicate coordinates and sum dipole if necessary + 
++       do non-local pair search */
++    if (DOMAINDECOMP(cr))
++    {
++        bDiffKernels = (nbv->grp[eintNonlocal].kernel_type !=
++                        nbv->grp[eintLocal].kernel_type);
++
++        if (bDiffKernels)
++        {
++            /* With GPU+CPU non-bonded calculations we need to copy
++             * the local coordinates to the non-local nbat struct
++             * (in CPU format) as the non-local kernel call also
++             * calculates the local - non-local interactions.
++             */
++            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
++            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
++            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs,eatLocal,TRUE,x,
++                                             nbv->grp[eintNonlocal].nbat);
++            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
++            wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
++        }
++
++        if (bNS)
++        {
++            wallcycle_start_nocount(wcycle,ewcNS);
++            wallcycle_sub_start(wcycle,ewcsNBS_SEARCH_NONLOCAL);
++
++            if (bDiffKernels)
++            {
++                nbnxn_grid_add_simple(nbv->nbs,nbv->grp[eintNonlocal].nbat);
++            }
++
++            nbnxn_make_pairlist(nbv->nbs,nbv->grp[eintNonlocal].nbat,
++                                &top->excls,
++                                ic->rlist,
++                                nbv->min_ci_balanced,
++                                &nbv->grp[eintNonlocal].nbl_lists,
++                                eintNonlocal,
++                                nbv->grp[eintNonlocal].kernel_type,
++                                nrnb);
++
++            wallcycle_sub_stop(wcycle,ewcsNBS_SEARCH_NONLOCAL);
++
++            if (nbv->grp[eintNonlocal].kernel_type == nbk8x8x8_CUDA)
++            {
++                /* initialize non-local pair-list on the GPU */
++                nbnxn_cuda_init_pairlist(nbv->cu_nbv,
++                                         nbv->grp[eintNonlocal].nbl_lists.nbl[0],
++                                         eintNonlocal);
++            }
++            wallcycle_stop(wcycle,ewcNS);
++        } 
++        else
++        {
++            wallcycle_start(wcycle,ewcMOVEX);
++            dd_move_x(cr->dd,box,x);
++
++            /* When we don't need the total dipole we sum it in global_stat */
++            if (bStateChanged && NEED_MUTOT(*inputrec))
++            {
++                gmx_sumd(2*DIM,mu,cr);
++            }
++            wallcycle_stop(wcycle,ewcMOVEX);
++
++            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
++            wallcycle_sub_start(wcycle, ewcsNB_X_BUF_OPS);
++            nbnxn_atomdata_copy_x_to_nbat_x(nbv->nbs,eatNonlocal,FALSE,x,
++                                            nbv->grp[eintNonlocal].nbat);
++            wallcycle_sub_stop(wcycle, ewcsNB_X_BUF_OPS);
++            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
++        }
++
++        if (bUseGPU && !bDiffKernels)
++        { 
++            wallcycle_start(wcycle,ewcLAUNCH_GPU_NB);
++            /* launch non-local nonbonded F on GPU */
++            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFNo,
++                         nrnb, wcycle);
++            cycles_force += wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
++        }
++    }
++
++    if (bUseGPU)
++    {
++        /* launch D2H copy-back F */
++        wallcycle_start_nocount(wcycle, ewcLAUNCH_GPU_NB);
++        if (DOMAINDECOMP(cr) && !bDiffKernels)
++        {
++            nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintNonlocal].nbat,
++                                      flags, eatNonlocal);
++        }
++        nbnxn_cuda_launch_cpyback(nbv->cu_nbv, nbv->grp[eintLocal].nbat,
++                                  flags, eatLocal);
++        cycles_force += wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
++    }
++
++    if (bStateChanged && NEED_MUTOT(*inputrec))
++    {
++        if (PAR(cr))
++        {
++            gmx_sumd(2*DIM,mu,cr);
++        } 
++
++        for(i=0; i<2; i++)
++        {
++            for(j=0;j<DIM;j++)
++            {
++                fr->mu_tot[i][j] = mu[i*DIM + j];
++            }
++        }
++    }
++    if (fr->efep == efepNO)
++    {
++        copy_rvec(fr->mu_tot[0],mu_tot);
++    }
++    else
++    {
++        for(j=0; j<DIM; j++)
++        {
++            mu_tot[j] =
++                (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] +
++                lambda[efptCOUL]*fr->mu_tot[1][j];
++        }
++    }
++
++    /* Reset energies */
++    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
++    clear_rvecs(SHIFTS,fr->fshift);
++
++    if (DOMAINDECOMP(cr))
++    {
++        if (!(cr->duty & DUTY_PME))
++        {
++            wallcycle_start(wcycle,ewcPPDURINGPME);
++            dd_force_flop_start(cr->dd,nrnb);
++        }
++    }
++    
++    /* Start the force cycle counter.
++     * This counter is stopped in do_forcelow_level.
++     * No parallel communication should occur while this counter is running,
++     * since that will interfere with the dynamic load balancing.
++     */
++    wallcycle_start(wcycle,ewcFORCE);
++    if (bDoForces)
++    {
++        /* Reset forces for which the virial is calculated separately:
++         * PME/Ewald forces if necessary */
++        if (fr->bF_NoVirSum) 
++        {
++            if (flags & GMX_FORCE_VIRIAL)
++            {
++                fr->f_novirsum = fr->f_novirsum_alloc;
++                if (fr->bDomDec)
++                {
++                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
++                }
++                else
++                {
++                    clear_rvecs(homenr,fr->f_novirsum+start);
++                }
++            }
++            else
++            {
++                /* We are not calculating the pressure so we do not need
++                 * a separate array for forces that do not contribute
++                 * to the pressure.
++                 */
++                fr->f_novirsum = f;
++            }
++        }
++
++        if (bSepLRF)
++        {
++            /* Add the long range forces to the short range forces */
++            for(i=0; i<fr->natoms_force_constr; i++)
++            {
++                copy_rvec(fr->f_twin[i],f[i]);
++            }
++        }
++        else if (!(fr->bTwinRange && bNS))
++        {
++            /* Clear the short-range forces */
++            clear_rvecs(fr->natoms_force_constr,f);
++        }
++
++        clear_rvec(fr->vir_diag_posres);
++    }
++    if (inputrec->ePull == epullCONSTRAINT)
++    {
++        clear_pull_forces(inputrec->pull);
++    }
++
++    /* update QMMMrec, if necessary */
++    if(fr->bQMMM)
++    {
++        update_QMMMrec(cr,fr,x,mdatoms,box,top);
++    }
++
++    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
++    {
++        posres_wrapper(fplog,flags,bSepDVDL,inputrec,nrnb,top,box,x,
++                       f,enerd,lambda,fr);
++    }
++
++    /* Compute the bonded and non-bonded energies and optionally forces */    
++    /* if we use the GPU turn off the nonbonded */
++    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
++                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
++                      x,hist,f,enerd,fcd,mtop,top,fr->born,
++                      &(top->atomtypes),bBornRadii,box,
++                      inputrec->fepvals,lambda,graph,&(top->excls),fr->mu_tot,
++                      ((nb_kernel_type == nbk8x8x8_CUDA || nb_kernel_type == nbk8x8x8_PlainC) 
++                        ? flags&~GMX_FORCE_NONBONDED : flags),
++                      &cycles_pme);
++
++    if (!bUseOrEmulGPU)
++    {
++        /* Maybe we should move this into do_force_lowlevel */
++        do_nb_verlet(fr, ic, enerd, flags, eintLocal, enbvClearFYes,
++                     nrnb, wcycle);
++    }
++        
++
++    if (!bUseOrEmulGPU || bDiffKernels)
++    {
++        int aloc;
++
++        if (DOMAINDECOMP(cr))
++        {
++            do_nb_verlet(fr, ic, enerd, flags, eintNonlocal,
++                         bDiffKernels ? enbvClearFYes : enbvClearFNo,
++                         nrnb, wcycle);
++        }
++
++        if (!bUseOrEmulGPU)
++        {
++            aloc = eintLocal;
++        }
++        else
++        {
++            aloc = eintNonlocal;
++        }
++
++        /* Add all the non-bonded force to the normal force array.
++         * This can be split into a local a non-local part when overlapping
++         * communication with calculation with domain decomposition.
++         */
++        cycles_force += wallcycle_stop(wcycle,ewcFORCE);
++        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
++        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
++        nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs,eatAll,nbv->grp[aloc].nbat,f);
++        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
++        cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
++        wallcycle_start_nocount(wcycle,ewcFORCE);
++
++        /* if there are multiple fshift output buffers reduce them */
++        if ((flags & GMX_FORCE_VIRIAL) &&
++            nbv->grp[aloc].nbl_lists.nnbl > 1)
++        {
++            nbnxn_atomdata_add_nbat_fshift_to_fshift(nbv->grp[aloc].nbat,
++                                                      fr->fshift);
++        }
++    }
++    
++    cycles_force += wallcycle_stop(wcycle,ewcFORCE);
++    
++    if (ed)
++    {
++        do_flood(fplog,cr,x,f,ed,box,step,bNS);
++    }
++
++    if (bUseOrEmulGPU && !bDiffKernels)
++    {
++        /* wait for non-local forces (or calculate in emulation mode) */
++        if (DOMAINDECOMP(cr))
++        {
++            if (bUseGPU)
++            {
++                wallcycle_start(wcycle,ewcWAIT_GPU_NB_NL);
++                nbnxn_cuda_wait_gpu(nbv->cu_nbv,
++                                    nbv->grp[eintNonlocal].nbat,
++                                    flags, eatNonlocal,
++                                    enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
++                                    fr->fshift);
++                cycles_force += wallcycle_stop(wcycle,ewcWAIT_GPU_NB_NL);
++            }
++            else
++            {
++                wallcycle_start_nocount(wcycle,ewcFORCE);
++                do_nb_verlet(fr, ic, enerd, flags, eintNonlocal, enbvClearFYes,
++                             nrnb, wcycle);
++                cycles_force += wallcycle_stop(wcycle,ewcFORCE);
++            }            
++            wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
++            wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
++            /* skip the reduction if there was no non-local work to do */
++            if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
++            {
++                nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs,eatNonlocal,
++                                               nbv->grp[eintNonlocal].nbat,f);
++            }
++            wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
++            cycles_force += wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
++        }
++    }
++
++    if (bDoForces)
++    {
++        /* Communicate the forces */
++        if (PAR(cr))
++        {
++            wallcycle_start(wcycle,ewcMOVEF);
++            if (DOMAINDECOMP(cr))
++            {
++                dd_move_f(cr->dd,f,fr->fshift);
++                /* Do we need to communicate the separate force array
++                 * for terms that do not contribute to the single sum virial?
++                 * Position restraints and electric fields do not introduce
++                 * inter-cg forces, only full electrostatics methods do.
++                 * When we do not calculate the virial, fr->f_novirsum = f,
++                 * so we have already communicated these forces.
++                 */
++                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
++                    (flags & GMX_FORCE_VIRIAL))
++                {
++                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
++                }
++                if (bSepLRF)
++                {
++                    /* We should not update the shift forces here,
++                     * since f_twin is already included in f.
++                     */
++                    dd_move_f(cr->dd,fr->f_twin,NULL);
++                }
++            }
++            wallcycle_stop(wcycle,ewcMOVEF);
++        }
++    }
++ 
++    if (bUseOrEmulGPU)
++    {
++        /* wait for local forces (or calculate in emulation mode) */
++        if (bUseGPU)
++        {
++            wallcycle_start(wcycle,ewcWAIT_GPU_NB_L);
++            nbnxn_cuda_wait_gpu(nbv->cu_nbv,
++                                nbv->grp[eintLocal].nbat,
++                                flags, eatLocal,
++                                enerd->grpp.ener[egLJSR], enerd->grpp.ener[egCOULSR],
++                                fr->fshift);
++            wallcycle_stop(wcycle,ewcWAIT_GPU_NB_L);
++
++            /* now clear the GPU outputs while we finish the step on the CPU */
++            nbnxn_cuda_clear_outputs(nbv->cu_nbv, flags);
++        }
++        else
++        {            
++            wallcycle_start_nocount(wcycle,ewcFORCE);
++            do_nb_verlet(fr, ic, enerd, flags, eintLocal,
++                         DOMAINDECOMP(cr) ? enbvClearFNo : enbvClearFYes,
++                         nrnb, wcycle);
++            wallcycle_stop(wcycle,ewcFORCE);
++        }
++        wallcycle_start(wcycle, ewcNB_XF_BUF_OPS);
++        wallcycle_sub_start(wcycle, ewcsNB_F_BUF_OPS);
++        if (nbv->grp[eintLocal].nbl_lists.nbl[0]->nsci > 0)
++        {
++            /* skip the reduction if there was no non-local work to do */
++            nbnxn_atomdata_add_nbat_f_to_f(nbv->nbs,eatLocal,
++                                           nbv->grp[eintLocal].nbat,f);
++        }
++        wallcycle_sub_stop(wcycle, ewcsNB_F_BUF_OPS);
++        wallcycle_stop(wcycle, ewcNB_XF_BUF_OPS);
++    }
++    
++    if (DOMAINDECOMP(cr))
++    {
++        dd_force_flop_stop(cr->dd,nrnb);
++        if (wcycle)
++        {
++            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
++        }
++    }
++
++    if (bDoForces)
++    {
++        if (IR_ELEC_FIELD(*inputrec))
++        {
++            /* Compute forces due to electric field */
++            calc_f_el(MASTER(cr) ? field : NULL,
++                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
++                      inputrec->ex,inputrec->et,t);
++        }
++
++        /* If we have NoVirSum forces, but we do not calculate the virial,
++         * we sum fr->f_novirum=f later.
++         */
++        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
++        {
++            wallcycle_start(wcycle,ewcVSITESPREAD);
++            spread_vsite_f(fplog,vsite,x,f,fr->fshift,FALSE,NULL,nrnb,
++                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
++            wallcycle_stop(wcycle,ewcVSITESPREAD);
++
++            if (bSepLRF)
++            {
++                wallcycle_start(wcycle,ewcVSITESPREAD);
++                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,FALSE,NULL,
++                               nrnb,
++                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
++                wallcycle_stop(wcycle,ewcVSITESPREAD);
++            }
++        }
++
++        if (flags & GMX_FORCE_VIRIAL)
++        {
++            /* Calculation of the virial must be done after vsites! */
++            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
++                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
+ +        }
+ +    }
-         fprintf(fp,"%10g  %10g  %10g  %10g #FIELD\n",t,
-                 Ext[XX]/FIELDFAC,Ext[YY]/FIELDFAC,Ext[ZZ]/FIELDFAC);
++
++    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
+ +    {
- }
- 
- static void calc_virial(FILE *fplog,int start,int homenr,rvec x[],rvec f[],
-                       tensor vir_part,t_graph *graph,matrix box,
-                       t_nrnb *nrnb,const t_forcerec *fr,int ePBC)
- {
-   int i,j;
-   tensor virtest;
- 
-   /* The short-range virial from surrounding boxes */
-   clear_mat(vir_part);
-   calc_vir(fplog,SHIFTS,fr->shift_vec,fr->fshift,vir_part,ePBC==epbcSCREW,box);
-   inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS);
- 
-   /* Calculate partial virial, for local atoms only, based on short range.
-    * Total virial is computed in global_stat, called from do_md
-    */
-   f_calc_vir(fplog,start,start+homenr,x,f,vir_part,graph,box);
-   inc_nrnb(nrnb,eNR_VIRIAL,homenr);
- 
-   /* Add position restraint contribution */
-   for(i=0; i<DIM; i++) {
-     vir_part[i][i] += fr->vir_diag_posres[i];
-   }
- 
-   /* Add wall contribution */
-   for(i=0; i<DIM; i++) {
-     vir_part[i][ZZ] += fr->vir_wall_z[i];
-   }
- 
-   if (debug)
-     pr_rvecs(debug,0,"vir_part",vir_part,DIM);
- }
++        pull_potential_wrapper(fplog,bSepDVDL,cr,inputrec,box,x,
++                               f,vir_force,mdatoms,enerd,lambda,t);
+ +    }
- static void print_large_forces(FILE *fp,t_mdatoms *md,t_commrec *cr,
-                              gmx_large_int_t step,real pforce,rvec *x,rvec *f)
- {
-   int  i;
-   real pf2,fn2;
-   char buf[STEPSTRSIZE];
+ +
-   pf2 = sqr(pforce);
-   for(i=md->start; i<md->start+md->homenr; i++) {
-     fn2 = norm2(f[i]);
-     /* We also catch NAN, if the compiler does not optimize this away. */
-     if (fn2 >= pf2 || fn2 != fn2) {
-       fprintf(fp,"step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
-             gmx_step_str(step,buf),
-             ddglatnr(cr->dd,i),x[i][XX],x[i][YY],x[i][ZZ],sqrt(fn2));
++    if (PAR(cr) && !(cr->duty & DUTY_PME))
++    {
++        /* In case of node-splitting, the PP nodes receive the long-range 
++         * forces, virial and energy from the PME nodes here.
++         */    
++        pme_receive_force_ener(fplog,bSepDVDL,cr,wcycle,enerd,fr);
++    }
+ +
-   }
++    if (bDoForces)
++    {
++        post_process_forces(fplog,cr,step,nrnb,wcycle,
++                            top,box,x,f,vir_force,mdatoms,graph,fr,vsite,
++                            flags);
+ +    }
- void do_force(FILE *fplog,t_commrec *cr,
++    
++    /* Sum the potential energy terms from group contributions */
++    sum_epot(&(inputrec->opts),enerd);
+ +}
+ +
-     real   dvdl_dum,lambda_dum;
++void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
+ +              t_inputrec *inputrec,
+ +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +              gmx_localtop_t *top,
+ +              gmx_mtop_t *mtop,
+ +              gmx_groups_t *groups,
+ +              matrix box,rvec x[],history_t *hist,
+ +              rvec f[],
+ +              tensor vir_force,
+ +              t_mdatoms *mdatoms,
+ +              gmx_enerdata_t *enerd,t_fcdata *fcd,
+ +              real *lambda,t_graph *graph,
+ +              t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
+ +              double t,FILE *field,gmx_edsam_t ed,
+ +              gmx_bool bBornRadii,
+ +              int flags)
+ +{
+ +    int    cg0,cg1,i,j;
+ +    int    start,homenr;
+ +    double mu[2*DIM];
+ +    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
+ +    gmx_bool   bDoLongRange,bDoForces,bSepLRF;
+ +    gmx_bool   bDoAdressWF;
+ +    matrix boxs;
++    rvec   vzero,box_diag;
+ +    real   e,v,dvdlambda[efptNR];
-     float  cycles_ppdpme,cycles_pme,cycles_seppme,cycles_force;
+ +    t_pbc  pbc;
-         /* Calculate total (local) dipole moment in a temporary common array.
-          * This makes it possible to sum them over nodes faster.
-          */
-         calc_mu(start,homenr,
-                 x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
-                 mu,mu+DIM);
++    float  cycles_pme,cycles_force;
+ +
+ +    start  = mdatoms->start;
+ +    homenr = mdatoms->homenr;
+ +
+ +    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
+ +
+ +    clear_mat(vir_force);
+ +
+ +    if (PARTDECOMP(cr))
+ +    {
+ +        pd_cg_range(cr,&cg0,&cg1);
+ +    }
+ +    else
+ +    {
+ +        cg0 = 0;
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            cg1 = cr->dd->ncg_tot;
+ +        }
+ +        else
+ +        {
+ +            cg1 = top->cgs.nr;
+ +        }
+ +        if (fr->n_tpi > 0)
+ +        {
+ +            cg1--;
+ +        }
+ +    }
+ +
+ +    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
+ +    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
+ +    bFillGrid     = (bNS && bStateChanged);
+ +    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
+ +    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
+ +    bDoForces     = (flags & GMX_FORCE_FORCES);
+ +    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
+ +    /* should probably move this to the forcerec since it doesn't change */
+ +    bDoAdressWF   = ((fr->adress_type!=eAdressOff));
+ +
+ +    if (bStateChanged)
+ +    {
+ +        update_forcerec(fplog,fr,box);
+ +
-   if (fr->ePBC != epbcNONE) {
-     /* Compute shift vectors every step,
-      * because of pressure coupling or box deformation!
-      */
-     if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
-       calc_shifts(box,fr->shift_vec);
- 
-     if (bCalcCGCM) {
-       put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
-                              &(top->cgs),x,fr->cg_cm);
-       inc_nrnb(nrnb,eNR_CGCM,homenr);
-       inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
-     }
-     else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
-       unshift_self(graph,box,x);
++        if (NEED_MUTOT(*inputrec))
++        {
++            /* Calculate total (local) dipole moment in a temporary common array.
++             * This makes it possible to sum them over nodes faster.
++             */
++            calc_mu(start,homenr,
++                    x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
++                    mu,mu+DIM);
++        }
+ +    }
+ +
-   }
-   else if (bCalcCGCM) {
-     calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
-     inc_nrnb(nrnb,eNR_CGCM,homenr);
-   }
++    if (fr->ePBC != epbcNONE) { 
++        /* Compute shift vectors every step,
++         * because of pressure coupling or box deformation!
++         */
++        if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
++            calc_shifts(box,fr->shift_vec);
++
++        if (bCalcCGCM) { 
++            put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
++                    &(top->cgs),x,fr->cg_cm);
++            inc_nrnb(nrnb,eNR_CGCM,homenr);
++            inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
++        } 
++        else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
++            unshift_self(graph,box,x);
++        }
++    } 
++    else if (bCalcCGCM) {
++        calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
++        inc_nrnb(nrnb,eNR_CGCM,homenr);
+ +    }
-   if (bCalcCGCM) {
-     if (PAR(cr)) {
-       move_cgcm(fplog,cr,fr->cg_cm);
+ +
-     if (gmx_debug_at)
-       pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
-   }
++    if (bCalcCGCM) {
++        if (PAR(cr)) {
++            move_cgcm(fplog,cr,fr->cg_cm);
++        }
++        if (gmx_debug_at)
++            pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
+ +    }
-   if (!(cr->duty & DUTY_PME)) {
-     /* Send particle coordinates to the pme nodes.
-      * Since this is only implemented for domain decomposition
-      * and domain decomposition does not use the graph,
-      * we do not need to worry about shifting.
-      */
+ +
+ +#ifdef GMX_MPI
-     wallcycle_start(wcycle,ewcPP_PMESENDX);
++    if (!(cr->duty & DUTY_PME)) {
++        /* Send particle coordinates to the pme nodes.
++         * Since this is only implemented for domain decomposition
++         * and domain decomposition does not use the graph,
++         * we do not need to worry about shifting.
++         */    
+ +
-     bBS = (inputrec->nwall == 2);
-     if (bBS) {
-       copy_mat(box,boxs);
-       svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
-     }
++        wallcycle_start(wcycle,ewcPP_PMESENDX);
+ +
-     gmx_pme_send_x(cr,bBS ? boxs : box,x,
-                    mdatoms->nChargePerturbed,lambda[efptCOUL],
-                    ( flags & GMX_FORCE_VIRIAL),step);
++        bBS = (inputrec->nwall == 2);
++        if (bBS) {
++            copy_mat(box,boxs);
++            svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
++        }
+ +
-     wallcycle_stop(wcycle,ewcPP_PMESENDX);
-   }
++        gmx_pme_send_x(cr,bBS ? boxs : box,x,
++                       mdatoms->nChargePerturbed,lambda[efptCOUL],
++                       (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)),step);
+ +
-         /* When we don't need the total dipole we sum it in global_stat */
-         if (bStateChanged && NEED_MUTOT(*inputrec))
++        wallcycle_stop(wcycle,ewcPP_PMESENDX);
++    }
+ +#endif /* GMX_MPI */
+ +
+ +    /* Communicate coordinates and sum dipole if necessary */
+ +    if (PAR(cr))
+ +    {
+ +        wallcycle_start(wcycle,ewcMOVEX);
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            dd_move_x(cr->dd,box,x);
+ +        }
+ +        else
+ +        {
+ +            move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
+ +        }
-             gmx_sumd(2*DIM,mu,cr);
++        wallcycle_stop(wcycle,ewcMOVEX);
++    }
++
++    /* update adress weight beforehand */
++    if(bStateChanged && bDoAdressWF)
++    {
++        /* need pbc for adress weight calculation with pbc_dx */
++        set_pbc(&pbc,inputrec->ePBC,box);
++        if(fr->adress_site == eAdressSITEcog)
++        {
++            update_adress_weights_cog(top->idef.iparams,top->idef.il,x,fr,mdatoms,
++                                      inputrec->ePBC==epbcNONE ? NULL : &pbc);
++        }
++        else if (fr->adress_site == eAdressSITEcom)
+ +        {
-         wallcycle_stop(wcycle,ewcMOVEX);
++            update_adress_weights_com(fplog,cg0,cg1,&(top->cgs),x,fr,mdatoms,
++                                      inputrec->ePBC==epbcNONE ? NULL : &pbc);
++        }
++        else if (fr->adress_site == eAdressSITEatomatom){
++            update_adress_weights_atom_per_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
++                                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
++        }
++        else
++        {
++            update_adress_weights_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
++                                       inputrec->ePBC==epbcNONE ? NULL : &pbc);
+ +        }
-     if (bStateChanged)
+ +    }
-         /* update adress weight beforehand */
-         if(bDoAdressWF)
++
++    if (NEED_MUTOT(*inputrec))
+ +    {
+ +
-             /* need pbc for adress weight calculation with pbc_dx */
-             set_pbc(&pbc,inputrec->ePBC,box);
-             if(fr->adress_site == eAdressSITEcog)
-             {
-                 update_adress_weights_cog(top->idef.iparams,top->idef.il,x,fr,mdatoms,
-                                           inputrec->ePBC==epbcNONE ? NULL : &pbc);
-             }
-             else if (fr->adress_site == eAdressSITEcom)
++        if (bStateChanged)
+ +        {
-                 update_adress_weights_com(fplog,cg0,cg1,&(top->cgs),x,fr,mdatoms,
-                                           inputrec->ePBC==epbcNONE ? NULL : &pbc);
-             }
-             else if (fr->adress_site == eAdressSITEatomatom){
-                 update_adress_weights_atom_per_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
-                                           inputrec->ePBC==epbcNONE ? NULL : &pbc);
++            if (PAR(cr))
+ +            {
-             else
++                gmx_sumd(2*DIM,mu,cr);
+ +            }
-                 update_adress_weights_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
-                                            inputrec->ePBC==epbcNONE ? NULL : &pbc);
++            for(i=0; i<2; i++)
+ +            {
- 
-         for(i=0; i<2; i++)
++                for(j=0;j<DIM;j++)
++                {
++                    fr->mu_tot[i][j] = mu[i*DIM + j];
++                }
+ +            }
+ +        }
-             for(j=0;j<DIM;j++)
-             {
-                 fr->mu_tot[i][j] = mu[i*DIM + j];
-             }
++        if (fr->efep == efepNO)
+ +        {
-     }
-     if (fr->efep == efepNO)
-     {
-         copy_rvec(fr->mu_tot[0],mu_tot);
-     }
-     else
-     {
-         for(j=0; j<DIM; j++)
++            copy_rvec(fr->mu_tot[0],mu_tot);
+ +        }
-             mu_tot[j] =
-                 (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
++        else
+ +        {
- 
++            for(j=0; j<DIM; j++)
++            {
++                mu_tot[j] =
++                    (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
++            }
+ +        }
+ +    }
+ +
+ +    /* Reset energies */
+ +    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
+ +    clear_rvecs(SHIFTS,fr->fshift);
+ +
+ +    if (bNS)
+ +    {
+ +        wallcycle_start(wcycle,ewcNS);
+ +
+ +        if (graph && bStateChanged)
+ +        {
+ +            /* Calculate intramolecular shift vectors to make molecules whole */
+ +            mk_mshift(fplog,graph,fr->ePBC,box,x);
+ +        }
+ +
+ +        /* Reset long range forces if necessary */
+ +        if (fr->bTwinRange)
+ +        {
+ +            /* Reset the (long-range) forces if necessary */
+ +            clear_rvecs(fr->natoms_force_constr,bSepLRF ? fr->f_twin : f);
+ +        }
+ +
+ +        /* Do the actual neighbour searching and if twin range electrostatics
+ +         * also do the calculation of long range forces and energies.
+ +         */
+ +        for (i=0;i<efptNR;i++) {dvdlambda[i] = 0;}
+ +        ns(fplog,fr,x,box,
+ +           groups,&(inputrec->opts),top,mdatoms,
+ +           cr,nrnb,lambda,dvdlambda,&enerd->grpp,bFillGrid,
+ +           bDoLongRange,bDoForces,bSepLRF ? fr->f_twin : f);
+ +        if (bSepDVDL)
+ +        {
+ +            fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdlambda);
+ +        }
+ +        enerd->dvdl_lin[efptVDW] += dvdlambda[efptVDW];
+ +        enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
+ +
+ +        wallcycle_stop(wcycle,ewcNS);
+ +    }
+ +
+ +    if (inputrec->implicit_solvent && bNS)
+ +    {
+ +        make_gb_nblist(cr,inputrec->gb_algorithm,inputrec->rlist,
+ +                       x,box,fr,&top->idef,graph,fr->born);
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        if (!(cr->duty & DUTY_PME))
+ +        {
+ +            wallcycle_start(wcycle,ewcPPDURINGPME);
+ +            dd_force_flop_start(cr->dd,nrnb);
+ +        }
+ +    }
+ +
+ +    if (inputrec->bRot)
+ +    {
+ +        /* Enforced rotation has its own cycle counter that starts after the collective
+ +         * coordinates have been communicated. It is added to ddCyclF to allow
+ +         * for proper load-balancing */
+ +        wallcycle_start(wcycle,ewcROT);
+ +        do_rotation(cr,inputrec,box,x,t,step,wcycle,bNS);
+ +        wallcycle_stop(wcycle,ewcROT);
+ +    }
+ +
+ +    /* Start the force cycle counter.
+ +     * This counter is stopped in do_forcelow_level.
+ +     * No parallel communication should occur while this counter is running,
+ +     * since that will interfere with the dynamic load balancing.
+ +     */
+ +    wallcycle_start(wcycle,ewcFORCE);
-         /* Position restraints always require full pbc. Check if we already did it for Adress */
-         if(!(bStateChanged && bDoAdressWF))
-         {
-             set_pbc(&pbc,inputrec->ePBC,box);
-         }
-         v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
-                    top->idef.iparams_posres,
-                    (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
-                    inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda[efptRESTRAINT],&(dvdlambda[efptRESTRAINT]),
-                    fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
-         if (bSepDVDL)
-         {
-             fprintf(fplog,sepdvdlformat,
-                     interaction_function[F_POSRES].longname,v,dvdlambda);
-         }
-         enerd->term[F_POSRES] += v;
-         /* This linear lambda dependence assumption is only correct
-          * when only k depends on lambda,
-          * not when the reference position depends on lambda.
-          * grompp checks for this.  (verify this is still the case?)
-          */
-         enerd->dvdl_nonlin[efptRESTRAINT] += dvdlambda[efptRESTRAINT]; /* if just the force constant changes, this is linear,
-                                                                           but we can't be sure w/o additional checking that is
-                                                                           hard to do at this level of code. Otherwise,
-                                                                           the dvdl is not differentiable */
-         inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
-         if ((inputrec->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL))
-         {
-             for(i=0; i<enerd->n_lambda; i++)
-             {
-                 lambda_dum = (i==0 ? lambda[efptRESTRAINT] : inputrec->fepvals->all_lambda[efptRESTRAINT][i-1]);
-                 v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
-                            top->idef.iparams_posres,
-                            (const rvec*)x,NULL,NULL,
-                            inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda_dum,&dvdl_dum,
-                            fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
-                 enerd->enerpart_lambda[i] += v;
-             }
-         }
-    }
++    
+ +    if (bDoForces)
+ +    {
+ +        /* Reset forces for which the virial is calculated separately:
+ +         * PME/Ewald forces if necessary */
+ +        if (fr->bF_NoVirSum)
+ +        {
+ +            if (flags & GMX_FORCE_VIRIAL)
+ +            {
+ +                fr->f_novirsum = fr->f_novirsum_alloc;
+ +                if (fr->bDomDec)
+ +                {
+ +                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
+ +                }
+ +                else
+ +                {
+ +                    clear_rvecs(homenr,fr->f_novirsum+start);
+ +                }
+ +            }
+ +            else
+ +            {
+ +                /* We are not calculating the pressure so we do not need
+ +                 * a separate array for forces that do not contribute
+ +                 * to the pressure.
+ +                 */
+ +                fr->f_novirsum = f;
+ +            }
+ +        }
+ +
+ +        if (bSepLRF)
+ +        {
+ +            /* Add the long range forces to the short range forces */
+ +            for(i=0; i<fr->natoms_force_constr; i++)
+ +            {
+ +                copy_rvec(fr->f_twin[i],f[i]);
+ +            }
+ +        }
+ +        else if (!(fr->bTwinRange && bNS))
+ +        {
+ +            /* Clear the short-range forces */
+ +            clear_rvecs(fr->natoms_force_constr,f);
+ +        }
+ +
+ +        clear_rvec(fr->vir_diag_posres);
+ +    }
+ +    if (inputrec->ePull == epullCONSTRAINT)
+ +    {
+ +        clear_pull_forces(inputrec->pull);
+ +    }
+ +
+ +    /* update QMMMrec, if necessary */
+ +    if(fr->bQMMM)
+ +    {
+ +        update_QMMMrec(cr,fr,x,mdatoms,box,top);
+ +    }
+ +
+ +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
+ +    {
-                       inputrec->fepvals,lambda,graph,&(top->excls),fr->mu_tot,
-                       flags,&cycles_pme);
++        posres_wrapper(fplog,flags,bSepDVDL,inputrec,nrnb,top,box,x,
++                       f,enerd,lambda,fr);
++    }
+ +
+ +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
+ +    {
+ +        /* Flat-bottomed position restraints always require full pbc */
+ +        if(!(bStateChanged && bDoAdressWF))
+ +        {
+ +            set_pbc(&pbc,inputrec->ePBC,box);
+ +        }
+ +        v = fbposres(top->idef.il[F_FBPOSRES].nr,top->idef.il[F_FBPOSRES].iatoms,
+ +                     top->idef.iparams_fbposres,
+ +                     (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
+ +                     inputrec->ePBC==epbcNONE ? NULL : &pbc,
+ +                     fr->rc_scaling,fr->ePBC,fr->posres_com);
+ +        enerd->term[F_FBPOSRES] += v;
+ +        inc_nrnb(nrnb,eNR_FBPOSRES,top->idef.il[F_FBPOSRES].nr/2);
+ +    }
+ +
+ +    /* Compute the bonded and non-bonded energies and optionally forces */
+ +    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
+ +                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
+ +                      x,hist,f,enerd,fcd,mtop,top,fr->born,
+ +                      &(top->atomtypes),bBornRadii,box,
-     enerd->term[F_COM_PULL] = 0;
++                      inputrec->fepvals,lambda,
++                      graph,&(top->excls),fr->mu_tot,
++                      flags,
++                      &cycles_pme);
+ +
+ +    cycles_force = wallcycle_stop(wcycle,ewcFORCE);
+ +
+ +    if (ed)
+ +    {
+ +        do_flood(fplog,cr,x,f,ed,box,step,bNS);
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        dd_force_flop_stop(cr->dd,nrnb);
+ +        if (wcycle)
+ +        {
+ +            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
+ +        }
+ +    }
+ +
+ +    if (bDoForces)
+ +    {
+ +        if (IR_ELEC_FIELD(*inputrec))
+ +        {
+ +            /* Compute forces due to electric field */
+ +            calc_f_el(MASTER(cr) ? field : NULL,
+ +                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
+ +                      inputrec->ex,inputrec->et,t);
+ +        }
+ +
+ +        if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
+ +        {
+ +            /* Compute thermodynamic force in hybrid AdResS region */
+ +            adress_thermo_force(start,homenr,&(top->cgs),x,fr->f_novirsum,fr,mdatoms,
+ +                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
+ +        }
+ +
+ +        /* Communicate the forces */
+ +        if (PAR(cr))
+ +        {
+ +            wallcycle_start(wcycle,ewcMOVEF);
+ +            if (DOMAINDECOMP(cr))
+ +            {
+ +                dd_move_f(cr->dd,f,fr->fshift);
+ +                /* Do we need to communicate the separate force array
+ +                 * for terms that do not contribute to the single sum virial?
+ +                 * Position restraints and electric fields do not introduce
+ +                 * inter-cg forces, only full electrostatics methods do.
+ +                 * When we do not calculate the virial, fr->f_novirsum = f,
+ +                 * so we have already communicated these forces.
+ +                 */
+ +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
+ +                    (flags & GMX_FORCE_VIRIAL))
+ +                {
+ +                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
+ +                }
+ +                if (bSepLRF)
+ +                {
+ +                    /* We should not update the shift forces here,
+ +                     * since f_twin is already included in f.
+ +                     */
+ +                    dd_move_f(cr->dd,fr->f_twin,NULL);
+ +                }
+ +            }
+ +            else
+ +            {
+ +                pd_move_f(cr,f,nrnb);
+ +                if (bSepLRF)
+ +                {
+ +                    pd_move_f(cr,fr->f_twin,nrnb);
+ +                }
+ +            }
+ +            wallcycle_stop(wcycle,ewcMOVEF);
+ +        }
+ +
+ +        /* If we have NoVirSum forces, but we do not calculate the virial,
+ +         * we sum fr->f_novirum=f later.
+ +         */
+ +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
+ +        {
+ +            wallcycle_start(wcycle,ewcVSITESPREAD);
+ +            spread_vsite_f(fplog,vsite,x,f,fr->fshift,FALSE,NULL,nrnb,
+ +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
+ +            wallcycle_stop(wcycle,ewcVSITESPREAD);
+ +
+ +            if (bSepLRF)
+ +            {
+ +                wallcycle_start(wcycle,ewcVSITESPREAD);
+ +                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,FALSE,NULL,
+ +                               nrnb,
+ +                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
+ +                wallcycle_stop(wcycle,ewcVSITESPREAD);
+ +            }
+ +        }
+ +
+ +        if (flags & GMX_FORCE_VIRIAL)
+ +        {
+ +            /* Calculation of the virial must be done after vsites! */
+ +            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
+ +                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
+ +        }
+ +    }
+ +
-         /* Calculate the center of mass forces, this requires communication,
-          * which is why pull_potential is called close to other communication.
-          * The virial contribution is calculated directly,
-          * which is why we call pull_potential after calc_virial.
-          */
-         set_pbc(&pbc,inputrec->ePBC,box);
-         dvdlambda[efptRESTRAINT] = 0;
-         enerd->term[F_COM_PULL] +=
-             pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc,
-                            cr,t,lambda[efptRESTRAINT],x,f,vir_force,&(dvdlambda[efptRESTRAINT]));
-         if (bSepDVDL)
-         {
-             fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdlambda[efptRESTRAINT]);
-         }
-         enerd->dvdl_lin[efptRESTRAINT] += dvdlambda[efptRESTRAINT];
+ +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
+ +    {
-         cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
-         dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
- 
-         /* In case of node-splitting, the PP nodes receive the long-range
++        pull_potential_wrapper(fplog,bSepDVDL,cr,inputrec,box,x,
++                               f,vir_force,mdatoms,enerd,lambda,t);
+ +    }
+ +
+ +    /* Add the forces from enforced rotation potentials (if any) */
+ +    if (inputrec->bRot)
+ +    {
+ +        wallcycle_start(wcycle,ewcROTadd);
+ +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr,step,t);
+ +        wallcycle_stop(wcycle,ewcROTadd);
+ +    }
+ +
+ +    if (PAR(cr) && !(cr->duty & DUTY_PME))
+ +    {
-         wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
-         dvdlambda[efptCOUL] = 0;
-         gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdlambda[efptCOUL],
-                           &cycles_seppme);
-         if (bSepDVDL)
-         {
-             fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdlambda[efptCOUL]);
-         }
-         enerd->term[F_COUL_RECIP] += e;
-         enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
-         if (wcycle)
-         {
-             dd_cycles_add(cr->dd,cycles_seppme,ddCyclPME);
-         }
-         wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
++        /* In case of node-splitting, the PP nodes receive the long-range 
+ +         * forces, virial and energy from the PME nodes here.
+ +         */
-     if (bDoForces && fr->bF_NoVirSum)
++        pme_receive_force_ener(fplog,bSepDVDL,cr,wcycle,enerd,fr);
+ +    }
+ +
-         if (vsite)
-         {
-             /* Spread the mesh force on virtual sites to the other particles...
-              * This is parallellized. MPI communication is performed
-              * if the constructing atoms aren't local.
-              */
-             wallcycle_start(wcycle,ewcVSITESPREAD);
-             spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,
-                            (flags & GMX_FORCE_VIRIAL),fr->vir_el_recip,
-                            nrnb,
-                            &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
-             wallcycle_stop(wcycle,ewcVSITESPREAD);
-         }
-         if (flags & GMX_FORCE_VIRIAL)
-         {
-             /* Now add the forces, this is local */
-             if (fr->bDomDec)
-             {
-                 sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
-             }
-             else
-             {
-                 sum_forces(start,start+homenr,f,fr->f_novirsum);
-             }
-             if (EEL_FULL(fr->eeltype))
-             {
-                 /* Add the mesh contribution to the virial */
-                 m_add(vir_force,fr->vir_el_recip,vir_force);
-             }
-             if (debug)
-             {
-                 pr_rvecs(debug,0,"vir_force",vir_force,DIM);
-             }
-         }
++    if (bDoForces)
+ +    {
-     if (fr->print_force >= 0 && bDoForces)
++        post_process_forces(fplog,cr,step,nrnb,wcycle,
++                            top,box,x,f,vir_force,mdatoms,graph,fr,vsite,
++                            flags);
+ +    }
+ +
+ +    /* Sum the potential energy terms from group contributions */
+ +    sum_epot(&(inputrec->opts),enerd);
++}
++
++void do_force(FILE *fplog,t_commrec *cr,
++              t_inputrec *inputrec,
++              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
++              gmx_localtop_t *top,
++              gmx_mtop_t *mtop,
++              gmx_groups_t *groups,
++              matrix box,rvec x[],history_t *hist,
++              rvec f[],
++              tensor vir_force,
++              t_mdatoms *mdatoms,
++              gmx_enerdata_t *enerd,t_fcdata *fcd,
++              real *lambda,t_graph *graph,
++              t_forcerec *fr,
++              gmx_vsite_t *vsite,rvec mu_tot,
++              double t,FILE *field,gmx_edsam_t ed,
++              gmx_bool bBornRadii,
++              int flags)
++{
++    /* modify force flag if not doing nonbonded */
++    if (!fr->bNonbonded)
++    {
++        flags &= ~GMX_FORCE_NONBONDED;
++    }
+ +
-         print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
++    switch (inputrec->cutoff_scheme)
+ +    {
-               state->box,state->lambda[efptBONDED],&dvdl_dum,
-               NULL,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
++        case ecutsVERLET:
++            do_force_cutsVERLET(fplog, cr, inputrec,
++                                step, nrnb, wcycle,
++                                top, mtop,
++                                groups,
++                                box, x, hist,
++                                f, vir_force,
++                                mdatoms,
++                                enerd, fcd,
++                                lambda, graph,
++                                fr, fr->ic, 
++                                vsite, mu_tot,
++                                t, field, ed,
++                                bBornRadii,
++                                flags);
++            break;
++        case ecutsGROUP:
++             do_force_cutsGROUP(fplog, cr, inputrec,
++                                step, nrnb, wcycle,
++                                top, mtop,
++                                groups,
++                                box, x, hist,
++                                f, vir_force,
++                                mdatoms,
++                                enerd, fcd,
++                                lambda, graph,
++                                fr, vsite, mu_tot,
++                                t, field, ed,
++                                bBornRadii,
++                                flags);
++            break;
++        default:
++            gmx_incons("Invalid cut-off scheme passed!");
+ +    }
+ +}
+ +
++
+ +void do_constrain_first(FILE *fplog,gmx_constr_t constr,
+ +                        t_inputrec *ir,t_mdatoms *md,
+ +                        t_state *state,rvec *f,
+ +                        t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
+ +                        t_forcerec *fr, gmx_localtop_t *top, tensor shake_vir)
+ +{
+ +    int    i,m,start,end;
+ +    gmx_large_int_t step;
+ +    real   dt=ir->delta_t;
+ +    real   dvdl_dum;
+ +    rvec   *savex;
+ +
+ +    snew(savex,state->natoms);
+ +
+ +    start = md->start;
+ +    end   = md->homenr + start;
+ +
+ +    if (debug)
+ +        fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",
+ +                start,md->homenr,end);
+ +    /* Do a first constrain to reset particles... */
+ +    step = ir->init_step;
+ +    if (fplog)
+ +    {
+ +        char buf[STEPSTRSIZE];
+ +        fprintf(fplog,"\nConstraining the starting coordinates (step %s)\n",
+ +                gmx_step_str(step,buf));
+ +    }
+ +    dvdl_dum = 0;
+ +
+ +    /* constrain the current position */
+ +    constrain(NULL,TRUE,FALSE,constr,&(top->idef),
+ +              ir,NULL,cr,step,0,md,
+ +              state->x,state->x,NULL,
-                   state->box,state->lambda[efptBONDED],&dvdl_dum,
-                   NULL,NULL,nrnb,econqVeloc,ir->epc==epcMTTK,state->veta,state->veta);
++              fr->bMolPBC,state->box,
++              state->lambda[efptBONDED],&dvdl_dum,
++              NULL,NULL,nrnb,econqCoord,
++              ir->epc==epcMTTK,state->veta,state->veta);
+ +    if (EI_VV(ir->eI))
+ +    {
+ +        /* constrain the inital velocity, and save it */
+ +        /* also may be useful if we need the ekin from the halfstep for velocity verlet */
+ +        /* might not yet treat veta correctly */
+ +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
+ +                  ir,NULL,cr,step,0,md,
+ +                  state->x,state->v,state->v,
-                   state->box,state->lambda[efptBONDED],&dvdl_dum,
-                   state->v,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
- 
++                  fr->bMolPBC,state->box,
++                  state->lambda[efptBONDED],&dvdl_dum,
++                  NULL,NULL,nrnb,econqVeloc,
++                  ir->epc==epcMTTK,state->veta,state->veta);
+ +    }
+ +    /* constrain the inital velocities at t-dt/2 */
+ +    if (EI_STATE_VELOCITY(ir->eI) && ir->eI!=eiVV)
+ +    {
+ +        for(i=start; (i<end); i++)
+ +        {
+ +            for(m=0; (m<DIM); m++)
+ +            {
+ +                /* Reverse the velocity */
+ +                state->v[i][m] = -state->v[i][m];
+ +                /* Store the position at t-dt in buf */
+ +                savex[i][m] = state->x[i][m] + dt*state->v[i][m];
+ +            }
+ +        }
+ +    /* Shake the positions at t=-dt with the positions at t=0
+ +     * as reference coordinates.
+ +         */
+ +        if (fplog)
+ +        {
+ +            char buf[STEPSTRSIZE];
+ +            fprintf(fplog,"\nConstraining the coordinates at t0-dt (step %s)\n",
+ +                    gmx_step_str(step,buf));
+ +        }
+ +        dvdl_dum = 0;
+ +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
+ +                  ir,NULL,cr,step,-1,md,
+ +                  state->x,savex,NULL,
-   int    i,j;
-   t_nrnb *nrnb_tot=NULL;
-   real   delta_t;
-   double nbfs,mflop;
-   double cycles[ewcNR];
++                  fr->bMolPBC,state->box,
++                  state->lambda[efptBONDED],&dvdl_dum,
++                  state->v,NULL,nrnb,econqCoord,
++                  ir->epc==epcMTTK,state->veta,state->veta);
++        
+ +        for(i=start; i<end; i++) {
+ +            for(m=0; m<DIM; m++) {
+ +                /* Re-reverse the velocities */
+ +                state->v[i][m] = -state->v[i][m];
+ +            }
+ +        }
+ +    }
+ +    sfree(savex);
+ +}
+ +
+ +void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
+ +{
+ +  double eners[2],virs[2],enersum,virsum,y0,f,g,h;
+ +  double r0,r1,r,rc3,rc9,ea,eb,ec,pa,pb,pc,pd;
+ +  double invscale,invscale2,invscale3;
+ +  int    ri0,ri1,ri,i,offstart,offset;
+ +  real   scale,*vdwtab;
+ +
+ +  fr->enershiftsix = 0;
+ +  fr->enershifttwelve = 0;
+ +  fr->enerdiffsix = 0;
+ +  fr->enerdifftwelve = 0;
+ +  fr->virdiffsix = 0;
+ +  fr->virdifftwelve = 0;
+ +
+ +  if (eDispCorr != edispcNO) {
+ +    for(i=0; i<2; i++) {
+ +      eners[i] = 0;
+ +      virs[i]  = 0;
+ +    }
+ +    if ((fr->vdwtype == evdwSWITCH) || (fr->vdwtype == evdwSHIFT)) {
+ +      if (fr->rvdw_switch == 0)
+ +      gmx_fatal(FARGS,
+ +                "With dispersion correction rvdw-switch can not be zero "
+ +                "for vdw-type = %s",evdw_names[fr->vdwtype]);
+ +
+ +      scale  = fr->nblists[0].tab.scale;
+ +      vdwtab = fr->nblists[0].vdwtab;
+ +
+ +      /* Round the cut-offs to exact table values for precision */
+ +      ri0 = floor(fr->rvdw_switch*scale);
+ +      ri1 = ceil(fr->rvdw*scale);
+ +      r0  = ri0/scale;
+ +      r1  = ri1/scale;
+ +      rc3 = r0*r0*r0;
+ +      rc9  = rc3*rc3*rc3;
+ +
+ +      if (fr->vdwtype == evdwSHIFT) {
+ +      /* Determine the constant energy shift below rvdw_switch */
+ +      fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - vdwtab[8*ri0];
+ +      fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - vdwtab[8*ri0 + 4];
+ +      }
+ +      /* Add the constant part from 0 to rvdw_switch.
+ +       * This integration from 0 to rvdw_switch overcounts the number
+ +       * of interactions by 1, as it also counts the self interaction.
+ +       * We will correct for this later.
+ +       */
+ +      eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
+ +      eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
+ +
+ +      invscale = 1.0/(scale);
+ +      invscale2 = invscale*invscale;
+ +      invscale3 = invscale*invscale2;
+ +
+ +      /* following summation derived from cubic spline definition,
+ +      Numerical Recipies in C, second edition, p. 113-116.  Exact
+ +      for the cubic spline.  We first calculate the negative of
+ +      the energy from rvdw to rvdw_switch, assuming that g(r)=1,
+ +      and then add the more standard, abrupt cutoff correction to
+ +      that result, yielding the long-range correction for a
+ +      switched function.  We perform both the pressure and energy
+ +      loops at the same time for simplicity, as the computational
+ +      cost is low. */
+ +
+ +      for (i=0;i<2;i++) {
+ +        enersum = 0.0; virsum = 0.0;
+ +        if (i==0)
+ +        offstart = 0;
+ +      else
+ +        offstart = 4;
+ +      for (ri=ri0; ri<ri1; ri++) {
+ +          r = ri*invscale;
+ +          ea = invscale3;
+ +          eb = 2.0*invscale2*r;
+ +          ec = invscale*r*r;
+ +
+ +          pa = invscale3;
+ +          pb = 3.0*invscale2*r;
+ +          pc = 3.0*invscale*r*r;
+ +          pd = r*r*r;
+ +
+ +          /* this "8" is from the packing in the vdwtab array - perhaps
+ +          should be #define'ed? */
+ +          offset = 8*ri + offstart;
+ +          y0 = vdwtab[offset];
+ +          f = vdwtab[offset+1];
+ +          g = vdwtab[offset+2];
+ +          h = vdwtab[offset+3];
+ +
+ +          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2)+
+ +            g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
+ +          virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) +
+ +            2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
+ +
+ +        }
+ +        enersum *= 4.0*M_PI;
+ +        virsum  *= 4.0*M_PI;
+ +        eners[i] -= enersum;
+ +        virs[i]  -= virsum;
+ +      }
+ +
+ +      /* now add the correction for rvdw_switch to infinity */
+ +      eners[0] += -4.0*M_PI/(3.0*rc3);
+ +      eners[1] +=  4.0*M_PI/(9.0*rc9);
+ +      virs[0]  +=  8.0*M_PI/rc3;
+ +      virs[1]  += -16.0*M_PI/(3.0*rc9);
+ +    }
+ +    else if ((fr->vdwtype == evdwCUT) || (fr->vdwtype == evdwUSER)) {
+ +      if (fr->vdwtype == evdwUSER && fplog)
+ +      fprintf(fplog,
+ +              "WARNING: using dispersion correction with user tables\n");
+ +      rc3  = fr->rvdw*fr->rvdw*fr->rvdw;
+ +      rc9  = rc3*rc3*rc3;
++      /* Contribution beyond the cut-off */
+ +      eners[0] += -4.0*M_PI/(3.0*rc3);
+ +      eners[1] +=  4.0*M_PI/(9.0*rc9);
++      if (fr->vdw_pot_shift) {
++          /* Contribution within the cut-off */
++          eners[0] += -4.0*M_PI/(3.0*rc3);
++          eners[1] +=  4.0*M_PI/(3.0*rc9);
++      }
++      /* Contribution beyond the cut-off */
+ +      virs[0]  +=  8.0*M_PI/rc3;
+ +      virs[1]  += -16.0*M_PI/(3.0*rc9);
+ +    } else {
+ +      gmx_fatal(FARGS,
+ +              "Dispersion correction is not implemented for vdw-type = %s",
+ +              evdw_names[fr->vdwtype]);
+ +    }
+ +    fr->enerdiffsix    = eners[0];
+ +    fr->enerdifftwelve = eners[1];
+ +    /* The 0.5 is due to the Gromacs definition of the virial */
+ +    fr->virdiffsix     = 0.5*virs[0];
+ +    fr->virdifftwelve  = 0.5*virs[1];
+ +  }
+ +}
+ +
+ +void calc_dispcorr(FILE *fplog,t_inputrec *ir,t_forcerec *fr,
+ +                   gmx_large_int_t step,int natoms,
+ +                   matrix box,real lambda,tensor pres,tensor virial,
+ +                   real *prescorr, real *enercorr, real *dvdlcorr)
+ +{
+ +    gmx_bool bCorrAll,bCorrPres;
+ +    real dvdlambda,invvol,dens,ninter,avcsix,avctwelve,enerdiff,svir=0,spres=0;
+ +    int  m;
+ +
+ +    *prescorr = 0;
+ +    *enercorr = 0;
+ +    *dvdlcorr = 0;
+ +
+ +    clear_mat(virial);
+ +    clear_mat(pres);
+ +
+ +    if (ir->eDispCorr != edispcNO) {
+ +        bCorrAll  = (ir->eDispCorr == edispcAllEner ||
+ +                     ir->eDispCorr == edispcAllEnerPres);
+ +        bCorrPres = (ir->eDispCorr == edispcEnerPres ||
+ +                     ir->eDispCorr == edispcAllEnerPres);
+ +
+ +        invvol = 1/det(box);
+ +        if (fr->n_tpi)
+ +        {
+ +            /* Only correct for the interactions with the inserted molecule */
+ +            dens = (natoms - fr->n_tpi)*invvol;
+ +            ninter = fr->n_tpi;
+ +        }
+ +        else
+ +        {
+ +            dens = natoms*invvol;
+ +            ninter = 0.5*natoms;
+ +        }
+ +
+ +        if (ir->efep == efepNO)
+ +        {
+ +            avcsix    = fr->avcsix[0];
+ +            avctwelve = fr->avctwelve[0];
+ +        }
+ +        else
+ +        {
+ +            avcsix    = (1 - lambda)*fr->avcsix[0]    + lambda*fr->avcsix[1];
+ +            avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
+ +        }
+ +
+ +        enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
+ +        *enercorr += avcsix*enerdiff;
+ +        dvdlambda = 0.0;
+ +        if (ir->efep != efepNO)
+ +        {
+ +            dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
+ +        }
+ +        if (bCorrAll)
+ +        {
+ +            enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
+ +            *enercorr += avctwelve*enerdiff;
+ +            if (fr->efep != efepNO)
+ +            {
+ +                dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
+ +            }
+ +        }
+ +
+ +        if (bCorrPres)
+ +        {
+ +            svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
+ +            if (ir->eDispCorr == edispcAllEnerPres)
+ +            {
+ +                svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
+ +            }
+ +            /* The factor 2 is because of the Gromacs virial definition */
+ +            spres = -2.0*invvol*svir*PRESFAC;
+ +
+ +            for(m=0; m<DIM; m++) {
+ +                virial[m][m] += svir;
+ +                pres[m][m] += spres;
+ +            }
+ +            *prescorr += spres;
+ +        }
+ +
+ +        /* Can't currently control when it prints, for now, just print when degugging */
+ +        if (debug)
+ +        {
+ +            if (bCorrAll) {
+ +                fprintf(debug,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
+ +                        avcsix,avctwelve);
+ +            }
+ +            if (bCorrPres)
+ +            {
+ +                fprintf(debug,
+ +                        "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
+ +                        *enercorr,spres,svir);
+ +            }
+ +            else
+ +            {
+ +                fprintf(debug,"Long Range LJ corr.: Epot %10g\n",*enercorr);
+ +            }
+ +        }
+ +
+ +        if (fr->bSepDVDL && do_per_step(step,ir->nstlog))
+ +        {
+ +            fprintf(fplog,sepdvdlformat,"Dispersion correction",
+ +                    *enercorr,dvdlambda);
+ +        }
+ +        if (fr->efep != efepNO)
+ +        {
+ +            *dvdlcorr += dvdlambda;
+ +        }
+ +    }
+ +}
+ +
+ +void do_pbc_first(FILE *fplog,matrix box,t_forcerec *fr,
+ +                t_graph *graph,rvec x[])
+ +{
+ +  if (fplog)
+ +    fprintf(fplog,"Removing pbc first time\n");
+ +  calc_shifts(box,fr->shift_vec);
+ +  if (graph) {
+ +    mk_mshift(fplog,graph,fr->ePBC,box,x);
+ +    if (gmx_debug_at)
+ +      p_graph(debug,"do_pbc_first 1",graph);
+ +    shift_self(graph,box,x);
+ +    /* By doing an extra mk_mshift the molecules that are broken
+ +     * because they were e.g. imported from another software
+ +     * will be made whole again. Such are the healing powers
+ +     * of GROMACS.
+ +     */
+ +    mk_mshift(fplog,graph,fr->ePBC,box,x);
+ +    if (gmx_debug_at)
+ +      p_graph(debug,"do_pbc_first 2",graph);
+ +  }
+ +  if (fplog)
+ +    fprintf(fplog,"Done rmpbc\n");
+ +}
+ +
+ +static void low_do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
+ +                          gmx_mtop_t *mtop,rvec x[],
+ +                          gmx_bool bFirst)
+ +{
+ +  t_graph *graph;
+ +  int mb,as,mol;
+ +  gmx_molblock_t *molb;
+ +
+ +  if (bFirst && fplog)
+ +    fprintf(fplog,"Removing pbc first time\n");
+ +
+ +  snew(graph,1);
+ +  as = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    molb = &mtop->molblock[mb];
+ +    if (molb->natoms_mol == 1 ||
+ +      (!bFirst && mtop->moltype[molb->type].cgs.nr == 1)) {
+ +      /* Just one atom or charge group in the molecule, no PBC required */
+ +      as += molb->nmol*molb->natoms_mol;
+ +    } else {
+ +      /* Pass NULL iso fplog to avoid graph prints for each molecule type */
+ +      mk_graph_ilist(NULL,mtop->moltype[molb->type].ilist,
+ +                   0,molb->natoms_mol,FALSE,FALSE,graph);
+ +
+ +      for(mol=0; mol<molb->nmol; mol++) {
+ +      mk_mshift(fplog,graph,ePBC,box,x+as);
+ +
+ +      shift_self(graph,box,x+as);
+ +      /* The molecule is whole now.
+ +       * We don't need the second mk_mshift call as in do_pbc_first,
+ +       * since we no longer need this graph.
+ +       */
+ +
+ +      as += molb->natoms_mol;
+ +      }
+ +      done_graph(graph);
+ +    }
+ +  }
+ +  sfree(graph);
+ +}
+ +
+ +void do_pbc_first_mtop(FILE *fplog,int ePBC,matrix box,
+ +                     gmx_mtop_t *mtop,rvec x[])
+ +{
+ +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,TRUE);
+ +}
+ +
+ +void do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
+ +               gmx_mtop_t *mtop,rvec x[])
+ +{
+ +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,FALSE);
+ +}
+ +
+ +void finish_run(FILE *fplog,t_commrec *cr,const char *confout,
+ +                t_inputrec *inputrec,
+ +                t_nrnb nrnb[],gmx_wallcycle_t wcycle,
+ +                gmx_runtime_t *runtime,
++                wallclock_gpu_t *gputimes,
++                int omp_nth_pp,
+ +                gmx_bool bWriteStat)
+ +{
-   wallcycle_sum(cr,wcycle,cycles);
++    int    i,j;
++    t_nrnb *nrnb_tot=NULL;
++    real   delta_t;
++    double nbfs,mflop;
+ +
-   if (cr->nnodes > 1) {
-     if (SIMMASTER(cr))
-       snew(nrnb_tot,1);
++    wallcycle_sum(cr,wcycle);
+ +
-     MPI_Reduce(nrnb->n,nrnb_tot->n,eNRNB,MPI_DOUBLE,MPI_SUM,
-                MASTERRANK(cr),cr->mpi_comm_mysim);
++    if (cr->nnodes > 1)
++    {
++        snew(nrnb_tot,1);
+ +#ifdef GMX_MPI
-   } else {
-     nrnb_tot = nrnb;
-   }
++        MPI_Allreduce(nrnb->n,nrnb_tot->n,eNRNB,MPI_DOUBLE,MPI_SUM,
++                      cr->mpi_comm_mysim);
+ +#endif
-   if (SIMMASTER(cr)) {
-     print_flop(fplog,nrnb_tot,&nbfs,&mflop);
-     if (cr->nnodes > 1) {
-       sfree(nrnb_tot);
++    }
++    else
++    {
++        nrnb_tot = nrnb;
++    }
+ +
-   }
++#if defined(GMX_MPI) && !defined(GMX_THREAD_MPI)
++    if (cr->nnodes > 1)
++    {
++        /* reduce nodetime over all MPI processes in the current simulation */
++        double sum;
++        MPI_Allreduce(&runtime->proctime,&sum,1,MPI_DOUBLE,MPI_SUM,
++                      cr->mpi_comm_mysim);
++        runtime->proctime = sum;
+ +    }
-   if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) {
-     print_dd_statistics(cr,inputrec,fplog);
-   }
++#endif
+ +
-   if (SIMMASTER(cr)) {
-     wallcycle_print(fplog,cr->nnodes,cr->npmenodes,runtime->realtime,
-                     wcycle,cycles);
- 
-     if (EI_DYNAMICS(inputrec->eI)) {
-       delta_t = inputrec->delta_t;
-     } else {
-       delta_t = 0;
-     }
++    if (SIMMASTER(cr))
++    {
++        print_flop(fplog,nrnb_tot,&nbfs,&mflop);
++    }
++    if (cr->nnodes > 1)
++    {
++        sfree(nrnb_tot);
++    }
++
++    if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr))
++    {
++        print_dd_statistics(cr,inputrec,fplog);
++    }
+ +
+ +#ifdef GMX_MPI
+ +    if (PARTDECOMP(cr))
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            t_nrnb     *nrnb_all;
+ +            int        s;
+ +            MPI_Status stat;
+ +
+ +            snew(nrnb_all,cr->nnodes);
+ +            nrnb_all[0] = *nrnb;
+ +            for(s=1; s<cr->nnodes; s++)
+ +            {
+ +                MPI_Recv(nrnb_all[s].n,eNRNB,MPI_DOUBLE,s,0,
+ +                         cr->mpi_comm_mysim,&stat);
+ +            }
+ +            pr_load(fplog,cr,nrnb_all);
+ +            sfree(nrnb_all);
+ +        }
+ +        else
+ +        {
+ +            MPI_Send(nrnb->n,eNRNB,MPI_DOUBLE,MASTERRANK(cr),0,
+ +                     cr->mpi_comm_mysim);
+ +        }
+ +    }
+ +#endif
+ +
-     if (fplog) {
-         print_perf(fplog,runtime->proctime,runtime->realtime,
-                    cr->nnodes-cr->npmenodes,
-                    runtime->nsteps_done,delta_t,nbfs,mflop);
-     }
-     if (bWriteStat) {
-         print_perf(stderr,runtime->proctime,runtime->realtime,
-                    cr->nnodes-cr->npmenodes,
-                    runtime->nsteps_done,delta_t,nbfs,mflop);
-     }
++    if (SIMMASTER(cr))
++    {
++        wallcycle_print(fplog,cr->nnodes,cr->npmenodes,runtime->realtime,
++                        wcycle,gputimes);
+ +
-     /*
-     runtime=inputrec->nsteps*inputrec->delta_t;
-     if (bWriteStat) {
-       if (cr->nnodes == 1)
-       fprintf(stderr,"\n\n");
-       print_perf(stderr,nodetime,realtime,runtime,&ntot,
-                cr->nnodes-cr->npmenodes,FALSE);
++        if (EI_DYNAMICS(inputrec->eI))
++        {
++            delta_t = inputrec->delta_t;
++        }
++        else
++        {
++            delta_t = 0;
++        }
+ +
-     wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles);
-     print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes,
-              TRUE);
-     if (PARTDECOMP(cr))
-       pr_load(fplog,cr,nrnb_all);
-     if (cr->nnodes > 1)
-       sfree(nrnb_all);
-     */
-   }
++        if (fplog)
++        {
++            print_perf(fplog,runtime->proctime,runtime->realtime,
++                       cr->nnodes-cr->npmenodes,
++                       runtime->nsteps_done,delta_t,nbfs,mflop,
++                       omp_nth_pp);
++        }
++        if (bWriteStat)
++        {
++            print_perf(stderr,runtime->proctime,runtime->realtime,
++                       cr->nnodes-cr->npmenodes,
++                       runtime->nsteps_done,delta_t,nbfs,mflop,
++                       omp_nth_pp);
++        }
+ +    }
- 
- 
- 
+ +}
+ +
+ +extern void initialize_lambdas(FILE *fplog,t_inputrec *ir,int *fep_state,real *lambda,double *lam0)
+ +{
+ +    /* this function works, but could probably use a logic rewrite to keep all the different
+ +       types of efep straight. */
+ +
+ +    int i;
+ +    t_lambda *fep = ir->fepvals;
+ +
+ +    if ((ir->efep==efepNO) && (ir->bSimTemp == FALSE)) {
+ +        for (i=0;i<efptNR;i++)  {
+ +            lambda[i] = 0.0;
+ +            if (lam0)
+ +            {
+ +                lam0[i] = 0.0;
+ +            }
+ +        }
+ +        return;
+ +    } else {
+ +        *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
+ +                                             if checkpoint is set -- a kludge is in for now
+ +                                             to prevent this.*/
+ +        for (i=0;i<efptNR;i++)
+ +        {
+ +            /* overwrite lambda state with init_lambda for now for backwards compatibility */
+ +            if (fep->init_lambda>=0) /* if it's -1, it was never initializd */
+ +            {
+ +                lambda[i] = fep->init_lambda;
+ +                if (lam0) {
+ +                    lam0[i] = lambda[i];
+ +                }
+ +            }
+ +            else
+ +            {
+ +                lambda[i] = fep->all_lambda[i][*fep_state];
+ +                if (lam0) {
+ +                    lam0[i] = lambda[i];
+ +                }
+ +            }
+ +        }
+ +        if (ir->bSimTemp) {
+ +            /* need to rescale control temperatures to match current state */
+ +            for (i=0;i<ir->opts.ngtc;i++) {
+ +                if (ir->opts.ref_t[i] > 0) {
+ +                    ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    /* Send to the log the information on the current lambdas */
+ +    if (fplog != NULL)
+ +    {
+ +        fprintf(fplog,"Initial vector of lambda components:[ ");
+ +        for (i=0;i<efptNR;i++)
+ +        {
+ +            fprintf(fplog,"%10.4f ",lambda[i]);
+ +        }
+ +        fprintf(fplog,"]\n");
+ +    }
+ +    return;
+ +}
+ +
+ +
+ +void init_md(FILE *fplog,
+ +             t_commrec *cr,t_inputrec *ir,const output_env_t oenv,
+ +             double *t,double *t0,
+ +             real *lambda, int *fep_state, double *lam0,
+ +             t_nrnb *nrnb,gmx_mtop_t *mtop,
+ +             gmx_update_t *upd,
+ +             int nfile,const t_filenm fnm[],
+ +             gmx_mdoutf_t **outf,t_mdebin **mdebin,
+ +             tensor force_vir,tensor shake_vir,rvec mu_tot,
+ +             gmx_bool *bSimAnn,t_vcm **vcm, t_state *state, unsigned long Flags)
+ +{
+ +    int  i,j,n;
+ +    real tmpt,mod;
+ +
+ +    /* Initial values */
+ +    *t = *t0       = ir->init_t;
+ +
+ +    *bSimAnn=FALSE;
+ +    for(i=0;i<ir->opts.ngtc;i++)
+ +    {
+ +        /* set bSimAnn if any group is being annealed */
+ +        if(ir->opts.annealing[i]!=eannNO)
+ +        {
+ +            *bSimAnn = TRUE;
+ +        }
+ +    }
+ +    if (*bSimAnn)
+ +    {
+ +        update_annealing_target_temp(&(ir->opts),ir->init_t);
+ +    }
+ +
+ +    /* Initialize lambda variables */
+ +    initialize_lambdas(fplog,ir,fep_state,lambda,lam0);
+ +
+ +    if (upd)
+ +    {
+ +        *upd = init_update(fplog,ir);
+ +    }
+ +
+ +
+ +    if (vcm != NULL)
+ +    {
+ +        *vcm = init_vcm(fplog,&mtop->groups,ir);
+ +    }
+ +
+ +    if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
+ +    {
+ +        if (ir->etc == etcBERENDSEN)
+ +        {
+ +            please_cite(fplog,"Berendsen84a");
+ +        }
+ +        if (ir->etc == etcVRESCALE)
+ +        {
+ +            please_cite(fplog,"Bussi2007a");
+ +        }
+ +    }
+ +
+ +    init_nrnb(nrnb);
+ +
+ +    if (nfile != -1)
+ +    {
+ +        *outf = init_mdoutf(nfile,fnm,Flags,cr,ir,oenv);
+ +
+ +        *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : (*outf)->fp_ene,
+ +                              mtop,ir, (*outf)->fp_dhdl);
+ +    }
+ +
+ +    if (ir->bAdress)
+ +    {
+ +      please_cite(fplog,"Fritsch12");
+ +      please_cite(fplog,"Junghans10");
+ +    }
+ +    /* Initiate variables */
+ +    clear_mat(force_vir);
+ +    clear_mat(shake_vir);
+ +    clear_rvec(mu_tot);
+ +
+ +    debug_gmx();
+ +}
+ +
diff --cc src/gromacs/mdlib/stat.c
Simple merge
diff --cc src/gromacs/mdlib/tables.c

index f0c73782ec758e4ac3decbaaad7672ef56d4eeed,0000000000000000000000000000000000000000..44a42f7c550b115984e5f371d37ae780abf3bb61

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/tables.c
--- /dev/null
+++ b/src/gromacs/mdlib/tables.c
@@@ -1,1208 -1,0 +1,1397 @@@
- /*
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <math.h>
+ +#include "maths.h"
+ +#include "typedefs.h"
+ +#include "names.h"
+ +#include "smalloc.h"
+ +#include "gmx_fatal.h"
+ +#include "futil.h"
+ +#include "xvgr.h"
+ +#include "vec.h"
+ +#include "main.h"
+ +#include "network.h"
+ +#include "physics.h"
+ +#include "force.h"
+ +#include "gmxfio.h"
++#include "macros.h"
++#include "tables.h"
+ +
+ +/* All the possible (implemented) table functions */
+ +enum { 
+ +  etabLJ6,   
+ +  etabLJ12, 
+ +  etabLJ6Shift, 
+ +  etabLJ12Shift, 
+ +  etabShift,
+ +  etabRF,
+ +  etabRF_ZERO,
+ +  etabCOUL, 
+ +  etabEwald, 
+ +  etabEwaldSwitch, 
+ +  etabEwaldUser,
+ +  etabEwaldUserSwitch,
+ +  etabLJ6Switch, 
+ +  etabLJ12Switch, 
+ +  etabCOULSwitch, 
+ +  etabLJ6Encad, 
+ +  etabLJ12Encad, 
+ +  etabCOULEncad,  
+ +  etabEXPMIN, 
+ +  etabUSER, 
+ +  etabNR 
+ +};
+ +
+ +/** Evaluates to true if the table type contains user data. */
+ +#define ETAB_USER(e)  ((e) == etabUSER || \
+ +                       (e) == etabEwaldUser || (e) == etabEwaldUserSwitch)
+ +
+ +typedef struct {
+ +  const char *name;
+ +  gmx_bool bCoulomb;
+ +} t_tab_props;
+ +
+ +/* This structure holds name and a flag that tells whether 
+ +   this is a Coulomb type funtion */
+ +static const t_tab_props tprops[etabNR] = {
+ +  { "LJ6",  FALSE },
+ +  { "LJ12", FALSE },
+ +  { "LJ6Shift", FALSE },
+ +  { "LJ12Shift", FALSE },
+ +  { "Shift", TRUE },
+ +  { "RF", TRUE },
+ +  { "RF-zero", TRUE },
+ +  { "COUL", TRUE },
+ +  { "Ewald", TRUE },
+ +  { "Ewald-Switch", TRUE },
+ +  { "Ewald-User", TRUE },
+ +  { "Ewald-User-Switch", TRUE },
+ +  { "LJ6Switch", FALSE },
+ +  { "LJ12Switch", FALSE },
+ +  { "COULSwitch", TRUE },
+ +  { "LJ6-Encad shift", FALSE },
+ +  { "LJ12-Encad shift", FALSE },
+ +  { "COUL-Encad shift",  TRUE },
+ +  { "EXPMIN", FALSE },
+ +  { "USER", FALSE }
+ +};
+ +
+ +/* Index in the table that says which function to use */
+ +enum { etiCOUL, etiLJ6, etiLJ12, etiNR };
+ +
+ +typedef struct {
+ +  int  nx,nx0;
+ +  double tabscale;
+ +  double *x,*v,*f;
+ +} t_tabledata;
+ +
+ +#define pow2(x) ((x)*(x))
+ +#define pow3(x) ((x)*(x)*(x))
+ +#define pow4(x) ((x)*(x)*(x)*(x))
+ +#define pow5(x) ((x)*(x)*(x)*(x)*(x))
+ +
++
++static double v_ewald_lr(double beta,double r)
++{
++    if (r == 0)
++    {
++        return beta*2/sqrt(M_PI);
++    }
++    else
++    {
++        return gmx_erfd(beta*r)/r;
++    }
++}
++
++void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
++                                 int ntab,int tableformat,
++                                 real dx,real beta)
++{
++    real tab_max;
++    int stride=0;
++    int i,i_inrange;
++    double dc,dc_new;
++    gmx_bool bOutOfRange;
++    double v_r0,v_r1,v_inrange,vi,a0,a1,a2dx;
++    double x_r0;
++
++    if (ntab < 2)
++    {
++        gmx_fatal(FARGS,"Can not make a spline table with less than 2 points");
++    }
++
++    /* We need some margin to be able to divide table values by r
++     * in the kernel and also to do the integration arithmetics
++     * without going out of range. Furthemore, we divide by dx below.
++     */
++    tab_max = GMX_REAL_MAX*0.0001;
++
++    /* This function produces a table with:
++     * maximum energy error: V'''/(6*12*sqrt(3))*dx^3
++     * maximum force error:  V'''/(6*4)*dx^2
++     * The rms force error is the max error times 1/sqrt(5)=0.45.
++     */
++
++    switch (tableformat)
++    {
++    case tableformatF:    stride = 1; break;
++    case tableformatFDV0: stride = 4; break;
++    default: gmx_incons("Unknown table format");
++    }
++
++    bOutOfRange = FALSE;
++    i_inrange = ntab;
++    v_inrange = 0;
++    dc = 0;
++    for(i=ntab-1; i>=0; i--)
++    {
++        x_r0 = i*dx;
++
++        v_r0 = v_ewald_lr(beta,x_r0);
++
++        if (!bOutOfRange)
++        {
++            i_inrange = i;
++            v_inrange = v_r0;
++    
++            vi = v_r0;
++        }
++        else
++        {
++            /* Linear continuation for the last point in range */
++            vi = v_inrange - dc*(i - i_inrange)*dx;
++        }
++
++        switch (tableformat)
++        {
++        case tableformatF:
++            if (tabv != NULL)
++            {
++                tabv[i] = vi;
++            }
++            break;
++        case tableformatFDV0:
++            tabf[i*stride+2] = vi;
++            tabf[i*stride+3] = 0;
++            break;
++        default:
++            gmx_incons("Unknown table format");
++        }
++
++        if (i == 0)
++        {
++            continue;
++        }
++
++        /* Get the potential at table point i-1 */
++        v_r1 = v_ewald_lr(beta,(i-1)*dx);
++
++        if (v_r1 != v_r1 || v_r1 < -tab_max || v_r1 > tab_max)
++        {
++            bOutOfRange = TRUE;
++        }
++
++        if (!bOutOfRange)
++        {
++            /* Calculate the average second derivative times dx over interval i-1 to i.
++             * Using the function values at the end points and in the middle.
++             */
++            a2dx = (v_r0 + v_r1 - 2*v_ewald_lr(beta,x_r0-0.5*dx))/(0.25*dx);
++            /* Set the derivative of the spline to match the difference in potential
++             * over the interval plus the average effect of the quadratic term.
++             * This is the essential step for minimizing the error in the force.
++             */
++            dc = (v_r0 - v_r1)/dx + 0.5*a2dx;
++        }
++
++        if (i == ntab - 1)
++        {
++            /* Fill the table with the force, minus the derivative of the spline */
++            tabf[i*stride] = -dc;
++        }
++        else
++        {
++            /* tab[i] will contain the average of the splines over the two intervals */
++            tabf[i*stride] += -0.5*dc;
++        }
++
++        if (!bOutOfRange)
++        {
++            /* Make spline s(x) = a0 + a1*(x - xr) + 0.5*a2*(x - xr)^2
++             * matching the potential at the two end points
++             * and the derivative dc at the end point xr.
++             */
++            a0   = v_r0;
++            a1   = dc;
++            a2dx = (a1*dx + v_r1 - a0)*2/dx;
++
++            /* Set dc to the derivative at the next point */
++            dc_new = a1 - a2dx;
++                
++            if (dc_new != dc_new || dc_new < -tab_max || dc_new > tab_max)
++            {
++                bOutOfRange = TRUE;
++            }
++            else
++            {
++                dc = dc_new;
++            }
++        }
++
++        tabf[(i-1)*stride] = -0.5*dc;
++    }
++    /* Currently the last value only contains half the force: double it */
++    tabf[0] *= 2;
++
++    if (tableformat == tableformatFDV0)
++    {
++        /* Store the force difference in the second entry */
++        for(i=0; i<ntab-1; i++)
++        {
++            tabf[i*stride+1] = tabf[(i+1)*stride] - tabf[i*stride];
++        }
++        tabf[(ntab-1)*stride+1] = -tabf[i*stride];
++    }
++}
++
++/* The scale (1/spacing) for third order spline interpolation
++ * of the Ewald mesh contribution which needs to be subtracted
++ * from the non-bonded interactions.
++ */
++real ewald_spline3_table_scale(real ewaldcoeff,real rc)
++{
++    double erf_x_d3=1.0522; /* max of (erf(x)/x)''' */
++    double ftol,etol;
++    double sc_f,sc_e;
++
++    /* Force tolerance: single precision accuracy */
++    ftol = GMX_FLOAT_EPS;
++    sc_f = sqrt(erf_x_d3/(6*4*ftol*ewaldcoeff))*ewaldcoeff;
++
++    /* Energy tolerance: 10x more accurate than the cut-off jump */
++    etol = 0.1*gmx_erfc(ewaldcoeff*rc);
++    etol = max(etol,GMX_REAL_EPS);
++    sc_e = pow(erf_x_d3/(6*12*sqrt(3)*etol),1.0/3.0)*ewaldcoeff;
++
++    return max(sc_f,sc_e);
++}
++
+ +/* Calculate the potential and force for an r value
+ + * in exactly the same way it is done in the inner loop.
+ + * VFtab is a pointer to the table data, offset is
+ + * the point where we should begin and stride is 
+ + * 4 if we have a buckingham table, 3 otherwise.
+ + * If you want to evaluate table no N, set offset to 4*N.
+ + *  
+ + * We use normal precision here, since that is what we
+ + * will use in the inner loops.
+ + */
+ +static void evaluate_table(real VFtab[], int offset, int stride, 
+ +                         real tabscale, real r, real *y, real *yp)
+ +{
+ +  int n;
+ +  real rt,eps,eps2;
+ +  real Y,F,Geps,Heps2,Fp;
+ +
+ +  rt       =  r*tabscale;
+ +  n        =  (int)rt;
+ +  eps      =  rt - n;
+ +  eps2     =  eps*eps;
+ +  n        =  offset+stride*n;
+ +  Y        =  VFtab[n];
+ +  F        =  VFtab[n+1];
+ +  Geps     =  eps*VFtab[n+2];
+ +  Heps2    =  eps2*VFtab[n+3];
+ +  Fp       =  F+Geps+Heps2;
+ +  *y       =  Y+eps*Fp;
+ +  *yp      =  (Fp+Geps+2.0*Heps2)*tabscale;
+ +}
+ +
+ +static void copy2table(int n,int offset,int stride,
+ +                     double x[],double Vtab[],double Ftab[],
+ +                     real dest[])
+ +{
+ +/* Use double prec. for the intermediary variables
+ + * and temporary x/vtab/vtab2 data to avoid unnecessary 
+ + * loss of precision.
+ + */
+ +  int  i,nn0;
+ +  double F,G,H,h;
+ +
+ +  h = 0;
+ +  for(i=0; (i<n); i++) {
+ +    if (i < n-1) {
+ +      h   = x[i+1] - x[i];
+ +      F   = -Ftab[i]*h;
+ +      G   =  3*(Vtab[i+1] - Vtab[i]) + (Ftab[i+1] + 2*Ftab[i])*h;
+ +      H   = -2*(Vtab[i+1] - Vtab[i]) - (Ftab[i+1] +   Ftab[i])*h;
+ +    } else {
+ +      /* Fill the last entry with a linear potential,
+ +       * this is mainly for rounding issues with angle and dihedral potentials.
+ +       */
+ +      F   = -Ftab[i]*h;
+ +      G   = 0;
+ +      H   = 0;
+ +    }
+ +    nn0 = offset + i*stride;
+ +    dest[nn0]   = Vtab[i];
+ +    dest[nn0+1] = F;
+ +    dest[nn0+2] = G;
+ +    dest[nn0+3] = H;
+ +  }
+ +}
+ +
+ +static void init_table(FILE *fp,int n,int nx0,
+ +                     double tabscale,t_tabledata *td,gmx_bool bAlloc)
+ +{
+ +  int i;
+ +  
+ +  td->nx  = n;
+ +  td->nx0 = nx0;
+ +  td->tabscale = tabscale;
+ +  if (bAlloc) {
+ +    snew(td->x,td->nx);
+ +    snew(td->v,td->nx);
+ +    snew(td->f,td->nx);
+ +  }
+ +  for(i=0; (i<td->nx); i++)
+ +    td->x[i] = i/tabscale;
+ +}
+ +
+ +static void spline_forces(int nx,double h,double v[],gmx_bool bS3,gmx_bool bE3,
+ +                        double f[])
+ +{
+ +  int    start,end,i;
+ +  double v3,b_s,b_e,b;
+ +  double beta,*gamma;
+ +
+ +  /* Formulas can be found in:
+ +   * H.J.C. Berendsen, Simulating the Physical World, Cambridge 2007
+ +   */
+ +
+ +  if (nx < 4 && (bS3 || bE3))
+ +    gmx_fatal(FARGS,"Can not generate splines with third derivative boundary conditions with less than 4 (%d) points",nx);
+ +  
+ +  /* To make life easy we initially set the spacing to 1
+ +   * and correct for this at the end.
+ +   */
+ +  beta = 2;
+ +  if (bS3) {
+ +    /* Fit V''' at the start */
+ +    v3  = v[3] - 3*v[2] + 3*v[1] - v[0];
+ +    if (debug)
+ +      fprintf(debug,"The left third derivative is %g\n",v3/(h*h*h));
+ +    b_s = 2*(v[1] - v[0]) + v3/6;
+ +    start = 0;
+ +    
+ +    if (FALSE) {
+ +      /* Fit V'' at the start */
+ +      real v2;
+ +      
+ +      v2  = -v[3] + 4*v[2] - 5*v[1] + 2*v[0];
+ +      /* v2  = v[2] - 2*v[1] + v[0]; */
+ +      if (debug)
+ +      fprintf(debug,"The left second derivative is %g\n",v2/(h*h));
+ +      b_s = 3*(v[1] - v[0]) - v2/2;
+ +      start = 0;
+ +    }
+ +  } else {
+ +    b_s = 3*(v[2] - v[0]) + f[0]*h;
+ +    start = 1;
+ +  }
+ +  if (bE3) {
+ +    /* Fit V''' at the end */
+ +    v3  = v[nx-1] - 3*v[nx-2] + 3*v[nx-3] - v[nx-4];
+ +    if (debug)
+ +      fprintf(debug,"The right third derivative is %g\n",v3/(h*h*h));
+ +    b_e = 2*(v[nx-1] - v[nx-2]) + v3/6;
+ +    end = nx;
+ +  } else {
+ +    /* V'=0 at the end */
+ +    b_e = 3*(v[nx-1] - v[nx-3]) + f[nx-1]*h;
+ +    end = nx - 1;
+ +  }
+ +
+ +  snew(gamma,nx);
+ +  beta = (bS3 ? 1 : 4);
+ +
+ +  /* For V'' fitting */
+ +  /* beta = (bS3 ? 2 : 4); */
+ +
+ +  f[start] = b_s/beta;
+ +  for(i=start+1; i<end; i++) {
+ +    gamma[i] = 1/beta;
+ +    beta = 4 - gamma[i];
+ +    b    =  3*(v[i+1] - v[i-1]);
+ +    f[i] = (b - f[i-1])/beta;
+ +  }
+ +  gamma[end-1] = 1/beta;
+ +  beta = (bE3 ? 1 : 4) - gamma[end-1];
+ +  f[end-1] = (b_e - f[end-2])/beta;
+ +
+ +  for(i=end-2; i>=start; i--)
+ +    f[i] -= gamma[i+1]*f[i+1];
+ +  sfree(gamma);
+ +
+ +  /* Correct for the minus sign and the spacing */
+ +  for(i=start; i<end; i++)
+ +    f[i] = -f[i]/h;
+ +}
+ +
+ +static void set_forces(FILE *fp,int angle,
+ +                     int nx,double h,double v[],double f[],
+ +                     int table)
+ +{
+ +  int start,end;
+ +
+ +  if (angle == 2)
+ +    gmx_fatal(FARGS,
+ +            "Force generation for dihedral tables is not (yet) implemented");
+ +
+ +  start = 0;
+ +  while (v[start] == 0)
+ +    start++;
+ +  
+ +  end = nx;
+ +  while(v[end-1] == 0)
+ +    end--;
+ +  if (end > nx - 2)
+ +    end = nx;
+ +  else
+ +    end++;
+ +
+ +  if (fp)
+ +    fprintf(fp,"Generating forces for table %d, boundary conditions: V''' at %g, %s at %g\n",
+ +          table+1,start*h,end==nx ? "V'''" : "V'=0",(end-1)*h);
+ +  spline_forces(end-start,h,v+start,TRUE,end==nx,f+start);
+ +}
+ +
+ +static void read_tables(FILE *fp,const char *fn,
+ +                      int ntab,int angle,t_tabledata td[])
+ +{
+ +  char *libfn;
+ +  char buf[STRLEN];
+ +  double **yy=NULL,start,end,dx0,dx1,ssd,vm,vp,f,numf;
+ +  int  k,i,nx,nx0=0,ny,nny,ns;
+ +  gmx_bool bAllZero,bZeroV,bZeroF;
+ +  double tabscale;
+ +
+ +  nny = 2*ntab+1;  
+ +  libfn = gmxlibfn(fn);
+ +  nx  = read_xvg(libfn,&yy,&ny);
+ +  if (ny != nny)
+ +    gmx_fatal(FARGS,"Trying to read file %s, but nr columns = %d, should be %d",
+ +              libfn,ny,nny);
+ +  if (angle == 0) {
+ +    if (yy[0][0] != 0.0)
+ +      gmx_fatal(FARGS,
+ +              "The first distance in file %s is %f nm instead of %f nm",
+ +              libfn,yy[0][0],0.0);
+ +  } else {
+ +    if (angle == 1)
+ +      start = 0.0;
+ +    else
+ +      start = -180.0;
+ +    end = 180.0;
+ +    if (yy[0][0] != start || yy[0][nx-1] != end)
+ +      gmx_fatal(FARGS,"The angles in file %s should go from %f to %f instead of %f to %f\n",
+ +              libfn,start,end,yy[0][0],yy[0][nx-1]);
+ +  }
+ +
+ +  tabscale = (nx-1)/(yy[0][nx-1] - yy[0][0]);
+ +  
+ +  if (fp) {
+ +    fprintf(fp,"Read user tables from %s with %d data points.\n",libfn,nx);
+ +    if (angle == 0)
+ +      fprintf(fp,"Tabscale = %g points/nm\n",tabscale);
+ +  }
+ +
+ +  bAllZero = TRUE;
+ +  for(k=0; k<ntab; k++) {
+ +    bZeroV = TRUE;
+ +    bZeroF = TRUE;
+ +    for(i=0; (i < nx); i++) {
+ +      if (i >= 2) {
+ +      dx0 = yy[0][i-1] - yy[0][i-2];
+ +      dx1 = yy[0][i]   - yy[0][i-1];
+ +      /* Check for 1% deviation in spacing */
+ +      if (fabs(dx1 - dx0) >= 0.005*(fabs(dx0) + fabs(dx1))) {
+ +        gmx_fatal(FARGS,"In table file '%s' the x values are not equally spaced: %f %f %f",fn,yy[0][i-2],yy[0][i-1],yy[0][i]);
+ +      }
+ +      }
+ +      if (yy[1+k*2][i] != 0) {
+ +      bZeroV = FALSE;
+ +      if (bAllZero) {
+ +        bAllZero = FALSE;
+ +        nx0 = i;
+ +      }
+ +      if (yy[1+k*2][i] >  0.01*GMX_REAL_MAX ||
+ +          yy[1+k*2][i] < -0.01*GMX_REAL_MAX) {
+ +        gmx_fatal(FARGS,"Out of range potential value %g in file '%s'",
+ +                  yy[1+k*2][i],fn);
+ +      }
+ +      }
+ +      if (yy[1+k*2+1][i] != 0) {
+ +      bZeroF = FALSE;
+ +      if (bAllZero) {
+ +        bAllZero = FALSE;
+ +        nx0 = i;
+ +      }
+ +      if (yy[1+k*2+1][i] >  0.01*GMX_REAL_MAX ||
+ +          yy[1+k*2+1][i] < -0.01*GMX_REAL_MAX) {
+ +        gmx_fatal(FARGS,"Out of range force value %g in file '%s'",
+ +                  yy[1+k*2+1][i],fn);
+ +      }
+ +      }
+ +    }
+ +
+ +    if (!bZeroV && bZeroF) {
+ +      set_forces(fp,angle,nx,1/tabscale,yy[1+k*2],yy[1+k*2+1],k);
+ +    } else {
+ +      /* Check if the second column is close to minus the numerical
+ +       * derivative of the first column.
+ +       */
+ +      ssd = 0;
+ +      ns = 0;
+ +      for(i=1; (i < nx-1); i++) {
+ +      vm = yy[1+2*k][i-1];
+ +      vp = yy[1+2*k][i+1];
+ +      f  = yy[1+2*k+1][i];
+ +      if (vm != 0 && vp != 0 && f != 0) {
+ +        /* Take the centered difference */
+ +        numf = -(vp - vm)*0.5*tabscale;
+ +        ssd += fabs(2*(f - numf)/(f + numf));
+ +        ns++;
+ +      }
+ +      }
+ +      if (ns > 0) {
+ +      ssd /= ns;
+ +      sprintf(buf,"For the %d non-zero entries for table %d in %s the forces deviate on average %d%% from minus the numerical derivative of the potential\n",ns,k,libfn,(int)(100*ssd+0.5));
+ +      if (debug)
+ +        fprintf(debug,"%s",buf);
+ +      if (ssd > 0.2) {
+ +        if (fp)
+ +          fprintf(fp,"\nWARNING: %s\n",buf);
+ +        fprintf(stderr,"\nWARNING: %s\n",buf);
+ +      }
+ +      }
+ +    }
+ +  }
+ +  if (bAllZero && fp) {
+ +    fprintf(fp,"\nNOTE: All elements in table %s are zero\n\n",libfn);
+ +  }
+ +
+ +  for(k=0; (k<ntab); k++) {
+ +    init_table(fp,nx,nx0,tabscale,&(td[k]),TRUE);
+ +    for(i=0; (i<nx); i++) {
+ +      td[k].x[i] = yy[0][i];
+ +      td[k].v[i] = yy[2*k+1][i];
+ +      td[k].f[i] = yy[2*k+2][i];
+ +    }
+ +  }
+ +  for(i=0; (i<ny); i++)
+ +    sfree(yy[i]);
+ +  sfree(yy);
+ +  sfree(libfn);
+ +}
+ +
+ +static void done_tabledata(t_tabledata *td)
+ +{
+ +  int i;
+ +  
+ +  if (!td)
+ +    return;
+ +    
+ +  sfree(td->x);
+ +  sfree(td->v);
+ +  sfree(td->f);
+ +}
+ +
+ +static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
+ +{
+ +  /* Fill the table according to the formulas in the manual.
+ +   * In principle, we only need the potential and the second
+ +   * derivative, but then we would have to do lots of calculations
+ +   * in the inner loop. By precalculating some terms (see manual)
+ +   * we get better eventual performance, despite a larger table.
+ +   *
+ +   * Since some of these higher-order terms are very small,
+ +   * we always use double precision to calculate them here, in order
+ +   * to avoid unnecessary loss of precision.
+ +   */
+ +#ifdef DEBUG_SWITCH
+ +  FILE *fp;
+ +#endif
+ +  int  i;
+ +  double reppow,p;
+ +  double r1,rc,r12,r13;
+ +  double r,r2,r6,rc6;
+ +  double expr,Vtab,Ftab;
+ +  /* Parameters for David's function */
+ +  double A=0,B=0,C=0,A_3=0,B_4=0;
+ +  /* Parameters for the switching function */
+ +  double ksw,swi,swi1;
+ +  /* Temporary parameters */
+ +  gmx_bool bSwitch,bShift;
+ +  double ewc=fr->ewaldcoeff;
+ +  double isp= 0.564189583547756;
+ +   
+ +  bSwitch = ((tp == etabLJ6Switch) || (tp == etabLJ12Switch) || 
+ +           (tp == etabCOULSwitch) ||
+ +           (tp == etabEwaldSwitch) || (tp == etabEwaldUserSwitch));
+ +  bShift  = ((tp == etabLJ6Shift) || (tp == etabLJ12Shift) || 
+ +           (tp == etabShift));
+ +
+ +  reppow = fr->reppow;
+ +
+ +  if (tprops[tp].bCoulomb) {
+ +    r1 = fr->rcoulomb_switch;
+ +    rc = fr->rcoulomb;
+ +  } 
+ +  else {
+ +    r1 = fr->rvdw_switch;
+ +    rc = fr->rvdw;
+ +  }
+ +  if (bSwitch)
+ +    ksw  = 1.0/(pow5(rc-r1));
+ +  else
+ +    ksw  = 0.0;
+ +  if (bShift) {
+ +    if (tp == etabShift)
+ +      p = 1;
+ +    else if (tp == etabLJ6Shift) 
+ +      p = 6; 
+ +    else 
+ +      p = reppow;
+ +    
+ +    A = p * ((p+1)*r1-(p+4)*rc)/(pow(rc,p+2)*pow2(rc-r1));
+ +    B = -p * ((p+1)*r1-(p+3)*rc)/(pow(rc,p+2)*pow3(rc-r1));
+ +    C = 1.0/pow(rc,p)-A/3.0*pow3(rc-r1)-B/4.0*pow4(rc-r1);
+ +    if (tp == etabLJ6Shift) {
+ +      A=-A;
+ +      B=-B;
+ +      C=-C;
+ +    }
+ +    A_3=A/3.0;
+ +    B_4=B/4.0;
+ +  }
+ +  if (debug) { fprintf(debug,"Setting up tables\n"); fflush(debug); }
+ +    
+ +#ifdef DEBUG_SWITCH
+ +  fp=xvgropen("switch.xvg","switch","r","s");
+ +#endif
+ +  
+ +  for(i=td->nx0; (i<td->nx); i++) {
+ +    r     = td->x[i];
+ +    r2    = r*r;
+ +    r6    = 1.0/(r2*r2*r2);
+ +    if (gmx_within_tol(reppow,12.0,10*GMX_DOUBLE_EPS)) {
+ +      r12 = r6*r6;
+ +    } else {
+ +      r12 = pow(r,-reppow);   
+ +    }
+ +    Vtab  = 0.0;
+ +    Ftab  = 0.0;
+ +    if (bSwitch) {
+ +      /* swi is function, swi1 1st derivative and swi2 2nd derivative */
+ +      /* The switch function is 1 for r<r1, 0 for r>rc, and smooth for
+ +       * r1<=r<=rc. The 1st and 2nd derivatives are both zero at
+ +       * r1 and rc.
+ +       * ksw is just the constant 1/(rc-r1)^5, to save some calculations...
+ +       */ 
+ +      if(r<=r1) {
+ +      swi  = 1.0;
+ +      swi1 = 0.0;
+ +      } else if (r>=rc) {
+ +      swi  = 0.0;
+ +      swi1 = 0.0;
+ +      } else {
+ +      swi      = 1 - 10*pow3(r-r1)*ksw*pow2(rc-r1) 
+ +        + 15*pow4(r-r1)*ksw*(rc-r1) - 6*pow5(r-r1)*ksw;
+ +      swi1     = -30*pow2(r-r1)*ksw*pow2(rc-r1) 
+ +        + 60*pow3(r-r1)*ksw*(rc-r1) - 30*pow4(r-r1)*ksw;
+ +      }
+ +    }
+ +    else { /* not really needed, but avoids compiler warnings... */
+ +      swi  = 1.0;
+ +      swi1 = 0.0;
+ +    }
+ +#ifdef DEBUG_SWITCH
+ +    fprintf(fp,"%10g  %10g  %10g  %10g\n",r,swi,swi1,swi2);
+ +#endif
+ +
+ +    rc6 = rc*rc*rc;
+ +    rc6 = 1.0/(rc6*rc6);
+ +
+ +    switch (tp) {
+ +    case etabLJ6:
+ +      /* Dispersion */
+ +      Vtab  = -r6;
+ +      Ftab  = 6.0*Vtab/r;
+ +      break;
+ +    case etabLJ6Switch:
+ +    case etabLJ6Shift:
+ +      /* Dispersion */
+ +      if (r < rc) {      
+ +      Vtab  = -r6;
+ +      Ftab  = 6.0*Vtab/r;
+ +      }
+ +      break;
+ +    case etabLJ12:
+ +      /* Repulsion */
+ +      Vtab  = r12;
+ +      Ftab  = reppow*Vtab/r;
+ +      break;
+ +    case etabLJ12Switch:
+ +    case etabLJ12Shift:
+ +      /* Repulsion */
+ +      if (r < rc) {                
+ +      Vtab  = r12;
+ +      Ftab  = reppow*Vtab/r;
+ +      }  
+ +      break;
+ +      case etabLJ6Encad:
+ +        if(r < rc) {
+ +            Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
+ +            Ftab  = -(6.0*r6/r-6.0*rc6/rc);
+ +        } else { /* r>rc */ 
+ +            Vtab  = 0;
+ +            Ftab  = 0;
+ +        } 
+ +        break;
+ +    case etabLJ12Encad:
+ +        if(r < rc) {
+ +            Vtab  = r12-12.0*(rc-r)*rc6*rc6/rc-1.0*rc6*rc6;
+ +            Ftab  = 12.0*r12/r-12.0*rc6*rc6/rc;
+ +        } else { /* r>rc */ 
+ +            Vtab  = 0;
+ +            Ftab  = 0;
+ +        } 
+ +        break;        
+ +    case etabCOUL:
+ +      Vtab  = 1.0/r;
+ +      Ftab  = 1.0/r2;
+ +      break;
+ +    case etabCOULSwitch:
+ +    case etabShift:
+ +      if (r < rc) { 
+ +      Vtab  = 1.0/r;
+ +      Ftab  = 1.0/r2;
+ +      }
+ +      break;
+ +    case etabEwald:
+ +    case etabEwaldSwitch:
+ +      Vtab  = gmx_erfc(ewc*r)/r;
+ +      Ftab  = gmx_erfc(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
+ +      break;
+ +    case etabEwaldUser:
+ +    case etabEwaldUserSwitch:
+ +      /* Only calculate minus the reciprocal space contribution */
+ +      Vtab  = -gmx_erf(ewc*r)/r;
+ +      Ftab  = -gmx_erf(ewc*r)/r2+2*exp(-(ewc*ewc*r2))*ewc*isp/r;
+ +      break;
+ +    case etabRF:
+ +    case etabRF_ZERO:
+ +      Vtab  = 1.0/r      +   fr->k_rf*r2 - fr->c_rf;
+ +      Ftab  = 1.0/r2     - 2*fr->k_rf*r;
+ +      if (tp == etabRF_ZERO && r >= rc) {
+ +      Vtab = 0;
+ +      Ftab = 0;
+ +      }
+ +      break;
+ +    case etabEXPMIN:
+ +      expr  = exp(-r);
+ +      Vtab  = expr;
+ +      Ftab  = expr;
+ +      break;
+ +    case etabCOULEncad:
+ +        if(r < rc) {
+ +            Vtab  = 1.0/r-(rc-r)/(rc*rc)-1.0/rc;
+ +            Ftab  = 1.0/r2-1.0/(rc*rc);
+ +        } else { /* r>rc */ 
+ +            Vtab  = 0;
+ +            Ftab  = 0;
+ +        } 
+ +        break;
+ +    default:
+ +      gmx_fatal(FARGS,"Table type %d not implemented yet. (%s,%d)",
+ +                tp,__FILE__,__LINE__);
+ +    }
+ +    if (bShift) {
+ +      /* Normal coulomb with cut-off correction for potential */
+ +      if (r < rc) {
+ +      Vtab -= C;
+ +      /* If in Shifting range add something to it */
+ +      if (r > r1) {
+ +        r12 = (r-r1)*(r-r1);
+ +        r13 = (r-r1)*r12;
+ +        Vtab  += - A_3*r13 - B_4*r12*r12;
+ +        Ftab  +=   A*r12 + B*r13;
+ +      }
+ +      }
+ +    }
+ +
+ +    if (ETAB_USER(tp)) {
+ +      Vtab += td->v[i];
+ +      Ftab += td->f[i];
+ +    }
+ +
+ +    if ((r > r1) && bSwitch) {
+ +      Ftab = Ftab*swi - Vtab*swi1;
+ +      Vtab = Vtab*swi;
+ +    }  
+ +    
+ +    /* Convert to single precision when we store to mem */
+ +    td->v[i]  = Vtab;
+ +    td->f[i]  = Ftab;
+ +  }
+ +
+ +  /* Continue the table linearly from nx0 to 0.
+ +   * These values are only required for energy minimization with overlap or TPI.
+ +   */
+ +  for(i=td->nx0-1; i>=0; i--) {
+ +    td->v[i] = td->v[i+1] + td->f[i+1]*(td->x[i+1] - td->x[i]);
+ +    td->f[i] = td->f[i+1];
+ +  }
+ +
+ +#ifdef DEBUG_SWITCH
+ +  gmx_fio_fclose(fp);
+ +#endif
+ +}
+ +
+ +static void set_table_type(int tabsel[],const t_forcerec *fr,gmx_bool b14only)
+ +{
+ +  int eltype,vdwtype;
+ +
+ +  /* Set the different table indices.
+ +   * Coulomb first.
+ +   */
+ +
+ +
+ +  if (b14only) {
+ +    switch (fr->eeltype) {
+ +    case eelRF_NEC:
+ +      eltype = eelRF;
+ +      break;
+ +    case eelUSER:
+ +    case eelPMEUSER:
+ +    case eelPMEUSERSWITCH:
+ +      eltype = eelUSER;
+ +      break;
+ +    default:
+ +      eltype = eelCUT;
+ +    }
+ +  } else {
+ +    eltype = fr->eeltype;
+ +  }
+ +  
+ +  switch (eltype) {
+ +  case eelCUT:
+ +    tabsel[etiCOUL] = etabCOUL;
+ +    break;
+ +  case eelPOISSON:
+ +    tabsel[etiCOUL] = etabShift;
+ +    break;
+ +  case eelSHIFT:
+ +    if (fr->rcoulomb > fr->rcoulomb_switch)
+ +      tabsel[etiCOUL] = etabShift;
+ +    else
+ +      tabsel[etiCOUL] = etabCOUL;
+ +    break;
+ +  case eelEWALD:
+ +  case eelPME:
+ +  case eelP3M_AD:
+ +    tabsel[etiCOUL] = etabEwald;
+ +    break;
+ +  case eelPMESWITCH:
+ +    tabsel[etiCOUL] = etabEwaldSwitch;
+ +    break;
+ +  case eelPMEUSER:
+ +    tabsel[etiCOUL] = etabEwaldUser;
+ +    break;
+ +  case eelPMEUSERSWITCH:
+ +    tabsel[etiCOUL] = etabEwaldUserSwitch;
+ +    break;
+ +  case eelRF:
+ +  case eelGRF:
+ +  case eelRF_NEC:
+ +    tabsel[etiCOUL] = etabRF;
+ +    break;
+ +  case eelRF_ZERO:
+ +    tabsel[etiCOUL] = etabRF_ZERO;
+ +    break;
+ +  case eelSWITCH:
+ +    tabsel[etiCOUL] = etabCOULSwitch;
+ +    break;
+ +  case eelUSER:
+ +    tabsel[etiCOUL] = etabUSER;
+ +    break;
+ +  case eelENCADSHIFT:
+ +    tabsel[etiCOUL] = etabCOULEncad;
+ +    break;      
+ +  default:
+ +    gmx_fatal(FARGS,"Invalid eeltype %d",eltype);
+ +  }
+ +  
+ +  /* Van der Waals time */
+ +  if (fr->bBHAM && !b14only) {
+ +    tabsel[etiLJ6]  = etabLJ6;
+ +    tabsel[etiLJ12] = etabEXPMIN;
+ +  } else {
+ +    if (b14only && fr->vdwtype != evdwUSER)
+ +      vdwtype = evdwCUT;
+ +    else
+ +      vdwtype = fr->vdwtype;
+ +
+ +    switch (vdwtype) {
+ +    case evdwSWITCH:
+ +      tabsel[etiLJ6]  = etabLJ6Switch;
+ +      tabsel[etiLJ12] = etabLJ12Switch;
+ +      break;
+ +    case evdwSHIFT:
+ +      tabsel[etiLJ6]  = etabLJ6Shift;
+ +      tabsel[etiLJ12] = etabLJ12Shift;
+ +      break;
+ +    case evdwUSER:
+ +      tabsel[etiLJ6]  = etabUSER;
+ +      tabsel[etiLJ12] = etabUSER;
+ +      break;
+ +    case evdwCUT:
+ +      tabsel[etiLJ6]  = etabLJ6;
+ +      tabsel[etiLJ12] = etabLJ12;
+ +      break;
+ +    case evdwENCADSHIFT:
+ +      tabsel[etiLJ6]  = etabLJ6Encad;
+ +      tabsel[etiLJ12] = etabLJ12Encad;
+ +      break;
+ +    default:
+ +      gmx_fatal(FARGS,"Invalid vdwtype %d in %s line %d",vdwtype,
+ +                __FILE__,__LINE__);
+ +    } 
+ +  }
+ +}
+ +
+ +t_forcetable make_tables(FILE *out,const output_env_t oenv,
+ +                         const t_forcerec *fr,
+ +                       gmx_bool bVerbose,const char *fn,
+ +                       real rtab,int flags)
+ +{
+ +  const char *fns[3] = { "ctab.xvg", "dtab.xvg", "rtab.xvg" };
+ +  const char *fns14[3] = { "ctab14.xvg", "dtab14.xvg", "rtab14.xvg" };
+ +  FILE        *fp;
+ +  t_tabledata *td;
+ +  gmx_bool        b14only,bReadTab,bGenTab;
+ +  real        x0,y0,yp;
+ +  int         i,j,k,nx,nx0,tabsel[etiNR];
+ +  
+ +  t_forcetable table;
+ +
+ +  b14only = (flags & GMX_MAKETABLES_14ONLY);
+ +
+ +  if (flags & GMX_MAKETABLES_FORCEUSER) {
+ +    tabsel[etiCOUL] = etabUSER;
+ +    tabsel[etiLJ6]  = etabUSER;
+ +    tabsel[etiLJ12] = etabUSER;
+ +  } else {
+ +    set_table_type(tabsel,fr,b14only);
+ +  }
+ +  snew(td,etiNR);
+ +  table.r         = rtab;
+ +  table.scale     = 0;
+ +  table.n         = 0;
+ +  table.scale_exp = 0;
+ +  nx0             = 10;
+ +  nx              = 0;
+ +  
+ +  /* Check whether we have to read or generate */
+ +  bReadTab = FALSE;
+ +  bGenTab  = FALSE;
+ +  for(i=0; (i<etiNR); i++) {
+ +    if (ETAB_USER(tabsel[i]))
+ +      bReadTab = TRUE;
+ +    if (tabsel[i] != etabUSER)
+ +      bGenTab  = TRUE;
+ +  }
+ +  if (bReadTab) {
+ +    read_tables(out,fn,etiNR,0,td);
+ +    if (rtab == 0 || (flags & GMX_MAKETABLES_14ONLY)) {
+ +      rtab      = td[0].x[td[0].nx-1];
+ +      table.n   = td[0].nx;
+ +      nx        = table.n;
+ +    } else {
+ +      if (td[0].x[td[0].nx-1] < rtab) 
+ +      gmx_fatal(FARGS,"Tables in file %s not long enough for cut-off:\n"
+ +                "\tshould be at least %f nm\n",fn,rtab);
+ +      nx        = table.n = (int)(rtab*td[0].tabscale + 0.5);
+ +    }
+ +    table.scale = td[0].tabscale;
+ +    nx0         = td[0].nx0;
+ +  }
+ +  if (bGenTab) {
+ +    if (!bReadTab) {
+ +#ifdef GMX_DOUBLE
+ +      table.scale = 2000.0;
+ +#else
+ +      table.scale = 500.0;
+ +#endif
+ +      nx = table.n = rtab*table.scale;
+ +    }
+ +  }
+ +  if (fr->bBHAM) {
+ +    if(fr->bham_b_max!=0)
+ +      table.scale_exp = table.scale/fr->bham_b_max;
+ +    else
+ +      table.scale_exp = table.scale;
+ +  }
+ +
+ +  /* Each table type (e.g. coul,lj6,lj12) requires four 
+ +   * numbers per nx+1 data points. For performance reasons we want
+ +   * the table data to be aligned to 16-byte.
+ +   */
+ +  snew_aligned(table.tab, 12*(nx+1)*sizeof(real),16);
+ +
+ +  for(k=0; (k<etiNR); k++) {
+ +    if (tabsel[k] != etabUSER) {
+ +      init_table(out,nx,nx0,
+ +               (tabsel[k] == etabEXPMIN) ? table.scale_exp : table.scale,
+ +               &(td[k]),!bReadTab);
+ +      fill_table(&(td[k]),tabsel[k],fr);
+ +      if (out) 
+ +      fprintf(out,"%s table with %d data points for %s%s.\n"
+ +              "Tabscale = %g points/nm\n",
+ +              ETAB_USER(tabsel[k]) ? "Modified" : "Generated",
+ +              td[k].nx,b14only?"1-4 ":"",tprops[tabsel[k]].name,
+ +              td[k].tabscale);
+ +    }
+ +    copy2table(table.n,k*4,12,td[k].x,td[k].v,td[k].f,table.tab);
+ +    
+ +    if (bDebugMode() && bVerbose) {
+ +      if (b14only)
+ +      fp=xvgropen(fns14[k],fns14[k],"r","V",oenv);
+ +      else
+ +      fp=xvgropen(fns[k],fns[k],"r","V",oenv);
+ +      /* plot the output 5 times denser than the table data */
+ +      for(i=5*((nx0+1)/2); i<5*table.n; i++) {
+ +      x0 = i*table.r/(5*(table.n-1));
+ +      evaluate_table(table.tab,4*k,12,table.scale,x0,&y0,&yp);
+ +      fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
+ +      }
+ +      gmx_fio_fclose(fp);
+ +    }
+ +    done_tabledata(&(td[k]));
+ +  }
+ +  sfree(td);
+ +
+ +  return table;
+ +}
+ +
+ +t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
+ +                           const t_forcerec *fr,
+ +                           const char *fn,
+ +                           real rtab)
+ +{
+ +      const char *fns[3] = { "gbctab.xvg", "gbdtab.xvg", "gbrtab.xvg" };
+ +      const char *fns14[3] = { "gbctab14.xvg", "gbdtab14.xvg", "gbrtab14.xvg" };
+ +      FILE        *fp;
+ +      t_tabledata *td;
+ +      gmx_bool        bReadTab,bGenTab;
+ +      real        x0,y0,yp;
+ +      int         i,j,k,nx,nx0,tabsel[etiNR];
+ +      double      r,r2,Vtab,Ftab,expterm;
+ +      
+ +      t_forcetable table;
+ +      
+ +      double abs_error_r, abs_error_r2;
+ +      double rel_error_r, rel_error_r2;
+ +      double rel_error_r_old=0, rel_error_r2_old=0;
+ +      double x0_r_error, x0_r2_error;
+ +      
+ +      
+ +      /* Only set a Coulomb table for GB */
+ +      /* 
+ +       tabsel[0]=etabGB;
+ +       tabsel[1]=-1;
+ +       tabsel[2]=-1;
+ +      */
+ +      
+ +      /* Set the table dimensions for GB, not really necessary to
+ +       * use etiNR (since we only have one table, but ...) 
+ +       */
+ +      snew(td,1);
+ +      table.r         = fr->gbtabr;
+ +      table.scale     = fr->gbtabscale;
+ +      table.scale_exp = 0;
+ +      table.n         = table.scale*table.r;
+ +      nx0             = 0;
+ +      nx              = table.scale*table.r;
+ +      
+ +      /* Check whether we have to read or generate 
+ +       * We will always generate a table, so remove the read code
+ +       * (Compare with original make_table function
+ +       */
+ +      bReadTab = FALSE;
+ +      bGenTab  = TRUE;
+ +      
+ +      /* Each table type (e.g. coul,lj6,lj12) requires four 
+ +       * numbers per datapoint. For performance reasons we want
+ +       * the table data to be aligned to 16-byte. This is accomplished
+ +       * by allocating 16 bytes extra to a temporary pointer, and then
+ +       * calculating an aligned pointer. This new pointer must not be
+ +       * used in a free() call, but thankfully we're sloppy enough not
+ +       * to do this :-)
+ +       */
+ +      
+ +      snew_aligned(table.tab,4*nx,16);
+ +      
+ +      init_table(out,nx,nx0,table.scale,&(td[0]),!bReadTab);
+ +      
+ +      /* Local implementation so we don't have to use the etabGB
+ +       * enum above, which will cause problems later when
+ +       * making the other tables (right now even though we are using
+ +       * GB, the normal Coulomb tables will be created, but this
+ +       * will cause a problem since fr->eeltype==etabGB which will not
+ +       * be defined in fill_table and set_table_type
+ +       */
+ +      
+ +      for(i=nx0;i<nx;i++)
+ +    {
+ +              Vtab    = 0.0;
+ +              Ftab    = 0.0;
+ +              r       = td->x[i];
+ +              r2      = r*r;
+ +              expterm = exp(-0.25*r2);
+ +              
+ +              Vtab = 1/sqrt(r2+expterm);
+ +              Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
+ +              
+ +              /* Convert to single precision when we store to mem */
+ +              td->v[i]  = Vtab;
+ +              td->f[i]  = Ftab;
+ +              
+ +    }
+ +      
+ +      copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,table.tab);
+ +      
+ +      if(bDebugMode())
+ +    {
+ +              fp=xvgropen(fns[0],fns[0],"r","V",oenv);
+ +              /* plot the output 5 times denser than the table data */
+ +              /* for(i=5*nx0;i<5*table.n;i++) */
+ +              for(i=nx0;i<table.n;i++)
+ +              {
+ +                      /* x0=i*table.r/(5*table.n); */
+ +                      x0=i*table.r/table.n;
+ +                      evaluate_table(table.tab,0,4,table.scale,x0,&y0,&yp);
+ +                      fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
+ +                      
+ +              }
+ +              gmx_fio_fclose(fp);
+ +    }
+ +      
+ +      /*
+ +       for(i=100*nx0;i<99.81*table.n;i++)
+ +       {
+ +       r = i*table.r/(100*table.n);
+ +       r2      = r*r;
+ +       expterm = exp(-0.25*r2);
+ +       
+ +       Vtab = 1/sqrt(r2+expterm);
+ +       Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
+ +       
+ +       
+ +       evaluate_table(table.tab,0,4,table.scale,r,&y0,&yp);
+ +       printf("gb: i=%d, x0=%g, y0=%15.15f, Vtab=%15.15f, yp=%15.15f, Ftab=%15.15f\n",i,r, y0, Vtab, yp, Ftab);
+ +       
+ +       abs_error_r=fabs(y0-Vtab);
+ +       abs_error_r2=fabs(yp-(-1)*Ftab);
+ +       
+ +       rel_error_r=abs_error_r/y0;
+ +       rel_error_r2=fabs(abs_error_r2/yp);
+ +       
+ +       
+ +       if(rel_error_r>rel_error_r_old)
+ +       {
+ +       rel_error_r_old=rel_error_r;
+ +       x0_r_error=x0;
+ +       }
+ +       
+ +       if(rel_error_r2>rel_error_r2_old)
+ +       {
+ +       rel_error_r2_old=rel_error_r2;
+ +       x0_r2_error=x0;        
+ +       }
+ +       }
+ +       
+ +       printf("gb: MAX REL ERROR IN R=%15.15f, MAX REL ERROR IN R2=%15.15f\n",rel_error_r_old, rel_error_r2_old);
+ +       printf("gb: XO_R=%g, X0_R2=%g\n",x0_r_error, x0_r2_error);
+ +       
+ +       exit(1); */
+ +      done_tabledata(&(td[0]));
+ +      sfree(td);
+ +      
+ +      return table;
+ +      
+ +      
+ +}
+ +
+ +t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
+ +                          const t_forcerec *fr,
+ +                          const char *fn,
+ +                            matrix box)
+ +{
+ +      const char *fns[3] = { "tf_tab.xvg", "atfdtab.xvg", "atfrtab.xvg" };
+ +      FILE        *fp;
+ +      t_tabledata *td;
+ +      real        x0,y0,yp,rtab;
+ +      int         i,nx,nx0;
+ +        real        rx, ry, rz, box_r;
+ +      
+ +      t_forcetable table;
+ +      
+ +      
+ +      /* Set the table dimensions for ATF, not really necessary to
+ +       * use etiNR (since we only have one table, but ...) 
+ +       */
+ +      snew(td,1);
+ +        
+ +        if (fr->adress_type == eAdressSphere){
+ +            /* take half box diagonal direction as tab range */
+ +               rx = 0.5*box[0][0]+0.5*box[1][0]+0.5*box[2][0];
+ +               ry = 0.5*box[0][1]+0.5*box[1][1]+0.5*box[2][1];
+ +               rz = 0.5*box[0][2]+0.5*box[1][2]+0.5*box[2][2];
+ +               box_r = sqrt(rx*rx+ry*ry+rz*rz);
+ +               
+ +        }else{
+ +            /* xsplit: take half box x direction as tab range */
+ +               box_r        = box[0][0]/2;
+ +        }
+ +        table.r         = box_r;
+ +      table.scale     = 0;
+ +      table.n         = 0;
+ +      table.scale_exp = 0;
+ +      nx0             = 10;
+ +      nx              = 0;
+ +      
+ +        read_tables(out,fn,1,0,td);
+ +        rtab      = td[0].x[td[0].nx-1];
+ +
+ +       if (fr->adress_type == eAdressXSplit && (rtab < box[0][0]/2)){
+ +           gmx_fatal(FARGS,"AdResS full box therm force table in file %s extends to %f:\n"
+ +                        "\tshould extend to at least half the length of the box in x-direction"
+ +                        "%f\n",fn,rtab, box[0][0]/2);
+ +       }
+ +       if (rtab < box_r){
+ +               gmx_fatal(FARGS,"AdResS full box therm force table in file %s extends to %f:\n"
+ +                "\tshould extend to at least for spherical adress"
+ +                "%f (=distance from center to furthermost point in box \n",fn,rtab, box_r);
+ +       }
+ +
+ +
+ +        table.n   = td[0].nx;
+ +        nx        = table.n;
+ +        table.scale = td[0].tabscale;
+ +        nx0         = td[0].nx0;
+ +
+ +      /* Each table type (e.g. coul,lj6,lj12) requires four 
+ +       * numbers per datapoint. For performance reasons we want
+ +       * the table data to be aligned to 16-byte. This is accomplished
+ +       * by allocating 16 bytes extra to a temporary pointer, and then
+ +       * calculating an aligned pointer. This new pointer must not be
+ +       * used in a free() call, but thankfully we're sloppy enough not
+ +       * to do this :-)
+ +       */
+ +      
+ +    snew_aligned(table.tab,4*nx,16);
+ +      
+ +      copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,table.tab);
+ +      
+ +      if(bDebugMode())
+ +        {
+ +          fp=xvgropen(fns[0],fns[0],"r","V",oenv);
+ +          /* plot the output 5 times denser than the table data */
+ +          /* for(i=5*nx0;i<5*table.n;i++) */
+ +         
+ +            for(i=5*((nx0+1)/2); i<5*table.n; i++)
+ +            {
+ +              /* x0=i*table.r/(5*table.n); */
+ +              x0 = i*table.r/(5*(table.n-1));
+ +              evaluate_table(table.tab,0,4,table.scale,x0,&y0,&yp);
+ +              fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
+ +              
+ +            }
+ +          ffclose(fp);
+ +        }
+ +
+ +      done_tabledata(&(td[0]));
+ +      sfree(td);
+ +      
+ +      return table;
+ +}
+ +
+ +bondedtable_t make_bonded_table(FILE *fplog,char *fn,int angle)
+ +{
+ +  t_tabledata td;
+ +  double start;
+ +  int    i;
+ +  bondedtable_t tab;
+ +  
+ +  if (angle < 2)
+ +    start = 0;
+ +  else
+ +    start = -180.0;
+ +  read_tables(fplog,fn,1,angle,&td);
+ +  if (angle > 0) {
+ +    /* Convert the table from degrees to radians */
+ +    for(i=0; i<td.nx; i++) {
+ +      td.x[i] *= DEG2RAD;
+ +      td.f[i] *= RAD2DEG;
+ +    }
+ +    td.tabscale *= RAD2DEG;
+ +  }
+ +  tab.n = td.nx;
+ +  tab.scale = td.tabscale;
+ +  snew(tab.tab,tab.n*4);
+ +  copy2table(tab.n,0,4,td.x,td.v,td.f,tab.tab);
+ +  done_tabledata(&td);
+ +
+ +  return tab;
+ +}
+ +
+ +
diff --cc src/gromacs/mdlib/tgroup.c
Simple merge
diff --cc src/gromacs/mdlib/tpi.c
Simple merge
diff --cc src/gromacs/mdlib/update.c
Simple merge
diff --cc src/gromacs/version.h.cmakein

index 0000000000000000000000000000000000000000,ae45be6e0b4087c335ac66944e5a2a909356fe66..ae45be6e0b4087c335ac66944e5a2a909356fe66

mode 000000,100644..100644
--- /dev/null
--- 2/include/version.h.cmakein
+++ b/src/gromacs/version.h.cmakein
diff --cc src/programs/gmxcheck/tpbcmp.c
Simple merge
diff --cc src/programs/grompp/grompp.c

index 54da13f97cb8effe00679f1509b73b3a8328108a,0000000000000000000000000000000000000000..7f4ea24b76ea283a69be08f48103b5e9bb869fd6

mode 100644,000000..100644
--- 1/src/programs/grompp/grompp.c
--- /dev/null
+++ b/src/programs/grompp/grompp.c
@@@ -1,1684 -1,0 +1,1765 @@@
- static void check_settle(gmx_mtop_t   *sys)
+ +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.03
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <sys/types.h>
+ +#include <math.h>
+ +#include <string.h>
+ +#include <errno.h>
+ +#include <limits.h>
+ +
+ +#include "sysstuff.h"
+ +#include "smalloc.h"
+ +#include "macros.h"
+ +#include "string2.h"
+ +#include "readir.h"
+ +#include "toputil.h"
+ +#include "topio.h"
+ +#include "confio.h"
+ +#include "copyrite.h"
+ +#include "readir.h"
+ +#include "symtab.h"
+ +#include "names.h"
+ +#include "grompp.h"
+ +#include "random.h"
+ +#include "vec.h"
+ +#include "futil.h"
+ +#include "statutil.h"
+ +#include "splitter.h"
+ +#include "sortwater.h"
+ +#include "convparm.h"
+ +#include "gmx_fatal.h"
+ +#include "warninp.h"
+ +#include "index.h"
+ +#include "gmxfio.h"
+ +#include "trnio.h"
+ +#include "tpxio.h"
+ +#include "vsite_parm.h"
+ +#include "txtdump.h"
+ +#include "calcgrid.h"
+ +#include "add_par.h"
+ +#include "enxio.h"
+ +#include "perf_est.h"
+ +#include "compute_io.h"
+ +#include "gpp_atomtype.h"
+ +#include "gpp_tomorse.h"
+ +#include "mtop_util.h"
+ +#include "genborn.h"
++#include "calc_verletbuf.h"
+ +
+ +static int rm_interactions(int ifunc,int nrmols,t_molinfo mols[])
+ +{
+ +  int  i,n;
+ +  
+ +  n=0;
+ +  /* For all the molecule types */
+ +  for(i=0; i<nrmols; i++) {
+ +    n += mols[i].plist[ifunc].nr;
+ +    mols[i].plist[ifunc].nr=0;
+ +  }
+ +  return n;
+ +}
+ +
+ +static int check_atom_names(const char *fn1, const char *fn2, 
+ +                          gmx_mtop_t *mtop, t_atoms *at)
+ +{
+ +  int mb,m,i,j,nmismatch;
+ +  t_atoms *tat;
+ +#define MAXMISMATCH 20
+ +
+ +  if (mtop->natoms != at->nr)
+ +    gmx_incons("comparing atom names");
+ +  
+ +  nmismatch=0;
+ +  i = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    tat = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ +    for(m=0; m<mtop->molblock[mb].nmol; m++) {
+ +      for(j=0; j < tat->nr; j++) {
+ +      if (strcmp( *(tat->atomname[j]) , *(at->atomname[i]) ) != 0) {
+ +        if (nmismatch < MAXMISMATCH) {
+ +          fprintf(stderr,
+ +                  "Warning: atom name %d in %s and %s does not match (%s - %s)\n",
+ +                  i+1, fn1, fn2, *(tat->atomname[j]), *(at->atomname[i]));
+ +        } else if (nmismatch == MAXMISMATCH) {
+ +          fprintf(stderr,"(more than %d non-matching atom names)\n",MAXMISMATCH);
+ +        }
+ +        nmismatch++;
+ +      }
+ +      i++;
+ +      }
+ +    }
+ +  }
+ +
+ +  return nmismatch;
+ +}
+ +
+ +static void check_eg_vs_cg(gmx_mtop_t *mtop)
+ +{
+ +  int astart,mb,m,cg,j,firstj;
+ +  unsigned char firsteg,eg;
+ +  gmx_moltype_t *molt;
+ +  
+ +  /* Go through all the charge groups and make sure all their
+ +   * atoms are in the same energy group.
+ +   */
+ +  
+ +  astart = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    molt = &mtop->moltype[mtop->molblock[mb].type];
+ +    for(m=0; m<mtop->molblock[mb].nmol; m++) {
+ +      for(cg=0; cg<molt->cgs.nr;cg++) {
+ +      /* Get the energy group of the first atom in this charge group */
+ +      firstj = astart + molt->cgs.index[cg];
+ +      firsteg = ggrpnr(&mtop->groups,egcENER,firstj);
+ +      for(j=molt->cgs.index[cg]+1;j<molt->cgs.index[cg+1];j++) {
+ +        eg = ggrpnr(&mtop->groups,egcENER,astart+j);
+ +        if(eg != firsteg) {
+ +          gmx_fatal(FARGS,"atoms %d and %d in charge group %d of molecule type '%s' are in different energy groups",
+ +                    firstj+1,astart+j+1,cg+1,*molt->name);
+ +        }
+ +      }
+ +      }
+ +      astart += molt->atoms.nr;
+ +    }
+ +  }  
+ +}
+ +
+ +static void check_cg_sizes(const char *topfn,t_block *cgs,warninp_t wi)
+ +{
+ +    int  maxsize,cg;
+ +    char warn_buf[STRLEN];
+ +
+ +    maxsize = 0;
+ +    for(cg=0; cg<cgs->nr; cg++)
+ +    {
+ +        maxsize = max(maxsize,cgs->index[cg+1]-cgs->index[cg]);
+ +    }
+ +    
+ +    if (maxsize > MAX_CHARGEGROUP_SIZE)
+ +    {
+ +        gmx_fatal(FARGS,"The largest charge group contains %d atoms. The maximum is %d.",maxsize,MAX_CHARGEGROUP_SIZE);
+ +    }
+ +    else if (maxsize > 10)
+ +    {
+ +        set_warning_line(wi,topfn,-1);
+ +        sprintf(warn_buf,
+ +                "The largest charge group contains %d atoms.\n"
+ +                "Since atoms only see each other when the centers of geometry of the charge groups they belong to are within the cut-off distance, too large charge groups can lead to serious cut-off artifacts.\n"
+ +                "For efficiency and accuracy, charge group should consist of a few atoms.\n"
+ +                "For all-atom force fields use: CH3, CH2, CH, NH2, NH, OH, CO2, CO, etc.",
+ +                maxsize);
+ +        warning_note(wi,warn_buf);
+ +    }
+ +}
+ +
+ +static void check_bonds_timestep(gmx_mtop_t *mtop,double dt,warninp_t wi)
+ +{
+ +    /* This check is not intended to ensure accurate integration,
+ +     * rather it is to signal mistakes in the mdp settings.
+ +     * A common mistake is to forget to turn on constraints
+ +     * for MD after energy minimization with flexible bonds.
+ +     * This check can also detect too large time steps for flexible water
+ +     * models, but such errors will often be masked by the constraints
+ +     * mdp options, which turns flexible water into water with bond constraints,
+ +     * but without an angle constraint. Unfortunately such incorrect use
+ +     * of water models can not easily be detected without checking
+ +     * for specific model names.
+ +     *
+ +     * The stability limit of leap-frog or velocity verlet is 4.44 steps
+ +     * per oscillational period.
+ +     * But accurate bonds distributions are lost far before that limit.
+ +     * To allow relatively common schemes (although not common with Gromacs)
+ +     * of dt=1 fs without constraints and dt=2 fs with only H-bond constraints
+ +     * we set the note limit to 10.
+ +     */
+ +    int       min_steps_warn=5;
+ +    int       min_steps_note=10;
+ +    t_iparams *ip;
+ +    int       molt;
+ +    gmx_moltype_t *moltype,*w_moltype;
+ +    t_atom    *atom;
+ +    t_ilist   *ilist,*ilb,*ilc,*ils;
+ +    int       ftype;
+ +    int       i,a1,a2,w_a1,w_a2,j;
+ +    real      twopi2,limit2,fc,re,m1,m2,period2,w_period2;
+ +    gmx_bool  bFound,bWater,bWarn;
+ +    char      warn_buf[STRLEN];
+ +
+ +    ip = mtop->ffparams.iparams;
+ +
+ +    twopi2 = sqr(2*M_PI);
+ +
+ +    limit2 = sqr(min_steps_note*dt);
+ +
+ +    w_a1 = w_a2 = -1;
+ +    w_period2 = -1.0;
+ +    
+ +    w_moltype = NULL;
+ +    for(molt=0; molt<mtop->nmoltype; molt++)
+ +    {
+ +        moltype = &mtop->moltype[molt];
+ +        atom  = moltype->atoms.atom;
+ +        ilist = moltype->ilist;
+ +        ilc = &ilist[F_CONSTR];
+ +        ils = &ilist[F_SETTLE];
+ +        for(ftype=0; ftype<F_NRE; ftype++)
+ +        {
+ +            if (!(ftype == F_BONDS || ftype == F_G96BONDS || ftype == F_HARMONIC))
+ +            {
+ +                continue;
+ +            }
+ +            
+ +            ilb = &ilist[ftype];
+ +            for(i=0; i<ilb->nr; i+=3)
+ +            {
+ +                fc = ip[ilb->iatoms[i]].harmonic.krA;
+ +                re = ip[ilb->iatoms[i]].harmonic.rA;
+ +                if (ftype == F_G96BONDS)
+ +                {
+ +                    /* Convert squared sqaure fc to harmonic fc */
+ +                    fc = 2*fc*re;
+ +                }
+ +                a1 = ilb->iatoms[i+1];
+ +                a2 = ilb->iatoms[i+2];
+ +                m1 = atom[a1].m;
+ +                m2 = atom[a2].m;
+ +                if (fc > 0 && m1 > 0 && m2 > 0)
+ +                {
+ +                    period2 = twopi2*m1*m2/((m1 + m2)*fc);
+ +                }
+ +                else
+ +                {
+ +                    period2 = GMX_FLOAT_MAX;
+ +                }
+ +                if (debug)
+ +                {
+ +                    fprintf(debug,"fc %g m1 %g m2 %g period %g\n",
+ +                            fc,m1,m2,sqrt(period2));
+ +                }
+ +                if (period2 < limit2)
+ +                {
+ +                    bFound = FALSE;
+ +                    for(j=0; j<ilc->nr; j+=3)
+ +                    {
+ +                        if ((ilc->iatoms[j+1] == a1 && ilc->iatoms[j+2] == a2) ||
+ +                            (ilc->iatoms[j+1] == a2 && ilc->iatoms[j+2] == a1))
+ +                            {
+ +                                bFound = TRUE;
+ +                            }
+ +                        }
+ +                    for(j=0; j<ils->nr; j+=4)
+ +                    {
+ +                        if ((a1 == ils->iatoms[j+1] || a1 == ils->iatoms[j+2] || a1 == ils->iatoms[j+3]) &&
+ +                            (a2 == ils->iatoms[j+1] || a2 == ils->iatoms[j+2] || a2 == ils->iatoms[j+3]))
+ +                        {
+ +                            bFound = TRUE;
+ +                        }
+ +                    }
+ +                    if (!bFound &&
+ +                        (w_moltype == NULL || period2 < w_period2))
+ +                    {
+ +                        w_moltype = moltype;
+ +                        w_a1      = a1;
+ +                        w_a2      = a2;
+ +                        w_period2 = period2;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (w_moltype != NULL)
+ +    {
+ +        bWarn = (w_period2 < sqr(min_steps_warn*dt));
+ +        /* A check that would recognize most water models */
+ +        bWater = ((*w_moltype->atoms.atomname[0])[0] == 'O' &&
+ +                  w_moltype->atoms.nr <= 5);
+ +        sprintf(warn_buf,"The bond in molecule-type %s between atoms %d %s and %d %s has an estimated oscillational period of %.1e ps, which is less than %d times the time step of %.1e ps.\n"
+ +                "%s",
+ +                *w_moltype->name,
+ +                w_a1+1,*w_moltype->atoms.atomname[w_a1],
+ +                w_a2+1,*w_moltype->atoms.atomname[w_a2],
+ +                sqrt(w_period2),bWarn ? min_steps_warn : min_steps_note,dt,
+ +                bWater ?
+ +                "Maybe you asked for fexible water." :
+ +                "Maybe you forgot to change the constraints mdp option.");
+ +        if (bWarn)
+ +        {
+ +            warning(wi,warn_buf);
+ +        }
+ +        else
+ +        {
+ +            warning_note(wi,warn_buf);
+ +        }
+ +    }
+ +}
+ +
+ +static void check_vel(gmx_mtop_t *mtop,rvec v[])
+ +{
+ +  gmx_mtop_atomloop_all_t aloop;
+ +  t_atom *atom;
+ +  int a;
+ +
+ +  aloop = gmx_mtop_atomloop_all_init(mtop);
+ +  while (gmx_mtop_atomloop_all_next(aloop,&a,&atom)) {
+ +    if (atom->ptype == eptShell ||
+ +      atom->ptype == eptBond  ||
+ +      atom->ptype == eptVSite) {
+ +      clear_rvec(v[a]);
+ +    }
+ +  }
+ +}
+ +
+ +static gmx_bool nint_ftype(gmx_mtop_t *mtop,t_molinfo *mi,int ftype)
+ +{
+ +  int nint,mb;
+ +
+ +  nint = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    nint += mtop->molblock[mb].nmol*mi[mtop->molblock[mb].type].plist[ftype].nr;
+ +  }
+ +
+ +  return nint;
+ +}
+ +
+ +/* This routine reorders the molecule type array
+ + * in the order of use in the molblocks,
+ + * unused molecule types are deleted.
+ + */
+ +static void renumber_moltypes(gmx_mtop_t *sys,
+ +                            int *nmolinfo,t_molinfo **molinfo)
+ +{
+ +  int *order,norder,i;
+ +  int mb,mi;
+ +  t_molinfo *minew;
+ +
+ +  snew(order,*nmolinfo);
+ +  norder = 0;
+ +  for(mb=0; mb<sys->nmolblock; mb++) {
+ +    for(i=0; i<norder; i++) {
+ +      if (order[i] == sys->molblock[mb].type) {
+ +      break;
+ +      }
+ +    }
+ +    if (i == norder) {
+ +      /* This type did not occur yet, add it */
+ +      order[norder] = sys->molblock[mb].type;
+ +      /* Renumber the moltype in the topology */
+ +      norder++;
+ +    }
+ +    sys->molblock[mb].type = i;
+ +  }
+ +  
+ +  /* We still need to reorder the molinfo structs */
+ +  snew(minew,norder);
+ +  for(mi=0; mi<*nmolinfo; mi++) {
+ +    for(i=0; i<norder; i++) {
+ +      if (order[i] == mi) {
+ +      break;
+ +      }
+ +    }
+ +    if (i == norder) {
+ +      done_mi(&(*molinfo)[mi]);
+ +    } else {
+ +      minew[i] = (*molinfo)[mi];
+ +    }
+ +  }
+ +  sfree(*molinfo);
+ +
+ +  *nmolinfo = norder;
+ +  *molinfo  = minew;
+ +}
+ +
+ +static void molinfo2mtop(int nmi,t_molinfo *mi,gmx_mtop_t *mtop)
+ +{
+ +  int m;
+ +  gmx_moltype_t *molt;
+ +
+ +  mtop->nmoltype = nmi;
+ +  snew(mtop->moltype,nmi);
+ +  for(m=0; m<nmi; m++) {
+ +    molt = &mtop->moltype[m];
+ +    molt->name  = mi[m].name;
+ +    molt->atoms = mi[m].atoms;
+ +    /* ilists are copied later */
+ +    molt->cgs   = mi[m].cgs;
+ +    molt->excls = mi[m].excls;
+ +  }
+ +}
+ +
+ +static void
+ +new_status(const char *topfile,const char *topppfile,const char *confin,
+ +           t_gromppopts *opts,t_inputrec *ir,gmx_bool bZero,
+ +           gmx_bool bGenVel,gmx_bool bVerbose,t_state *state,
+ +           gpp_atomtype_t atype,gmx_mtop_t *sys,
+ +           int *nmi,t_molinfo **mi,t_params plist[],
+ +           int *comb,double *reppow,real *fudgeQQ,
+ +           gmx_bool bMorse,
+ +           warninp_t wi)
+ +{
+ +  t_molinfo   *molinfo=NULL;
+ +  int         nmolblock;
+ +  gmx_molblock_t *molblock,*molbs;
+ +  t_atoms     *confat;
+ +  int         mb,i,nrmols,nmismatch;
+ +  char        buf[STRLEN];
+ +  gmx_bool        bGB=FALSE;
+ +  char        warn_buf[STRLEN];
+ +
+ +  init_mtop(sys);
+ +
+ +  /* Set gmx_boolean for GB */
+ +  if(ir->implicit_solvent)
+ +    bGB=TRUE;
+ +  
+ +  /* TOPOLOGY processing */
+ +  sys->name = do_top(bVerbose,topfile,topppfile,opts,bZero,&(sys->symtab),
+ +                     plist,comb,reppow,fudgeQQ,
+ +                     atype,&nrmols,&molinfo,ir,
+ +                     &nmolblock,&molblock,bGB,
+ +                     wi);
+ +  
+ +  sys->nmolblock = 0;
+ +  snew(sys->molblock,nmolblock);
+ +  
+ +  sys->natoms = 0;
+ +  for(mb=0; mb<nmolblock; mb++) {
+ +    if (sys->nmolblock > 0 &&
+ +      molblock[mb].type == sys->molblock[sys->nmolblock-1].type) {
+ +      /* Merge consecutive blocks with the same molecule type */
+ +      sys->molblock[sys->nmolblock-1].nmol += molblock[mb].nmol;
+ +      sys->natoms += molblock[mb].nmol*sys->molblock[sys->nmolblock-1].natoms_mol;
+ +    } else if (molblock[mb].nmol > 0) {
+ +      /* Add a new molblock to the topology */
+ +      molbs = &sys->molblock[sys->nmolblock];
+ +      *molbs = molblock[mb];
+ +      molbs->natoms_mol = molinfo[molbs->type].atoms.nr;
+ +      molbs->nposres_xA = 0;
+ +      molbs->nposres_xB = 0;
+ +      sys->natoms += molbs->nmol*molbs->natoms_mol;
+ +      sys->nmolblock++;
+ +    }
+ +  }
+ +  if (sys->nmolblock == 0) {
+ +    gmx_fatal(FARGS,"No molecules were defined in the system");
+ +  }
+ +
+ +  renumber_moltypes(sys,&nrmols,&molinfo);
+ +
+ +  if (bMorse)
+ +    convert_harmonics(nrmols,molinfo,atype);
+ +
+ +  if (ir->eDisre == edrNone) {
+ +    i = rm_interactions(F_DISRES,nrmols,molinfo);
+ +    if (i > 0) {
+ +      set_warning_line(wi,"unknown",-1);
+ +      sprintf(warn_buf,"disre = no, removed %d distance restraints",i);
+ +      warning_note(wi,warn_buf);
+ +    }
+ +  }
+ +  if (opts->bOrire == FALSE) {
+ +    i = rm_interactions(F_ORIRES,nrmols,molinfo);
+ +    if (i > 0) {
+ +      set_warning_line(wi,"unknown",-1);
+ +      sprintf(warn_buf,"orire = no, removed %d orientation restraints",i);
+ +      warning_note(wi,warn_buf);
+ +    }
+ +  }
+ +  
+ +  /* Copy structures from msys to sys */
+ +  molinfo2mtop(nrmols,molinfo,sys);
+ +
+ +  gmx_mtop_finalize(sys);
+ + 
+ +  /* COORDINATE file processing */
+ +  if (bVerbose) 
+ +    fprintf(stderr,"processing coordinates...\n");
+ +
+ +  get_stx_coordnum(confin,&state->natoms);
+ +  if (state->natoms != sys->natoms)
+ +    gmx_fatal(FARGS,"number of coordinates in coordinate file (%s, %d)\n"
+ +              "             does not match topology (%s, %d)",
+ +            confin,state->natoms,topfile,sys->natoms);
+ +  else {
+ +    /* make space for coordinates and velocities */
+ +    char title[STRLEN];
+ +    snew(confat,1);
+ +    init_t_atoms(confat,state->natoms,FALSE);
+ +    init_state(state,state->natoms,0,0,0,0);
+ +    read_stx_conf(confin,title,confat,state->x,state->v,NULL,state->box);
+ +    /* This call fixes the box shape for runs with pressure scaling */
+ +    set_box_rel(ir,state);
+ +
+ +    nmismatch = check_atom_names(topfile, confin, sys, confat);
+ +    free_t_atoms(confat,TRUE);
+ +    sfree(confat);
+ +    
+ +    if (nmismatch) {
+ +      sprintf(buf,"%d non-matching atom name%s\n"
+ +            "atom names from %s will be used\n"
+ +            "atom names from %s will be ignored\n",
+ +            nmismatch,(nmismatch == 1) ? "" : "s",topfile,confin);
+ +      warning(wi,buf);
+ +    }    
+ +    if (bVerbose) 
+ +      fprintf(stderr,"double-checking input for internal consistency...\n");
+ +    double_check(ir,state->box,nint_ftype(sys,molinfo,F_CONSTR),wi);
+ +  }
+ +
+ +  if (bGenVel) {
+ +    real *mass;
+ +    gmx_mtop_atomloop_all_t aloop;
+ +    t_atom *atom;
+ +
+ +    snew(mass,state->natoms);
+ +    aloop = gmx_mtop_atomloop_all_init(sys);
+ +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
+ +      mass[i] = atom->m;
+ +    }
+ +
+ +    if (opts->seed == -1) {
+ +      opts->seed = make_seed();
+ +      fprintf(stderr,"Setting gen_seed to %d\n",opts->seed);
+ +    }
+ +    maxwell_speed(opts->tempi,opts->seed,sys,state->v);
+ +
+ +    stop_cm(stdout,state->natoms,mass,state->x,state->v);
+ +    sfree(mass);
+ +  }
+ +
+ +  *nmi = nrmols;
+ +  *mi  = molinfo;
+ +}
+ +
+ +static void copy_state(const char *slog,t_trxframe *fr,
+ +                       gmx_bool bReadVel,t_state *state,
+ +                       double *use_time)
+ +{
+ +    int i;
+ +
+ +    if (fr->not_ok & FRAME_NOT_OK)
+ +    {
+ +        gmx_fatal(FARGS,"Can not start from an incomplete frame");
+ +    }
+ +    if (!fr->bX)
+ +    {
+ +        gmx_fatal(FARGS,"Did not find a frame with coordinates in file %s",
+ +                  slog);
+ +    }
+ +
+ +    for(i=0; i<state->natoms; i++)
+ +    {
+ +        copy_rvec(fr->x[i],state->x[i]);
+ +    }
+ +    if (bReadVel)
+ +    {
+ +        if (!fr->bV)
+ +        {
+ +            gmx_incons("Trajecory frame unexpectedly does not contain velocities");
+ +        }
+ +        for(i=0; i<state->natoms; i++)
+ +        {
+ +            copy_rvec(fr->v[i],state->v[i]);
+ +        }
+ +    }
+ +    if (fr->bBox)
+ +    {
+ +        copy_mat(fr->box,state->box);
+ +    }
+ +
+ +    *use_time = fr->time;
+ +}
+ +
+ +static void cont_status(const char *slog,const char *ener,
+ +                      gmx_bool bNeedVel,gmx_bool bGenVel, real fr_time,
+ +                      t_inputrec *ir,t_state *state,
+ +                      gmx_mtop_t *sys,
+ +                        const output_env_t oenv)
+ +     /* If fr_time == -1 read the last frame available which is complete */
+ +{
+ +    gmx_bool bReadVel;
+ +    t_trxframe  fr;
+ +    t_trxstatus *fp;
+ +    int i;
+ +    double use_time;
+ +
+ +    bReadVel = (bNeedVel && !bGenVel);
+ +
+ +    fprintf(stderr,
+ +            "Reading Coordinates%s and Box size from old trajectory\n",
+ +            bReadVel ? ", Velocities" : "");
+ +    if (fr_time == -1)
+ +    {
+ +        fprintf(stderr,"Will read whole trajectory\n");
+ +    }
+ +    else
+ +    {
+ +        fprintf(stderr,"Will read till time %g\n",fr_time);
+ +    }
+ +    if (!bReadVel)
+ +    {
+ +        if (bGenVel)
+ +        {
+ +            fprintf(stderr,"Velocities generated: "
+ +                    "ignoring velocities in input trajectory\n");
+ +        }
+ +        read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X);
+ +    }
+ +    else
+ +    {
+ +        read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X | TRX_NEED_V);
+ +        
+ +        if (!fr.bV)
+ +        {
+ +            fprintf(stderr,
+ +                    "\n"
+ +                    "WARNING: Did not find a frame with velocities in file %s,\n"
+ +                    "         all velocities will be set to zero!\n\n",slog);
+ +            for(i=0; i<sys->natoms; i++)
+ +            {
+ +                clear_rvec(state->v[i]);
+ +            }
+ +            close_trj(fp);
+ +            /* Search for a frame without velocities */
+ +            bReadVel = FALSE;
+ +            read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X);
+ +        }
+ +    }
+ +
+ +    state->natoms = fr.natoms;
+ +
+ +    if (sys->natoms != state->natoms)
+ +    {
+ +        gmx_fatal(FARGS,"Number of atoms in Topology "
+ +                  "is not the same as in Trajectory");
+ +    }
+ +    copy_state(slog,&fr,bReadVel,state,&use_time);
+ +
+ +    /* Find the appropriate frame */
+ +    while ((fr_time == -1 || fr.time < fr_time) &&
+ +           read_next_frame(oenv,fp,&fr))
+ +    {
+ +        copy_state(slog,&fr,bReadVel,state,&use_time);
+ +    }
+ +  
+ +    close_trj(fp);
+ +
+ +    /* Set the relative box lengths for preserving the box shape.
+ +     * Note that this call can lead to differences in the last bit
+ +     * with respect to using tpbconv to create a [TT].tpx[tt] file.
+ +     */
+ +    set_box_rel(ir,state);
+ +
+ +    fprintf(stderr,"Using frame at t = %g ps\n",use_time);
+ +    fprintf(stderr,"Starting time for run is %g ps\n",ir->init_t); 
+ +  
+ +    if ((ir->epc != epcNO  || ir->etc ==etcNOSEHOOVER) && ener)
+ +    {
+ +        get_enx_state(ener,use_time,&sys->groups,ir,state);
+ +        preserve_box_shape(ir,state->box_rel,state->boxv);
+ +    }
+ +}
+ +
+ +static void read_posres(gmx_mtop_t *mtop,t_molinfo *molinfo,gmx_bool bTopB,
+ +                        char *fn,
+ +                        int rc_scaling, int ePBC, 
+ +                        rvec com,
+ +                        warninp_t wi)
+ +{
+ +  gmx_bool   bFirst = TRUE, *hadAtom;
+ +  rvec   *x,*v,*xp;
+ +  dvec   sum;
+ +  double totmass;
+ +  t_atoms dumat;
+ +  matrix box,invbox;
+ +  int    natoms,npbcdim=0;
+ +  char   warn_buf[STRLEN],title[STRLEN];
+ +  int    a,i,ai,j,k,mb,nat_molb;
+ +  gmx_molblock_t *molb;
+ +  t_params *pr,*prfb;
+ +  t_atom *atom;
+ +
+ +  get_stx_coordnum(fn,&natoms);
+ +  if (natoms != mtop->natoms) {
+ +    sprintf(warn_buf,"The number of atoms in %s (%d) does not match the number of atoms in the topology (%d). Will assume that the first %d atoms in the topology and %s match.",fn,natoms,mtop->natoms,min(mtop->natoms,natoms),fn);
+ +    warning(wi,warn_buf);
+ +  }
+ +  snew(x,natoms);
+ +  snew(v,natoms);
+ +  init_t_atoms(&dumat,natoms,FALSE);
+ +  read_stx_conf(fn,title,&dumat,x,v,NULL,box);
+ +  
+ +  npbcdim = ePBC2npbcdim(ePBC);
+ +  clear_rvec(com);
+ +  if (rc_scaling != erscNO) {
+ +    copy_mat(box,invbox);
+ +    for(j=npbcdim; j<DIM; j++) {
+ +      clear_rvec(invbox[j]);
+ +      invbox[j][j] = 1;
+ +    }
+ +    m_inv_ur0(invbox,invbox);
+ +  }
+ +
+ +  /* Copy the reference coordinates to mtop */
+ +  clear_dvec(sum);
+ +  totmass = 0;
+ +  a = 0;
+ +  snew(hadAtom,natoms);
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    molb = &mtop->molblock[mb];
+ +    nat_molb = molb->nmol*mtop->moltype[molb->type].atoms.nr;
+ +    pr = &(molinfo[molb->type].plist[F_POSRES]);
+ +    prfb = &(molinfo[molb->type].plist[F_FBPOSRES]);
+ +    if (pr->nr > 0 || prfb->nr > 0) {
+ +      atom = mtop->moltype[molb->type].atoms.atom;
+ +      for(i=0; (i<pr->nr); i++) {
+ +      ai=pr->param[i].AI;
+ +      if (ai >= natoms) {
+ +        gmx_fatal(FARGS,"Position restraint atom index (%d) in moltype '%s' is larger than number of atoms in %s (%d).\n",
+ +                  ai+1,*molinfo[molb->type].name,fn,natoms);
+ +      }
+ +    hadAtom[ai]=TRUE;
+ +      if (rc_scaling == erscCOM) {
+ +        /* Determine the center of mass of the posres reference coordinates */
+ +        for(j=0; j<npbcdim; j++) {
+ +          sum[j] += atom[ai].m*x[a+ai][j];
+ +        }
+ +        totmass  += atom[ai].m;
+ +      }
+ +      }
+ +      /* Same for flat-bottomed posres, but do not count an atom twice for COM */
+ +      for(i=0; (i<prfb->nr); i++) {
+ +          ai=prfb->param[i].AI;
+ +          if (ai >= natoms) {
+ +              gmx_fatal(FARGS,"Position restraint atom index (%d) in moltype '%s' is larger than number of atoms in %s (%d).\n",
+ +                        ai+1,*molinfo[molb->type].name,fn,natoms);
+ +          }
+ +          if (rc_scaling == erscCOM && hadAtom[ai] == FALSE) {
+ +              /* Determine the center of mass of the posres reference coordinates */
+ +              for(j=0; j<npbcdim; j++) {
+ +                  sum[j] += atom[ai].m*x[a+ai][j];
+ +              }
+ +              totmass  += atom[ai].m;
+ +          }
+ +      }
+ +      if (!bTopB) {
+ +      molb->nposres_xA = nat_molb;
+ +      snew(molb->posres_xA,molb->nposres_xA);
+ +      for(i=0; i<nat_molb; i++) {
+ +        copy_rvec(x[a+i],molb->posres_xA[i]);
+ +      }
+ +      } else {
+ +      molb->nposres_xB = nat_molb;
+ +      snew(molb->posres_xB,molb->nposres_xB);
+ +      for(i=0; i<nat_molb; i++) {
+ +        copy_rvec(x[a+i],molb->posres_xB[i]);
+ +      }
+ +      }
+ +    }
+ +    a += nat_molb;
+ +  }
+ +  if (rc_scaling == erscCOM) {
+ +    if (totmass == 0)
+ +      gmx_fatal(FARGS,"The total mass of the position restraint atoms is 0");
+ +    for(j=0; j<npbcdim; j++)
+ +      com[j] = sum[j]/totmass;
+ +    fprintf(stderr,"The center of mass of the position restraint coord's is %6.3f %6.3f %6.3f\n",com[XX],com[YY],com[ZZ]);
+ +  }
+ +
+ +  if (rc_scaling != erscNO) {
+ +    for(mb=0; mb<mtop->nmolblock; mb++) {
+ +      molb = &mtop->molblock[mb];
+ +      nat_molb = molb->nmol*mtop->moltype[molb->type].atoms.nr;
+ +      if (molb->nposres_xA > 0 || molb->nposres_xB > 0) {
+ +      xp = (!bTopB ? molb->posres_xA : molb->posres_xB);
+ +      for(i=0; i<nat_molb; i++) {
+ +        for(j=0; j<npbcdim; j++) {
+ +          if (rc_scaling == erscALL) {
+ +            /* Convert from Cartesian to crystal coordinates */
+ +            xp[i][j] *= invbox[j][j];
+ +            for(k=j+1; k<npbcdim; k++) {
+ +              xp[i][j] += invbox[k][j]*xp[i][k];
+ +            }
+ +          } else if (rc_scaling == erscCOM) {
+ +            /* Subtract the center of mass */
+ +            xp[i][j] -= com[j];
+ +          }
+ +        }
+ +      }
+ +      }
+ +    }
+ +
+ +    if (rc_scaling == erscCOM) {
+ +      /* Convert the COM from Cartesian to crystal coordinates */
+ +      for(j=0; j<npbcdim; j++) {
+ +      com[j] *= invbox[j][j];
+ +      for(k=j+1; k<npbcdim; k++) {
+ +        com[j] += invbox[k][j]*com[k];
+ +      }
+ +      }
+ +    }
+ +  }
+ +  
+ +  free_t_atoms(&dumat,TRUE);
+ +  sfree(x);
+ +  sfree(v);
+ +  sfree(hadAtom);
+ +}
+ +
+ +static void gen_posres(gmx_mtop_t *mtop,t_molinfo *mi,
+ +                       char *fnA, char *fnB,
+ +                       int rc_scaling, int ePBC,
+ +                       rvec com, rvec comB,
+ +                       warninp_t wi)
+ +{
+ +  int i,j;
+ +
+ +  read_posres  (mtop,mi,FALSE,fnA,rc_scaling,ePBC,com,wi);
+ +  if (strcmp(fnA,fnB) != 0) {
+ +      read_posres(mtop,mi,TRUE ,fnB,rc_scaling,ePBC,comB,wi);
+ +  }
+ +}
+ +
+ +static void set_wall_atomtype(gpp_atomtype_t at,t_gromppopts *opts,
+ +                              t_inputrec *ir,warninp_t wi)
+ +{
+ +  int i;
+ +  char warn_buf[STRLEN];
+ +
+ +  if (ir->nwall > 0)
+ +  {
+ +      fprintf(stderr,"Searching the wall atom type(s)\n");
+ +  }
+ +  for(i=0; i<ir->nwall; i++)
+ +  {
+ +      ir->wall_atomtype[i] = get_atomtype_type(opts->wall_atomtype[i],at);
+ +      if (ir->wall_atomtype[i] == NOTSET)
+ +      {
+ +          sprintf(warn_buf,"Specified wall atom type %s is not defined",opts->wall_atomtype[i]);
+ +          warning_error(wi,warn_buf);
+ +      }
+ +  }
+ +}
+ +
+ +static int nrdf_internal(t_atoms *atoms)
+ +{
+ +  int i,nmass,nrdf;
+ +
+ +  nmass = 0;
+ +  for(i=0; i<atoms->nr; i++) {
+ +    /* Vsite ptype might not be set here yet, so also check the mass */
+ +    if ((atoms->atom[i].ptype == eptAtom ||
+ +       atoms->atom[i].ptype == eptNucleus)
+ +      && atoms->atom[i].m > 0) {
+ +      nmass++;
+ +    }
+ +  }
+ +  switch (nmass) {
+ +  case 0:  nrdf = 0; break;
+ +  case 1:  nrdf = 0; break;
+ +  case 2:  nrdf = 1; break;
+ +  default: nrdf = nmass*3 - 6; break;
+ +  }
+ +  
+ +  return nrdf;
+ +}
+ +
+ +void
+ +spline1d( double        dx,
+ +               double *      y,
+ +               int           n,
+ +               double *      u,
+ +               double *      y2 )
+ +{
+ +    int i;
+ +    double p,q;
+ +      
+ +    y2[0] = 0.0;
+ +    u[0]  = 0.0;
+ +      
+ +    for(i=1;i<n-1;i++)
+ +    {
+ +              p = 0.5*y2[i-1]+2.0;
+ +        y2[i] = -0.5/p;
+ +        q = (y[i+1]-2.0*y[i]+y[i-1])/dx;
+ +              u[i] = (3.0*q/dx-0.5*u[i-1])/p;
+ +    }
+ +      
+ +    y2[n-1] = 0.0;
+ +      
+ +    for(i=n-2;i>=0;i--)
+ +    {
+ +        y2[i] = y2[i]*y2[i+1]+u[i];
+ +    }
+ +}
+ +
+ +
+ +void
+ +interpolate1d( double     xmin,
+ +                        double     dx,
+ +                        double *   ya,
+ +                        double *   y2a,
+ +                        double     x,
+ +                        double *   y,
+ +                        double *   y1)
+ +{
+ +    int ix;
+ +    double a,b;
+ +      
+ +    ix = (x-xmin)/dx;
+ +      
+ +    a = (xmin+(ix+1)*dx-x)/dx;
+ +    b = (x-xmin-ix*dx)/dx;
+ +      
+ +    *y  = a*ya[ix]+b*ya[ix+1]+((a*a*a-a)*y2a[ix]+(b*b*b-b)*y2a[ix+1])*(dx*dx)/6.0;
+ +    *y1 = (ya[ix+1]-ya[ix])/dx-(3.0*a*a-1.0)/6.0*dx*y2a[ix]+(3.0*b*b-1.0)/6.0*dx*y2a[ix+1];
+ +}
+ +
+ +
+ +void
+ +setup_cmap (int              grid_spacing,
+ +                      int              nc,
+ +                      real *           grid ,
+ +                      gmx_cmap_t *     cmap_grid)
+ +{
+ +      double *tmp_u,*tmp_u2,*tmp_yy,*tmp_y1,*tmp_t2,*tmp_grid;
+ +      
+ +    int    i,j,k,ii,jj,kk,idx;
+ +      int    offset;
+ +    double dx,xmin,v,v1,v2,v12;
+ +    double phi,psi;
+ +      
+ +      snew(tmp_u,2*grid_spacing);
+ +      snew(tmp_u2,2*grid_spacing);
+ +      snew(tmp_yy,2*grid_spacing);
+ +      snew(tmp_y1,2*grid_spacing);
+ +      snew(tmp_t2,2*grid_spacing*2*grid_spacing);
+ +      snew(tmp_grid,2*grid_spacing*2*grid_spacing);
+ +      
+ +    dx = 360.0/grid_spacing;
+ +    xmin = -180.0-dx*grid_spacing/2;
+ +      
+ +      for(kk=0;kk<nc;kk++)
+ +      {
+ +              /* Compute an offset depending on which cmap we are using 
+ +               * Offset will be the map number multiplied with the 
+ +                 * grid_spacing * grid_spacing * 2
+ +               */
+ +              offset = kk * grid_spacing * grid_spacing * 2;
+ +              
+ +              for(i=0;i<2*grid_spacing;i++)
+ +              {
+ +                      ii=(i+grid_spacing-grid_spacing/2)%grid_spacing;
+ +                      
+ +                      for(j=0;j<2*grid_spacing;j++)
+ +                      {
+ +                              jj=(j+grid_spacing-grid_spacing/2)%grid_spacing;
+ +                              tmp_grid[i*grid_spacing*2+j] = grid[offset+ii*grid_spacing+jj];
+ +                      }
+ +              }
+ +              
+ +              for(i=0;i<2*grid_spacing;i++)
+ +              {
+ +                      spline1d(dx,&(tmp_grid[2*grid_spacing*i]),2*grid_spacing,tmp_u,&(tmp_t2[2*grid_spacing*i]));
+ +              }
+ +              
+ +              for(i=grid_spacing/2;i<grid_spacing+grid_spacing/2;i++)
+ +              {
+ +                      ii = i-grid_spacing/2;
+ +                      phi = ii*dx-180.0;
+ +                      
+ +                      for(j=grid_spacing/2;j<grid_spacing+grid_spacing/2;j++)
+ +                      {
+ +                              jj = j-grid_spacing/2;
+ +                              psi = jj*dx-180.0;
+ +                              
+ +                              for(k=0;k<2*grid_spacing;k++)
+ +                              {
+ +                                      interpolate1d(xmin,dx,&(tmp_grid[2*grid_spacing*k]),
+ +                                                                &(tmp_t2[2*grid_spacing*k]),psi,&tmp_yy[k],&tmp_y1[k]);
+ +                              }
+ +                              
+ +                              spline1d(dx,tmp_yy,2*grid_spacing,tmp_u,tmp_u2);
+ +                              interpolate1d(xmin,dx,tmp_yy,tmp_u2,phi,&v,&v1);
+ +                              spline1d(dx,tmp_y1,2*grid_spacing,tmp_u,tmp_u2);
+ +                              interpolate1d(xmin,dx,tmp_y1,tmp_u2,phi,&v2,&v12);
+ +                              
+ +                              idx = ii*grid_spacing+jj;
+ +                              cmap_grid->cmapdata[kk].cmap[idx*4] = grid[offset+ii*grid_spacing+jj];
+ +                              cmap_grid->cmapdata[kk].cmap[idx*4+1] = v1;
+ +                              cmap_grid->cmapdata[kk].cmap[idx*4+2] = v2;
+ +                              cmap_grid->cmapdata[kk].cmap[idx*4+3] = v12;
+ +                      }
+ +              }
+ +      }
+ +}                             
+ +                              
+ +void init_cmap_grid(gmx_cmap_t *cmap_grid, int ngrid, int grid_spacing)
+ +{
+ +      int i,k,nelem;
+ +      
+ +      cmap_grid->ngrid        = ngrid;
+ +      cmap_grid->grid_spacing = grid_spacing;
+ +      nelem                   = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
+ +      
+ +      snew(cmap_grid->cmapdata,ngrid);
+ +      
+ +      for(i=0;i<cmap_grid->ngrid;i++)
+ +      {
+ +              snew(cmap_grid->cmapdata[i].cmap,4*nelem);
+ +      }
+ +}
+ +
+ +
+ +static int count_constraints(gmx_mtop_t *mtop,t_molinfo *mi,warninp_t wi)
+ +{
+ +  int count,count_mol,i,mb;
+ +  gmx_molblock_t *molb;
+ +  t_params *plist;
+ +  char buf[STRLEN];
+ +
+ +  count = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    count_mol = 0;
+ +    molb  = &mtop->molblock[mb];
+ +    plist = mi[molb->type].plist;
+ +      
+ +    for(i=0; i<F_NRE; i++) {
+ +      if (i == F_SETTLE)
+ +      count_mol += 3*plist[i].nr;
+ +      else if (interaction_function[i].flags & IF_CONSTRAINT)
+ +      count_mol += plist[i].nr;
+ +    }
+ +      
+ +    if (count_mol > nrdf_internal(&mi[molb->type].atoms)) {
+ +      sprintf(buf,
+ +            "Molecule type '%s' has %d constraints.\n"
+ +            "For stability and efficiency there should not be more constraints than internal number of degrees of freedom: %d.\n",
+ +            *mi[molb->type].name,count_mol,
+ +            nrdf_internal(&mi[molb->type].atoms));
+ +      warning(wi,buf);
+ +    }
+ +    count += molb->nmol*count_mol;
+ +  }
+ +
+ +  return count;
+ +}
+ +
+ +static void check_gbsa_params_charged(gmx_mtop_t *sys, gpp_atomtype_t atype)
+ +{
+ +    int i,nmiss,natoms,mt;
+ +    real q;
+ +    const t_atoms *atoms;
+ +  
+ +    nmiss = 0;
+ +    for(mt=0;mt<sys->nmoltype;mt++)
+ +    {
+ +        atoms  = &sys->moltype[mt].atoms;
+ +        natoms = atoms->nr;
+ +
+ +        for(i=0;i<natoms;i++)
+ +        {
+ +            q = atoms->atom[i].q;
+ +            if ((get_atomtype_radius(atoms->atom[i].type,atype)    == 0  ||
+ +                 get_atomtype_vol(atoms->atom[i].type,atype)       == 0  ||
+ +                 get_atomtype_surftens(atoms->atom[i].type,atype)  == 0  ||
+ +                 get_atomtype_gb_radius(atoms->atom[i].type,atype) == 0  ||
+ +                 get_atomtype_S_hct(atoms->atom[i].type,atype)     == 0) &&
+ +                q != 0)
+ +            {
+ +                fprintf(stderr,"\nGB parameter(s) zero for atom type '%s' while charge is %g\n",
+ +                        get_atomtype_name(atoms->atom[i].type,atype),q);
+ +                nmiss++;
+ +            }
+ +        }
+ +    }
+ +
+ +    if (nmiss > 0)
+ +    {
+ +        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield has parameters with value zero for %d atomtypes that occur as charged atoms.",nmiss);
+ +    }
+ +}
+ +
+ +
+ +static void check_gbsa_params(t_inputrec *ir,gpp_atomtype_t atype)
+ +{
+ +    int  nmiss,i;
+ +
+ +    /* If we are doing GBSA, check that we got the parameters we need
+ +     * This checking is to see if there are GBSA paratmeters for all
+ +     * atoms in the force field. To go around this for testing purposes
+ +     * comment out the nerror++ counter temporarily
+ +     */
+ +    nmiss = 0;
+ +    for(i=0;i<get_atomtype_ntypes(atype);i++)
+ +    {
+ +        if (get_atomtype_radius(i,atype)    < 0 ||
+ +            get_atomtype_vol(i,atype)       < 0 ||
+ +            get_atomtype_surftens(i,atype)  < 0 ||
+ +            get_atomtype_gb_radius(i,atype) < 0 ||
+ +            get_atomtype_S_hct(i,atype)     < 0)
+ +        {
+ +            fprintf(stderr,"\nGB parameter(s) missing or negative for atom type '%s'\n",
+ +                    get_atomtype_name(i,atype));
+ +            nmiss++;
+ +        }
+ +    }
+ +    
+ +    if (nmiss > 0)
+ +    {
+ +        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield is missing parameters for %d atomtypes or they might be negative.",nmiss);
+ +    }
+ +  
+ +}
+ +
-     int i,j,cgj1,nra;
-     
-     nra = interaction_function[F_SETTLE].nratoms;
-     for(i=0; (i<sys->nmoltype); i++) 
++static void set_verlet_buffer(const gmx_mtop_t *mtop,
++                              t_inputrec *ir,
++                              matrix box,
++                              real verletbuf_drift,
++                              warninp_t wi)
+ +{
-         for(j=0; (j<sys->moltype[i].ilist[F_SETTLE].nr); j+=nra+1)
++    real ref_T;
++    int i;
++    verletbuf_list_setup_t ls;
++    real rlist_1x1;
++    int n_nonlin_vsite;
++    char warn_buf[STRLEN];
++
++    ref_T = 0;
++    for(i=0; i<ir->opts.ngtc; i++)
++    {
++        if (ir->opts.ref_t[i] < 0)
++        {
++            warning(wi,"Some atom groups do not use temperature coupling. This cannot be accounted for in the energy drift estimation for the Verlet buffer size. The energy drift and the Verlet buffer might be underestimated.");
++        }
++        else
++        {
++            ref_T = max(ref_T,ir->opts.ref_t[i]);
++        }
++    }
++
++    printf("Determining Verlet buffer for an energy drift of %g kJ/mol/ps at %g K\n",verletbuf_drift,ref_T);
++
++    for(i=0; i<ir->opts.ngtc; i++)
+ +    {
-             cgj1 = sys->moltype[i].cgs.index[j+1];
-             if (j+2 >= cgj1)
-                 gmx_fatal(FARGS,"For SETTLE you need to have all atoms involved in one charge group. Please fix your topology.");
++        if (ir->opts.ref_t[i] >= 0 && ir->opts.ref_t[i] != ref_T)
+ +        {
-   /* check for charge groups in settles */
-   check_settle(sys);
-   
++            sprintf(warn_buf,"ref_T for group of %.1f DOFs is %g K, which is smaller than the maximum of %g K used for the buffer size calculation. The buffer size might be on the conservative (large) side.",
++                    ir->opts.nrdf[i],ir->opts.ref_t[i],ref_T);
++            warning_note(wi,warn_buf);
+ +        }
+ +    }
++
++    /* Calculate the buffer size for simple atom vs atoms list */
++    ls.cluster_size_i = 1;
++    ls.cluster_size_j = 1;
++    calc_verlet_buffer_size(mtop,det(box),ir,verletbuf_drift,
++                            &ls,&n_nonlin_vsite,&rlist_1x1);
++
++    /* Set the pair-list buffer size in ir */
++    verletbuf_get_list_setup(FALSE,&ls);
++    calc_verlet_buffer_size(mtop,det(box),ir,verletbuf_drift,
++                            &ls,&n_nonlin_vsite,&ir->rlist);
++
++    if (n_nonlin_vsite > 0)
++    {
++        sprintf(warn_buf,"There are %d non-linear virtual site constructions. Their contribution to the energy drift is approximated. In most cases this does not affect the energy drift significantly.",n_nonlin_vsite);
++        warning_note(wi,warn_buf);
++    }
++
++    printf("Calculated rlist for %dx%d atom pair-list as %.3f nm, buffer size %.3f nm\n",
++           1,1,rlist_1x1,rlist_1x1-max(ir->rvdw,ir->rcoulomb));
++
++    ir->rlistlong = ir->rlist;
++    printf("Set rlist, assuming %dx%d atom pair-list, to %.3f nm, buffer size %.3f nm\n",
++           ls.cluster_size_i,ls.cluster_size_j,
++           ir->rlist,ir->rlist-max(ir->rvdw,ir->rcoulomb));
++            
++    if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC,box))
++    {
++        gmx_fatal(FARGS,"The pair-list cut-off (%g nm) is longer than half the shortest box vector or longer than the smallest box diagonal element (%g nm). Increase the box size or decrease nstlist or increase verlet-buffer-drift.",ir->rlistlong,sqrt(max_cutoff2(ir->ePBC,box)));
++    }
+ +}
+ +
+ +int main (int argc, char *argv[])
+ +{
+ +  static const char *desc[] = {
+ +    "The gromacs preprocessor",
+ +    "reads a molecular topology file, checks the validity of the",
+ +    "file, expands the topology from a molecular description to an atomic",
+ +    "description. The topology file contains information about",
+ +    "molecule types and the number of molecules, the preprocessor",
+ +    "copies each molecule as needed. ",
+ +    "There is no limitation on the number of molecule types. ",
+ +    "Bonds and bond-angles can be converted into constraints, separately",
+ +    "for hydrogens and heavy atoms.",
+ +    "Then a coordinate file is read and velocities can be generated",
+ +    "from a Maxwellian distribution if requested.",
+ +    "[TT]grompp[tt] also reads parameters for the [TT]mdrun[tt] ",
+ +    "(eg. number of MD steps, time step, cut-off), and others such as",
+ +    "NEMD parameters, which are corrected so that the net acceleration",
+ +    "is zero.",
+ +    "Eventually a binary file is produced that can serve as the sole input",
+ +    "file for the MD program.[PAR]",
+ +    
+ +    "[TT]grompp[tt] uses the atom names from the topology file. The atom names",
+ +    "in the coordinate file (option [TT]-c[tt]) are only read to generate",
+ +    "warnings when they do not match the atom names in the topology.",
+ +    "Note that the atom names are irrelevant for the simulation as",
+ +    "only the atom types are used for generating interaction parameters.[PAR]",
+ +
+ +    "[TT]grompp[tt] uses a built-in preprocessor to resolve includes, macros, ",
+ +    "etc. The preprocessor supports the following keywords:[PAR]",
+ +    "#ifdef VARIABLE[BR]",
+ +    "#ifndef VARIABLE[BR]",
+ +    "#else[BR]",
+ +    "#endif[BR]",
+ +    "#define VARIABLE[BR]",
+ +    "#undef VARIABLE[BR]"
+ +    "#include \"filename\"[BR]",
+ +    "#include <filename>[PAR]",
+ +    "The functioning of these statements in your topology may be modulated by",
+ +    "using the following two flags in your [TT].mdp[tt] file:[PAR]",
+ +    "[TT]define = -DVARIABLE1 -DVARIABLE2[BR]",
+ +    "include = -I/home/john/doe[tt][BR]",
+ +    "For further information a C-programming textbook may help you out.",
+ +    "Specifying the [TT]-pp[tt] flag will get the pre-processed",
+ +    "topology file written out so that you can verify its contents.[PAR]",
+ +   
+ +    /* cpp has been unnecessary for some time, hasn't it?
+ +        "If your system does not have a C-preprocessor, you can still",
+ +        "use [TT]grompp[tt], but you do not have access to the features ",
+ +        "from the cpp. Command line options to the C-preprocessor can be given",
+ +        "in the [TT].mdp[tt] file. See your local manual (man cpp).[PAR]",
+ +    */
+ +    
+ +    "When using position restraints a file with restraint coordinates",
+ +    "can be supplied with [TT]-r[tt], otherwise restraining will be done",
+ +    "with respect to the conformation from the [TT]-c[tt] option.",
+ +    "For free energy calculation the the coordinates for the B topology",
+ +    "can be supplied with [TT]-rb[tt], otherwise they will be equal to",
+ +    "those of the A topology.[PAR]",
+ +    
+ +    "Starting coordinates can be read from trajectory with [TT]-t[tt].",
+ +    "The last frame with coordinates and velocities will be read,",
+ +    "unless the [TT]-time[tt] option is used. Only if this information",
+ +    "is absent will the coordinates in the [TT]-c[tt] file be used.",
+ +    "Note that these velocities will not be used when [TT]gen_vel = yes[tt]",
+ +    "in your [TT].mdp[tt] file. An energy file can be supplied with",
+ +    "[TT]-e[tt] to read Nose-Hoover and/or Parrinello-Rahman coupling",
+ +    "variables.[PAR]",
+ +
+ +    "[TT]grompp[tt] can be used to restart simulations (preserving",
+ +    "continuity) by supplying just a checkpoint file with [TT]-t[tt].",
+ +    "However, for simply changing the number of run steps to extend",
+ +    "a run, using [TT]tpbconv[tt] is more convenient than [TT]grompp[tt].",
+ +    "You then supply the old checkpoint file directly to [TT]mdrun[tt]",
+ +    "with [TT]-cpi[tt]. If you wish to change the ensemble or things",
+ +    "like output frequency, then supplying the checkpoint file to",
+ +    "[TT]grompp[tt] with [TT]-t[tt] along with a new [TT].mdp[tt] file",
+ +    "with [TT]-f[tt] is the recommended procedure.[PAR]",
+ +
+ +    "By default, all bonded interactions which have constant energy due to",
+ +    "virtual site constructions will be removed. If this constant energy is",
+ +    "not zero, this will result in a shift in the total energy. All bonded",
+ +    "interactions can be kept by turning off [TT]-rmvsbds[tt]. Additionally,",
+ +    "all constraints for distances which will be constant anyway because",
+ +    "of virtual site constructions will be removed. If any constraints remain",
+ +    "which involve virtual sites, a fatal error will result.[PAR]"
+ +    
+ +    "To verify your run input file, please take note of all warnings",
+ +    "on the screen, and correct where necessary. Do also look at the contents",
+ +    "of the [TT]mdout.mdp[tt] file; this contains comment lines, as well as",
+ +    "the input that [TT]grompp[tt] has read. If in doubt, you can start [TT]grompp[tt]",
+ +    "with the [TT]-debug[tt] option which will give you more information",
+ +    "in a file called [TT]grompp.log[tt] (along with real debug info). You",
+ +    "can see the contents of the run input file with the [TT]gmxdump[tt]",
+ +    "program. [TT]gmxcheck[tt] can be used to compare the contents of two",
+ +    "run input files.[PAR]"
+ +
+ +    "The [TT]-maxwarn[tt] option can be used to override warnings printed",
+ +    "by [TT]grompp[tt] that otherwise halt output. In some cases, warnings are",
+ +    "harmless, but usually they are not. The user is advised to carefully",
+ +    "interpret the output messages before attempting to bypass them with",
+ +    "this option."
+ +  };
+ +  t_gromppopts *opts;
+ +  gmx_mtop_t   *sys;
+ +  int          nmi;
+ +  t_molinfo    *mi;
+ +  gpp_atomtype_t atype;
+ +  t_inputrec   *ir;
+ +  int          natoms,nvsite,comb,mt;
+ +  t_params     *plist;
+ +  t_state      state;
+ +  matrix       box;
+ +  real         max_spacing,fudgeQQ;
+ +  double       reppow;
+ +  char         fn[STRLEN],fnB[STRLEN];
+ +  const char   *mdparin;
+ +  int          ntype;
+ +  gmx_bool         bNeedVel,bGenVel;
+ +  gmx_bool         have_atomnumber;
+ +  int            n12,n13,n14;
+ +  t_params     *gb_plist = NULL;
+ +  gmx_genborn_t *born = NULL;
+ +  output_env_t oenv;
+ +  gmx_bool         bVerbose = FALSE;
+ +  warninp_t    wi;
+ +  char         warn_buf[STRLEN];
+ +
+ +  t_filenm fnm[] = {
+ +    { efMDP, NULL,  NULL,        ffREAD  },
+ +    { efMDP, "-po", "mdout",     ffWRITE },
+ +    { efSTX, "-c",  NULL,        ffREAD  },
+ +    { efSTX, "-r",  NULL,        ffOPTRD },
+ +    { efSTX, "-rb", NULL,        ffOPTRD },
+ +    { efNDX, NULL,  NULL,        ffOPTRD },
+ +    { efTOP, NULL,  NULL,        ffREAD  },
+ +    { efTOP, "-pp", "processed", ffOPTWR },
+ +    { efTPX, "-o",  NULL,        ffWRITE },
+ +    { efTRN, "-t",  NULL,        ffOPTRD },
+ +    { efEDR, "-e",  NULL,        ffOPTRD },
+ +    { efTRN, "-ref","rotref",    ffOPTRW }
+ +  };
+ +#define NFILE asize(fnm)
+ +
+ +  /* Command line options */
+ +  static gmx_bool bRenum=TRUE;
+ +  static gmx_bool bRmVSBds=TRUE,bZero=FALSE;
+ +  static int  i,maxwarn=0;
+ +  static real fr_time=-1;
+ +  t_pargs pa[] = {
+ +    { "-v",       FALSE, etBOOL,{&bVerbose},  
+ +      "Be loud and noisy" },
+ +    { "-time",    FALSE, etREAL, {&fr_time},
+ +      "Take frame at or first after this time." },
+ +    { "-rmvsbds",FALSE, etBOOL, {&bRmVSBds},
+ +      "Remove constant bonded interactions with virtual sites" },
+ +    { "-maxwarn", FALSE, etINT,  {&maxwarn},
+ +      "Number of allowed warnings during input processing. Not for normal use and may generate unstable systems" },
+ +    { "-zero",    FALSE, etBOOL, {&bZero},
+ +      "Set parameters for bonded interactions without defaults to zero instead of generating an error" },
+ +    { "-renum",   FALSE, etBOOL, {&bRenum},
+ +      "Renumber atomtypes and minimize number of atomtypes" }
+ +  };
+ +  
+ +  CopyRight(stderr,argv[0]);
+ +  
+ +  /* Initiate some variables */
+ +  snew(ir,1);
+ +  snew(opts,1);
+ +  init_ir(ir,opts);
+ +  
+ +  /* Parse the command line */
+ +  parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,
+ +                    asize(desc),desc,0,NULL,&oenv);
+ +  
+ +  wi = init_warning(TRUE,maxwarn);
+ +  
+ +  /* PARAMETER file processing */
+ +  mdparin = opt2fn("-f",NFILE,fnm);
+ +  set_warning_line(wi,mdparin,-1);    
+ +  get_ir(mdparin,opt2fn("-po",NFILE,fnm),ir,opts,wi);
+ +  
+ +  if (bVerbose) 
+ +    fprintf(stderr,"checking input for internal consistency...\n");
+ +  check_ir(mdparin,ir,opts,wi);
+ +
+ +  if (ir->ld_seed == -1) {
+ +    ir->ld_seed = make_seed();
+ +    fprintf(stderr,"Setting the LD random seed to %d\n",ir->ld_seed);
+ +  }
+ +
+ +  if (ir->expandedvals->lmc_seed == -1) {
+ +    ir->expandedvals->lmc_seed = make_seed();
+ +    fprintf(stderr,"Setting the lambda MC random seed to %d\n",ir->expandedvals->lmc_seed);
+ +  }
+ +
+ +  bNeedVel = EI_STATE_VELOCITY(ir->eI);
+ +  bGenVel  = (bNeedVel && opts->bGenVel);
+ +
+ +  snew(plist,F_NRE);
+ +  init_plist(plist);
+ +  snew(sys,1);
+ +  atype = init_atomtype();
+ +  if (debug)
+ +    pr_symtab(debug,0,"Just opened",&sys->symtab);
+ +    
+ +  strcpy(fn,ftp2fn(efTOP,NFILE,fnm));
+ +  if (!gmx_fexist(fn)) 
+ +    gmx_fatal(FARGS,"%s does not exist",fn);
+ +  new_status(fn,opt2fn_null("-pp",NFILE,fnm),opt2fn("-c",NFILE,fnm),
+ +           opts,ir,bZero,bGenVel,bVerbose,&state,
+ +           atype,sys,&nmi,&mi,plist,&comb,&reppow,&fudgeQQ,
+ +           opts->bMorse,
+ +           wi);
+ +  
+ +  if (debug)
+ +    pr_symtab(debug,0,"After new_status",&sys->symtab);
++
++    if (ir->cutoff_scheme == ecutsVERLET)
++    {
++        fprintf(stderr,"Removing all charge groups because cutoff-scheme=%s\n",
++                ecutscheme_names[ir->cutoff_scheme]);
++
++        /* Remove all charge groups */
++        gmx_mtop_remove_chargegroups(sys);
++    }
+ +  
+ +  if (count_constraints(sys,mi,wi) && (ir->eConstrAlg == econtSHAKE)) {
+ +    if (ir->eI == eiCG || ir->eI == eiLBFGS) {
+ +        sprintf(warn_buf,"Can not do %s with %s, use %s",
+ +                EI(ir->eI),econstr_names[econtSHAKE],econstr_names[econtLINCS]);
+ +        warning_error(wi,warn_buf);
+ +    }
+ +    if (ir->bPeriodicMols) {
+ +        sprintf(warn_buf,"Can not do periodic molecules with %s, use %s",
+ +                econstr_names[econtSHAKE],econstr_names[econtLINCS]);
+ +        warning_error(wi,warn_buf);
+ +    }
+ +  }
+ +
+ +  if ( EI_SD (ir->eI) &&  ir->etc != etcNO ) {
+ +      warning_note(wi,"Temperature coupling is ignored with SD integrators.");
+ +  }
+ +
+ +  /* If we are doing QM/MM, check that we got the atom numbers */
+ +  have_atomnumber = TRUE;
+ +  for (i=0; i<get_atomtype_ntypes(atype); i++) {
+ +    have_atomnumber = have_atomnumber && (get_atomtype_atomnumber(i,atype) >= 0);
+ +  }
+ +  if (!have_atomnumber && ir->bQMMM)
+ +  {
+ +      warning_error(wi,
+ +                    "\n"
+ +                    "It appears as if you are trying to run a QM/MM calculation, but the force\n"
+ +                    "field you are using does not contain atom numbers fields. This is an\n"
+ +                    "optional field (introduced in Gromacs 3.3) for general runs, but mandatory\n"
+ +                    "for QM/MM. The good news is that it is easy to add - put the atom number as\n"
+ +                    "an integer just before the mass column in ffXXXnb.itp.\n"
+ +                    "NB: United atoms have the same atom numbers as normal ones.\n\n"); 
+ +  }
+ +
+ +  if (ir->bAdress) {
+ +    if ((ir->adress->const_wf>1) || (ir->adress->const_wf<0)) {
+ +      warning_error(wi,"AdResS contant weighting function should be between 0 and 1\n\n");
+ +    }
+ +    /** \TODO check size of ex+hy width against box size */
+ +  }
+ + 
+ +  /* Check for errors in the input now, since they might cause problems
+ +   * during processing further down.
+ +   */
+ +  check_warning_error(wi,FARGS);
+ +
+ +  if (opt2bSet("-r",NFILE,fnm))
+ +    sprintf(fn,"%s",opt2fn("-r",NFILE,fnm));
+ +  else
+ +    sprintf(fn,"%s",opt2fn("-c",NFILE,fnm));
+ +  if (opt2bSet("-rb",NFILE,fnm))
+ +    sprintf(fnB,"%s",opt2fn("-rb",NFILE,fnm));
+ +  else
+ +    strcpy(fnB,fn);
+ +
+ +    if (nint_ftype(sys,mi,F_POSRES) > 0 || nint_ftype(sys,mi,F_FBPOSRES) > 0)
+ +    {
+ +        if (bVerbose)
+ +        {
+ +            fprintf(stderr,"Reading position restraint coords from %s",fn);
+ +            if (strcmp(fn,fnB) == 0)
+ +            {
+ +                fprintf(stderr,"\n");
+ +            }
+ +            else
+ +            {
+ +                fprintf(stderr," and %s\n",fnB);
+ +            }
+ +        }
+ +        gen_posres(sys,mi,fn,fnB,
+ +                   ir->refcoord_scaling,ir->ePBC,
+ +                   ir->posres_com,ir->posres_comB,
+ +                   wi);
+ +    }
+ +              
+ +  nvsite = 0;
+ +  /* set parameters for virtual site construction (not for vsiten) */
+ +  for(mt=0; mt<sys->nmoltype; mt++) {
+ +    nvsite +=
+ +      set_vsites(bVerbose, &sys->moltype[mt].atoms, atype, mi[mt].plist);
+ +  }
+ +  /* now throw away all obsolete bonds, angles and dihedrals: */
+ +  /* note: constraints are ALWAYS removed */
+ +  if (nvsite) {
+ +    for(mt=0; mt<sys->nmoltype; mt++) {
+ +      clean_vsite_bondeds(mi[mt].plist,sys->moltype[mt].atoms.nr,bRmVSBds);
+ +    }
+ +  }
+ +  
+ +      /* If we are using CMAP, setup the pre-interpolation grid */
+ +      if(plist->ncmap>0)
+ +      {
+ +              init_cmap_grid(&sys->ffparams.cmap_grid, plist->nc, plist->grid_spacing);
+ +              setup_cmap(plist->grid_spacing, plist->nc, plist->cmap,&sys->ffparams.cmap_grid);
+ +      }
+ +      
+ +    set_wall_atomtype(atype,opts,ir,wi);
+ +  if (bRenum) {
+ +    renum_atype(plist, sys, ir->wall_atomtype, atype, bVerbose);
+ +    ntype = get_atomtype_ntypes(atype);
+ +  }
+ +
+ +    if (ir->implicit_solvent != eisNO)
+ +    {
+ +        /* Now we have renumbered the atom types, we can check the GBSA params */
+ +        check_gbsa_params(ir,atype);
+ +      
+ +      /* Check that all atoms that have charge and/or LJ-parameters also have 
+ +       * sensible GB-parameters
+ +       */
+ +      check_gbsa_params_charged(sys,atype);
+ +    }
+ +
+ +      /* PELA: Copy the atomtype data to the topology atomtype list */
+ +      copy_atomtype_atomtypes(atype,&(sys->atomtypes));
+ +
+ +      if (debug)
+ +    pr_symtab(debug,0,"After renum_atype",&sys->symtab);
+ +
+ +  if (bVerbose) 
+ +    fprintf(stderr,"converting bonded parameters...\n");
+ +      
+ +  ntype = get_atomtype_ntypes(atype);
+ +  convert_params(ntype, plist, mi, comb, reppow, fudgeQQ, sys);
+ +      
+ +  if (debug)
+ +    pr_symtab(debug,0,"After convert_params",&sys->symtab);
+ +
+ +  /* set ptype to VSite for virtual sites */
+ +  for(mt=0; mt<sys->nmoltype; mt++) {
+ +    set_vsites_ptype(FALSE,&sys->moltype[mt]);
+ +  }
+ +  if (debug) {
+ +    pr_symtab(debug,0,"After virtual sites",&sys->symtab);
+ +  }
+ +  /* Check velocity for virtual sites and shells */
+ +  if (bGenVel) {
+ +    check_vel(sys,state.v);
+ +  }
+ +    
-     if (ir->rlist > 0)
+ +  /* check masses */
+ +  check_mol(sys,wi);
+ +  
+ +  for(i=0; i<sys->nmoltype; i++) {
+ +      check_cg_sizes(ftp2fn(efTOP,NFILE,fnm),&sys->moltype[i].cgs,wi);
+ +  }
+ +
+ +  if (EI_DYNAMICS(ir->eI) && ir->eI != eiBD)
+ +  {
+ +      check_bonds_timestep(sys,ir->delta_t,wi);
+ +  }
+ +
+ +  if (EI_ENERGY_MINIMIZATION(ir->eI) && 0 == ir->nsteps)
+ +  {
+ +      warning_note(wi,"Zero-step energy minimization will alter the coordinates before calculating the energy. If you just want the energy of a single point, try zero-step MD (with unconstrained_start = yes). To do multiple single-point energy evaluations of different configurations of the same topology, use mdrun -rerun.");
+ +  }
+ +
+ +  check_warning_error(wi,FARGS);
+ +      
+ +  if (bVerbose) 
+ +    fprintf(stderr,"initialising group options...\n");
+ +  do_index(mdparin,ftp2fn_null(efNDX,NFILE,fnm),
+ +           sys,bVerbose,ir,
+ +           bGenVel ? state.v : NULL,
+ +           wi);
+ +  
++    if (ir->cutoff_scheme == ecutsVERLET && ir->verletbuf_drift > 0 &&
++        ir->nstlist > 1)
++    {
++        if (EI_DYNAMICS(ir->eI) &&
++            !(EI_MD(ir->eI) && ir->etc==etcNO) &&
++            inputrec2nboundeddim(ir) == 3)
++        {
++            set_verlet_buffer(sys,ir,state.box,ir->verletbuf_drift,wi);
++        }
++    }
++
+ +  /* Init the temperature coupling state */
+ +  init_gtc_state(&state,ir->opts.ngtc,0,ir->opts.nhchainlength); /* need to add nnhpres here? */
+ +
+ +  if (bVerbose)
+ +    fprintf(stderr,"Checking consistency between energy and charge groups...\n");
+ +  check_eg_vs_cg(sys);
+ +  
+ +  if (debug)
+ +    pr_symtab(debug,0,"After index",&sys->symtab);
+ +  triple_check(mdparin,ir,sys,wi);
+ +  close_symtab(&sys->symtab);
+ +  if (debug)
+ +    pr_symtab(debug,0,"After close",&sys->symtab);
+ +
+ +  /* make exclusions between QM atoms */
+ +  if (ir->bQMMM) {
+ +    if (ir->QMMMscheme==eQMMMschemenormal && ir->ns_type == ensSIMPLE ){
+ +      gmx_fatal(FARGS,"electrostatic embedding only works with grid neighboursearching, use ns-type=grid instead\n");
+ +    }
+ +    else {
+ +     generate_qmexcl(sys,ir,wi);
+ +    }
+ +  }
+ +
+ +  if (ftp2bSet(efTRN,NFILE,fnm)) {
+ +    if (bVerbose)
+ +      fprintf(stderr,"getting data from old trajectory ...\n");
+ +    cont_status(ftp2fn(efTRN,NFILE,fnm),ftp2fn_null(efEDR,NFILE,fnm),
+ +              bNeedVel,bGenVel,fr_time,ir,&state,sys,oenv);
+ +  }
+ +
+ +    if (ir->ePBC==epbcXY && ir->nwall!=2)
+ +    {
+ +        clear_rvec(state.box[ZZ]);
+ +    }
+ +  
-     max_spacing = calc_grid(stdout,box,opts->fourierspacing,
++    if (ir->cutoff_scheme != ecutsVERLET && ir->rlist > 0)
+ +    {
+ +        set_warning_line(wi,mdparin,-1);
+ +        check_chargegroup_radii(sys,ir,state.x,wi);
+ +    }
+ +
+ +  if (EEL_FULL(ir->coulombtype)) {
+ +    /* Calculate the optimal grid dimensions */
+ +    copy_mat(state.box,box);
+ +    if (ir->ePBC==epbcXY && ir->nwall==2)
+ +      svmul(ir->wall_ewald_zfac,box[ZZ],box[ZZ]);
++    if (ir->nkx > 0 && ir->nky > 0 && ir->nkz > 0)
++    {
++        /* Mark fourier_spacing as not used */
++        ir->fourier_spacing = 0;
++    }
++    else if (ir->nkx != 0 && ir->nky != 0 && ir->nkz != 0)
++    {
++        set_warning_line(wi,mdparin,-1);
++        warning_error(wi,"Some of the Fourier grid sizes are set, but all of them need to be set.");
++    }
++    max_spacing = calc_grid(stdout,box,ir->fourier_spacing,
+ +                            &(ir->nkx),&(ir->nky),&(ir->nkz));
+ +  }
+ +
+ +  if (ir->ePull != epullNO)
+ +    set_pull_init(ir,sys,state.x,state.box,oenv,opts->pull_start);
+ +  
+ +  if (ir->bRot)
+ +  {
+ +      set_reference_positions(ir->rot,sys,state.x,state.box,
+ +                              opt2fn("-ref",NFILE,fnm),opt2bSet("-ref",NFILE,fnm),
+ +                              wi);
+ +  }
+ +
+ +  /*  reset_multinr(sys); */
+ +  
+ +  if (EEL_PME(ir->coulombtype)) {
+ +      float ratio = pme_load_estimate(sys,ir,state.box);
+ +      fprintf(stderr,"Estimate for the relative computational load of the PME mesh part: %.2f\n",ratio);
+ +      /* With free energy we might need to do PME both for the A and B state
+ +       * charges. This will double the cost, but the optimal performance will
+ +       * then probably be at a slightly larger cut-off and grid spacing.
+ +       */
+ +      if ((ir->efep == efepNO && ratio > 1.0/2.0) ||
+ +          (ir->efep != efepNO && ratio > 2.0/3.0)) {
+ +          warning_note(wi,
+ +                       "The optimal PME mesh load for parallel simulations is below 0.5\n"
+ +                       "and for highly parallel simulations between 0.25 and 0.33,\n"
+ +                       "for higher performance, increase the cut-off and the PME grid spacing.\n");
+ +          if (ir->efep != efepNO) {
+ +              warning_note(wi,
+ +                           "For free energy simulations, the optimal load limit increases from 0.5 to 0.667\n");
+ +          }
+ +      }
+ +  }
+ +  
+ +  {
+ +        char warn_buf[STRLEN];
+ +        double cio = compute_io(ir,sys->natoms,&sys->groups,F_NRE,1);
+ +        sprintf(warn_buf,"This run will generate roughly %.0f Mb of data",cio);
+ +        if (cio > 2000) {
+ +            set_warning_line(wi,mdparin,-1);
+ +            warning_note(wi,warn_buf);
+ +        } else {
+ +            printf("%s\n",warn_buf);
+ +        }
+ +    }
+ +      
+ +  /* MRS: eventually figure out better logic for initializing the fep
+ +   values that makes declaring the lambda and declaring the state not
+ +   potentially conflict if not handled correctly. */
+ +  if (ir->efep != efepNO)
+ +  {
+ +      state.fep_state = ir->fepvals->init_fep_state;
+ +      for (i=0;i<efptNR;i++)
+ +      {
+ +          /* init_lambda trumps state definitions*/
+ +          if (ir->fepvals->init_lambda >= 0)
+ +          {
+ +              state.lambda[i] = ir->fepvals->init_lambda;
+ +          }
+ +          else
+ +          {
+ +              if (ir->fepvals->all_lambda[i] == NULL)
+ +              {
+ +                  gmx_fatal(FARGS,"Values of lambda not set for a free energy calculation!");
+ +              }
+ +              else
+ +              {
+ +                  state.lambda[i] = ir->fepvals->all_lambda[i][state.fep_state];
+ +              }
+ +          }
+ +      }
+ +  }
+ +
+ +  if (bVerbose) 
+ +    fprintf(stderr,"writing run input file...\n");
+ +
+ +  done_warning(wi,FARGS);
+ +
+ +  write_tpx_state(ftp2fn(efTPX,NFILE,fnm),ir,&state,sys);
+ +  
+ +  thanx(stderr);
+ +  
+ +  return 0;
+ +}
diff --cc src/programs/mdrun/CMakeLists.txt

index d982743a9971f8efc3d6e5ff081a0cf4286c33e6,0000000000000000000000000000000000000000..6c499aa0020dd1a7074a6adb1236f29c5f46ec5c

mode 100644,000000..100644
--- 1/src/programs/mdrun/CMakeLists.txt
--- /dev/null
+++ b/src/programs/mdrun/CMakeLists.txt
@@@ -1,53 -1,0 +1,52 @@@
-     repl_ex.c   runner.c        xutils.c)
+ +include_directories(${CMAKE_SOURCE_DIR}/src/gromacs/gmxpreprocess)
+ +
+ +set(MDRUN_SOURCES
+ +    do_gct.c    gctio.c         genalg.c    ionize.c
+ +    md.c        md_openmm.c     mdrun.c     membed.c
-     add_subdirectory(gmx_gpu_utils)
++    pme_switch.c  repl_ex.c   runner.c        xutils.c)
+ +
+ +if(GMX_OPENMM) 
+ +    include_directories(./gmx_gpu_utils ${OpenMM_INCLUDE_DIR})
+ +    link_directories(${OpenMM_LIBRARY_DIR}) 
+ +    # with this define no evn.var. is needed with OPENMM_PLUGIN_DIR
+ +    # if the same OpenMM installation is used for running and building 
+ +    add_definitions( -DOPENMM_PLUGIN_DIR="${OpenMM_PLUGIN_DIR}" ) 
+ +    file(TO_CMAKE_PATH ${OpenMM_PLUGIN_DIR} _path)
+ +    add_library(openmm_api_wrapper STATIC openmm_wrapper.cpp)
+ +    target_link_libraries(openmm_api_wrapper gmx_gpu_utils ${OpenMM_LIBRARIES})
+ +    set(GMX_OPENMM_LIBRARIES openmm_api_wrapper gmx_gpu_utils ${OpenMM_LIBRARIES})   
+ +endif(GMX_OPENMM)
+ +
+ +if(GMX_FAHCORE)
+ +    add_library(fahcore ${MDRUN_SOURCES})
+ +else(GMX_FAHCORE)
+ +    add_executable(mdrun ${MDRUN_SOURCES})
+ +    gmx_add_man_page(mdrun)
+ +    target_link_libraries(mdrun ${GMX_EXTRA_LIBRARIES} libgromacs ${GMX_OPENMM_LIBRARIES}
+ +        ${OpenMP_LINKER_FLAGS})
+ +    set_target_properties(mdrun PROPERTIES OUTPUT_NAME "mdrun${GMX_BINARY_SUFFIX}"
+ +        COMPILE_FLAGS "${OpenMP_C_FLAGS}")
+ +    install(TARGETS mdrun DESTINATION ${BIN_INSTALL_DIR} COMPONENT mdrun)
+ +
+ +    if(GMX_OPENMM AND MSVC)
+ +        set_target_properties(mdrun PROPERTIES LINK_FLAGS "/NODEFAULTLIB:LIBCMT")
+ +    endif()
+ +
+ +    # Create the custom install-mdrun target
+ +    if (BUILD_SHARED_LIBS)
+ +        # If shared libraries are used, we need to install the libraries in
+ +        # addition to the mdrun binary.
+ +        add_custom_target(install-mdrun
+ +            COMMAND ${CMAKE_COMMAND} -DCOMPONENT=libraries
+ +                    -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
+ +            COMMAND ${CMAKE_COMMAND} -DCOMPONENT=mdrun
+ +                    -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
+ +            COMMENT "Installing mdrun")
+ +    else (BUILD_SHARED_LIBS)
+ +        add_custom_target(install-mdrun
+ +            COMMAND ${CMAKE_COMMAND} -DCOMPONENT=mdrun
+ +                    -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
+ +            COMMENT "Installing mdrun")
+ +    endif (BUILD_SHARED_LIBS)
+ +    add_dependencies(install-mdrun mdrun)
+ +endif(GMX_FAHCORE)
diff --cc src/programs/mdrun/md.c

index 535b023658ea7ad53a17a6d21295427a91d7648e,0000000000000000000000000000000000000000..d6f4017607a1cd61c9269fe67a9e6022c389982f

mode 100644,000000..100644
--- 1/src/programs/mdrun/md.c
--- /dev/null
+++ b/src/programs/mdrun/md.c
@@@ -1,2000 -1,0 +1,2127 @@@
-     gmx_bool   bGStatEveryStep,bGStat,bNstEner,bCalcEnerPres,bEnergyHere;
-     gmx_bool   bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "sysstuff.h"
+ +#include "vec.h"
+ +#include "statutil.h"
+ +#include "vcm.h"
+ +#include "mdebin.h"
+ +#include "nrnb.h"
+ +#include "calcmu.h"
+ +#include "index.h"
+ +#include "vsite.h"
+ +#include "update.h"
+ +#include "ns.h"
+ +#include "trnio.h"
+ +#include "xtcio.h"
+ +#include "mdrun.h"
++#include "md_support.h"
+ +#include "confio.h"
+ +#include "network.h"
+ +#include "pull.h"
+ +#include "xvgr.h"
+ +#include "physics.h"
+ +#include "names.h"
+ +#include "xmdrun.h"
+ +#include "ionize.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "pme.h"
+ +#include "mdatoms.h"
+ +#include "repl_ex.h"
+ +#include "qmmm.h"
+ +#include "domdec.h"
++#include "domdec_network.h"
+ +#include "partdec.h"
+ +#include "topsort.h"
+ +#include "coulomb.h"
+ +#include "constr.h"
+ +#include "shellfc.h"
+ +#include "compute_io.h"
+ +#include "mvdata.h"
+ +#include "checkpoint.h"
+ +#include "mtop_util.h"
+ +#include "sighandler.h"
+ +#include "txtdump.h"
+ +#include "string2.h"
++#include "pme_switch.h"
++#include "bondf.h"
+ +#include "membed.h"
++#include "types/nlistheuristics.h"
++#include "types/iteratedconstraints.h"
++#include "nbnxn_cuda_data_mgmt.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#ifdef GMX_FAHCORE
+ +#include "corewrap.h"
+ +#endif
+ +
++static void reset_all_counters(FILE *fplog,t_commrec *cr,
++                               gmx_large_int_t step,
++                               gmx_large_int_t *step_rel,t_inputrec *ir,
++                               gmx_wallcycle_t wcycle,t_nrnb *nrnb,
++                               gmx_runtime_t *runtime,
++                               nbnxn_cuda_ptr_t cu_nbv)
++{
++    char sbuf[STEPSTRSIZE];
++
++    /* Reset all the counters related to performance over the run */
++    md_print_warn(cr,fplog,"step %s: resetting all time and cycle counters\n",
++                  gmx_step_str(step,sbuf));
++
++    if (cu_nbv)
++    {
++        nbnxn_cuda_reset_timings(cu_nbv);
++    }
++
++    wallcycle_stop(wcycle,ewcRUN);
++    wallcycle_reset_all(wcycle);
++    if (DOMAINDECOMP(cr))
++    {
++        reset_dd_statistics_counters(cr->dd);
++    }
++    init_nrnb(nrnb);
++    ir->init_step += *step_rel;
++    ir->nsteps    -= *step_rel;
++    *step_rel = 0;
++    wallcycle_start(wcycle,ewcRUN);
++    runtime_start(runtime);
++    print_date_and_time(fplog,cr->nodeid,"Restarted time",runtime);
++}
++
+ +double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
+ +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+ +             int nstglobalcomm,
+ +             gmx_vsite_t *vsite,gmx_constr_t constr,
+ +             int stepout,t_inputrec *ir,
+ +             gmx_mtop_t *top_global,
+ +             t_fcdata *fcd,
+ +             t_state *state_global,
+ +             t_mdatoms *mdatoms,
+ +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +             gmx_edsam_t ed,t_forcerec *fr,
+ +             int repl_ex_nst,int repl_ex_nex,int repl_ex_seed,gmx_membed_t membed,
+ +             real cpt_period,real max_hours,
+ +             const char *deviceOptions,
+ +             unsigned long Flags,
+ +             gmx_runtime_t *runtime)
+ +{
+ +    gmx_mdoutf_t *outf;
+ +    gmx_large_int_t step,step_rel;
+ +    double     run_time;
+ +    double     t,t0,lam0[efptNR];
-     gmx_bool   do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
++    gmx_bool       bGStatEveryStep,bGStat,bCalcVir,bCalcEner;
++    gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
+ +               bFirstStep,bStateFromCP,bStateFromTPX,bInitStep,bLastStep,
+ +               bBornRadii,bStartingFromCpt;
+ +    gmx_bool   bDoDHDL=FALSE,bDoFEP=FALSE,bDoExpanded=FALSE;
-     gmx_bool   bMasterState;
++    gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
+ +               bForceUpdate=FALSE,bCPT;
+ +    int        mdof_flags;
- 
++    gmx_bool       bMasterState;
+ +    int        force_flags,cglo_flags;
+ +    tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
+ +    int        i,m;
+ +    t_trxstatus *status;
+ +    rvec       mu_tot;
+ +    t_vcm      *vcm;
+ +    t_state    *bufstate=NULL;   
+ +    matrix     *scale_tot,pcoupl_mu,M,ebox;
+ +    gmx_nlheur_t nlh;
+ +    t_trxframe rerun_fr;
+ +    gmx_repl_ex_t repl_ex=NULL;
+ +    int        nchkpt=1;
+ +    gmx_localtop_t *top;      
+ +    t_mdebin *mdebin=NULL;
+ +    df_history_t df_history;
+ +    t_state    *state=NULL;
+ +    rvec       *f_global=NULL;
+ +    int        n_xtc=-1;
+ +    rvec       *x_xtc=NULL;
+ +    gmx_enerdata_t *enerd;
+ +    rvec       *f=NULL;
+ +    gmx_global_stat_t gstat;
+ +    gmx_update_t upd=NULL;
+ +    t_graph    *graph=NULL;
+ +    globsig_t   gs;
+ +    gmx_rng_t mcrng=NULL;
+ +    gmx_bool        bFFscan;
+ +    gmx_groups_t *groups;
+ +    gmx_ekindata_t *ekind, *ekind_save;
+ +    gmx_shellfc_t shellfc;
+ +    int         count,nconverged=0;
+ +    real        timestep=0;
+ +    double      tcount=0;
+ +    gmx_bool        bIonize=FALSE;
+ +    gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
+ +    gmx_bool        bAppend;
+ +    gmx_bool        bResetCountersHalfMaxH=FALSE;
+ +    gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
+ +    real        mu_aver=0,dvdl;
+ +    int         a0,a1,gnx=0,ii;
+ +    atom_id     *grpindex=NULL;
+ +    char        *grpname;
+ +    t_coupl_rec *tcr=NULL;
+ +    rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
+ +    matrix      boxcopy={{0}},lastbox;
+ +      tensor      tmpvir;
+ +      real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
+ +      real        vetanew = 0;
+ +    int         lamnew=0;
+ +    /* for FEP */
+ +    int         fep_state=0;
+ +    int         nstfep;
+ +    real        rate;
+ +    double      cycles;
+ +      real        saved_conserved_quantity = 0;
+ +    real        last_ekin = 0;
+ +      int         iter_i;
+ +      t_extmass   MassQ;
+ +    int         **trotter_seq; 
+ +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
+ +    int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
+ +    gmx_iterate_t iterate;
+ +    gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
+ +                                          simulation stops. If equal to zero, don't
+ +                                          communicate any more between multisims.*/
++    /* PME load balancing data for GPU kernels */
++    pme_switch_t    pme_switch=NULL;
++    double          cycles_pmes;
++    gmx_bool        bPMETuneTry=FALSE,bPMETuneRunning=FALSE;
+ +
+ +    if(MASTER(cr))
+ +    {
+ +        fprintf(stderr,
+ +                "\n* WARNING * WARNING * WARNING * WARNING * WARNING * WARNING *\n"
+ +                "We have just committed the new CPU detection code in this branch,\n"
+ +                "and will commit new SSE/AVX kernels in a few days. However, this\n"
+ +                "means that currently only the NxN kernels are accelerated!\n"
+ +                "In the mean time, you might want to avoid production runs in 4.6.\n\n");
+ +    }
+ +
+ +#ifdef GMX_FAHCORE
+ +    /* Temporary addition for FAHCORE checkpointing */
+ +    int chkpt_ret;
+ +#endif
-         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
++    
+ +    /* Check for special mdrun options */
+ +    bRerunMD = (Flags & MD_RERUN);
+ +    bIonize  = (Flags & MD_IONIZE);
+ +    bFFscan  = (Flags & MD_FFSCAN);
+ +    bAppend  = (Flags & MD_APPENDFILES);
+ +    if (Flags & MD_RESETCOUNTERSHALFWAY)
+ +    {
+ +        if (ir->nsteps > 0)
+ +        {
+ +            /* Signal to reset the counters half the simulation steps. */
+ +            wcycle_set_reset_counters(wcycle,ir->nsteps/2);
+ +        }
+ +        /* Signal to reset the counters halfway the simulation time. */
+ +        bResetCountersHalfMaxH = (max_hours > 0);
+ +    }
+ +
+ +    /* md-vv uses averaged full step velocities for T-control 
+ +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
+ +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
+ +    bVV = EI_VV(ir->eI);
+ +    if (bVV) /* to store the initial velocities while computing virial */
+ +    {
+ +        snew(cbuf,top_global->natoms);
+ +    }
+ +    /* all the iteratative cases - only if there are constraints */ 
+ +    bIterations = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
+ +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
+ +    
+ +    if (bRerunMD)
+ +    {
+ +        /* Since we don't know if the frames read are related in any way,
+ +         * rebuild the neighborlist at every step.
+ +         */
+ +        ir->nstlist       = 1;
+ +        ir->nstcalcenergy = 1;
+ +        nstglobalcomm     = 1;
+ +    }
+ +
+ +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
+ +
+ +    nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
+ +    bGStatEveryStep = (nstglobalcomm == 1);
+ +
+ +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
+ +    {
+ +        fprintf(fplog,
+ +                "To reduce the energy communication with nstlist = -1\n"
+ +                "the neighbor list validity should not be checked at every step,\n"
+ +                "this means that exact integration is not guaranteed.\n"
+ +                "The neighbor list validity is checked after:\n"
+ +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
+ +                "In most cases this will result in exact integration.\n"
+ +                "This reduces the energy communication by a factor of 2 to 3.\n"
+ +                "If you want less energy communication, set nstlist > 3.\n\n");
+ +    }
+ +
+ +    if (bRerunMD || bFFscan)
+ +    {
+ +        ir->nstxtcout = 0;
+ +    }
+ +    groups = &top_global->groups;
+ +
+ +    /* Initial values */
+ +    init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
+ +            &(state_global->fep_state),lam0,
+ +            nrnb,top_global,&upd,
+ +            nfile,fnm,&outf,&mdebin,
+ +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
+ +
+ +    clear_mat(total_vir);
+ +    clear_mat(pres);
+ +    /* Energy terms and groups */
+ +    snew(enerd,1);
+ +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
+ +                  enerd);
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        f = NULL;
+ +    }
+ +    else
+ +    {
+ +        snew(f,top_global->natoms);
+ +    }
+ +
+ +    /* lambda Monte carlo random number generator  */
+ +    if (ir->bExpanded)
+ +    {
+ +        mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
+ +    }
+ +    /* copy the state into df_history */
+ +    copy_df_history(&df_history,&state_global->dfhist);
+ +
+ +    /* Kinetic energy data */
+ +    snew(ekind,1);
+ +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
+ +    /* needed for iteration of constraints */
+ +    snew(ekind_save,1);
+ +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
+ +    /* Copy the cos acceleration to the groups struct */    
+ +    ekind->cosacc.cos_accel = ir->cos_accel;
+ +
+ +    gstat = global_stat_init(ir);
+ +    debug_gmx();
+ +
+ +    /* Check for polarizable models and flexible constraints */
+ +    shellfc = init_shell_flexcon(fplog,
+ +                                 top_global,n_flexible_constraints(constr),
+ +                                 (ir->bContinuation || 
+ +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
+ +                                 NULL : state_global->x);
+ +
+ +    if (DEFORM(*ir))
+ +    {
+ +#ifdef GMX_THREAD_MPI
+ +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
+ +#endif
+ +        set_deform_reference_box(upd,
+ +                                 deform_init_init_step_tpx,
+ +                                 deform_init_box_tpx);
+ +#ifdef GMX_THREAD_MPI
+ +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
+ +#endif
+ +    }
+ +
+ +    {
+ +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
+ +        if ((io > 2000) && MASTER(cr))
+ +            fprintf(stderr,
+ +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
+ +                    io);
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr)) {
+ +        top = dd_init_local_top(top_global);
+ +
+ +        snew(state,1);
+ +        dd_init_local_state(cr->dd,state_global,state);
+ +
+ +        if (DDMASTER(cr->dd) && ir->nstfout) {
+ +            snew(f_global,state_global->natoms);
+ +        }
+ +    } else {
+ +        if (PAR(cr)) {
+ +            /* Initialize the particle decomposition and split the topology */
+ +            top = split_system(fplog,top_global,ir,cr);
+ +
+ +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
+ +            pd_at_range(cr,&a0,&a1);
+ +        } else {
+ +            top = gmx_mtop_generate_local_top(top_global,ir);
+ +
+ +            a0 = 0;
+ +            a1 = top_global->natoms;
+ +        }
+ +
++        forcerec_set_excl_load(fr,top,cr);
++
+ +        state = partdec_init_local_state(cr,state_global);
+ +        f_global = f;
+ +
+ +        atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
+ +
+ +        if (vsite) {
+ +            set_vsite_top(vsite,top,mdatoms,cr);
+ +        }
+ +
-                                         repl_ex_nst,repl_ex_nex,repl_ex_seed); 
++        if (ir->ePBC != epbcNONE && !fr->bMolPBC) {
+ +            graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
+ +        }
+ +
+ +        if (shellfc) {
+ +            make_local_shells(cr,mdatoms,shellfc);
+ +        }
+ +
++        init_bonded_thread_force_reduction(fr,&top->idef);
++
+ +        if (ir->pull && PAR(cr)) {
+ +            dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
+ +        }
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        /* Distribute the charge groups over the nodes from the master node */
+ +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
+ +                            state_global,top_global,ir,
+ +                            state,&f,mdatoms,top,fr,
+ +                            vsite,shellfc,constr,
+ +                            nrnb,wcycle,FALSE);
++
+ +    }
+ +
+ +    update_mdatoms(mdatoms,state->lambda[efptMASS]);
+ +
+ +    if (opt2bSet("-cpi",nfile,fnm))
+ +    {
+ +        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr);
+ +    }
+ +    else
+ +    {
+ +        bStateFromCP = FALSE;
+ +    }
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        if (bStateFromCP)
+ +        {
+ +            /* Update mdebin with energy history if appending to output files */
+ +            if ( Flags & MD_APPENDFILES )
+ +            {
+ +                restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+ +            }
+ +            else
+ +            {
+ +                /* We might have read an energy history from checkpoint,
+ +                 * free the allocated memory and reset the counts.
+ +                 */
+ +                done_energyhistory(&state_global->enerhist);
+ +                init_energyhistory(&state_global->enerhist);
+ +            }
+ +        }
+ +        /* Set the initial energy history in state by updating once */
+ +        update_energyhistory(&state_global->enerhist,mdebin);
+ +    } 
+ +
+ +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) 
+ +    {
+ +        /* Set the random state if we read a checkpoint file */
+ +        set_stochd_state(upd,state);
+ +    }
+ +
+ +    if (state->flags & (1<<estMC_RNG))
+ +    {
+ +        set_mc_state(mcrng,state);
+ +    }
+ +
+ +    /* Initialize constraints */
+ +    if (constr) {
+ +        if (!DOMAINDECOMP(cr))
+ +            set_constraints(constr,top,ir,mdatoms,cr);
+ +    }
+ +
+ +    /* Check whether we have to GCT stuff */
+ +    bTCR = ftp2bSet(efGCT,nfile,fnm);
+ +    if (bTCR) {
+ +        if (MASTER(cr)) {
+ +            fprintf(stderr,"Will do General Coupling Theory!\n");
+ +        }
+ +        gnx = top_global->mols.nr;
+ +        snew(grpindex,gnx);
+ +        for(i=0; (i<gnx); i++) {
+ +            grpindex[i] = i;
+ +        }
+ +    }
+ +
+ +    if (repl_ex_nst > 0)
+ +    {
+ +        /* We need to be sure replica exchange can only occur
+ +         * when the energies are current */
+ +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
+ +                        "repl_ex_nst",&repl_ex_nst);
+ +        /* This check needs to happen before inter-simulation
+ +         * signals are initialized, too */
+ +    }
+ +    if (repl_ex_nst > 0 && MASTER(cr))
+ +    {
+ +        repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
- 
++                                        repl_ex_nst,repl_ex_nex,repl_ex_seed);
++    }
++
++    /* PME tuning is only supported with GPUs or PME nodes and not with rerun */
++    if ((Flags & MD_TUNEPME) &&
++        EEL_PME(fr->eeltype) &&
++        fr->cutoff_scheme == ecutsVERLET &&
++        (fr->nbv->bUseGPU || !(cr->duty & DUTY_PME)) &&
++        !bRerunMD)
++    {
++        switch_pme_init(&pme_switch,ir,state->box,fr->ic,fr->pmedata);
++        cycles_pmes = 0;
++        if (cr->duty & DUTY_PME)
++        {
++            /* Start tuning right away, as we can't measure the load */
++            bPMETuneRunning = TRUE;
++        }
++        else
++        {
++            /* Separate PME nodes, we can measure the PP/PME load balance */
++            bPMETuneTry = TRUE;
++        }
+ +    }
++
+ +    if (!ir->bContinuation && !bRerunMD)
+ +    {
+ +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
+ +        {
+ +            /* Set the velocities of frozen particles to zero */
+ +            for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
+ +            {
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
+ +                    {
+ +                        state->v[i][m] = 0;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +
+ +        if (constr)
+ +        {
+ +            /* Constrain the initial coordinates and velocities */
+ +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
+ +                               graph,cr,nrnb,fr,top,shake_vir);
+ +        }
+ +        if (vsite)
+ +        {
+ +            /* Construct the virtual sites for the initial configuration */
+ +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
+ +                             top->idef.iparams,top->idef.il,
+ +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +        }
+ +    }
+ +
+ +    debug_gmx();
-         {
++  
+ +    /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
+ +    nstfep = ir->fepvals->nstdhdl;
+ +    if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
+ +    {
+ +        nstfep = ir->expandedvals->nstexpanded;
+ +    }
+ +    if (repl_ex_nst > 0 && repl_ex_nst > nstfep)
+ +    {
+ +        nstfep = repl_ex_nst;
+ +    }
+ +
+ +    /* I'm assuming we need global communication the first time! MRS */
+ +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
+ +                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM:0)
+ +                  | (bVV ? CGLO_PRESSURE:0)
+ +                  | (bVV ? CGLO_CONSTRAINT:0)
+ +                  | (bRerunMD ? CGLO_RERUNMD:0)
+ +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
+ +    
+ +    bSumEkinhOld = FALSE;
+ +    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                    NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+ +                    constr,NULL,FALSE,state->box,
+ +                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
+ +    if (ir->eI == eiVVAK) {
+ +        /* a second call to get the half step temperature initialized as well */ 
+ +        /* we do the same call as above, but turn the pressure off -- internally to 
+ +           compute_globals, this is recognized as a velocity verlet half-step 
+ +           kinetic energy calculation.  This minimized excess variables, but 
+ +           perhaps loses some logic?*/
+ +        
+ +        compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                        NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+ +                        constr,NULL,FALSE,state->box,
+ +                        top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+ +                        cglo_flags &~ (CGLO_STOPCM | CGLO_PRESSURE));
+ +    }
+ +    
+ +    /* Calculate the initial half step temperature, and save the ekinh_old */
+ +    if (!(Flags & MD_STARTFROMCPT)) 
+ +    {
+ +        for(i=0; (i<ir->opts.ngtc); i++) 
+ +        {
+ +            copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
+ +        } 
+ +    }
+ +    if (ir->eI != eiVV) 
+ +    {
+ +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
+ +                                     and there is no previous step */
+ +    }
+ +    
+ +    /* if using an iterative algorithm, we need to create a working directory for the state. */
+ +    if (bIterations) 
+ +    {
+ +            bufstate = init_bufstate(state);
+ +    }
+ +    if (bFFscan) 
+ +    {
+ +        snew(xcopy,state->natoms);
+ +        snew(vcopy,state->natoms);
+ +        copy_rvecn(state->x,xcopy,0,state->natoms);
+ +        copy_rvecn(state->v,vcopy,0,state->natoms);
+ +        copy_mat(state->box,boxcopy);
+ +    } 
+ +    
+ +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
+ +       temperature control */
+ +    trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
+ +    
+ +    if (MASTER(cr))
+ +    {
+ +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
+ +        {
+ +            fprintf(fplog,
+ +                    "RMS relative constraint deviation after constraining: %.2e\n",
+ +                    constr_rmsd(constr,FALSE));
+ +        }
+ +        if (EI_STATE_VELOCITY(ir->eI))
+ +        {
+ +            fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
+ +        }
+ +        if (bRerunMD)
+ +        {
+ +            fprintf(stderr,"starting md rerun '%s', reading coordinates from"
+ +                    " input trajectory '%s'\n\n",
+ +                    *(top_global->name),opt2fn("-rerun",nfile,fnm));
+ +            if (bVerbose)
+ +            {
+ +                fprintf(stderr,"Calculated time to finish depends on nsteps from "
+ +                        "run input file,\nwhich may not correspond to the time "
+ +                        "needed to process input trajectory.\n\n");
+ +            }
+ +        }
+ +        else
+ +        {
+ +            char tbuf[20];
+ +            fprintf(stderr,"starting mdrun '%s'\n",
+ +                    *(top_global->name));
+ +            if (ir->nsteps >= 0)
+ +            {
+ +                sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
+ +            }
+ +            else
+ +            {
+ +                sprintf(tbuf,"%s","infinite");
+ +            }
+ +            if (ir->init_step > 0)
+ +            {
+ +                fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
+ +                        gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
+ +                        gmx_step_str(ir->init_step,sbuf2),
+ +                        ir->init_step*ir->delta_t);
+ +            }
+ +            else
+ +            {
+ +                fprintf(stderr,"%s steps, %s ps.\n",
+ +                        gmx_step_str(ir->nsteps,sbuf),tbuf);
+ +            }
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +
+ +    /* Set and write start time */
+ +    runtime_start(runtime);
+ +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
+ +    wallcycle_start(wcycle,ewcRUN);
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\n");
+ +    }
+ +
+ +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
+ +#ifdef GMX_FAHCORE
+ +    chkpt_ret=fcCheckPointParallel( cr->nodeid,
+ +                                    NULL,0);
+ +    if ( chkpt_ret == 0 ) 
+ +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
+ +#endif
+ +
+ +    debug_gmx();
+ +    /***********************************************************
+ +     *
+ +     *             Loop over MD steps 
+ +     *
+ +     ************************************************************/
+ +
+ +    /* if rerunMD then read coordinates and velocities from input trajectory */
+ +    if (bRerunMD)
+ +    {
+ +        if (getenv("GMX_FORCE_UPDATE"))
+ +        {
+ +            bForceUpdate = TRUE;
+ +        }
+ +
+ +        rerun_fr.natoms = 0;
+ +        if (MASTER(cr))
+ +        {
+ +            bNotLastFrame = read_first_frame(oenv,&status,
+ +                                             opt2fn("-rerun",nfile,fnm),
+ +                                             &rerun_fr,TRX_NEED_X | TRX_READ_V);
+ +            if (rerun_fr.natoms != top_global->natoms)
+ +            {
+ +                gmx_fatal(FARGS,
+ +                          "Number of atoms in trajectory (%d) does not match the "
+ +                          "run input file (%d)\n",
+ +                          rerun_fr.natoms,top_global->natoms);
+ +            }
+ +            if (ir->ePBC != epbcNONE)
+ +            {
+ +                if (!rerun_fr.bBox)
+ +                {
+ +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
+ +                }
+ +                if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
+ +                {
+ +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
+ +                }
+ +            }
+ +        }
+ +
+ +        if (PAR(cr))
+ +        {
+ +            rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
+ +        }
+ +
+ +        if (ir->ePBC != epbcNONE)
+ +        {
+ +            /* Set the shift vectors.
+ +             * Necessary here when have a static box different from the tpr box.
+ +             */
+ +            calc_shifts(rerun_fr.box,fr->shift_vec);
+ +        }
+ +    }
+ +
+ +    /* loop over MD steps or if rerunMD to end of input trajectory */
+ +    bFirstStep = TRUE;
+ +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
+ +    bStateFromTPX = !bStateFromCP;
+ +    bInitStep = bFirstStep && (bStateFromTPX || bVV);
+ +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
+ +    bLastStep    = FALSE;
+ +    bSumEkinhOld = FALSE;
+ +    bExchanged   = FALSE;
+ +
+ +    init_global_signals(&gs,cr,ir,repl_ex_nst);
+ +
+ +    step = ir->init_step;
+ +    step_rel = 0;
+ +
+ +    if (ir->nstlist == -1)
+ +    {
+ +        init_nlistheuristics(&nlh,bGStatEveryStep,step);
+ +    }
+ +
+ +    if (MULTISIM(cr) && (repl_ex_nst <=0 ))
+ +    {
+ +        /* check how many steps are left in other sims */
+ +        multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
+ +    }
+ +
+ +
+ +    /* and stop now if we should */
+ +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
+ +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
+ +    while (!bLastStep || (bRerunMD && bNotLastFrame)) {
+ +
+ +        wallcycle_start(wcycle,ewcSTEP);
+ +
+ +        if (bRerunMD) {
+ +            if (rerun_fr.bStep) {
+ +                step = rerun_fr.step;
+ +                step_rel = step - ir->init_step;
+ +            }
+ +            if (rerun_fr.bTime) {
+ +                t = rerun_fr.time;
+ +            }
+ +            else
+ +            {
+ +                t = step;
+ +            }
+ +        } 
+ +        else 
+ +        {
+ +            bLastStep = (step_rel == ir->nsteps);
+ +            t = t0 + step*ir->delta_t;
+ +        }
+ +
+ +        if (ir->efep != efepNO || ir->bSimTemp)
-                                     nrnb,wcycle,do_verbose);
++            {
+ +            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
+ +               requiring different logic. */
+ +            
+ +            set_current_lambdas(step,ir->fepvals,bRerunMD,&rerun_fr,state_global,state,lam0);
+ +            bDoDHDL = do_per_step(step,ir->fepvals->nstdhdl);
+ +            bDoFEP  = (do_per_step(step,nstfep) && (ir->efep != efepNO));
+ +            bDoExpanded  = (do_per_step(step,ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
+ +        }
+ +
+ +        if (bSimAnn) 
+ +        {
+ +            update_annealing_target_temp(&(ir->opts),t);
+ +        }
+ +
+ +        if (bRerunMD)
+ +        {
+ +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
+ +            {
+ +                for(i=0; i<state_global->natoms; i++)
+ +                {
+ +                    copy_rvec(rerun_fr.x[i],state_global->x[i]);
+ +                }
+ +                if (rerun_fr.bV)
+ +                {
+ +                    for(i=0; i<state_global->natoms; i++)
+ +                    {
+ +                        copy_rvec(rerun_fr.v[i],state_global->v[i]);
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    for(i=0; i<state_global->natoms; i++)
+ +                    {
+ +                        clear_rvec(state_global->v[i]);
+ +                    }
+ +                    if (bRerunWarnNoV)
+ +                    {
+ +                        fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
+ +                                "         Ekin, temperature and pressure are incorrect,\n"
+ +                                "         the virial will be incorrect when constraints are present.\n"
+ +                                "\n");
+ +                        bRerunWarnNoV = FALSE;
+ +                    }
+ +                }
+ +            }
+ +            copy_mat(rerun_fr.box,state_global->box);
+ +            copy_mat(state_global->box,state->box);
+ +
+ +            if (vsite && (Flags & MD_RERUN_VSITE))
+ +            {
+ +                if (DOMAINDECOMP(cr))
+ +                {
+ +                    gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
+ +                }
+ +                if (graph)
+ +                {
+ +                    /* Following is necessary because the graph may get out of sync
+ +                     * with the coordinates if we only have every N'th coordinate set
+ +                     */
+ +                    mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
+ +                    shift_self(graph,state->box,state->x);
+ +                }
+ +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
+ +                                 top->idef.iparams,top->idef.il,
+ +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +                if (graph)
+ +                {
+ +                    unshift_self(graph,state->box,state->x);
+ +                }
+ +            }
+ +        }
+ +
+ +        /* Stop Center of Mass motion */
+ +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
+ +
+ +        /* Copy back starting coordinates in case we're doing a forcefield scan */
+ +        if (bFFscan)
+ +        {
+ +            for(ii=0; (ii<state->natoms); ii++)
+ +            {
+ +                copy_rvec(xcopy[ii],state->x[ii]);
+ +                copy_rvec(vcopy[ii],state->v[ii]);
+ +            }
+ +            copy_mat(boxcopy,state->box);
+ +        }
+ +
+ +        if (bRerunMD)
+ +        {
+ +            /* for rerun MD always do Neighbour Searching */
+ +            bNS = (bFirstStep || ir->nstlist != 0);
+ +            bNStList = bNS;
+ +        }
+ +        else
+ +        {
+ +            /* Determine whether or not to do Neighbour Searching and LR */
+ +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
+ +            
+ +            bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
+ +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
+ +
+ +            if (bNS && ir->nstlist == -1)
+ +            {
+ +                set_nlistheuristics(&nlh,bFirstStep || bExchanged || bDoFEP, step);
+ +            }
+ +        } 
+ +
+ +        /* check whether we should stop because another simulation has 
+ +           stopped. */
+ +        if (MULTISIM(cr))
+ +        {
+ +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
+ +                 (multisim_nsteps != ir->nsteps) )  
+ +            {
+ +                if (bNS)
+ +                {
+ +                    if (MASTER(cr))
+ +                    {
+ +                        fprintf(stderr, 
+ +                                "Stopping simulation %d because another one has finished\n",
+ +                                cr->ms->sim);
+ +                    }
+ +                    bLastStep=TRUE;
+ +                    gs.sig[eglsCHKPT] = 1;
+ +                }
+ +            }
+ +        }
+ +
+ +        /* < 0 means stop at next step, > 0 means stop at next NS step */
+ +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
+ +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
+ +        {
+ +            bLastStep = TRUE;
+ +        }
+ +
+ +        /* Determine whether or not to update the Born radii if doing GB */
+ +        bBornRadii=bFirstStep;
+ +        if (ir->implicit_solvent && (step % ir->nstgbradii==0))
+ +        {
+ +            bBornRadii=TRUE;
+ +        }
+ +        
+ +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
+ +        do_verbose = bVerbose &&
+ +                  (step % stepout == 0 || bFirstStep || bLastStep);
+ +
+ +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
+ +        {
+ +            if (bRerunMD)
+ +            {
+ +                bMasterState = TRUE;
+ +            }
+ +            else
+ +            {
+ +                bMasterState = FALSE;
+ +                /* Correct the new box if it is too skewed */
+ +                if (DYNAMIC_BOX(*ir))
+ +                {
+ +                    if (correct_box(fplog,step,state->box,graph))
+ +                    {
+ +                        bMasterState = TRUE;
+ +                    }
+ +                }
+ +                if (DOMAINDECOMP(cr) && bMasterState)
+ +                {
+ +                    dd_collect_state(cr->dd,state,state_global);
+ +                }
+ +            }
+ +
+ +            if (DOMAINDECOMP(cr))
+ +            {
+ +                /* Repartition the domain decomposition */
+ +                wallcycle_start(wcycle,ewcDOMDEC);
+ +                dd_partition_system(fplog,step,cr,
+ +                                    bMasterState,nstglobalcomm,
+ +                                    state_global,top_global,ir,
+ +                                    state,&f,mdatoms,top,fr,
+ +                                    vsite,shellfc,constr,
-         if (bRerunMD && rerun_fr.bV)
++                                    nrnb,wcycle,
++                                    do_verbose && !bPMETuneRunning);
+ +                wallcycle_stop(wcycle,ewcDOMDEC);
+ +                /* If using an iterative integrator, reallocate space to match the decomposition */
+ +            }
+ +        }
+ +
+ +        if (MASTER(cr) && do_log && !bFFscan)
+ +        {
+ +            print_ebin_header(fplog,step,t,state->lambda[efptFEP]); /* can we improve the information printed here? */
+ +        }
+ +
+ +        if (ir->efep != efepNO)
+ +        {
+ +            update_mdatoms(mdatoms,state->lambda[efptMASS]);
+ +        }
+ +
- 
-         if (EI_VV(ir->eI) && (!bInitStep)) {  /* for vv, the first half actually corresponds to the last step */
-             bNstEner = do_per_step(step-1,ir->nstcalcenergy);
-         } else {
-             bNstEner = do_per_step(step,ir->nstcalcenergy);
++        if ((bRerunMD && rerun_fr.bV) || bExchanged)
+ +        {
+ +            
+ +            /* We need the kinetic energy at minus the half step for determining
+ +             * the full step kinetic energy and possibly for T-coupling.*/
+ +            /* This may not be quite working correctly yet . . . . */
+ +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
+ +                            constr,NULL,FALSE,state->box,
+ +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+ +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
+ +        }
+ +        clear_mat(force_vir);
+ +        
+ +        /* Ionize the atoms if necessary */
+ +        if (bIonize)
+ +        {
+ +            ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
+ +                   mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
+ +        }
+ +        
+ +        /* Update force field in ffscan program */
+ +        if (bFFscan)
+ +        {
+ +            if (update_forcefield(fplog,
+ +                                  nfile,fnm,fr,
+ +                                  mdatoms->nr,state->x,state->box))
+ +            {
+ +                gmx_finalize_par();
+ +
+ +                exit(0);
+ +            }
+ +        }
+ +
+ +        /* We write a checkpoint at this MD step when:
+ +         * either at an NS step when we signalled through gs,
+ +         * or at the last step (but not when we do not want confout),
+ +         * but never at the first step or with rerun.
+ +         */
+ +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
+ +                 (bLastStep && (Flags & MD_CONFOUT))) &&
+ +                step > ir->init_step && !bRerunMD);
+ +        if (bCPT)
+ +        {
+ +            gs.set[eglsCHKPT] = 0;
+ +        }
+ +
+ +        /* Determine the energy and pressure:
+ +         * at nstcalcenergy steps and at energy output steps (set below).
+ +         */
-         bCalcEnerPres =
-             (bNstEner ||
-              (ir->epc > epcNO && do_per_step(step,ir->nstpcouple)));
++        if (EI_VV(ir->eI) && (!bInitStep))
++        {
++            /* for vv, the first half actually corresponds to the last step */
++            bCalcEner = do_per_step(step-1,ir->nstcalcenergy);
+ +        }
-         bGStat = (bCalcEnerPres || bStopCM ||
++        else
++        {
++            bCalcEner = do_per_step(step,ir->nstcalcenergy);
++        }
++        bCalcVir = bCalcEner ||
++            (ir->epc != epcNO && do_per_step(step,ir->nstpcouple));
+ +
+ +        /* Do we need global communication ? */
-             bCalcEnerPres = TRUE;
-             bGStat        = TRUE;
++        bGStat = (bCalcVir || bCalcEner || bStopCM ||
+ +                  do_per_step(step,nstglobalcomm) ||
+ +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
+ +
+ +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
+ +
+ +        if (do_ene || do_log)
+ +        {
-                        (bCalcEnerPres ? GMX_FORCE_VIRIAL : 0) |
++            bCalcVir  = TRUE;
++            bCalcEner = TRUE;
++            bGStat    = TRUE;
+ +        }
+ +        
+ +        /* these CGLO_ options remain the same throughout the iteration */
+ +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
+ +                      (bGStat ? CGLO_GSTAT : 0)
+ +            );
+ +        
+ +        force_flags = (GMX_FORCE_STATECHANGED |
+ +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
+ +                       GMX_FORCE_ALLFORCES |
+ +                       (bNStList ? GMX_FORCE_DOLR : 0) |
+ +                       GMX_FORCE_SEPLRF |
-             do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
++                       (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
++                       (bCalcEner ? GMX_FORCE_ENERGY : 0) |
+ +                       (bDoFEP ? GMX_FORCE_DHDL : 0)
+ +            );
+ +        
+ +        if (shellfc)
+ +        {
+ +            /* Now is the time to relax the shells */
+ +            count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
+ +                                      ir,bNS,force_flags,
+ +                                      bStopCM,top,top_global,
+ +                                      constr,enerd,fcd,
+ +                                      state,f,force_vir,mdatoms,
+ +                                      nrnb,wcycle,graph,groups,
+ +                                      shellfc,fr,bBornRadii,t,mu_tot,
+ +                                      state->natoms,&bConverged,vsite,
+ +                                      outf->fp_field);
+ +            tcount+=count;
+ +
+ +            if (bConverged)
+ +            {
+ +                nconverged++;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* The coordinates (x) are shifted (to get whole molecules)
+ +             * in do_force.
+ +             * This is parallellized as well, and does communication too. 
+ +             * Check comments in sim_util.c
+ +             */
-             update_coords(fplog,step,ir,mdatoms,state,
++             do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
+ +                     state->box,state->x,&state->hist,
+ +                     f,force_vir,mdatoms,enerd,fcd,
+ +                     state->lambda,graph,
+ +                     fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
+ +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
+ +        }
+ +        
+ +        if (bTCR)
+ +        {
+ +            mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
+ +                                   mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
+ +        }
+ +        
+ +        if (bTCR && bFirstStep)
+ +        {
+ +            tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
+ +            fprintf(fplog,"Done init_coupling\n"); 
+ +            fflush(fplog);
+ +        }
+ +        
+ +        if (bVV && !bStartingFromCpt && !bRerunMD)
+ +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
+ +        {
+ +            if (ir->eI==eiVV && bInitStep) 
+ +            {
+ +                /* if using velocity verlet with full time step Ekin,
+ +                 * take the first half step only to compute the 
+ +                 * virial for the first step. From there,
+ +                 * revert back to the initial coordinates
+ +                 * so that the input is actually the initial step.
+ +                 */
+ +                copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
+ +            } else {
+ +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
+ +                trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
+ +            }
+ +
-                     update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
++            update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,
+ +                          f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+ +                          ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
+ +                          cr,nrnb,constr,&top->idef);
+ +            
+ +            if (bIterations)
+ +            {
+ +                gmx_iterate_init(&iterate,bIterations && !bInitStep);
+ +            }
+ +            /* for iterations, we save these vectors, as we will be self-consistently iterating
+ +               the calculations */
+ +
+ +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
+ +            
+ +            /* save the state */
+ +            if (bIterations && iterate.bIterate) { 
+ +                copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
+ +            }
+ +            
+ +            bFirstIterate = TRUE;
+ +            while (bFirstIterate || (bIterations && iterate.bIterate))
+ +            {
+ +                if (bIterations && iterate.bIterate) 
+ +                {
+ +                    copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
+ +                    if (bFirstIterate && bTrotter) 
+ +                    {
+ +                        /* The first time through, we need a decent first estimate
+ +                           of veta(t+dt) to compute the constraints.  Do
+ +                           this by computing the box volume part of the
+ +                           trotter integration at this time. Nothing else
+ +                           should be changed by this routine here.  If
+ +                           !(first time), we start with the previous value
+ +                           of veta.  */
+ +                        
+ +                        veta_save = state->veta;
+ +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
+ +                        vetanew = state->veta;
+ +                        state->veta = veta_save;
+ +                    } 
+ +                } 
+ +                
+ +                bOK = TRUE;
+ +                if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
+ +                    dvdl = 0;
+ +                    
-                                        bInitStep,TRUE,bCalcEnerPres,vetanew);
++                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
++                                       state,fr->bMolPBC,graph,f,
+ +                                       &top->idef,shake_vir,NULL,
+ +                                       cr,nrnb,wcycle,upd,constr,
- 
++                                       bInitStep,TRUE,bCalcVir,vetanew);
+ +                    
+ +                    if (!bOK && !bFFscan)
+ +                    {
+ +                        gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
+ +                    }
+ +                    
+ +                } 
+ +                else if (graph)
+ +                { /* Need to unshift here if a do_force has been
+ +                     called in the previous step */
+ +                    unshift_self(graph,state->box,state->x);
+ +                }
-                 /* bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
-                 /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
++                
+ +                
+ +                /* if VV, compute the pressure and constraints */
+ +                /* For VV2, we strictly only need this if using pressure
+ +                 * control, but we really would like to have accurate pressures
+ +                 * printed out.
+ +                 * Think about ways around this in the future?
+ +                 * For now, keep this choice in comments.
+ +                 */
-                 if (bNstEner && ir->eI==eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
++                /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
++                    /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
+ +                bPres = TRUE;
+ +                bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
- 
++                if (bCalcEner && ir->eI==eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
+ +                {
+ +                    bSumEkinhOld = TRUE;
+ +                }
+ +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+ +                                constr,NULL,FALSE,state->box,
+ +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+ +                                cglo_flags 
+ +                                | CGLO_ENERGY 
+ +                                | (bStopCM ? CGLO_STOPCM : 0)
+ +                                | (bTemp ? CGLO_TEMPERATURE:0) 
+ +                                | (bPres ? CGLO_PRESSURE : 0) 
+ +                                | (bPres ? CGLO_CONSTRAINT : 0)
+ +                                | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
+ +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
+ +                                | CGLO_SCALEEKIN 
+ +                    );
+ +                /* explanation of above: 
+ +                   a) We compute Ekin at the full time step
+ +                   if 1) we are using the AveVel Ekin, and it's not the
+ +                   initial step, or 2) if we are using AveEkin, but need the full
+ +                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
+ +                   b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
+ +                   EkinAveVel because it's needed for the pressure */
+ +                
+ +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
+ +                if (!bInitStep) 
+ +                {
+ +                    if (bTrotter)
+ +                    {
+ +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
+ +                    } 
+ +                    else 
+ +                    {
++                        if (bExchanged)
++                        {
++            
++                            /* We need the kinetic energy at minus the half step for determining
++                             * the full step kinetic energy and possibly for T-coupling.*/
++                            /* This may not be quite working correctly yet . . . . */
++                            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
++                                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
++                                            constr,NULL,FALSE,state->box,
++                                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
++                                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
++                        }
++
++
+ +                        update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
+ +                    }
+ +                }
+ +                
+ +                if (bIterations &&
+ +                    done_iterating(cr,fplog,step,&iterate,bFirstIterate,
+ +                                   state->veta,&vetanew)) 
+ +                {
+ +                    break;
+ +                }
+ +                bFirstIterate = FALSE;
+ +            }
+ +
+ +            if (bTrotter && !bInitStep) {
+ +                enerd->term[F_DVDL_BONDED] += dvdl;        /* only add after iterations */
+ +                copy_mat(shake_vir,state->svir_prev);
+ +                copy_mat(force_vir,state->fvir_prev);
+ +                if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
+ +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
+ +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
+ +                    enerd->term[F_EKIN] = trace(ekind->ekin);
+ +                }
+ +            }
+ +            /* if it's the initial step, we performed this first step just to get the constraint virial */
+ +            if (bInitStep && ir->eI==eiVV) {
+ +                copy_rvecn(cbuf,state->v,0,state->natoms);
+ +            }
+ +            
+ +            if (fr->bSepDVDL && fplog && do_log) 
+ +            {
+ +                fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
+ +            }
+ +            enerd->term[F_DVDL_BONDED] += dvdl;
+ +        }
- 
++    
+ +        /* MRS -- now done iterating -- compute the conserved quantity */
+ +        if (bVV) {
+ +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
+ +            if (ir->eI==eiVV) 
+ +            {
+ +                last_ekin = enerd->term[F_EKIN];
+ +            }
+ +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
+ +            {
+ +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
+ +            }
+ +            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
+ +            sum_dhdl(enerd,state->lambda,ir->fepvals);
+ +        }
+ +        
+ +        /* ########  END FIRST UPDATE STEP  ############## */
+ +        /* ########  If doing VV, we now have v(dt) ###### */
+ +        if (bDoExpanded) {
+ +            /* perform extended ensemble sampling in lambda - we don't
+ +               actually move to the new state before outputting
+ +               statistics, but if performing simulated tempering, we
+ +               do update the velocities and the tau_t. */
-                 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols &&
-                     DOMAINDECOMP(cr))
++        
+ +            lamnew = ExpandedEnsembleDynamics(fplog,ir,enerd,state,&MassQ,&df_history,step,mcrng,state->v,mdatoms);
+ +        }
+ +        /* ################## START TRAJECTORY OUTPUT ################# */
+ +        
+ +        /* Now we have the energies and forces corresponding to the 
+ +         * coordinates at time t. We must output all of this before
+ +         * the update.
+ +         * for RerunMD t is read from input trajectory
+ +         */
+ +        mdof_flags = 0;
+ +        if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
+ +        if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
+ +        if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
+ +        if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
+ +        if (bCPT) { mdof_flags |= MDOF_CPT; };
+ +
+ +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
+ +        if (bLastStep)
+ +        {
+ +            /* Enforce writing positions and velocities at end of run */
+ +            mdof_flags |= (MDOF_X | MDOF_V);
+ +        }
+ +#endif
+ +#ifdef GMX_FAHCORE
+ +        if (MASTER(cr))
+ +            fcReportProgress( ir->nsteps, step );
+ +
+ +        /* sync bCPT and fc record-keeping */
+ +        if (bCPT && MASTER(cr))
+ +            fcRequestCheckPoint();
+ +#endif
+ +        
+ +        if (mdof_flags != 0)
+ +        {
+ +            wallcycle_start(wcycle,ewcTRAJ);
+ +            if (bCPT)
+ +            {
+ +                if (state->flags & (1<<estLD_RNG))
+ +                {
+ +                    get_stochd_state(upd,state);
+ +                }
+ +                if (state->flags  & (1<<estMC_RNG))
+ +                {
+ +                    get_mc_state(mcrng,state);
+ +                }
+ +                if (MASTER(cr))
+ +                {
+ +                    if (bSumEkinhOld)
+ +                    {
+ +                        state_global->ekinstate.bUpToDate = FALSE;
+ +                    }
+ +                    else
+ +                    {
+ +                        update_ekinstate(&state_global->ekinstate,ekind);
+ +                        state_global->ekinstate.bUpToDate = TRUE;
+ +                    }
+ +                    update_energyhistory(&state_global->enerhist,mdebin);
+ +                    if (ir->efep!=efepNO || ir->bSimTemp) 
+ +                    {
+ +                        state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
+ +                                                                       structured so this isn't necessary.
+ +                                                                       Note this reassignment is only necessary
+ +                                                                       for single threads.*/
+ +                        copy_df_history(&state_global->dfhist,&df_history);
+ +                    }
+ +                }
+ +            }
+ +            write_traj(fplog,cr,outf,mdof_flags,top_global,
+ +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
+ +            if (bCPT)
+ +            {
+ +                nchkpt++;
+ +                bCPT = FALSE;
+ +            }
+ +            debug_gmx();
+ +            if (bLastStep && step_rel == ir->nsteps &&
+ +                (Flags & MD_CONFOUT) && MASTER(cr) &&
+ +                !bRerunMD && !bFFscan)
+ +            {
+ +                /* x and v have been collected in write_traj,
+ +                 * because a checkpoint file will always be written
+ +                 * at the last step.
+ +                 */
+ +                fprintf(stderr,"\nWriting final coordinates.\n");
-         /* Determine the pressure:
-          * always when we want exact averages in the energy file,
-          * at ns steps when we have pressure coupling,
-          * otherwise only at energy output steps (set below).
-          */
- 
-         
-         bNstEner = (bGStatEveryStep || do_per_step(step,ir->nstcalcenergy));
-         bCalcEnerPres = bNstEner;
- 
-         /* Do we need global communication ? */
-         bGStat = (bGStatEveryStep || bStopCM || bNS ||
-                   (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
- 
-         do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
- 
-         if (do_ene || do_log)
-         {
-             bCalcEnerPres = TRUE;
-             bGStat        = TRUE;
-         }
- 
++                if (fr->bMolPBC)
+ +                {
+ +                    /* Make molecules whole only for confout writing */
+ +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
+ +                }
+ +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
+ +                                    *top_global->name,top_global,
+ +                                    state_global->x,state_global->v,
+ +                                    ir->ePBC,state->box);
+ +                debug_gmx();
+ +            }
+ +            wallcycle_stop(wcycle,ewcTRAJ);
+ +        }
+ +        
+ +        /* kludge -- virial is lost with restart for NPT control. Must restart */
+ +        if (bStartingFromCpt && bVV) 
+ +        {
+ +            copy_mat(state->svir_prev,shake_vir);
+ +            copy_mat(state->fvir_prev,force_vir);
+ +        }
+ +        /*  ################## END TRAJECTORY OUTPUT ################ */
+ +        
- 
+ +        /* Determine the wallclock run time up till now */
+ +        run_time = gmx_gettime() - (double)runtime->real;
++
+ +        /* Check whether everything is still allright */    
+ +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
+ +#ifdef GMX_THREAD_MPI
+ +            && MASTER(cr)
+ +#endif
+ +            )
+ +        {
+ +            /* this is just make gs.sig compatible with the hack 
+ +               of sending signals around by MPI_Reduce with together with
+ +               other floats */
+ +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
+ +                gs.sig[eglsSTOPCOND]=1;
+ +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
+ +                gs.sig[eglsSTOPCOND]=-1;
+ +            /* < 0 means stop at next step, > 0 means stop at next NS step */
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,
+ +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                        gmx_get_signal_name(),
+ +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+ +                fflush(fplog);
+ +            }
+ +            fprintf(stderr,
+ +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                    gmx_get_signal_name(),
+ +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+ +            fflush(stderr);
+ +            handled_stop_condition=(int)gmx_get_stop_condition();
+ +        }
+ +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
+ +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
+ +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
+ +        {
+ +            /* Signal to terminate the run */
+ +            gs.sig[eglsSTOPCOND] = 1;
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+ +            }
+ +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+ +        }
+ +
+ +        if (bResetCountersHalfMaxH && MASTER(cr) &&
+ +            run_time > max_hours*60.0*60.0*0.495)
+ +        {
+ +            gs.sig[eglsRESETCOUNTERS] = 1;
+ +        }
+ +
+ +        if (ir->nstlist == -1 && !bRerunMD)
+ +        {
+ +            /* When bGStatEveryStep=FALSE, global_stat is only called
+ +             * when we check the atom displacements, not at NS steps.
+ +             * This means that also the bonded interaction count check is not
+ +             * performed immediately after NS. Therefore a few MD steps could
+ +             * be performed with missing interactions.
+ +             * But wrong energies are never written to file,
+ +             * since energies are only written after global_stat
+ +             * has been called.
+ +             */
+ +            if (step >= nlh.step_nscheck)
+ +            {
+ +                nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
+ +                                                     nlh.scale_tot,state->x);
+ +            }
+ +            else
+ +            {
+ +                /* This is not necessarily true,
+ +                 * but step_nscheck is determined quite conservatively.
+ +                 */
+ +                nlh.nabnsb = 0;
+ +            }
+ +        }
+ +
+ +        /* In parallel we only have to check for checkpointing in steps
+ +         * where we do global communication,
+ +         *  otherwise the other nodes don't know.
+ +         */
+ +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
+ +                           cpt_period >= 0 &&
+ +                           (cpt_period == 0 || 
+ +                            run_time >= nchkpt*cpt_period*60.0)) &&
+ +            gs.set[eglsCHKPT] == 0)
+ +        {
+ +            gs.sig[eglsCHKPT] = 1;
+ +        }
-                 update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
++  
+ +
+ +        /* at the start of step, randomize the velocities */
+ +        if (ETC_ANDERSEN(ir->etc) && EI_VV(ir->eI))
+ +        {
+ +            gmx_bool bDoAndersenConstr;
+ +            bDoAndersenConstr = (constr && update_randomize_velocities(ir,step,mdatoms,state,upd,&top->idef,constr));
+ +            /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
+ +            if (bDoAndersenConstr)
+ +            {
-                                    bInitStep,TRUE,FALSE,vetanew);
++                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
++                                   state,fr->bMolPBC,graph,f,
+ +                                   &top->idef,tmp_vir,NULL,
+ +                                   cr,nrnb,wcycle,upd,constr,
-                     update_coords(fplog,step,ir,mdatoms,state,f,
++                                   bInitStep,TRUE,bCalcVir,vetanew);
+ +            }
+ +        }
+ +
+ +        if (bIterations)
+ +        {
+ +            gmx_iterate_init(&iterate,bIterations);
+ +        }
+ +    
+ +        /* for iterations, we save these vectors, as we will be redoing the calculations */
+ +        if (bIterations && iterate.bIterate) 
+ +        {
+ +            copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
+ +        }
+ +        bFirstIterate = TRUE;
+ +        while (bFirstIterate || (bIterations && iterate.bIterate))
+ +        {
+ +            /* We now restore these vectors to redo the calculation with improved extended variables */    
+ +            if (bIterations) 
+ +            { 
+ +                copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
+ +            }
+ +
+ +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
+ +               so scroll down for that logic */
+ +            
+ +            /* #########   START SECOND UPDATE STEP ################# */
+ +            /* Box is changed in update() when we do pressure coupling,
+ +             * but we should still use the old box for energy corrections and when
+ +             * writing it to the energy file, so it matches the trajectory files for
+ +             * the same timestep above. Make a copy in a separate array.
+ +             */
+ +            copy_mat(state->box,lastbox);
+ +
+ +            bOK = TRUE;
+ +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
+ +            {
+ +                wallcycle_start(wcycle,ewcUPDATE);
+ +                dvdl = 0;
+ +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
+ +                if (bTrotter) 
+ +                {
+ +                    if (bIterations && iterate.bIterate) 
+ +                    {
+ +                        if (bFirstIterate) 
+ +                        {
+ +                            scalevir = 1;
+ +                        }
+ +                        else 
+ +                        {
+ +                            /* we use a new value of scalevir to converge the iterations faster */
+ +                            scalevir = tracevir/trace(shake_vir);
+ +                        }
+ +                        msmul(shake_vir,scalevir,shake_vir); 
+ +                        m_add(force_vir,shake_vir,total_vir);
+ +                        clear_mat(shake_vir);
+ +                    }
+ +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
+ +                /* We can only do Berendsen coupling after we have summed
+ +                 * the kinetic energy or virial. Since the happens
+ +                 * in global_state after update, we should only do it at
+ +                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
+ +                 */
+ +                }
+ +                else 
+ +                {
+ +                    update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
+ +                    update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
+ +                                   upd,bInitStep);
+ +                }
+ +
+ +                if (bVV)
+ +                {
+ +                    /* velocity half-step update */
-                 update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
++                    update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
+ +                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
+ +                                  ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
+ +                                  cr,nrnb,constr,&top->idef);
+ +                }
+ +
+ +                /* Above, initialize just copies ekinh into ekin,
+ +                 * it doesn't copy position (for VV),
+ +                 * and entire integrator for MD.
+ +                 */
+ +                
+ +                if (ir->eI==eiVVAK) 
+ +                {
+ +                    copy_rvecn(state->x,cbuf,0,state->natoms);
+ +                }
+ +                
-                 update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
++                update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
++                              fr->bTwinRange && bNStList,fr->f_twin,fcd,
+ +                              ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
+ +                wallcycle_stop(wcycle,ewcUPDATE);
+ +
-                                    bInitStep,FALSE,bCalcEnerPres,state->veta);  
++                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,
++                                   fr->bMolPBC,graph,f,
+ +                                   &top->idef,shake_vir,force_vir,
+ +                                   cr,nrnb,wcycle,upd,constr,
-                 if (ir->eI==eiVVAK)
++                                   bInitStep,FALSE,bCalcVir,state->veta);  
+ +                
-                                     cglo_flags | CGLO_TEMPERATURE
++                if (ir->eI==eiVVAK) 
+ +                {
+ +                    /* erase F_EKIN and F_TEMP here? */
+ +                    /* just compute the kinetic energy at the half step to perform a trotter step */
+ +                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+ +                                    constr,NULL,FALSE,lastbox,
+ +                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
-                     update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
++                                    cglo_flags | CGLO_TEMPERATURE    
+ +                        );
+ +                    wallcycle_start(wcycle,ewcUPDATE);
+ +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
+ +                    /* now we know the scaling, we can compute the positions again again */
+ +                    copy_rvecn(cbuf,state->x,0,state->natoms);
+ +
-                     update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
++                    update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
++                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
+ +                                  ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
+ +                    wallcycle_stop(wcycle,ewcUPDATE);
+ +
+ +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
+ +                    /* are the small terms in the shake_vir here due
+ +                     * to numerical errors, or are they important
+ +                     * physically? I'm thinking they are just errors, but not completely sure. 
+ +                     * For now, will call without actually constraining, constr=NULL*/
-                                        bInitStep,FALSE,bCalcEnerPres,
++                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,
++                                       state,fr->bMolPBC,graph,f,
+ +                                       &top->idef,tmp_vir,force_vir,
+ +                                       cr,nrnb,wcycle,upd,NULL,
-             if (ir->nstlist == -1 && bFirstIterate)
++                                       bInitStep,FALSE,bCalcVir,
+ +                                       state->veta);  
+ +                }
+ +                if (!bOK && !bFFscan) 
+ +                {
+ +                    gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
+ +                }
+ +                
+ +                if (fr->bSepDVDL && fplog && do_log) 
+ +                {
+ +                    fprintf(fplog,sepdvdlformat,"Constraint dV/dl",0.0,dvdl);
+ +                }
+ +                enerd->term[F_DVDL_BONDED] += dvdl;
+ +            } 
+ +            else if (graph) 
+ +            {
+ +                /* Need to unshift here */
+ +                unshift_self(graph,state->box,state->x);
+ +            }
+ +
+ +            if (vsite != NULL) 
+ +            {
+ +                wallcycle_start(wcycle,ewcVSITECONSTR);
+ +                if (graph != NULL) 
+ +                {
+ +                    shift_self(graph,state->box,state->x);
+ +                }
+ +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
+ +                                 top->idef.iparams,top->idef.il,
+ +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +                
+ +                if (graph != NULL) 
+ +                {
+ +                    unshift_self(graph,state->box,state->x);
+ +                }
+ +                wallcycle_stop(wcycle,ewcVSITECONSTR);
+ +            }
+ +            
+ +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
-                 gs.sig[eglsNABNSB] = nlh.nabnsb;
-             }
-             bEnergyHere = (!EI_VV(ir->eI) || (EI_VV(ir->eI) && bRerunMD)); /* this is not quite working for vv and rerun! fails for running rerun on multiple threads. This is caught in runner.c. */
-             compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
-                             wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
-                             constr,
-                             bFirstIterate ? &gs : NULL, 
-                             (step_rel % gs.nstms == 0) && 
++            /* With Leap-Frog we can skip compute_globals at
++             * non-communication steps, but we need to calculate
++             * the kinetic energy one step before communication.
++             */
++            if (bGStat || do_per_step(step+1,nstglobalcomm) ||
++                EI_VV(ir->eI))
+ +            {
-                             lastbox,
-                             top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
-                             cglo_flags 
-                             | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0) 
-                             | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
-                             | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
-                             | (bEnergyHere || bRerunMD ? CGLO_PRESSURE : 0) 
-                             | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
-                             | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
-                             | CGLO_CONSTRAINT
-                 );
-             if (ir->nstlist == -1 && bFirstIterate)
-             {
-                 nlh.nabnsb = gs.set[eglsNABNSB];
-                 gs.set[eglsNABNSB] = 0;
++                if (ir->nstlist == -1 && bFirstIterate)
++                {
++                    gs.sig[eglsNABNSB] = nlh.nabnsb;
++                }
++                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
++                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
++                                constr,
++                                bFirstIterate ? &gs : NULL, 
++                                (step_rel % gs.nstms == 0) && 
+ +                                (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
- 
++                                lastbox,
++                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
++                                cglo_flags 
++                                | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0)
++                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
++                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
++                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
++                                | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
++                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
++                                | CGLO_CONSTRAINT 
++                    );
++                if (ir->nstlist == -1 && bFirstIterate)
++                {
++                    nlh.nabnsb = gs.set[eglsNABNSB];
++                    gs.set[eglsNABNSB] = 0;
++                }
+ +            }
+ +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
+ +            /* #############  END CALC EKIN AND PRESSURE ################# */
+ +        
+ +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
+ +               the virial that should probably be addressed eventually. state->veta has better properies,
+ +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
+ +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
+ +
+ +            if (bIterations && 
+ +                done_iterating(cr,fplog,step,&iterate,bFirstIterate,
+ +                               trace(shake_vir),&tracevir)) 
+ +            {
+ +                break;
+ +            }
+ +            bFirstIterate = FALSE;
+ +        }
+ +
+ +        /* only add constraint dvdl after constraints */
+ +        enerd->term[F_DVDL_BONDED] += dvdl;
+ +        if (!bVV)
+ +        {
+ +            /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
+ +            sum_dhdl(enerd,state->lambda,ir->fepvals);
+ +        }
+ +        update_box(fplog,step,ir,mdatoms,state,graph,f,
+ +                   ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
+ +        
+ +        /* ################# END UPDATE STEP 2 ################# */
+ +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
+ +    
+ +        /* The coordinates (x) were unshifted in update */
+ +        if (bFFscan && (shellfc==NULL || bConverged))
+ +        {
+ +            if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
+ +                                 f,NULL,xcopy,
+ +                                 &(top_global->mols),mdatoms->massT,pres))
+ +            {
+ +                gmx_finalize_par();
+ +
+ +                fprintf(stderr,"\n");
+ +                exit(0);
+ +            }
+ +        }
+ +        if (!bGStat)
+ +        {
+ +            /* We will not sum ekinh_old,                                                            
+ +             * so signal that we still have to do it.                                                
+ +             */
+ +            bSumEkinhOld = TRUE;
+ +        }
+ +        
+ +        if (bTCR)
+ +        {
+ +            /* Only do GCT when the relaxation of shells (minimization) has converged,
+ +             * otherwise we might be coupling to bogus energies. 
+ +             * In parallel we must always do this, because the other sims might
+ +             * update the FF.
+ +             */
+ +
+ +            /* Since this is called with the new coordinates state->x, I assume
+ +             * we want the new box state->box too. / EL 20040121
+ +             */
+ +            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
+ +                        ir,MASTER(cr),
+ +                        mdatoms,&(top->idef),mu_aver,
+ +                        top_global->mols.nr,cr,
+ +                        state->box,total_vir,pres,
+ +                        mu_tot,state->x,f,bConverged);
+ +            debug_gmx();
+ +        }
+ +
+ +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
-                 if (bNstEner)
++        
+ +        /* use the directly determined last velocity, not actually the averaged half steps */
+ +        if (bTrotter && ir->eI==eiVV) 
+ +        {
+ +            enerd->term[F_EKIN] = last_ekin;
+ +        }
+ +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
+ +        
+ +        if (bVV)
+ +        {
+ +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
+ +        }
+ +        else 
+ +        {
+ +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
+ +        }
+ +        /* Check for excessively large energies */
+ +        if (bIonize) 
+ +        {
+ +#ifdef GMX_DOUBLE
+ +            real etot_max = 1e200;
+ +#else
+ +            real etot_max = 1e30;
+ +#endif
+ +            if (fabs(enerd->term[F_ETOT]) > etot_max) 
+ +            {
+ +                fprintf(stderr,"Energy too large (%g), giving up\n",
+ +                        enerd->term[F_ETOT]);
+ +            }
+ +        }
+ +        /* #########  END PREPARING EDR OUTPUT  ###########  */
+ +        
+ +        /* Time for performance */
+ +        if (((step % stepout) == 0) || bLastStep) 
+ +        {
+ +            runtime_upd_proc(runtime);
+ +        }
+ +        
+ +        /* Output stuff */
+ +        if (MASTER(cr))
+ +        {
+ +            gmx_bool do_dr,do_or;
+ +            
+ +            if (fplog && do_log && bDoExpanded)
+ +            {
+ +                /* only needed if doing expanded ensemble */
+ +                PrintFreeEnergyInfoToFile(fplog,ir->fepvals,ir->expandedvals,ir->bSimTemp?ir->simtempvals:NULL,
+ +                                          &df_history,state->fep_state,ir->nstlog,step);
+ +            }
+ +            if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
+ +            {
-                     upd_mdebin(mdebin,bDoDHDL,TRUE,
++                if (bCalcEner)
+ +                {
-         if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
++                    upd_mdebin(mdebin,bDoDHDL, TRUE,
+ +                               t,mdatoms->tmass,enerd,state,
+ +                               ir->fepvals,ir->expandedvals,lastbox,
+ +                               shake_vir,force_vir,total_vir,pres,
+ +                               ekind,mu_tot,constr);
+ +                }
+ +                else
+ +                {
+ +                    upd_mdebin_step(mdebin);
+ +                }
+ +                
+ +                do_dr  = do_per_step(step,ir->nstdisreout);
+ +                do_or  = do_per_step(step,ir->nstorireout);
+ +                
+ +                print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
+ +                           step,t,
+ +                           eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
+ +            }
+ +            if (ir->ePull != epullNO)
+ +            {
+ +                pull_print_output(ir->pull,step,t);
+ +            }
+ +            
+ +            if (do_per_step(step,ir->nstlog))
+ +            {
+ +                if(fflush(fplog) != 0)
+ +                {
+ +                    gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
+ +                }
+ +            }
+ +        }
+ +        if (bDoExpanded)
+ +        {
+ +            /* Have to do this part after outputting the logfile and the edr file */
+ +            state->fep_state = lamnew;
+ +            for (i=0;i<efptNR;i++)
+ +            {
+ +                state->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
+ +            }
+ +        }
+ +        /* Remaining runtime */
-             reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime);
++        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
+ +        {
+ +            if (shellfc) 
+ +            {
+ +                fprintf(stderr,"\n");
+ +            }
+ +            print_time(stderr,runtime,step,ir,cr);
+ +        }
+ +
+ +        /* Replica exchange */
+ +        bExchanged = FALSE;
+ +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
+ +            do_per_step(step,repl_ex_nst)) 
+ +        {
+ +            bExchanged = replica_exchange(fplog,cr,repl_ex,
+ +                                          state_global,enerd,
+ +                                          state,step,t);
+ +
+ +            if (bExchanged && DOMAINDECOMP(cr)) 
+ +            {
+ +                dd_partition_system(fplog,step,cr,TRUE,1,
+ +                                    state_global,top_global,ir,
+ +                                    state,&f,mdatoms,top,fr,
+ +                                    vsite,shellfc,constr,
+ +                                    nrnb,wcycle,FALSE);
+ +            }
+ +        }
+ +        
+ +        bFirstStep = FALSE;
+ +        bInitStep = FALSE;
+ +        bStartingFromCpt = FALSE;
+ +
+ +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
+ +        /* With all integrators, except VV, we need to retain the pressure
+ +         * at the current step for coupling at the next step.
+ +         */
+ +        if ((state->flags & (1<<estPRES_PREV)) &&
+ +            (bGStatEveryStep ||
+ +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
+ +        {
+ +            /* Store the pressure in t_state for pressure coupling
+ +             * at the next MD step.
+ +             */
+ +            copy_mat(pres,state->pres_prev);
+ +        }
+ +        
+ +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
+ +
+ +        if ( (membed!=NULL) && (!bLastStep) )
+ +        {
+ +            rescale_membed(step_rel,membed,state_global->x);
+ +        }
+ +
+ +        if (bRerunMD) 
+ +        {
+ +            if (MASTER(cr))
+ +            {
+ +                /* read next frame from input trajectory */
+ +                bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
+ +            }
+ +
+ +            if (PAR(cr))
+ +            {
+ +                rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
+ +            }
+ +        }
+ +        
+ +        if (!bRerunMD || !rerun_fr.bStep)
+ +        {
+ +            /* increase the MD step number */
+ +            step++;
+ +            step_rel++;
+ +        }
+ +        
+ +        cycles = wallcycle_stop(wcycle,ewcSTEP);
+ +        if (DOMAINDECOMP(cr) && wcycle)
+ +        {
+ +            dd_cycles_add(cr->dd,cycles,ddCyclStep);
+ +        }
++
++        if (bPMETuneRunning || bPMETuneTry)
++        {
++            /* PME grid + cut-off optimization with GPUs or PME nodes */
++
++            /* Count the total cycles over the last steps */
++            cycles_pmes += cycles;
++
++            /* We can only switch cut-off at NS steps */
++            if (step % ir->nstlist == 0)
++            {
++                /* PME grid + cut-off optimization with GPUs or PME nodes */
++                if (bPMETuneTry)
++                {
++                    if (DDMASTER(cr->dd))
++                    {
++                        /* PME node load is too high, start tuning */
++                        bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
++                    }
++                    dd_bcast(cr->dd,sizeof(gmx_bool),&bPMETuneRunning);
++
++                    if (bPMETuneRunning || step_rel > ir->nstlist*50)
++                    {
++                        bPMETuneTry     = FALSE;
++                    }
++                }
++                if (bPMETuneRunning)
++                {
++                    /* init_step might not be a multiple of nstlist,
++                     * but the first cycle is always skipped anyhow.
++                     */
++                    bPMETuneRunning =
++                        switch_pme(pme_switch,cr,
++                                   (bVerbose && MASTER(cr)) ? stderr : NULL,
++                                   fplog,
++                                   ir,state,cycles_pmes,
++                                   fr->ic,fr->nbv,&fr->pmedata,
++                                   step);
++
++                    fr->ewaldcoeff = fr->ic->ewaldcoeff;
++                }
++
++                cycles_pmes = 0;
++            }
++        }
+ +        
+ +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
+ +            gs.set[eglsRESETCOUNTERS] != 0)
+ +        {
+ +            /* Reset all the counters related to performance over the run */
-         gmx_pme_finish(cr);
++            reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime,
++                               fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
+ +            wcycle_set_reset_counters(wcycle,-1);
+ +            /* Correct max_hours for the elapsed time */
+ +            max_hours -= run_time/(60.0*60.0);
+ +            bResetCountersHalfMaxH = FALSE;
+ +            gs.set[eglsRESETCOUNTERS] = 0;
+ +        }
+ +
+ +    }
+ +    /* End of main MD loop */
+ +    debug_gmx();
+ +    
+ +    /* Stop the time */
+ +    runtime_end(runtime);
+ +    
+ +    if (bRerunMD && MASTER(cr))
+ +    {
+ +        close_trj(status);
+ +    }
+ +    
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Tell the PME only node to finish */
-     
-     return 0;
++        gmx_pme_send_finish(cr);
+ +    }
+ +    
+ +    if (MASTER(cr))
+ +    {
+ +        if (ir->nstcalcenergy > 0 && !bRerunMD) 
+ +        {
+ +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
+ +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
+ +        }
+ +    }
+ +
+ +    done_mdoutf(outf);
+ +
+ +    debug_gmx();
+ +
+ +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
+ +    {
+ +        fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
+ +        fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
+ +    }
+ +    
+ +    if (shellfc && fplog)
+ +    {
+ +        fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
+ +                (nconverged*100.0)/step_rel);
+ +        fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
+ +                tcount/step_rel);
+ +    }
+ +    
+ +    if (repl_ex_nst > 0 && MASTER(cr))
+ +    {
+ +        print_replica_exchange_statistics(fplog,repl_ex);
+ +    }
+ +    
+ +    runtime->nsteps_done = step_rel;
++
++   return 0;
+ +}
diff --cc src/programs/mdrun/md_openmm.c

index b29bd0e2f75dcc54d28bbc8120fb3c173938beb9,0000000000000000000000000000000000000000..a12bdda02c52ad39d29c0ab6e144c43145fc41ff

mode 100644,000000..100644
--- 1/src/programs/mdrun/md_openmm.c
--- /dev/null
+++ b/src/programs/mdrun/md_openmm.c
@@@ -1,573 -1,0 +1,574 @@@
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2010, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <signal.h>
+ +#include <stdlib.h>
+ +
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "sysstuff.h"
+ +#include "vec.h"
+ +#include "statutil.h"
+ +#include "vcm.h"
+ +#include "mdebin.h"
+ +#include "nrnb.h"
+ +#include "calcmu.h"
+ +#include "index.h"
+ +#include "vsite.h"
+ +#include "update.h"
+ +#include "ns.h"
+ +#include "trnio.h"
+ +#include "xtcio.h"
+ +#include "mdrun.h"
++#include "md_support.h"
+ +#include "confio.h"
+ +#include "network.h"
+ +#include "pull.h"
+ +#include "xvgr.h"
+ +#include "physics.h"
+ +#include "names.h"
+ +#include "xmdrun.h"
+ +#include "ionize.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "pme.h"
+ +#include "mdatoms.h"
+ +#include "qmmm.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "topsort.h"
+ +#include "coulomb.h"
+ +#include "constr.h"
+ +#include "compute_io.h"
+ +#include "mvdata.h"
+ +#include "checkpoint.h"
+ +#include "mtop_util.h"
+ +#include "sighandler.h"
+ +#include "genborn.h"
+ +#include "string2.h"
+ +#include "copyrite.h"
+ +#include "membed.h"
+ +
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +/* include even when OpenMM not used to force compilation of do_md_openmm */
+ +#include "openmm_wrapper.h"
+ +
+ +double do_md_openmm(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
+ +                    const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+ +                    int nstglobalcomm,
+ +                    gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                    int stepout,t_inputrec *ir,
+ +                    gmx_mtop_t *top_global,
+ +                    t_fcdata *fcd,
+ +                    t_state *state_global,
+ +                    t_mdatoms *mdatoms,
+ +                    t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +                    gmx_edsam_t ed,t_forcerec *fr,
+ +                    int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+ +                    gmx_membed_t membed,
+ +                    real cpt_period,real max_hours,
+ +                    const char *deviceOptions,
+ +                    unsigned long Flags,
+ +                    gmx_runtime_t *runtime)
+ +{
+ +    gmx_mdoutf_t *outf;
+ +    gmx_large_int_t step,step_rel;
+ +    double     run_time;
+ +    double     t,t0,lam0;
+ +    gmx_bool       bSimAnn,
+ +    bFirstStep,bStateFromTPX,bLastStep,bStartingFromCpt;
+ +    gmx_bool       bInitStep=TRUE;
+ +    gmx_bool       do_ene,do_log, do_verbose,
+ +    bX,bV,bF,bCPT;
+ +    tensor     force_vir,shake_vir,total_vir,pres;
+ +    int        i,m;
+ +    int        mdof_flags;
+ +    rvec       mu_tot;
+ +    t_vcm      *vcm;
+ +    int        nchkpt=1;
+ +    gmx_localtop_t *top;
+ +    t_mdebin *mdebin;
+ +    t_state    *state=NULL;
+ +    rvec       *f_global=NULL;
+ +    int        n_xtc=-1;
+ +    rvec       *x_xtc=NULL;
+ +    gmx_enerdata_t *enerd;
+ +    rvec       *f=NULL;
+ +    gmx_global_stat_t gstat;
+ +    gmx_update_t upd=NULL;
+ +    t_graph    *graph=NULL;
+ +    globsig_t   gs;
+ +
+ +    gmx_groups_t *groups;
+ +    gmx_ekindata_t *ekind, *ekind_save;
+ +    gmx_bool        bAppend;
+ +    int         a0,a1;
+ +    matrix      lastbox;
+ +    real        reset_counters=0,reset_counters_now=0;
+ +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
+ +    int         handled_stop_condition=gmx_stop_cond_none; 
+ +
+ +    const char *ommOptions = NULL;
+ +    void   *openmmData;
+ +
+ +#ifdef GMX_DOUBLE
+ +    /* Checks in cmake should prevent the compilation in double precision
+ +     * with OpenMM, but just to be sure we check here.
+ +     */
+ +    gmx_fatal(FARGS,"Compilation was performed in double precision, but OpenMM only supports single precision. If you want to use to OpenMM, compile in single precision.");
+ +#endif
+ +
+ +    bAppend  = (Flags & MD_APPENDFILES);
+ +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
+ +
+ +    groups = &top_global->groups;
+ +
+ +    /* Initial values */
+ +    init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
+ +            &(state_global->fep_state),&lam0,
+ +            nrnb,top_global,&upd,
+ +            nfile,fnm,&outf,&mdebin,
+ +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
+ +
+ +    clear_mat(total_vir);
+ +    clear_mat(pres);
+ +    /* Energy terms and groups */
+ +    snew(enerd,1);
+ +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
+ +                  enerd);
+ +    snew(f,top_global->natoms);
+ +
+ +    /* Kinetic energy data */
+ +    snew(ekind,1);
+ +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
+ +    /* needed for iteration of constraints */
+ +    snew(ekind_save,1);
+ +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
+ +    /* Copy the cos acceleration to the groups struct */
+ +    ekind->cosacc.cos_accel = ir->cos_accel;
+ +
+ +    gstat = global_stat_init(ir);
+ +    debug_gmx();
+ +
+ +    {
+ +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
+ +        if ((io > 2000) && MASTER(cr))
+ +            fprintf(stderr,
+ +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
+ +                    io);
+ +    }
+ +
+ +    top = gmx_mtop_generate_local_top(top_global,ir);
+ +
+ +    a0 = 0;
+ +    a1 = top_global->natoms;
+ +
+ +    state = partdec_init_local_state(cr,state_global);
+ +    f_global = f;
+ +
+ +    atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
+ +
+ +    if (vsite)
+ +    {
+ +        set_vsite_top(vsite,top,mdatoms,cr);
+ +    }
+ +
+ +    if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
+ +    {
+ +        graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
+ +    }
+ +
+ +    update_mdatoms(mdatoms,state->lambda[efptMASS]);
+ +
+ +    if (deviceOptions[0]=='\0')
+ +    {
+ +        /* empty options, which should default to OpenMM in this build */
+ +        ommOptions=deviceOptions;
+ +    }
+ +    else
+ +    {
+ +        if (gmx_strncasecmp(deviceOptions,"OpenMM",6)!=0)
+ +        {
+ +            gmx_fatal(FARGS, "This Gromacs version currently only works with OpenMM. Use -device \"OpenMM:<options>\"");
+ +        }
+ +        else
+ +        {
+ +            ommOptions=strchr(deviceOptions,':');
+ +            if (NULL!=ommOptions)
+ +            {
+ +                /* Increase the pointer to skip the colon */
+ +                ommOptions++;
+ +            }
+ +        }
+ +    }
+ +
+ +    openmmData = openmm_init(fplog, ommOptions, ir, top_global, top, mdatoms, fr, state);
+ +    please_cite(fplog,"Friedrichs2009");
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        /* Update mdebin with energy history if appending to output files */
+ +        if ( Flags & MD_APPENDFILES )
+ +        {
+ +            restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+ +        }
+ +        /* Set the initial energy history in state to zero by updating once */
+ +        update_energyhistory(&state_global->enerhist,mdebin);
+ +    }
+ +
+ +    if (constr)
+ +    {
+ +        set_constraints(constr,top,ir,mdatoms,cr);
+ +    }
+ +
+ +    if (!ir->bContinuation)
+ +    {
+ +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
+ +        {
+ +            /* Set the velocities of frozen particles to zero */
+ +            for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
+ +            {
+ +                for (m=0; m<DIM; m++)
+ +                {
+ +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
+ +                    {
+ +                        state->v[i][m] = 0;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +
+ +        if (constr)
+ +        {
+ +            /* Constrain the initial coordinates and velocities */
+ +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
+ +                               graph,cr,nrnb,fr,top,shake_vir);
+ +        }
+ +        if (vsite)
+ +        {
+ +            /* Construct the virtual sites for the initial configuration */
+ +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
+ +                             top->idef.iparams,top->idef.il,
+ +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +        }
+ +    }
+ +
+ +    debug_gmx();
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        char tbuf[20];
+ +        fprintf(stderr,"starting mdrun '%s'\n",
+ +                *(top_global->name));
+ +        if (ir->nsteps >= 0)
+ +        {
+ +            sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
+ +        }
+ +        else
+ +        {
+ +            sprintf(tbuf,"%s","infinite");
+ +        }
+ +        if (ir->init_step > 0)
+ +        {
+ +            fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
+ +                    gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
+ +                    gmx_step_str(ir->init_step,sbuf2),
+ +                    ir->init_step*ir->delta_t);
+ +        }
+ +        else
+ +        {
+ +            fprintf(stderr,"%s steps, %s ps.\n",
+ +                    gmx_step_str(ir->nsteps,sbuf),tbuf);
+ +        }
+ +    }
+ +
+ +    fprintf(fplog,"\n");
+ +
+ +    /* Set and write start time */
+ +    runtime_start(runtime);
+ +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
+ +    wallcycle_start(wcycle,ewcRUN);
+ +    if (fplog)
+ +        fprintf(fplog,"\n");
+ +
+ +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
+ +
+ +    debug_gmx();
+ +    /***********************************************************
+ +     *
+ +     *             Loop over MD steps
+ +     *
+ +     ************************************************************/
+ +
+ +    /* loop over MD steps or if rerunMD to end of input trajectory */
+ +    bFirstStep = TRUE;
+ +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
+ +    bStateFromTPX = !opt2bSet("-cpi",nfile,fnm);
+ +    bInitStep = bFirstStep && bStateFromTPX;
+ +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
+ +    bLastStep = FALSE;
+ +
+ +    init_global_signals(&gs,cr,ir,repl_ex_nst);
+ +
+ +    step = ir->init_step;
+ +    step_rel = 0;
+ +
+ +    while (!bLastStep)
+ +    {
+ +        wallcycle_start(wcycle,ewcSTEP);
+ +
+ +        bLastStep = (step_rel == ir->nsteps);
+ +        t = t0 + step*ir->delta_t;
+ +
+ +        if (gs.set[eglsSTOPCOND] != 0)
+ +        {
+ +            bLastStep = TRUE;
+ +        }
+ +
+ +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
+ +        do_verbose = bVerbose &&
+ +                     (step % stepout == 0 || bFirstStep || bLastStep);
+ +
+ +        if (MASTER(cr) && do_log)
+ +        {
+ +            print_ebin_header(fplog,step,t,state->lambda[efptFEP]);
+ +        }
+ +
+ +        clear_mat(force_vir);
+ +
+ +        /* We write a checkpoint at this MD step when:
+ +         * either when we signalled through gs (in OpenMM NS works different),
+ +         * or at the last step (but not when we do not want confout),
+ +         * but never at the first step.
+ +         */
+ +        bCPT = ((gs.set[eglsCHKPT] ||
+ +                 (bLastStep && (Flags & MD_CONFOUT))) &&
+ +                step > ir->init_step );
+ +        if (bCPT)
+ +        {
+ +            gs.set[eglsCHKPT] = 0;
+ +        }
+ +
+ +        /* Now we have the energies and forces corresponding to the
+ +         * coordinates at time t. We must output all of this before
+ +         * the update.
+ +         * for RerunMD t is read from input trajectory
+ +         */
+ +        mdof_flags = 0;
+ +        if (do_per_step(step,ir->nstxout))
+ +        {
+ +            mdof_flags |= MDOF_X;
+ +        }
+ +        if (do_per_step(step,ir->nstvout))
+ +        {
+ +            mdof_flags |= MDOF_V;
+ +        }
+ +        if (do_per_step(step,ir->nstfout))
+ +        {
+ +            mdof_flags |= MDOF_F;
+ +        }
+ +        if (do_per_step(step,ir->nstxtcout))
+ +        {
+ +            mdof_flags |= MDOF_XTC;
+ +        }
+ +        if (bCPT)
+ +        {
+ +            mdof_flags |= MDOF_CPT;
+ +        };
+ +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
+ +
+ +        if (mdof_flags != 0 || do_ene || do_log)
+ +        {
+ +            wallcycle_start(wcycle,ewcTRAJ);
+ +            bF = (mdof_flags & MDOF_F);
+ +            bX = (mdof_flags & (MDOF_X | MDOF_XTC | MDOF_CPT));
+ +            bV = (mdof_flags & (MDOF_V | MDOF_CPT));
+ +
+ +            openmm_copy_state(openmmData, state, &t, f, enerd, bX, bV, bF, do_ene);
+ +
+ +            upd_mdebin(mdebin,FALSE,TRUE,
+ +                       t,mdatoms->tmass,enerd,state,ir->fepvals,ir->expandedvals,lastbox,
+ +                       shake_vir,force_vir,total_vir,pres,
+ +                       ekind,mu_tot,constr);
+ +            print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,do_log?fplog:NULL,
+ +                       step,t,
+ +                       eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
+ +            write_traj(fplog,cr,outf,mdof_flags,top_global,
+ +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
+ +            if (bCPT)
+ +            {
+ +                nchkpt++;
+ +                bCPT = FALSE;
+ +            }
+ +            debug_gmx();
+ +            if (bLastStep && step_rel == ir->nsteps &&
+ +                    (Flags & MD_CONFOUT) && MASTER(cr))
+ +            {
+ +                /* x and v have been collected in write_traj,
+ +                 * because a checkpoint file will always be written
+ +                 * at the last step.
+ +                 */
+ +                fprintf(stderr,"\nWriting final coordinates.\n");
+ +                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
+ +                {
+ +                    /* Make molecules whole only for confout writing */
+ +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
+ +                }
+ +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
+ +                                    *top_global->name,top_global,
+ +                                    state_global->x,state_global->v,
+ +                                    ir->ePBC,state->box);
+ +                debug_gmx();
+ +            }
+ +            wallcycle_stop(wcycle,ewcTRAJ);
+ +        }
+ +
+ +        /* Determine the wallclock run time up till now */
+ +        run_time = gmx_gettime() - (double)runtime->real;
+ +
+ +        /* Check whether everything is still allright */
+ +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
+ +#ifdef GMX_THREAD_MPI
+ +            && MASTER(cr)
+ +#endif
+ +            )
+ +        {
+ +           /* this is just make gs.sig compatible with the hack 
+ +               of sending signals around by MPI_Reduce with together with
+ +               other floats */
+ +            /* NOTE: this only works for serial code. For code that allows
+ +               MPI nodes to propagate their condition, see kernel/md.c*/
+ +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
+ +                gs.set[eglsSTOPCOND]=1;
+ +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
+ +                gs.set[eglsSTOPCOND]=1;
+ +            /* < 0 means stop at next step, > 0 means stop at next NS step */
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,
+ +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                        gmx_get_signal_name(),
+ +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+ +                fflush(fplog);
+ +            }
+ +            fprintf(stderr,
+ +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                    gmx_get_signal_name(),
+ +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+ +            fflush(stderr);
+ +            handled_stop_condition=(int)gmx_get_stop_condition();
+ +        }
+ +        else if (MASTER(cr) &&
+ +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
+ +                 gs.set[eglsSTOPCOND] == 0)
+ +        {
+ +            /* Signal to terminate the run */
+ +            gs.set[eglsSTOPCOND] = 1;
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+ +            }
+ +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+ +        }
+ +
+ +        /* checkpoints */
+ +        if (MASTER(cr) && (cpt_period >= 0 &&
+ +                           (cpt_period == 0 ||
+ +                            run_time >= nchkpt*cpt_period*60.0)) &&
+ +                gs.set[eglsCHKPT] == 0)
+ +        {
+ +            gs.set[eglsCHKPT] = 1;
+ +        }
+ +
+ +        /* Time for performance */
+ +        if (((step % stepout) == 0) || bLastStep)
+ +        {
+ +            runtime_upd_proc(runtime);
+ +        }
+ +
+ +        if (do_per_step(step,ir->nstlog))
+ +        {
+ +            if (fflush(fplog) != 0)
+ +            {
+ +                gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
+ +            }
+ +        }
+ +
+ +        /* Remaining runtime */
+ +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
+ +        {
+ +            print_time(stderr,runtime,step,ir,cr);
+ +        }
+ +
+ +        bFirstStep = FALSE;
+ +        bInitStep = FALSE;
+ +        bStartingFromCpt = FALSE;
+ +        step++;
+ +        step_rel++;
+ +
+ +        openmm_take_one_step(openmmData);
+ +    }
+ +    /* End of main MD loop */
+ +    debug_gmx();
+ +
+ +    /* Stop the time */
+ +    runtime_end(runtime);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        if (ir->nstcalcenergy > 0) 
+ +        {
+ +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
+ +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
+ +        }
+ +    }
+ +
+ +    openmm_cleanup(fplog, openmmData);
+ +
+ +    done_mdoutf(outf);
+ +
+ +    debug_gmx();
+ +
+ +    runtime->nsteps_done = step_rel;
+ +
+ +    return 0;
+ +}
diff --cc src/programs/mdrun/mdrun.c
Simple merge
diff --cc src/programs/mdrun/membed.c
Simple merge
diff --cc src/programs/mdrun/membed.h
Simple merge
diff --cc src/programs/mdrun/openmm_wrapper.cpp

index 168af54561bd1e857775d56fbef70f115664fd08,0000000000000000000000000000000000000000..248273e1bec26427ac33f11e3aa1ecece3a07c27

mode 100644,000000..100644
--- 1/src/programs/mdrun/openmm_wrapper.cpp
--- /dev/null
+++ b/src/programs/mdrun/openmm_wrapper.cpp
@@@ -1,1510 -1,0 +1,1510 @@@
- #include "gmx_gpu_utils.h"
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2010, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +
+ +/*
+ + * Note, that parts of this source code originate from the Simtk release 
+ + * of OpenMM accelerated Gromacs, for more details see: 
+ + * https://simtk.org/project/xml/downloads.xml?group_id=161#package_id600
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <types/simple.h>
+ +#include <cmath>
+ +#include <set>
+ +#include <iostream>
+ +#include <sstream>
+ +#include <fstream>
+ +#include <map>
+ +#include <vector>
+ +#include <cctype>
+ +#include <algorithm>
+ +
+ +using namespace std;
+ +
+ +#include "OpenMM.h"
+ +
+ +#include "gmx_fatal.h"
+ +#include "typedefs.h"
+ +#include "mdrun.h"
+ +#include "physics.h"
+ +#include "string2.h"
-             if (!is_supported_cuda_gpu(-1, gpuname))
++#include "gpu_utils.h"
+ +#include "mtop_util.h"
+ +
+ +#include "openmm_wrapper.h"
+ +
+ +using namespace OpenMM;
+ +
+ +/*! \cond */
+ +#define MEM_ERR_MSG(str) \
+ +    "The %s-simulation GPU memory test detected errors. As memory errors would cause incorrect " \
+ +    "simulation results, gromacs has aborted execution.\n Make sure that your GPU's memory is not " \
+ +    "overclocked and that the device is properly cooled.\n", (str)
+ +/*! \endcond */
+ +
+ +#define COMBRULE_CHK_TOL            1e-6
+ +#define COMBRULE_SIGMA(sig1, sig2)  (((sig1) + (sig2))/2)
+ +#define COMBRULE_EPS(eps1, eps2)    (sqrt((eps1) * (eps2)))
+ +
+ +/*! 
+ + * \brief Convert string to integer type.
+ + * \param[in]  s    String to convert from.
+ + * \param[in]  f    Basefield format flag that takes any of the following I/O
+ + *                  manipulators: dec, hex, oct.
+ + * \param[out] t    Destination variable to convert to.
+ + */
+ +template <class T>
+ +static gmx_bool from_string(T& t, const string& s, ios_base& (*f)(ios_base&))
+ +{
+ +    istringstream iss(s);
+ +    return !(iss >> f >> t).fail();
+ +}
+ +
+ +/*!
+ + * \brief Split string around a given delimiter.
+ + * \param[in] s      String to split.
+ + * \param[in] delim  Delimiter character.
+ + * \returns          Vector of strings found in \p s.
+ + */
+ +static vector<string> split(const string &s, char delim)
+ +{
+ +    vector<string> elems;
+ +    stringstream ss(s);
+ +    string item;
+ +    while (getline(ss, item, delim))
+ +    {
+ +        if (item.length() != 0)
+ +            elems.push_back(item);
+ +    }
+ +    return elems;
+ +}
+ +
+ +/*!
+ + * \brief Split a string of the form "option=value" into "option" and "value" strings.
+ + * This string corresponds to one option and the associated value from the option list 
+ + * in the mdrun -device argument.
+ + *
+ + * \param[in]  s    A string containing an "option=value" pair that needs to be split up.
+ + * \param[out] opt  The name of the option.
+ + * \param[out] val  Value of the option. 
+ + */
+ +static void splitOptionValue(const string &s, string &opt, string &val)
+ +{
+ +    size_t eqPos = s.find('=');
+ +    if (eqPos != string::npos)
+ +    {
+ +        opt = s.substr(0, eqPos);
+ +        if (eqPos != s.length())  val = s.substr(eqPos+1);
+ +    }
+ +}
+ +
+ +/*!
+ + * \brief Compare two strings ignoring case.
+ + * This function is in fact a wrapper around the gromacs function gmx_strncasecmp().
+ + * \param[in] s1 String. 
+ + * \param[in] s2 String.
+ + * \returns      Similarly to the C function strncasecmp(), the return value is an  
+ +                 integer less than, equal to, or greater than 0 if \p s1 less than, 
+ +                 identical to, or greater than \p s2.
+ + */
+ +static gmx_bool isStringEqNCase(const string& s1, const string& s2)
+ +{
+ +    return (gmx_strncasecmp(s1.c_str(), s2.c_str(), max(s1.length(), s2.length())) == 0);
+ +}
+ +
+ +/*!
+ + * \brief Convert string to upper case.
+ + *
+ + * \param[in]  s    String to convert to uppercase.
+ + * \returns         The given string converted to uppercase.
+ + */
+ +static string toUpper(const string &s)
+ +{
+ +    string stmp(s);
+ +    std::transform(stmp.begin(), stmp.end(), stmp.begin(), static_cast < int(*)(int) > (toupper));
+ +    return stmp;
+ +}
+ +
+ +/*! 
+ +  \name Sizes of constant device option arrays GmxOpenMMPlatformOptions#platforms, 
+ +  GmxOpenMMPlatformOptions#memtests, GmxOpenMMPlatformOptions#deviceid, 
+ +  GmxOpenMMPlatformOptions#force_dev.  */
+ +/* {@ */
+ +#define SIZEOF_PLATFORMS    2  // 2
+ +#define SIZEOF_MEMTESTS     3 
+ +#define SIZEOF_DEVICEIDS    1 
+ +#define SIZEOF_FORCE_DEV    2 
+ +
+ +#define SIZEOF_CHECK_COMBRULE 2
+ +/* @} */
+ +
+ +/*! Possible platform options in the mdrun -device option. */
+ +static const char *devOptStrings[] = { "platform", "deviceid", "memtest", "force-device", "check-combrule" }; 
+ +
+ +/*! Enumerated platform options in the mdrun -device option. */
+ +enum devOpt
+ +{
+ +    PLATFORM     = 0,
+ +    DEVICEID     = 1,
+ +    MEMTEST      = 2,
+ +    FORCE_DEVICE = 3
+ +};
+ +
+ +/*!
+ + * \brief Class to extract and manage the platform options in the mdrun -device option.
+ + * 
+ + */
+ +class GmxOpenMMPlatformOptions
+ +{
+ +public:
+ +    GmxOpenMMPlatformOptions(const char *opt);
+ +    ~GmxOpenMMPlatformOptions() { options.clear(); }
+ +    string getOptionValue(const string &opt);
+ +    void remOption(const string &opt);
+ +    void print();
+ +private:
+ +    void setOption(const string &opt, const string &val);
+ +
+ +    map<string, string> options; /*!< Data structure to store the option (name, value) pairs. */
+ +
+ +    static const char * const platforms[SIZEOF_PLATFORMS];  /*!< Available OpenMM platforms; size #SIZEOF_PLATFORMS */
+ +    static const char * const memtests[SIZEOF_MEMTESTS];    /*!< Available types of memory tests, also valid 
+ +                                                                 any positive integer >=15; size #SIZEOF_MEMTESTS */
+ +    static const char * const deviceid[SIZEOF_DEVICEIDS];   /*!< Possible values for deviceid option; 
+ +                                                                 also valid any positive integer; size #SIZEOF_DEVICEIDS */
+ +    static const char * const force_dev[SIZEOF_FORCE_DEV];  /*!< Possible values for for force-device option; 
+ +                                                                 size #SIZEOF_FORCE_DEV */
+ +    static const char * const check_combrule[SIZEOF_CHECK_COMBRULE]; /* XXX temporary debug feature to 
+ +                                                                      turn off combination rule check */
+ +};
+ +
+ +const char * const GmxOpenMMPlatformOptions::platforms[SIZEOF_PLATFORMS]
+ +                    = {"CUDA", "Reference"};
+ +                    //= { "Reference", "CUDA" /*,"OpenCL"*/ };
+ +const char * const GmxOpenMMPlatformOptions::memtests[SIZEOF_MEMTESTS]
+ +                    = { "15", "full", "off" };
+ +const char * const GmxOpenMMPlatformOptions::deviceid[SIZEOF_DEVICEIDS]
+ +                    = { "0" };
+ +const char * const GmxOpenMMPlatformOptions::force_dev[SIZEOF_FORCE_DEV]
+ +                    = { "no", "yes" };
+ +const char * const GmxOpenMMPlatformOptions::check_combrule[SIZEOF_CHECK_COMBRULE] 
+ +                    = { "yes", "no" };
+ +
+ +/*!
+ + * \brief Contructor.
+ + * Takes the option list, parses it, checks the options and their values for validity.
+ + * When certain options are not provided by the user, as default value the first item  
+ + * of the respective constant array is taken (GmxOpenMMPlatformOptions#platforms, 
+ + * GmxOpenMMPlatformOptions#memtests, GmxOpenMMPlatformOptions#deviceid, 
+ + * GmxOpenMMPlatformOptions#force_dev). 
+ + * \param[in] optionString  Option list part of the mdrun -device parameter.
+ + */
+ +GmxOpenMMPlatformOptions::GmxOpenMMPlatformOptions(const char *optionString)
+ +{
+ +    // set default values
+ +    setOption("platform",       platforms[0]);
+ +    setOption("memtest",        memtests[0]);
+ +    setOption("deviceid",       deviceid[0]);
+ +    setOption("force-device",   force_dev[0]);
+ +    setOption("check-combrule", check_combrule[0]);
+ +
+ +    string opt(optionString);
+ +
+ +    // remove all whitespaces
+ +    opt.erase(remove_if(opt.begin(), opt.end(), ::isspace), opt.end());
+ +    // tokenize around ","-s
+ +    vector<string> tokens = split(opt, ',');
+ +
+ +    for (vector<string>::iterator it = tokens.begin(); it != tokens.end(); ++it)
+ +    {
+ +        string opt = "", val = "";
+ +        splitOptionValue(*it, opt, val);
+ +
+ +        if (isStringEqNCase(opt, "platform"))
+ +        {
+ +            /* no check, this will fail if platform does not exist when we try to set it */
+ +            setOption(opt, val);
+ +            continue;
+ +        }
+ +
+ +        if (isStringEqNCase(opt, "memtest"))
+ +        {
+ +            /* the value has to be an integer >15(s) or "full" OR "off" */
+ +            if (!isStringEqNCase(val, "full") && !isStringEqNCase(val, "off")) 
+ +            {
+ +                int secs;
+ +                if (!from_string<int>(secs, val, std::dec))
+ +                {
+ +                    gmx_fatal(FARGS, "Invalid value for option memtest option: \"%s\"!", val.c_str());
+ +                }
+ +                if (secs < 15)
+ +                {
+ +                    gmx_fatal(FARGS, "Incorrect value for memtest option (%d). "
+ +                            "Memtest needs to run for at least 15s!", secs);
+ +                }
+ +            }
+ +            setOption(opt, val);
+ +            continue;
+ +        }
+ +
+ +        if (isStringEqNCase(opt, "deviceid"))
+ +        {
+ +            int id;
+ +            if (!from_string<int>(id, val, std::dec) )
+ +            {
+ +                gmx_fatal(FARGS, "Invalid device id: \"%s\"!", val.c_str());
+ +            }
+ +            setOption(opt, val);
+ +            continue;
+ +        }
+ +
+ +        if (isStringEqNCase(opt, "force-device"))
+ +        {
+ +            /* */
+ +            if (!isStringEqNCase(val, "yes") && !isStringEqNCase(val, "no"))
+ +            {
+ +                gmx_fatal(FARGS, "Invalid OpenMM force option: \"%s\"!", val.c_str());
+ +            }
+ +            setOption(opt, val);
+ +            continue;
+ +        }
+ +
+ +        if (isStringEqNCase(opt, "check-combrule"))
+ +        {
+ +            /* */
+ +            if (!isStringEqNCase(val, "yes") && !isStringEqNCase(val, "no"))
+ +            {
+ +                gmx_fatal(FARGS, "Invalid OpenMM force option: \"%s\"!", val.c_str());
+ +            }
+ +            setOption(opt, val);
+ +            continue;
+ +        }
+ +
+ +
+ +        // if we got till here something went wrong
+ +        gmx_fatal(FARGS, "Invalid OpenMM platform option: \"%s\"!", (*it).c_str());
+ +    }
+ +}
+ +
+ +
+ +/*!
+ + * \brief Getter function.
+ + * \param[in] opt   Name of the option.
+ + * \returns         Returns the value associated to an option. 
+ + */
+ +string GmxOpenMMPlatformOptions::getOptionValue(const string &opt)
+ +{
+ +      map<string, string> :: const_iterator it = options.find(toUpper(opt));
+ +      if (it != options.end())
+ +    {
+ +              return it->second;
+ +    }
+ +    else
+ +    {
+ +        return NULL;
+ +    }
+ +}
+ +
+ +/*!
+ + * \brief Setter function - private, only used from contructor.
+ + * \param[in] opt   Name of the option.
+ + * \param[in] val   Value for the option. 
+ + */
+ +void GmxOpenMMPlatformOptions::setOption(const string &opt, const string &val)
+ +{
+ +    options[toUpper(opt)] = val;
+ +}
+ +
+ +/*!
+ + * \brief Removes an option with its value from the map structure. If the option 
+ + * does not exist, returns without any action.
+ + * \param[in] opt   Name of the option.
+ + */
+ +void GmxOpenMMPlatformOptions::remOption(const string &opt) 
+ +{ 
+ +    options.erase(toUpper(opt)); 
+ +}
+ +
+ +/*!
+ + * \brief Print option-value pairs to a file (debugging function). 
+ + */
+ +void GmxOpenMMPlatformOptions::print()
+ +{
+ +    cout << ">> Platform options: " << endl 
+ +         << ">> platform     = " << getOptionValue("platform") << endl
+ +         << ">> deviceID     = " << getOptionValue("deviceid") << endl
+ +         << ">> memtest      = " << getOptionValue("memtest") << endl
+ +         << ">> force-device = " << getOptionValue("force-device") << endl;
+ +}
+ +
+ +/*!
+ + * \brief Container for OpenMM related data structures that represent the bridge 
+ + *        between the Gromacs data-structures and the OpenMM library and is but it's 
+ + *        only passed through the API functions as void to disable direct access. 
+ + */
+ +class OpenMMData
+ +{
+ +public:
+ +    System* system;      //!< The system to simulate.
+ +    Context* context;   //!< The OpenMM context in which the simulation is carried out.
+ +    Integrator* integrator; //!< The integrator used in the simulation.
+ +    gmx_bool removeCM;          //!< If true, remove center of mass motion, false otherwise.
+ +    GmxOpenMMPlatformOptions *platformOpt; //!< Platform options.
+ +};
+ +
+ +/*!
+ + *  \brief Runs memtest on the GPU that has alreaby been initialized by OpenMM.
+ + *  \param[in] fplog    Pointer to gromacs log file.
+ + *  \param[in] devId    Device id of the GPU to run the test on. 
+ +                        Note: as OpenMM previously creates the context,for now this is always -1.
+ + *  \param[in] pre_post Contains either "Pre" or "Post" just to be able to differentiate in 
+ + *                      stdout messages/log between memtest carried out before and after simulation.
+ + *  \param[in] opt      Pointer to platform options object.
+ + */
+ +static void runMemtest(FILE* fplog, int devId, const char* pre_post, GmxOpenMMPlatformOptions *opt)
+ +{
+ +    char        strout_buf[STRLEN];
+ +    int         which_test;
+ +    int         res = 0;
+ +    string      s = opt->getOptionValue("memtest");
+ +    const char  *test_type = s.c_str();
+ +
+ +    if (!gmx_strcasecmp(test_type, "off"))
+ +    {
+ +        which_test = 0;
+ +    }
+ +    else
+ +    {
+ +        if (!gmx_strcasecmp(test_type, "full"))
+ +        {
+ +            which_test = 2;
+ +        }
+ +        else
+ +        {
+ +            from_string<int>(which_test, test_type, std::dec);
+ +        }
+ +    }
+ +
+ +    if (which_test < 0) 
+ +    {
+ +        gmx_fatal(FARGS, "Amount of seconds for memetest is negative (%d). ", which_test);
+ +    }
+ +
+ +    switch (which_test)
+ +    {
+ +        case 0: /* no memtest */
+ +            sprintf(strout_buf, "%s-simulation GPU memtest skipped. Note, that faulty memory can cause "
+ +                "incorrect results!", pre_post);
+ +            fprintf(fplog, "%s\n", strout_buf);
+ +            gmx_warning(strout_buf);
+ +            break; /* case 0 */
+ +
+ +        case 1: /* quick memtest */
+ +            fprintf(fplog,  "%s-simulation %s GPU memtest in progress...\n", pre_post, test_type);
+ +            fprintf(stdout, "\n%s-simulation %s GPU memtest in progress...", pre_post, test_type);
+ +            fflush(fplog);
+ +            fflush(stdout);
+ +            res = do_quick_memtest(devId);
+ +            break; /* case 1 */
+ +
+ +        case 2: /* full memtest */
+ +            fprintf(fplog,  "%s-simulation %s memtest in progress...\n", pre_post, test_type);
+ +            fprintf(stdout, "\n%s-simulation %s memtest in progress...", pre_post, test_type);
+ +            fflush(fplog);
+ +            fflush(stdout);
+ +            res = do_full_memtest(devId);
+ +            break; /* case 2 */
+ +
+ +        default: /* timed memtest */
+ +            fprintf(fplog,  "%s-simulation ~%ds memtest in progress...\n", pre_post, which_test);
+ +            fprintf(stdout, "\n%s-simulation ~%ds memtest in progress...", pre_post, which_test);
+ +            fflush(fplog);
+ +            fflush(stdout);
+ +            res = do_timed_memtest(devId, which_test);
+ +        }
+ +
+ +        if (which_test != 0)
+ +        {
+ +            if (res != 0)
+ +            {
+ +                gmx_fatal(FARGS, MEM_ERR_MSG(pre_post));
+ +            }
+ +            else
+ +            {
+ +                fprintf(fplog,  "Memory test completed without errors.\n");
+ +                fflush(fplog);
+ +                fprintf(stdout, "done, no errors detected\n");
+ +                fflush(stdout);           
+ +            }
+ +        }
+ +}
+ +
+ +/*!
+ + * \brief Convert Lennard-Jones parameters c12 and c6 to sigma and epsilon.
+ + * 
+ + * \param[in] c12
+ + * \param[in] c6
+ + * \param[out] sigma 
+ + * \param[out] epsilon
+ + */
+ +static void convert_c_12_6(double c12, double c6, double *sigma, double *epsilon)
+ +{
+ +    if (c12 == 0 && c6 == 0)
+ +    {
+ +        *epsilon    = 0.0;        
+ +        *sigma      = 1.0;
+ +    }
+ +    else if (c12 > 0 && c6 > 0)
+ +    {
+ +        *epsilon    = (c6*c6)/(4.0*c12);
+ +        *sigma      = pow(c12/c6, 1.0/6.0);
+ +    }
+ +    else 
+ +    {
+ +        gmx_fatal(FARGS,"OpenMM only supports c6 > 0 and c12 > 0 or c6 = c12 = 0.");
+ +    } 
+ +}
+ +
+ +/*!
+ + * \brief Does gromacs option checking.
+ + *
+ + * Checks the gromacs mdp options for features unsupported in OpenMM, case in which 
+ + * interrupts the execution. It also warns the user about pecularities of OpenMM 
+ + * implementations.
+ + * \param[in] fplog         Gromacs log file pointer.
+ + * \param[in] ir            Gromacs input parameters, see ::t_inputrec
+ + * \param[in] top           Gromacs node local topology, \see gmx_localtop_t
+ + * \param[in] state         Gromacs state structure \see ::t_state
+ + * \param[in] mdatoms       Gromacs atom parameters, \see ::t_mdatoms
+ + * \param[in] fr            \see ::t_forcerec
+ + * \param[in] state         Gromacs systems state, \see ::t_state
+ + */
+ +static void checkGmxOptions(FILE* fplog, GmxOpenMMPlatformOptions *opt,
+ +                            t_inputrec *ir, gmx_localtop_t *top,
+ +                            t_forcerec *fr, t_state *state)
+ +{
+ +    int     i, j, natoms;
+ +    double  c6, c12;
+ +    double  sigma_ij=0, sigma_ji=0, sigma_ii=0, sigma_jj=0, sigma_comb;
+ +    double  eps_ij=0, eps_ji=0, eps_ii=0, eps_jj=0, eps_comb;
+ +
+ +    /* Abort if unsupported critical options are present */
+ +
+ +    /* Integrator */
+ +    if (ir->eI ==  eiMD)
+ +    {
+ +        gmx_warning( "OpenMM does not support leap-frog, will use velocity-verlet integrator.");
+ +    }
+ +
+ +    if (    (ir->eI !=  eiMD)   &&
+ +            (ir->eI !=  eiVV)   &&
+ +            (ir->eI !=  eiVVAK) &&
+ +            (ir->eI !=  eiSD1)  &&
+ +            (ir->eI !=  eiSD2)  &&
+ +            (ir->eI !=  eiBD) )
+ +    {
+ +        gmx_fatal(FARGS, "OpenMM supports only the following integrators: md/md-vv/md-vv-avek, sd/sd1, and bd.");
+ +    }
+ +
+ +    /* Electroctstics */
+ +    if (   !(ir->coulombtype == eelPME   ||
+ +             EEL_RF(ir->coulombtype)     ||
+ +             ir->coulombtype == eelRF    ||
+ +             ir->coulombtype == eelEWALD ||
+ +             // no-cutoff
+ +             (ir->coulombtype == eelCUT && ir->rcoulomb == 0 &&  ir->rvdw == 0) ||
+ +             // we could have cut-off combined with GBSA (openmm will use RF)
+ +             ir->implicit_solvent == eisGBSA)   )
+ +    {
+ +        gmx_fatal(FARGS,"OpenMM supports only the following methods for electrostatics: "
+ +                "NoCutoff (i.e. rcoulomb = rvdw = 0 ),Reaction-Field, Ewald or PME.");
+ +    }
+ +
+ +    if (EEL_RF(ir->coulombtype) && ir->epsilon_rf != 0)
+ +    {
+ +        // openmm has epsilon_rf=inf hard-coded
+ +        gmx_warning("OpenMM will use a Reaction-Field epsilon of infinity instead of %g.",ir->epsilon_rf);
+ +    }
+ +
+ +    if (ir->etc != etcNO &&
+ +        ir->eI  != eiSD1 &&
+ +        ir->eI  != eiSD2 &&
+ +        ir->eI  != eiBD )
+ +    {
+ +        gmx_warning("OpenMM supports only Andersen thermostat with the md/md-vv/md-vv-avek integrators.");
+ +    }
+ +
+ +    if (ir->implicit_solvent == eisGBSA &&
+ +        ir->gb_algorithm != egbOBC  )
+ +    {
+ +        gmx_warning("OpenMM does not support the specified algorithm for Generalized Born, will use OBC instead.");
+ +    }
+ +
+ +    if (ir->opts.ngtc > 1)
+ +        gmx_fatal(FARGS,"OpenMM does not support multiple temperature coupling groups.");
+ +
+ +    if (ir->epc != epcNO)
+ +        gmx_warning("OpenMM supports only Monte Carlo barostat for pressure coupling.");
+ +
+ +    if (ir->opts.annealing[0])
+ +        gmx_fatal(FARGS,"OpenMM does not support simulated annealing.");
+ +    
+ +    if (top->idef.il[F_CONSTR].nr > 0 && ir->eConstrAlg != econtSHAKE)
+ +        gmx_warning("OpenMM provides contraints as a combination "
+ +                    "of SHAKE, SETTLE and CCMA. Accuracy is based on the SHAKE tolerance set "
+ +                    "by the \"shake_tol\" option.");
+ +
+ +    if (ir->nwall != 0)
+ +        gmx_fatal(FARGS,"OpenMM does not support walls.");
+ +
+ +    if (ir->ePull != epullNO)
+ +        gmx_fatal(FARGS,"OpenMM does not support pulling.");
+ +
+ +    /* check for interaction types */
+ +    for (i = 0; i < F_EPOT; i++)
+ +    {
+ +        if (!(i == F_CONSTR ||
+ +            i == F_SETTLE   ||
+ +            i == F_BONDS    ||            
+ +            i == F_HARMONIC ||
+ +            i == F_UREY_BRADLEY ||
+ +            i == F_ANGLES   ||
+ +            i == F_PDIHS    ||
+ +            i == F_RBDIHS   ||
+ +            i == F_PIDIHS   ||
+ +            i == F_IDIHS    ||
+ +            i == F_LJ14     ||
+ +            i == F_GB12     || /* The GB parameters are hardcoded both in */
+ +            i == F_GB13     || /* Gromacs and OpenMM */
+ +            i == F_GB14   ) &&
+ +            top->idef.il[i].nr > 0)
+ +        {
+ +            gmx_fatal(FARGS, "OpenMM does not support (some) of the provided interaction " 
+ +                    "type(s) (%s) ", interaction_function[i].longname);
+ +        }
+ +    }
+ +
+ +    if (ir->efep != efepNO)
+ +        gmx_fatal(FARGS,"OpenMM does not support free energy calculations.");
+ +
+ +    if (ir->opts.ngacc > 1)
+ +        gmx_fatal(FARGS,"OpenMM does not support non-equilibrium MD (accelerated groups).");
+ +
+ +    if (IR_ELEC_FIELD(*ir))
+ +        gmx_fatal(FARGS,"OpenMM does not support electric fields.");
+ +
+ +    if (ir->bQMMM)
+ +        gmx_fatal(FARGS,"OpenMM does not support QMMM calculations.");
+ +
+ +    if (ir->rcoulomb != ir->rvdw)
+ +        gmx_fatal(FARGS,"OpenMM uses a single cutoff for both Coulomb "
+ +                  "and VdW interactions. Please set rcoulomb equal to rvdw.");
+ +    
+ +    if (EEL_FULL(ir->coulombtype))
+ +    {
+ +        if (ir->ewald_geometry == eewg3DC)
+ +            gmx_fatal(FARGS,"OpenMM supports only Ewald 3D geometry.");
+ +        if (ir->epsilon_surface != 0)
+ +            gmx_fatal(FARGS,"OpenMM does not support dipole correction in Ewald summation.");
+ +    }
+ +
+ +    if (TRICLINIC(state->box))        
+ +    {
+ +        gmx_fatal(FARGS,"OpenMM does not support triclinic unit cells.");
+ +    }
+ +
+ +    /* XXX this is just debugging code to disable the combination rule check */
+ +    if ( isStringEqNCase(opt->getOptionValue("check-combrule"), "yes") )
+ +    {
+ +    /* As OpenMM by default uses hardcoded combination rules 
+ +       sigma_ij = (sigma_i + sigma_j)/2, eps_ij = sqrt(eps_i * eps_j)
+ +       we need to check whether the force field params obey this 
+ +       and if not, we can't use this force field so we exit 
+ +       grace-fatal-fully. */
+ +    real *nbfp = fr->nbfp;
+ +    natoms = fr->ntype;
+ +    if (debug) 
+ +    {   
+ +        fprintf(debug, ">> Atom parameters: <<\n%10s%5s %5s %5s %5s COMB\n", 
+ +                "", "i-j", "j-i", "i-i", "j-j");
+ +    }
+ +    /* loop over all i-j atom pairs and verify if 
+ +       sigma_ij = sigma_ji = sigma_comb and eps_ij = eps_ji = eps_comb */
+ +    for (i = 0; i < natoms; i++)
+ +    {
+ +        /* i-i */
+ +        c12 = C12(nbfp, natoms, i, i);
+ +        c6  = C6(nbfp,  natoms, i, i);
+ +        convert_c_12_6(c12, c6, &sigma_ii, &eps_ii);
+ +
+ +        for (j = 0; j < i; j++)
+ +        {
+ +            /* i-j */
+ +            c12 = C12(nbfp, natoms, i, j);
+ +            c6  = C6(nbfp,  natoms, i, j);
+ +            convert_c_12_6(c12, c6, &sigma_ij, &eps_ij);
+ +            /* j-i */
+ +            c12 = C12(nbfp, natoms, j, i);
+ +            c6  = C6(nbfp,  natoms, j, i);
+ +            convert_c_12_6(c12, c6, &sigma_ji, &eps_ji);
+ +            /* j-j */
+ +            c12 = C12(nbfp, natoms, j, j);
+ +            c6  = C6(nbfp,  natoms, j, j);
+ +            convert_c_12_6(c12, c6, &sigma_jj, &eps_jj);
+ +            /* OpenMM hardcoded combination rules */
+ +            sigma_comb = COMBRULE_SIGMA(sigma_ii, sigma_jj);
+ +            eps_comb = COMBRULE_EPS(eps_ii, eps_jj);
+ +  
+ +            if (debug)
+ +            {
+ +                fprintf(debug, "i=%-3d j=%-3d", i, j);
+ +                fprintf(debug, "%-11s", "sigma");
+ +                fprintf(debug, "%5.3f %5.3f %5.3f %5.3f %5.3f\n",  
+ +                        sigma_ij, sigma_ji, sigma_ii, sigma_jj, sigma_comb);
+ +                fprintf(debug, "%11s%-11s", "", "epsilon");
+ +                fprintf(debug, "%5.3f %5.3f %5.3f %5.3f %5.3f\n", 
+ +                        eps_ij, eps_ji, eps_ii, eps_jj, eps_comb);
+ +            }
+ +
+ +            /* check the values against the rule used by omm */
+ +            if((fabs(eps_ij) > COMBRULE_CHK_TOL && 
+ +                fabs(eps_ji) > COMBRULE_CHK_TOL) &&
+ +               (fabs(sigma_comb - sigma_ij) > COMBRULE_CHK_TOL ||
+ +               fabs(sigma_comb - sigma_ji) > COMBRULE_CHK_TOL ||
+ +               fabs(eps_comb - eps_ij) > COMBRULE_CHK_TOL ||
+ +               fabs(eps_comb - eps_ji) > COMBRULE_CHK_TOL ))
+ +            {
+ +                gmx_fatal(FARGS,
+ +                        "The combination rules of the used force-field do not "
+ +                        "match the one supported by OpenMM:  "
+ +                        "sigma_ij = (sigma_i + sigma_j)/2, eps_ij = sqrt(eps_i * eps_j). "
+ +                        "Switch to a force-field that uses these rules in order to "
+ +                        "simulate this system using OpenMM.\n");                        
+ +            }
+ +        }
+ +    }
+ +    if (debug) { fprintf(debug, ">><<\n\n"); }
+ +
+ +    /* if we got here, log that everything is fine */
+ +    if (debug)
+ +    {
+ +        fprintf(debug, ">> The combination rule of the used force matches the one used by OpenMM.\n");
+ +    }
+ +    fprintf(fplog, "The combination rule of the used force field matches the one used by OpenMM.\n");   
+ +
+ +    } /* if (are we checking the combination rules) ... */
+ +}
+ +
+ +
+ +/*!
+ + * \brief Initialize OpenMM, run sanity/consistency checks, and return a pointer to 
+ + * the OpenMMData.
+ + * 
+ + * Various gromacs data structures are passed that contain the parameters, state and 
+ + * other porperties of the system to simulate. These serve as input for initializing 
+ + * OpenMM. Besides, a set of misc action are taken:
+ + *  - OpenMM plugins are loaded;
+ + *  - platform options in \p platformOptStr are parsed and checked; 
+ + *  - Gromacs parameters are checked for OpenMM support and consistency;
+ + *  - after the OpenMM is initialized memtest executed in the same GPU context.
+ + * 
+ + * \param[in] fplog             Gromacs log file handler.
+ + * \param[in] platformOptStr    Platform option string. 
+ + * \param[in] ir                The Gromacs input parameters, see ::t_inputrec
+ + * \param[in] top_global        Gromacs system toppology, \see ::gmx_mtop_t
+ + * \param[in] top               Gromacs node local topology, \see gmx_localtop_t
+ + * \param[in] mdatoms           Gromacs atom parameters, \see ::t_mdatoms
+ + * \param[in] fr                \see ::t_forcerec
+ + * \param[in] state             Gromacs systems state, \see ::t_state
+ + * \returns                     Pointer to a 
+ + * 
+ + */
+ +void* openmm_init(FILE *fplog, const char *platformOptStr,
+ +                  t_inputrec *ir,
+ +                  gmx_mtop_t *top_global, gmx_localtop_t *top,
+ +                  t_mdatoms *mdatoms, t_forcerec *fr, t_state *state)
+ +{
+ +
+ +    char warn_buf[STRLEN];
+ +    static gmx_bool hasLoadedPlugins = false;
+ +    string usedPluginDir;
+ +    int devId;
+ +
+ +    try
+ +    {
+ +        if (!hasLoadedPlugins)
+ +        {
+ +            vector<string> loadedPlugins;
+ +            /*  Look for OpenMM plugins at various locations (listed in order of priority):
+ +                - on the path in OPENMM_PLUGIN_DIR environment variable if this is specified
+ +                - on the path in the OPENMM_PLUGIN_DIR macro that is set by the build script
+ +                - at the default location assumed by OpenMM
+ +            */
+ +            /* env var */
+ +            char *pluginDir = getenv("OPENMM_PLUGIN_DIR");
+ +            trim(pluginDir);
+ +            /* no env var or empty */
+ +            if (pluginDir != NULL && *pluginDir != '\0')
+ +            {
+ +                loadedPlugins = Platform::loadPluginsFromDirectory(pluginDir);
+ +                if (!loadedPlugins.empty())
+ +                {
+ +                    hasLoadedPlugins = true;
+ +                    usedPluginDir = pluginDir;
+ +                }
+ +                else
+ +                {
+ +                    gmx_fatal(FARGS, "The directory provided in the OPENMM_PLUGIN_DIR environment variable "
+ +                              "(%s) does not contain valid OpenMM plugins. Check your OpenMM installation!", 
+ +                              pluginDir);
+ +                }
+ +            }
+ +
+ +            /* macro set at build time  */
+ +#ifdef OPENMM_PLUGIN_DIR
+ +            if (!hasLoadedPlugins)
+ +            {
+ +                loadedPlugins = Platform::loadPluginsFromDirectory(OPENMM_PLUGIN_DIR);
+ +                if (!loadedPlugins.empty())
+ +                {
+ +                    hasLoadedPlugins = true;
+ +                    usedPluginDir = OPENMM_PLUGIN_DIR;
+ +                }
+ +            }
+ +#endif
+ +            /* default loocation */
+ +            if (!hasLoadedPlugins)
+ +            {
+ +                loadedPlugins = Platform::loadPluginsFromDirectory(Platform::getDefaultPluginsDirectory());
+ +                if (!loadedPlugins.empty())
+ +                {
+ +                    hasLoadedPlugins = true;
+ +                    usedPluginDir = Platform::getDefaultPluginsDirectory();
+ +                }
+ +            }
+ +
+ +            /* if there are still no plugins loaded there won't be any */
+ +            if (!hasLoadedPlugins)
+ +            {
+ +                gmx_fatal(FARGS, "No OpenMM plugins were found! You can provide the"
+ +                          " plugin directory in the OPENMM_PLUGIN_DIR environment variable.", pluginDir);
+ +            }
+ +
+ +            fprintf(fplog, "\nOpenMM plugins loaded from directory %s:\t", usedPluginDir.c_str());
+ +            for (int i = 0; i < (int)loadedPlugins.size(); i++)
+ +            {
+ +                fprintf(fplog, "%s, ", loadedPlugins[i].c_str());
+ +            }
+ +            fprintf(fplog, "\n");
+ +        }
+ +
+ +        /* parse option string */
+ +        GmxOpenMMPlatformOptions *opt = new GmxOpenMMPlatformOptions(platformOptStr);
+ +        devId = atoi(opt->getOptionValue("deviceid").c_str());
+ +
+ +        if (debug)
+ +        {
+ +            opt->print();
+ +        }
+ +
+ +        /* check wheter Gromacs options compatibility with OpenMM */
+ +        checkGmxOptions(fplog, opt, ir, top, fr, state);
+ +
+ +        /* Create the system. */
+ +        const t_idef& idef = top->idef;
+ +        const int numAtoms = top_global->natoms;
+ +        const int numConstraints = idef.il[F_CONSTR].nr/3;
+ +        const int numSettle = idef.il[F_SETTLE].nr/2;
+ +        const int numBonds = idef.il[F_BONDS].nr/3;
+ +        const int numHarmonic = idef.il[F_HARMONIC].nr/3;
+ +        const int numUB = idef.il[F_UREY_BRADLEY].nr/4;
+ +        const int numAngles = idef.il[F_ANGLES].nr/4;
+ +        const int numPeriodic = idef.il[F_PDIHS].nr/5;
+ +        const int numPeriodicImproper = idef.il[F_PIDIHS].nr/5;
+ +        const int numRB = idef.il[F_RBDIHS].nr/5;
+ +        const int numImproperDih = idef.il[F_IDIHS].nr/5;
+ +        const int num14 = idef.il[F_LJ14].nr/3;
+ +        System* sys = new System();
+ +        if (ir->nstcomm > 0)
+ +            sys->addForce(new CMMotionRemover(ir->nstcomm));
+ +
+ +        /* Set bonded force field terms. */
+ +
+ +              /* 
+ +               * CUDA platform currently doesn't support more than one
+ +               * instance of a force object, so we pack all forces that
+ +               * use the same form into one.
+ +              */
+ +
+ +        const int* bondAtoms = (int*) idef.il[F_BONDS].iatoms;
+ +        HarmonicBondForce* bondForce = new HarmonicBondForce();
+ +        sys->addForce(bondForce);
+ +        int offset = 0;
+ +        for (int i = 0; i < numBonds; ++i)
+ +        {
+ +            int type = bondAtoms[offset++];
+ +            int atom1 = bondAtoms[offset++];
+ +            int atom2 = bondAtoms[offset++];
+ +            bondForce->addBond(atom1, atom2,
+ +                               idef.iparams[type].harmonic.rA, idef.iparams[type].harmonic.krA);
+ +        }
+ +
+ +        const int* harmonicAtoms = (int*) idef.il[F_HARMONIC].iatoms;
+ +        offset = 0;
+ +        for (int i = 0; i < numHarmonic; ++i)
+ +        {
+ +            int type = harmonicAtoms[offset++];
+ +            int atom1 = harmonicAtoms[offset++];
+ +            int atom2 = harmonicAtoms[offset++];
+ +            bondForce->addBond(atom1, atom2,
+ +                               idef.iparams[type].harmonic.rA, idef.iparams[type].harmonic.krA);
+ +        }
+ +
+ +              /* Set the angle force field terms */
+ +        const int* angleAtoms = (int*) idef.il[F_ANGLES].iatoms;
+ +        HarmonicAngleForce* angleForce = new HarmonicAngleForce();
+ +        sys->addForce(angleForce);
+ +        offset = 0;
+ +        for (int i = 0; i < numAngles; ++i)
+ +        {
+ +            int type = angleAtoms[offset++];
+ +            int atom1 = angleAtoms[offset++];
+ +            int atom2 = angleAtoms[offset++];
+ +            int atom3 = angleAtoms[offset++];
+ +            angleForce->addAngle(atom1, atom2, atom3, 
+ +                    idef.iparams[type].harmonic.rA*M_PI/180.0, idef.iparams[type].harmonic.krA);
+ +        }
+ +
+ +        /* Urey-Bradley includes both the angle and bond potential for 1-3 interactions */
+ +        const int* ubAtoms = (int*) idef.il[F_UREY_BRADLEY].iatoms;
+ +              /* HarmonicBondForce* ubBondForce = new HarmonicBondForce(); */
+ +              /*  HarmonicAngleForce* ubAngleForce = new HarmonicAngleForce(); */
+ +        /* sys->addForce(ubBondForce); */
+ +        /* sys->addForce(ubAngleForce); */
+ +        offset = 0;
+ +        for (int i = 0; i < numUB; ++i)
+ +        {
+ +            int type = ubAtoms[offset++];
+ +            int atom1 = ubAtoms[offset++];
+ +            int atom2 = ubAtoms[offset++];
+ +            int atom3 = ubAtoms[offset++];
+ +            /* ubBondForce->addBond(atom1, atom3, */
+ +            bondForce->addBond(atom1, atom3,
+ +                               idef.iparams[type].u_b.r13A, idef.iparams[type].u_b.kUBA);
+ +            /* ubAngleForce->addAngle(atom1, atom2, atom3, */ 
+ +            angleForce->addAngle(atom1, atom2, atom3, 
+ +                    idef.iparams[type].u_b.thetaA*M_PI/180.0, idef.iparams[type].u_b.kthetaA);
+ +        }
+ +
+ +              /* Set proper dihedral terms */
+ +        const int* periodicAtoms = (int*) idef.il[F_PDIHS].iatoms;
+ +        PeriodicTorsionForce* periodicForce = new PeriodicTorsionForce();
+ +        sys->addForce(periodicForce);
+ +        offset = 0;
+ +        for (int i = 0; i < numPeriodic; ++i)
+ +        {
+ +            int type = periodicAtoms[offset++];
+ +            int atom1 = periodicAtoms[offset++];
+ +            int atom2 = periodicAtoms[offset++];
+ +            int atom3 = periodicAtoms[offset++];
+ +            int atom4 = periodicAtoms[offset++];
+ +            periodicForce->addTorsion(atom1, atom2, atom3, atom4,
+ +                                      idef.iparams[type].pdihs.mult,
+ +                                      idef.iparams[type].pdihs.phiA*M_PI/180.0, 
+ +                                      idef.iparams[type].pdihs.cpA);
+ +        }
+ +
+ +              /* Set improper dihedral terms that are represented by a periodic function (as in AMBER FF) */
+ +        const int* periodicImproperAtoms = (int*) idef.il[F_PIDIHS].iatoms;
+ +        /* PeriodicTorsionForce* periodicImproperForce = new PeriodicTorsionForce(); */
+ +        /* sys->addForce(periodicImproperForce); */
+ +        offset = 0;
+ +        for (int i = 0; i < numPeriodicImproper; ++i)
+ +        {
+ +            int type = periodicImproperAtoms[offset++];
+ +            int atom1 = periodicImproperAtoms[offset++];
+ +            int atom2 = periodicImproperAtoms[offset++];
+ +            int atom3 = periodicImproperAtoms[offset++];
+ +            int atom4 = periodicImproperAtoms[offset++];
+ +            /* periodicImproperForce->addTorsion(atom1, atom2, atom3, atom4, */
+ +            periodicForce->addTorsion(atom1, atom2, atom3, atom4,
+ +                                      idef.iparams[type].pdihs.mult,
+ +                                      idef.iparams[type].pdihs.phiA*M_PI/180.0,
+ +                                      idef.iparams[type].pdihs.cpA);
+ +        }
+ +
+ +        /* Ryckaert-Bellemans dihedrals */
+ +        const int* rbAtoms = (int*) idef.il[F_RBDIHS].iatoms;
+ +        RBTorsionForce* rbForce = new RBTorsionForce();
+ +        sys->addForce(rbForce);
+ +        offset = 0;
+ +        for (int i = 0; i < numRB; ++i)
+ +        {
+ +            int type = rbAtoms[offset++];
+ +            int atom1 = rbAtoms[offset++];
+ +            int atom2 = rbAtoms[offset++];
+ +            int atom3 = rbAtoms[offset++];
+ +            int atom4 = rbAtoms[offset++];
+ +            rbForce->addTorsion(atom1, atom2, atom3, atom4,
+ +                                idef.iparams[type].rbdihs.rbcA[0], idef.iparams[type].rbdihs.rbcA[1],
+ +                                idef.iparams[type].rbdihs.rbcA[2], idef.iparams[type].rbdihs.rbcA[3],
+ +                                idef.iparams[type].rbdihs.rbcA[4], idef.iparams[type].rbdihs.rbcA[5]);
+ +        }
+ +
+ +              /* Set improper dihedral terms (as in CHARMM FF) */
+ +        const int* improperDihAtoms = (int*) idef.il[F_IDIHS].iatoms;
+ +              CustomTorsionForce* improperDihForce = new CustomTorsionForce("2.0*k*asin(sin((theta-theta0)/2))^2");
+ +        sys->addForce(improperDihForce);
+ +              improperDihForce->addPerTorsionParameter("k");
+ +              improperDihForce->addPerTorsionParameter("theta0");
+ +              vector<double> improperDihParameters(2);
+ +        offset = 0;
+ +        for (int i = 0; i < numImproperDih; ++i)
+ +        {
+ +            int type = improperDihAtoms[offset++];
+ +            int atom1 = improperDihAtoms[offset++];
+ +            int atom2 = improperDihAtoms[offset++];
+ +            int atom3 = improperDihAtoms[offset++];
+ +            int atom4 = improperDihAtoms[offset++];
+ +                      improperDihParameters[0] = idef.iparams[type].harmonic.krA;
+ +                      improperDihParameters[1] = idef.iparams[type].harmonic.rA*M_PI/180.0;
+ +            improperDihForce->addTorsion(atom1, atom2, atom3, atom4,
+ +                                improperDihParameters);
+ +        }
+ +
+ +        /* Set nonbonded parameters and masses. */
+ +        int ntypes = fr->ntype;
+ +        int* types = mdatoms->typeA;
+ +        real* nbfp = fr->nbfp;
+ +        real* charges = mdatoms->chargeA;
+ +        real* masses = mdatoms->massT;
+ +        NonbondedForce* nonbondedForce = new NonbondedForce();
+ +        sys->addForce(nonbondedForce);
+ +        
+ +        switch (ir->ePBC)
+ +        {
+ +        case epbcNONE:
+ +            if (ir->rcoulomb == 0)
+ +            {
+ +                nonbondedForce->setNonbondedMethod(NonbondedForce::NoCutoff);
+ +            }
+ +            else
+ +            {
+ +                nonbondedForce->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
+ +            }
+ +            break;
+ +        case epbcXYZ:
+ +            switch (ir->coulombtype)
+ +            {
+ +            case eelCUT:
+ +            case eelRF:
+ +            case eelGRF:
+ +            case eelRF_NEC:
+ +            case eelRF_ZERO:
+ +                nonbondedForce->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
+ +                break;
+ +
+ +            case eelEWALD:
+ +                nonbondedForce->setNonbondedMethod(NonbondedForce::Ewald);
+ +                break;
+ +
+ +            case eelPME:
+ +                nonbondedForce->setNonbondedMethod(NonbondedForce::PME);
+ +                break;
+ +
+ +            default:
+ +                gmx_fatal(FARGS,"Internal error: you should not see this message, it means that the"
+ +                          "electrosatics option check failed. Please report this error!");
+ +            }        
+ +            sys->setDefaultPeriodicBoxVectors(Vec3(state->box[0][0], 0, 0),
+ +                                       Vec3(0, state->box[1][1], 0), Vec3(0, 0, state->box[2][2]));                    
+ +            nonbondedForce->setCutoffDistance(ir->rcoulomb);
+ +           
+ +            break;
+ +        default:            
+ +            gmx_fatal(FARGS,"OpenMM supports only full periodic boundary conditions "
+ +                              "(pbc = xyz), or none (pbc = no).");
+ +        }
+ +
+ +
+ +        /* Fix for PME and Ewald error tolerance 
+ +         *
+ +               *  OpenMM uses approximate formulas to calculate the Ewald parameter:
+ +               *  alpha = (1.0/cutoff)*sqrt(-log(2.0*tolerlance));
+ +               *  and the grid spacing for PME:
+ +               *  gridX = ceil(2*alpha*box[0][0]/3*(pow(tol, 0.2)))
+ +               *  gridY = ceil(2*alpha*box[1][1]/3*(pow(tol, 0.2)));
+ +               *  gridZ = ceil(2*alpha*box[2][2]/3*(pow(tol, 0.2)));
+ +               *
+ +               *  
+ +               *  If the default ewald_rtol=1e-5 is used we silently adjust the value to the 
+ +               *  OpenMM default of 5e-4 otherwise a warning is issued about the action taken. 
+ +               *
+ +              */
+ +        double corr_ewald_rtol = 50.0 * ir->ewald_rtol;
+ +        if ((ir->ePBC == epbcXYZ) && 
+ +            (ir->coulombtype == eelEWALD || ir->coulombtype == eelPME))
+ +        {
+ +            if (debug)
+ +            {
+ +                fprintf(debug, ">> ewald_rtol = %e (corrected = %e) \n",
+ +                    ir->ewald_rtol, corr_ewald_rtol);
+ +            }
+ +
+ +            if (fabs(ir->ewald_rtol - 1e-5) > 1e-10)
+ +            {
+ +                gmx_warning("OpenMM uses the ewald_rtol parameter with approximate formulas "
+ +                        "to calculate the alpha and grid spacing parameters of the Ewald "
+ +                        "and PME methods. This tolerance need to be corrected in order to get "
+ +                        "settings close to the ones used in GROMACS. Although the internal correction "
+ +                        "should work for any reasonable value of ewald_rtol, using values other than "
+ +                        "the default 1e-5 might cause incorrect behavior.");
+ +
+ +                if (corr_ewald_rtol > 1)
+ +                {
+ +                    gmx_fatal(FARGS, "The ewald_rtol accuracy term is >1 after the "
+ +                            "adjustment for OpenMM (%e)", corr_ewald_rtol);
+ +                }
+ +            }
+ +            nonbondedForce->setEwaldErrorTolerance(corr_ewald_rtol);
+ +        }
+ +
+ +        for (int i = 0; i < numAtoms; ++i)
+ +        {
+ +            double c12 = nbfp[types[i]*2*ntypes+types[i]*2+1];
+ +            double c6 = nbfp[types[i]*2*ntypes+types[i]*2];
+ +            double sigma=0.0, epsilon=0.0;
+ +            convert_c_12_6(c12, c6, &sigma, &epsilon);
+ +            nonbondedForce->addParticle(charges[i], sigma, epsilon);
+ +            sys->addParticle(masses[i]);
+ +        }
+ +
+ +        // Build a table of all exclusions.
+ +        vector<set<int> > exclusions(numAtoms);
+ +        for (int i = 0; i < numAtoms; i++)
+ +        {
+ +            int start = top->excls.index[i];
+ +            int end = top->excls.index[i+1];
+ +            for (int j = start; j < end; j++)
+ +                exclusions[i].insert(top->excls.a[j]);
+ +        }
+ +
+ +        // Record the 1-4 interactions, and remove them from the list of exclusions.
+ +        const int* nb14Atoms = (int*) idef.il[F_LJ14].iatoms;
+ +        offset = 0;
+ +        for (int i = 0; i < num14; ++i)
+ +        {
+ +            int type = nb14Atoms[offset++];
+ +            int atom1 = nb14Atoms[offset++];
+ +            int atom2 = nb14Atoms[offset++];
+ +            double sigma=0, epsilon=0;
+ +            convert_c_12_6(idef.iparams[type].lj14.c12A, 
+ +                    idef.iparams[type].lj14.c6A,
+ +                    &sigma, &epsilon);
+ +            nonbondedForce->addException(atom1, atom2,
+ +                                         fr->fudgeQQ*charges[atom1]*charges[atom2], sigma, epsilon);
+ +            exclusions[atom1].erase(atom2);
+ +            exclusions[atom2].erase(atom1);
+ +        }
+ +
+ +        // Record exclusions.
+ +        for (int i = 0; i < numAtoms; i++)
+ +        {
+ +            for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter)
+ +            {
+ +                if (i < *iter)
+ +                {
+ +                    nonbondedForce->addException(i, *iter, 0.0, 1.0, 0.0);
+ +                }
+ +            }
+ +        }
+ +
+ +        // Add GBSA if needed.
+ +        if (ir->implicit_solvent == eisGBSA)
+ +        {
+ +            gmx_warning("The OBC scale factors alpha, beta and gamma are hardcoded in OpenMM with the default Gromacs values.");
+ +            t_atoms atoms       = gmx_mtop_global_atoms(top_global);
+ +            GBSAOBCForce* gbsa  = new GBSAOBCForce();
+ +
+ +            sys->addForce(gbsa);
+ +            gbsa->setSoluteDielectric(ir->epsilon_r);
+ +            gbsa->setSolventDielectric(ir->gb_epsilon_solvent);
+ +            gbsa->setCutoffDistance(nonbondedForce->getCutoffDistance());
+ +            if (nonbondedForce->getNonbondedMethod() == NonbondedForce::NoCutoff)
+ +                gbsa->setNonbondedMethod(GBSAOBCForce::NoCutoff);
+ +            else if (nonbondedForce->getNonbondedMethod() == NonbondedForce::CutoffNonPeriodic)
+ +                gbsa->setNonbondedMethod(GBSAOBCForce::CutoffNonPeriodic);
+ +            else if (nonbondedForce->getNonbondedMethod() == NonbondedForce::CutoffPeriodic)
+ +                gbsa->setNonbondedMethod(GBSAOBCForce::CutoffPeriodic);
+ +            else
+ +                gmx_fatal(FARGS,"OpenMM supports only Reaction-Field electrostatics with OBC/GBSA.");
+ +
+ +            for (int i = 0; i < numAtoms; ++i)
+ +            {
+ +                gbsa->addParticle(charges[i],
+ +                                  top_global->atomtypes.gb_radius[atoms.atom[i].type],
+ +                                  top_global->atomtypes.S_hct[atoms.atom[i].type]);
+ +            }
+ +            free_t_atoms(&atoms, FALSE);
+ +        }
+ +
+ +        // Set constraints.
+ +        const int* constraintAtoms = (int*) idef.il[F_CONSTR].iatoms;
+ +        offset = 0;
+ +        for (int i = 0; i < numConstraints; ++i)
+ +        {
+ +            int type = constraintAtoms[offset++];
+ +            int atom1 = constraintAtoms[offset++];
+ +            int atom2 = constraintAtoms[offset++];
+ +            sys->addConstraint(atom1, atom2, idef.iparams[type].constr.dA);
+ +        }
+ +        const int* settleAtoms = (int*) idef.il[F_SETTLE].iatoms;
+ +        offset = 0;
+ +        for (int i = 0; i < numSettle; ++i)
+ +        {
+ +            int type = settleAtoms[offset++];
+ +            int oxygen = settleAtoms[offset++];
+ +            sys->addConstraint(oxygen, oxygen+1, idef.iparams[type].settle.doh);
+ +            sys->addConstraint(oxygen, oxygen+2, idef.iparams[type].settle.doh);
+ +            sys->addConstraint(oxygen+1, oxygen+2, idef.iparams[type].settle.dhh);
+ +        }
+ +
+ +        // Create an integrator for simulating the system.
+ +        double friction = (ir->opts.tau_t[0] == 0.0 ? 0.0 : 1.0/ir->opts.tau_t[0]);
+ +        Integrator* integ;
+ +        if (ir->eI == eiBD)
+ +        {
+ +            integ = new BrownianIntegrator(ir->opts.ref_t[0], friction, ir->delta_t);
+ +            static_cast<BrownianIntegrator*>(integ)->setRandomNumberSeed(ir->ld_seed); 
+ +        }
+ +        else if (EI_SD(ir->eI))
+ +        {
+ +            integ = new LangevinIntegrator(ir->opts.ref_t[0], friction, ir->delta_t);
+ +            static_cast<LangevinIntegrator*>(integ)->setRandomNumberSeed(ir->ld_seed); 
+ +        }
+ +        else 
+ +        {
+ +            integ = new VerletIntegrator(ir->delta_t);
+ +            if ( ir->etc != etcNO)
+ +            {
+ +                AndersenThermostat* thermostat = new AndersenThermostat(ir->opts.ref_t[0], friction); 
+ +                sys->addForce(thermostat);
+ +            }           
+ +        }
+ +
+ +              // Add pressure coupling
+ +        if (ir->epc != epcNO)
+ +              {
+ +          // convert gromacs pressure tensor to a scalar
+ +          double pressure = (ir->ref_p[0][0] + ir->ref_p[1][1] + ir->ref_p[2][2]) / 3.0;
+ +          int frequency = int(ir->tau_p / ir->delta_t); // update frequency in time steps
+ +          if (frequency < 1) frequency = 1;
+ +          double temperature = ir->opts.ref_t[0]; // in kelvin
+ +          sys->addForce(new MonteCarloBarostat(pressure, temperature, frequency));
+ +              }
+ +
+ +        integ->setConstraintTolerance(ir->shake_tol);
+ +
+ +        // Create a context and initialize it.
+ +        Context* context = NULL;
+ +
+ +        /*      
+ +        OpenMM could automatically select the "best" GPU, however we're not't 
+ +        going to let it do that for now, as the current algorithm is very rudimentary
+ +        and we anyway support only CUDA.        
+ +        if (platformOptStr == NULL || platformOptStr == "")
+ +        {
+ +            context = new Context(*sys, *integ);
+ +        }
+ +        else
+ +        */        
+ +        {
+ +            /* which platform should we use */
+ +            for (int i = 0; i < (int)Platform::getNumPlatforms() && context == NULL; i++)
+ +            {
+ +                if (isStringEqNCase(opt->getOptionValue("platform"), Platform::getPlatform(i).getName()))
+ +                {
+ +                    Platform& platform = Platform::getPlatform(i);
+ +                    // set standard properties
+ +                    platform.setPropertyDefaultValue("CudaDevice", opt->getOptionValue("deviceid"));
+ +                    // TODO add extra properties
+ +                    context = new Context(*sys, *integ, platform);
+ +                }
+ +            }
+ +            if (context == NULL)
+ +            {
+ +                gmx_fatal(FARGS, "The requested platform \"%s\" could not be found.", 
+ +                        opt->getOptionValue("platform").c_str());
+ +            }
+ +        }
+ +
+ +        Platform& platform = context->getPlatform();
+ +        fprintf(fplog, "Gromacs will use the OpenMM platform: %s\n", platform.getName().c_str());
+ +
+ +        const vector<string>& properties = platform.getPropertyNames();
+ +        if (debug)
+ +        {
+ +            for (int i = 0; i < (int)properties.size(); i++)
+ +            {
+ +                fprintf(debug, ">> %s: %s\n", properties[i].c_str(), 
+ +                        platform.getPropertyValue(*context, properties[i]).c_str());
+ +            }
+ +        }
+ +
+ +        /* only for CUDA */
+ +        if (isStringEqNCase(opt->getOptionValue("platform"), "CUDA"))
+ +        {
+ +            int tmp;
+ +            if (!from_string<int>(tmp, platform.getPropertyValue(*context, "CudaDevice"), std::dec))
+ +            {
+ +                gmx_fatal(FARGS, "Internal error: couldn't determine the device selected by OpenMM");
+ +
+ +            }
+ +
+ +            /* For now this is just to double-check if OpenMM selected the GPU we wanted,
+ +            but when we'll let OpenMM select the GPU automatically, it will query the deviceId.
+ +            */            
+ +            if (tmp != devId)
+ +            {
+ +                gmx_fatal(FARGS, "Internal error: OpenMM is using device #%d"
+ +                        "while initialized for device #%d", tmp, devId);
+ +            }        
+ +            
+ +            /* check GPU compatibility */
+ +            char gpuname[STRLEN];
+ +            devId = atoi(opt->getOptionValue("deviceid").c_str());
++            if (!is_gmx_openmm_supported_gpu(-1, gpuname))
+ +            {
+ +                if (!gmx_strcasecmp(opt->getOptionValue("force-device").c_str(), "yes"))
+ +                {
+ +                    sprintf(warn_buf, "Non-supported GPU selected (#%d, %s), forced continuing."
+ +                            "Note, that the simulation can be slow or it migth even crash.", 
+ +                            devId, gpuname);
+ +                    fprintf(fplog, "%s\n", warn_buf);
+ +                    gmx_warning(warn_buf);
+ +                }
+ +                else
+ +                {
+ +                    gmx_fatal(FARGS, "The selected GPU (#%d, %s) is not supported by Gromacs! "
+ +                              "Most probably you have a low-end GPU which would not perform well, " 
+ +                              "or new hardware that has not been tested with the current release. "
+ +                              "If you still want to try using the device, use the force-device=yes option.", 
+ +                              devId, gpuname);
+ +                }
+ +            }
+ +            else
+ +            {
+ +                fprintf(fplog, "Gromacs will run on the GPU #%d (%s).\n", devId, gpuname);
+ +            }
+ +        }
+ +        
+ +        /* only for CUDA */
+ +        if (isStringEqNCase(opt->getOptionValue("platform"), "CUDA"))
+ +        {
+ +            /* pre-simulation memtest */
+ +            runMemtest(fplog, -1, "Pre", opt);
+ +        }
+ +
+ +        vector<Vec3> pos(numAtoms);
+ +        vector<Vec3> vel(numAtoms);
+ +        for (int i = 0; i < numAtoms; ++i)
+ +        {
+ +            pos[i] = Vec3(state->x[i][0], state->x[i][1], state->x[i][2]);
+ +            vel[i] = Vec3(state->v[i][0], state->v[i][1], state->v[i][2]);
+ +        }
+ +        context->setPositions(pos);
+ +        context->setVelocities(vel);
+ +
+ +        // Return a structure containing the system, integrator, and context.
+ +        OpenMMData* data = new OpenMMData();
+ +        data->system = sys;
+ +        data->integrator = integ;
+ +        data->context = context;
+ +        data->removeCM = (ir->nstcomm > 0);
+ +        data->platformOpt = opt;
+ +        return data;
+ +    }
+ +    catch (std::exception& e)
+ +    {
+ +        gmx_fatal(FARGS, "OpenMM exception caught while initializating: %s", e.what());
+ +    } 
+ +    return NULL; /* just to avoid warnings */
+ +}
+ +
+ +/*!
+ + * \brief Integrate one step.
+ + *
+ + * \param[in] data  OpenMMData object created by openmm_init().
+ + */
+ +void openmm_take_one_step(void* data)
+ +{
+ +    // static int step = 0; printf("----> taking step #%d\n", step++);
+ +    try
+ +    {
+ +        static_cast<OpenMMData*>(data)->integrator->step(1);
+ +    }
+ +    catch (std::exception& e)
+ +    {
+ +        gmx_fatal(FARGS, "OpenMM exception caught while taking a step: %s", e.what());
+ +    }
+ +}
+ +
+ +/*!
+ + * \brief Integrate n steps.
+ + *
+ + * \param[in] data  OpenMMData object created by openmm_init().
+ + */
+ +void openmm_take_steps(void* data, int nstep)
+ +{
+ +    try
+ +    {
+ +        static_cast<OpenMMData*>(data)->integrator->step(nstep);
+ +    }
+ +    catch (std::exception& e)
+ +    {
+ +        gmx_fatal(FARGS, "OpenMM exception caught while taking a step: %s", e.what());
+ +    }
+ +}
+ +
+ +/*!
+ + * \brief Clean up the data structures cretead for OpenMM.
+ + *
+ + * \param[in] log   Log file pointer.
+ + * \param[in] data  OpenMMData object created by openmm_init().
+ + */
+ +void openmm_cleanup(FILE* fplog, void* data)
+ +{
+ +    OpenMMData* d = static_cast<OpenMMData*>(data);
+ +    /* only for CUDA */
+ +    if (isStringEqNCase(d->platformOpt->getOptionValue("platform"), "CUDA"))
+ +    {
+ +        /* post-simulation memtest */
+ +        runMemtest(fplog, -1, "Post", d->platformOpt);
+ +    }
+ +    delete d->system;
+ +    delete d->integrator;
+ +    delete d->context;
+ +    delete d->platformOpt;
+ +    delete d;
+ +}
+ +
+ +/*!
+ + * \brief Copy the current state information from OpenMM into the Gromacs data structures.
+ + * 
+ + * This function results in the requested proprties to be copied from the 
+ + * GPU to host. As this represents a bottleneck, the frequency of pulling data
+ + * should be minimized. 
+ + *
+ + * \param[in]   data        OpenMMData object created by openmm_init().
+ + * \param[out]  time        Simulation time for which the state was created.
+ + * \param[out]  state       State of the system: coordinates and velocities.
+ + * \param[out]  f           Forces.
+ + * \param[out]  enerd       Energies.
+ + * \param[in]   includePos  True if coordinates are requested.
+ + * \param[in]   includeVel  True if velocities are requested. 
+ + * \param[in]   includeForce True if forces are requested. 
+ + * \param[in]   includeEnergy True if energies are requested. 
+ + */
+ +void openmm_copy_state(void *data,
+ +                       t_state *state, double *time,
+ +                       rvec f[], gmx_enerdata_t *enerd,
+ +                       gmx_bool includePos, gmx_bool includeVel, gmx_bool includeForce, gmx_bool includeEnergy)
+ +{
+ +    int types = 0;
+ +    if (includePos)
+ +        types += State::Positions;
+ +    if (includeVel)
+ +        types += State::Velocities;
+ +    if (includeForce)
+ +        types += State::Forces;
+ +    if (includeEnergy)
+ +        types += State::Energy;
+ +    if (types == 0)
+ +        return;
+ +    try
+ +    {
+ +        State currentState = static_cast<OpenMMData*>(data)->context->getState(types);
+ +        int numAtoms =  static_cast<OpenMMData*>(data)->system->getNumParticles();
+ +        if (includePos)
+ +        {
+ +            for (int i = 0; i < numAtoms; i++)
+ +            {
+ +                Vec3 x = currentState.getPositions()[i];
+ +                state->x[i][0] = x[0];
+ +                state->x[i][1] = x[1];
+ +                state->x[i][2] = x[2];
+ +            }
+ +        }
+ +        if (includeVel)
+ +        {
+ +            for (int i = 0; i < numAtoms; i++)
+ +            {
+ +                Vec3 v = currentState.getVelocities()[i];
+ +                state->v[i][0] = v[0];
+ +                state->v[i][1] = v[1];
+ +                state->v[i][2] = v[2];
+ +            }
+ +        }
+ +        if (includeForce)
+ +        {
+ +            for (int i = 0; i < numAtoms; i++)
+ +            {
+ +                Vec3 force = currentState.getForces()[i];
+ +                f[i][0] = force[0];
+ +                f[i][1] = force[1];
+ +                f[i][2] = force[2];
+ +            }
+ +        }
+ +        if (includeEnergy)
+ +        {
+ +            int numConstraints = static_cast<OpenMMData*>(data)->system->getNumConstraints();
+ +            int dof = 3*numAtoms-numConstraints;
+ +            if (static_cast<OpenMMData*>(data)->removeCM)
+ +                dof -= 3;
+ +            enerd->term[F_EPOT] = currentState.getPotentialEnergy();
+ +            enerd->term[F_EKIN] = currentState.getKineticEnergy();
+ +            enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
+ +            enerd->term[F_TEMP] = 2.0*enerd->term[F_EKIN]/dof/BOLTZ;
+ +        }
+ +        *time = currentState.getTime();
+ +    }
+ +    catch (std::exception& e)
+ +    {
+ +        gmx_fatal(FARGS, "OpenMM exception caught while retrieving state information: %s", e.what());
+ +    }
+ +}
diff --cc src/programs/mdrun/pme_switch.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..5ffd72c94ce9ef89a709cf7471b99057dc023c73

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/programs/mdrun/pme_switch.c
@@@ -1,0 -1,0 +1,530 @@@
++/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
++ *
++ * 
++ *                This source code is part of
++ * 
++ *                 G   R   O   M   A   C   S
++ * 
++ *          GROningen MAchine for Chemical Simulations
++ * 
++ *                        VERSION 4.6.0
++ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
++ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
++ * Copyright (c) 2001-2011, The GROMACS development team,
++ * check out http://www.gromacs.org for more information.
++
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version 2
++ * of the License, or (at your option) any later version.
++ * 
++ * If you want to redistribute modifications, please consider that
++ * scientific software is very special. Version control is crucial -
++ * bugs must be traceable. We will be happy to consider code for
++ * inclusion in the official distribution, but derived work must not
++ * be called official GROMACS. Details are found in the README & COPYING
++ * files - if they are missing, get the official version at www.gromacs.org.
++ * 
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the papers on the package - you can find them in the top README file.
++ * 
++ * For more info, check our website at http://www.gromacs.org
++ * 
++ * And Hey:
++ * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
++ */
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++
++#include "smalloc.h"
++#include "network.h"
++#include "calcgrid.h"
++#include "pme.h"
++#include "vec.h"
++#include "domdec.h"
++#include "nbnxn_cuda_data_mgmt.h"
++#include "force.h"
++#include "macros.h"
++#include "pme_switch.h"
++
++typedef struct {
++    real rcut;
++    real rlist;
++    real spacing;
++    ivec grid;
++    real grid_eff;
++    real coeff;
++    gmx_pme_t pmedata;
++
++    int  count;
++    double cycles;
++} pme_setup_t;
++
++/* In the initial scan, step by grids that are at least a factor 0.8 coarser */
++#define PMES_GRID_SCALE_FAC  0.8
++/* In the initial scan, try to skip grids with uneven x/y/z spacing,
++ * checking if the "efficiency" is more than 5% worse than the previous grid.
++ */
++#define PMES_GRID_EFF_FAC  1.05
++/* Rerun up till 12% slower setups than the fastest up till now */
++#define PMES_SLOW_FAC  1.12
++/* If setups get more than 2% faster, do another round to avoid
++ * choosing a slower setup due to acceleration or fluctuations.
++ */
++#define PMES_ACCEL_TOL 1.02
++
++typedef struct pme_switch {
++    int  nstage;        /* the current maximum number of stages */
++
++    real cut_spacing;   /* the minimum cutoff / PME grid spacing ratio */
++    real rbuf;          /* the pairlist buffer size */
++    matrix box_start;   /* the initial simulation box */
++    int n;              /* the count of setup as well as the allocation size */
++    pme_setup_t *setup; /* the PME+cutoff setups */
++    int cur;            /* the current setup */
++    int fastest;        /* fastest setup up till now */
++    int start;          /* start of setup range to consider in stage>0 */
++    int end;            /* end   of setup range to consider in stage>0 */
++
++    int stage;          /* the current stage */
++} t_pme_switch;
++
++void switch_pme_init(pme_switch_t *pmes_p,
++                     const t_inputrec *ir,matrix box,
++                     const interaction_const_t *ic,
++                     gmx_pme_t pmedata)
++{
++    pme_switch_t pmes;
++    real spm,sp;
++    int  d;
++
++    snew(pmes,1);
++
++    /* Any number of stages >= 2 is supported */
++    pmes->nstage   = 2;
++
++    pmes->rbuf = ic->rlist - ic->rcoulomb;
++
++    copy_mat(box,pmes->box_start);
++    if (ir->ePBC==epbcXY && ir->nwall==2)
++    {
++        svmul(ir->wall_ewald_zfac,pmes->box_start[ZZ],pmes->box_start[ZZ]);
++    }
++
++    pmes->n = 1;
++    snew(pmes->setup,pmes->n);
++
++    pmes->cur = 0;
++    pmes->setup[0].rcut     = ic->rcoulomb;
++    pmes->setup[0].rlist    = ic->rlist;
++    pmes->setup[0].grid[XX] = ir->nkx;
++    pmes->setup[0].grid[YY] = ir->nky;
++    pmes->setup[0].grid[ZZ] = ir->nkz;
++    pmes->setup[0].coeff    = ic->ewaldcoeff;
++
++    pmes->setup[0].pmedata  = pmedata;
++    
++    spm = 0;
++    for(d=0; d<DIM; d++)
++    {
++        sp = norm(pmes->box_start[d])/pmes->setup[0].grid[d];
++        if (sp > spm)
++        {
++            spm = sp;
++        }
++    }
++    pmes->setup[0].spacing = spm;
++
++    if (ir->fourier_spacing > 0)
++    {
++        pmes->cut_spacing = ir->rcoulomb/ir->fourier_spacing;
++    }
++    else
++    {
++        pmes->cut_spacing = ir->rcoulomb/pmes->setup[0].spacing;
++    }
++
++    pmes->stage = 0;
++
++    pmes->fastest = 0;
++    pmes->start   = 0;
++
++    *pmes_p = pmes;
++}
++
++static gmx_bool switch_pme_increase_cutoff(pme_switch_t pmes,int pme_order)
++{
++    pme_setup_t *set;
++    real fac,sp;
++    int d;
++
++    /* Try to add a new setup with next larger cut-off to the list */
++    pmes->n++;
++    srenew(pmes->setup,pmes->n);
++    set = &pmes->setup[pmes->n-1];
++    set->pmedata = NULL;
++
++    fac = 1;
++    do
++    {
++        fac *= 1.01;
++        clear_ivec(set->grid);
++        sp = calc_grid(NULL,pmes->box_start,
++                       fac*pmes->setup[pmes->cur].spacing,
++                       &set->grid[XX],
++                       &set->grid[YY],
++                       &set->grid[ZZ]);
++
++        /* In parallel we can't have grids smaller than 2*pme_order,
++         * and we would anyhow not gain much speed at these grid sizes.
++         */
++        for(d=0; d<DIM; d++)
++        {
++            if (set->grid[d] <= 2*pme_order)
++            {
++                pmes->n--;
++
++                return FALSE;
++            }
++        }
++    }
++    while (sp <= 1.001*pmes->setup[pmes->cur].spacing);
++
++    set->rcut    = pmes->cut_spacing*sp;
++    set->rlist   = set->rcut + pmes->rbuf;
++    set->spacing = sp;
++    /* The grid efficiency is the size wrt a grid with uniform x/y/z spacing */
++    set->grid_eff = 1;
++    for(d=0; d<DIM; d++)
++    {
++        set->grid_eff *= (set->grid[d]*sp)/norm(pmes->box_start[d]);
++    }
++    /* The Ewald coefficient is inversly proportional to the cut-off */
++    set->coeff   = pmes->setup[0].coeff*pmes->setup[0].rcut/set->rcut;
++
++    set->count   = 0;
++    set->cycles  = 0;
++
++    if (debug)
++    {
++        fprintf(debug,"PME switch grid %d %d %d, cutoff %f\n",
++                set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut);
++    }
++
++    return TRUE;
++}
++
++static void print_grid(FILE *fp_err,FILE *fp_log,
++                       const char *pre,
++                       const char *desc,
++                       const pme_setup_t *set,
++                       double cycles)
++{
++    char buf[STRLEN],buft[STRLEN];
++    
++    if (cycles >= 0)
++    {
++        sprintf(buft,": %.1f M-cycles",cycles*1e-6);
++    }
++    else
++    {
++        buft[0] = '\0';
++    }
++    sprintf(buf,"%-11s%10s pme grid %d %d %d, cutoff %.3f%s",
++            pre,
++            desc,set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut,
++            buft);
++    if (fp_err != NULL)
++    {
++        fprintf(fp_err,"%s\n",buf);
++    }
++    if (fp_log != NULL)
++    {
++        fprintf(fp_log,"%s\n",buf);
++    }
++}
++
++static void switch_to_stage1(pme_switch_t pmes)
++{
++    pmes->start = 0;
++    while (pmes->start+1 < pmes->n &&
++           (pmes->setup[pmes->start].count == 0 ||
++            pmes->setup[pmes->start].cycles >
++            pmes->setup[pmes->fastest].cycles*PMES_SLOW_FAC))
++    {
++        pmes->start++;
++    }
++    while (pmes->start > 0 && pmes->setup[pmes->start-1].cycles == 0)
++    {
++        pmes->start--;
++    }
++
++    pmes->end = pmes->n;
++    if (pmes->setup[pmes->end-1].count > 0 &&
++        pmes->setup[pmes->end-1].cycles >
++        pmes->setup[pmes->fastest].cycles*PMES_SLOW_FAC)
++    {
++        pmes->end--;
++    }
++
++    pmes->stage = 1;
++
++    /* Start add start, 1 will be added immediately after returning */
++    pmes->cur = pmes->start - 1;
++}
++
++gmx_bool switch_pme(pme_switch_t pmes,
++                    t_commrec *cr,
++                    FILE *fp_err,
++                    FILE *fp_log,
++                    t_inputrec *ir,
++                    t_state *state,
++                    double cycles,
++                    interaction_const_t *ic,
++                    nonbonded_verlet_t *nbv,
++                    gmx_pme_t *pmedata,
++                    int step)
++{
++    gmx_bool OK;
++    pme_setup_t *set;
++    double cycles_fast;
++    char buf[STRLEN];
++
++    if (pmes->stage == pmes->nstage)
++    {
++        return FALSE;
++    }
++
++    if (PAR(cr))
++    {
++        gmx_sumd(1,&cycles,cr);
++        cycles /= cr->nnodes;
++    }
++
++    set = &pmes->setup[pmes->cur];
++
++    set->count++;
++    if (set->count % 2 == 1)
++    {
++        /* Skip the first cycle, because the first step after a switch
++         * is much slower due to allocation and/or caching effects.
++         */
++        return TRUE;
++    }
++
++    sprintf(buf, "step %4d: ", step);
++    print_grid(fp_err,fp_log,buf,"timed with",set,cycles);
++
++    if (set->count <= 2)
++    {
++        set->cycles = cycles;
++    }
++    else
++    {
++        if (cycles*PMES_ACCEL_TOL < set->cycles &&
++            pmes->stage == pmes->nstage - 1)
++        {
++            /* The performance went up a lot (due to e.g. DD load balancing).
++             * Add a stage, keep the minima, but rescan all setups.
++             */
++            pmes->nstage++;
++
++            if (debug)
++            {
++                fprintf(debug,"The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
++                        "Increased the number stages to %d"
++                        " and ignoring the previous performance\n",
++                        set->grid[XX],set->grid[YY],set->grid[ZZ],
++                        cycles*1e-6,set->cycles*1e-6,PMES_ACCEL_TOL,
++                        pmes->nstage);
++            }
++        }
++        set->cycles = min(set->cycles,cycles);
++    }
++
++    if (set->cycles < pmes->setup[pmes->fastest].cycles)
++    {
++        pmes->fastest = pmes->cur;
++    }
++    cycles_fast = pmes->setup[pmes->fastest].cycles;
++
++    /* Check in stage 0 if we should stop scanning grids.
++     * Stop when the time is more than SLOW_FAC longer than the fastest.
++     */
++    if (pmes->stage == 0 && pmes->cur > 0 &&
++        cycles > pmes->setup[pmes->fastest].cycles*PMES_SLOW_FAC)
++    {
++        pmes->n = pmes->cur + 1;
++        /* Done with scanning, go to stage 1 */
++        switch_to_stage1(pmes);
++    }
++
++    if (pmes->stage == 0)
++    {
++        int gridsize_start;
++
++        gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];
++
++        do
++        {
++            if (pmes->cur+1 < pmes->n)
++            {
++                /* We had already generated the next setup */
++                OK = TRUE;
++            }
++            else
++            {
++                /* Find the next setup */
++                OK = switch_pme_increase_cutoff(pmes,ir->pme_order);
++            }
++                
++            if (OK && ir->ePBC != epbcNONE)
++            {
++                OK = (sqr(pmes->setup[pmes->cur+1].rlist)
++                      <= max_cutoff2(ir->ePBC,state->box));
++            }
++
++            if (OK)
++            {
++                pmes->cur++;
++
++                if (DOMAINDECOMP(cr))
++                {
++                    OK = change_dd_cutoff(cr,state,ir,
++                                          pmes->setup[pmes->cur].rlist);
++                    if (!OK)
++                    {
++                        /* Failed: do not use this setup */
++                        pmes->cur--;
++                    }
++                }
++            }
++            if (!OK)
++            {
++                /* We hit the upper limit for the cut-off,
++                 * the setup should not go further than cur.
++                 */
++                pmes->n = pmes->cur + 1;
++                /* Switch to the next stage */
++                switch_to_stage1(pmes);
++            }
++        }
++        while (OK &&
++               !(pmes->setup[pmes->cur].grid[XX]*
++                 pmes->setup[pmes->cur].grid[YY]*
++                 pmes->setup[pmes->cur].grid[ZZ] <
++                 gridsize_start*PMES_GRID_SCALE_FAC
++                 &&
++                 pmes->setup[pmes->cur].grid_eff <
++                 pmes->setup[pmes->cur-1].grid_eff*PMES_GRID_EFF_FAC));
++    }
++
++    if (pmes->stage > 0 && pmes->end == 1)
++    {
++        pmes->cur = 0;
++        pmes->stage = pmes->nstage;
++    }
++    else if (pmes->stage > 0 && pmes->end > 1)
++    {
++        /* If stage = nstage-1:
++         *   scan over all setups, rerunning only those setups
++         *   which are not much slower than the fastest
++         * else:
++         *   use the next setup
++         */
++        do
++        {
++            pmes->cur++;
++            if (pmes->cur == pmes->end)
++            {
++                pmes->stage++;
++                pmes->cur = pmes->start;
++            }
++        }
++        while (pmes->stage == pmes->nstage - 1 &&
++               pmes->setup[pmes->cur].count > 0 &&
++               pmes->setup[pmes->cur].cycles > cycles_fast*PMES_SLOW_FAC);
++
++        if (pmes->stage == pmes->nstage)
++        {
++            /* We are done optiming, use the fastest setup we found */
++            pmes->cur = pmes->fastest;
++        }
++    }
++
++    if (DOMAINDECOMP(cr) && pmes->stage > 0)
++    {
++        OK = change_dd_cutoff(cr,state,ir,pmes->setup[pmes->cur].rlist);
++        if (!OK)
++        {
++            /* Failsafe solution */
++            if (pmes->cur > 1 && pmes->stage == pmes->nstage)
++            {
++                pmes->stage--;
++            }
++            pmes->fastest = 0;
++            pmes->start   = 0;
++            pmes->end     = pmes->cur;
++            pmes->cur     = pmes->start;
++        }
++    }
++
++    /* Change the Coulomb cut-off and the PME grid */
++
++    set = &pmes->setup[pmes->cur];
++
++    ic->rcoulomb   = set->rcut;
++    ic->rlist      = set->rlist;
++    ic->ewaldcoeff = set->coeff;
++
++    if (nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
++    {
++        nbnxn_cuda_pmetune_update_param(nbv->cu_nbv,ic);
++    }
++    else
++    {
++        init_interaction_const_tables(NULL,ic,nbv->grp[0].kernel_type);
++    }
++
++    if (nbv->ngrp > 1)
++    {
++        init_interaction_const_tables(NULL,ic,nbv->grp[1].kernel_type);
++    }
++
++    if (cr->duty & DUTY_PME)
++    {
++        if (pmes->setup[pmes->cur].pmedata == NULL)
++        {
++            /* Generate a new PME data structure,
++             * copying part of the old pointers.
++             */
++            gmx_pme_reinit(&set->pmedata,
++                           cr,pmes->setup[0].pmedata,ir,
++                           set->grid);
++        }
++        *pmedata = set->pmedata;
++    }
++    else
++    {
++        /* Tell our PME-only node to switch grid */
++        gmx_pme_send_switch(cr, set->grid, set->coeff);
++    }
++
++    if (debug)
++    {
++        print_grid(NULL,debug,"","switched to",set,-1);
++    }
++
++    if (pmes->stage == pmes->nstage)
++    {
++        print_grid(fp_err,fp_log,"","optimal",set,-1);
++    }
++
++    return TRUE;
++}
++
++void restart_switch_pme(pme_switch_t pmes, int n)
++{
++    pmes->nstage += n;
++}
diff --cc src/programs/mdrun/pme_switch.h

index 0000000000000000000000000000000000000000,76060a5a474f08962cf7572301ba3fd520ef3e90..76060a5a474f08962cf7572301ba3fd520ef3e90

mode 000000,100644..100644
--- /dev/null
--- 2/src/kernel/pme_switch.h
+++ b/src/programs/mdrun/pme_switch.h
diff --cc src/programs/mdrun/repl_ex.h
Simple merge
diff --cc src/programs/mdrun/runner.c

index 23872d26111f6091ebfe2700a6ac4cbc4e3ed246,0000000000000000000000000000000000000000..ab7884ac21c1e45835ee21bbb41a4e8bd2aae085

mode 100644,000000..100644
--- 1/src/programs/mdrun/runner.c
--- /dev/null
+++ b/src/programs/mdrun/runner.c
@@@ -1,982 -1,0 +1,1776 @@@
- 
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +#ifdef __linux
+ +#define _GNU_SOURCE
+ +#include <sched.h>
+ +#include <sys/syscall.h>
+ +#endif
+ +#include <signal.h>
+ +#include <stdlib.h>
+ +#ifdef HAVE_UNISTD_H
+ +#include <unistd.h>
+ +#endif
++#include <string.h>
++#include <assert.h>
+ +
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "sysstuff.h"
+ +#include "statutil.h"
+ +#include "mdrun.h"
++#include "md_logging.h"
++#include "md_support.h"
+ +#include "network.h"
+ +#include "pull.h"
+ +#include "pull_rotation.h"
+ +#include "names.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "pme.h"
+ +#include "mdatoms.h"
+ +#include "repl_ex.h"
+ +#include "qmmm.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "coulomb.h"
+ +#include "constr.h"
+ +#include "mvdata.h"
+ +#include "checkpoint.h"
+ +#include "mtop_util.h"
+ +#include "sighandler.h"
+ +#include "tpxio.h"
+ +#include "txtdump.h"
++#include "gmx_detect_hardware.h"
++#include "gmx_omp_nthreads.h"
+ +#include "pull_rotation.h"
++#include "calc_verletbuf.h"
++#include "nbnxn_search.h"
++#include "../mdlib/nbnxn_consts.h"
++#include "gmx_fatal_collective.h"
+ +#include "membed.h"
+ +#include "macros.h"
-     mda->ret=mdrunner(cr->nnodes, fplog, cr, mc.nfile, fnm, mc.oenv, 
+ +#include "gmx_omp.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#ifdef GMX_FAHCORE
+ +#include "corewrap.h"
+ +#endif
+ +
+ +#ifdef GMX_OPENMM
+ +#include "md_openmm.h"
+ +#endif
+ +
++#include "gpu_utils.h"
++#include "nbnxn_cuda_data_mgmt.h"
+ +
+ +typedef struct { 
+ +    gmx_integrator_t *func;
+ +} gmx_intp_t;
+ +
+ +/* The array should match the eI array in include/types/enums.h */
+ +#ifdef GMX_OPENMM  /* FIXME do_md_openmm needs fixing */
+ +const gmx_intp_t integrator[eiNR] = { {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm},{do_md_openmm}};
+ +#else
+ +const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
+ +#endif
+ +
+ +gmx_large_int_t     deform_init_init_step_tpx;
+ +matrix              deform_init_box_tpx;
+ +#ifdef GMX_THREAD_MPI
+ +tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
+ +#endif
+ +
+ +
+ +#ifdef GMX_THREAD_MPI
+ +struct mdrunner_arglist
+ +{
++    gmx_hw_opt_t *hw_opt;
+ +    FILE *fplog;
+ +    t_commrec *cr;
+ +    int nfile;
+ +    const t_filenm *fnm;
+ +    output_env_t oenv;
+ +    gmx_bool bVerbose;
+ +    gmx_bool bCompact;
+ +    int nstglobalcomm;
+ +    ivec ddxyz;
+ +    int dd_node_order;
+ +    real rdd;
+ +    real rconstr;
+ +    const char *dddlb_opt;
+ +    real dlb_scale;
+ +    const char *ddcsx;
+ +    const char *ddcsy;
+ +    const char *ddcsz;
++    const char *nbpu_opt;
++    int nsteps_cmdline;
+ +    int nstepout;
+ +    int resetstep;
+ +    int nmultisim;
+ +    int repl_ex_nst;
+ +    int repl_ex_nex;
+ +    int repl_ex_seed;
+ +    real pforce;
+ +    real cpt_period;
+ +    real max_hours;
+ +    const char *deviceOptions;
+ +    unsigned long Flags;
+ +    int ret; /* return value */
+ +};
+ +
+ +
+ +/* The function used for spawning threads. Extracts the mdrunner() 
+ +   arguments from its one argument and calls mdrunner(), after making
+ +   a commrec. */
+ +static void mdrunner_start_fn(void *arg)
+ +{
+ +    struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
+ +    struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure 
+ +                                        that it's thread-local. This doesn't
+ +                                        copy pointed-to items, of course,
+ +                                        but those are all const. */
+ +    t_commrec *cr;  /* we need a local version of this */
+ +    FILE *fplog=NULL;
+ +    t_filenm *fnm;
+ +
+ +    fnm = dup_tfn(mc.nfile, mc.fnm);
+ +
+ +    cr = init_par_threads(mc.cr);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fplog=mc.fplog;
+ +    }
+ +
-                       mc.ddcsx, mc.ddcsy, mc.ddcsz, mc.nstepout, mc.resetstep, 
-                       mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
++    mda->ret=mdrunner(mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv, 
+ +                      mc.bVerbose, mc.bCompact, mc.nstglobalcomm, 
+ +                      mc.ddxyz, mc.dd_node_order, mc.rdd,
+ +                      mc.rconstr, mc.dddlb_opt, mc.dlb_scale, 
- static t_commrec *mdrunner_start_threads(int nthreads, 
++                      mc.ddcsx, mc.ddcsy, mc.ddcsz,
++                      mc.nbpu_opt,
++                      mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
++                      mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce, 
+ +                      mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
+ +}
+ +
+ +/* called by mdrunner() to start a specific number of threads (including 
+ +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
+ +   for each thread. 
+ +   All options besides nthreads are the same as for mdrunner(). */
-               int nstepout,int resetstep,int nmultisim,int repl_ex_nst,
-               int repl_ex_nex, int repl_ex_seed, real pforce,real cpt_period, real max_hours,
++static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt, 
+ +              FILE *fplog,t_commrec *cr,int nfile, 
+ +              const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ +              gmx_bool bCompact, int nstglobalcomm,
+ +              ivec ddxyz,int dd_node_order,real rdd,real rconstr,
+ +              const char *dddlb_opt,real dlb_scale,
+ +              const char *ddcsx,const char *ddcsy,const char *ddcsz,
-     if (nthreads<2)
++              const char *nbpu_opt,
++              int nsteps_cmdline, int nstepout,int resetstep,
++              int nmultisim,int repl_ex_nst,int repl_ex_nex, int repl_ex_seed,
++              real pforce,real cpt_period, real max_hours, 
+ +              const char *deviceOptions, unsigned long Flags)
+ +{
+ +    int ret;
+ +    struct mdrunner_arglist *mda;
+ +    t_commrec *crn; /* the new commrec */
+ +    t_filenm *fnmn;
+ +
+ +    /* first check whether we even need to start tMPI */
-     fprintf(stderr, "Starting %d threads\n",nthreads);
++    if (hw_opt->nthreads_tmpi < 2)
++    {
+ +        return cr;
++    }
+ +
+ +    /* a few small, one-time, almost unavoidable memory leaks: */
+ +    snew(mda,1);
+ +    fnmn=dup_tfn(nfile, fnm);
+ +
+ +    /* fill the data structure to pass as void pointer to thread start fn */
++    mda->hw_opt=hw_opt;
+ +    mda->fplog=fplog;
+ +    mda->cr=cr;
+ +    mda->nfile=nfile;
+ +    mda->fnm=fnmn;
+ +    mda->oenv=oenv;
+ +    mda->bVerbose=bVerbose;
+ +    mda->bCompact=bCompact;
+ +    mda->nstglobalcomm=nstglobalcomm;
+ +    mda->ddxyz[XX]=ddxyz[XX];
+ +    mda->ddxyz[YY]=ddxyz[YY];
+ +    mda->ddxyz[ZZ]=ddxyz[ZZ];
+ +    mda->dd_node_order=dd_node_order;
+ +    mda->rdd=rdd;
+ +    mda->rconstr=rconstr;
+ +    mda->dddlb_opt=dddlb_opt;
+ +    mda->dlb_scale=dlb_scale;
+ +    mda->ddcsx=ddcsx;
+ +    mda->ddcsy=ddcsy;
+ +    mda->ddcsz=ddcsz;
++    mda->nbpu_opt=nbpu_opt;
++    mda->nsteps_cmdline=nsteps_cmdline;
+ +    mda->nstepout=nstepout;
+ +    mda->resetstep=resetstep;
+ +    mda->nmultisim=nmultisim;
+ +    mda->repl_ex_nst=repl_ex_nst;
+ +    mda->repl_ex_nex=repl_ex_nex;
+ +    mda->repl_ex_seed=repl_ex_seed;
+ +    mda->pforce=pforce;
+ +    mda->cpt_period=cpt_period;
+ +    mda->max_hours=max_hours;
+ +    mda->deviceOptions=deviceOptions;
+ +    mda->Flags=Flags;
+ +
-     ret=tMPI_Init_fn(TRUE, nthreads, mdrunner_start_fn, (void*)(mda) );
++    fprintf(stderr, "Starting %d tMPI threads\n",hw_opt->nthreads_tmpi);
+ +    fflush(stderr);
+ +    /* now spawn new threads that start mdrunner_start_fn(), while 
+ +       the main thread returns */
- static int get_nthreads_mpi(int nthreads_requested, t_inputrec *inputrec,
-                             gmx_mtop_t *mtop)
++    ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi,
++                     mdrunner_start_fn, (void*)(mda) );
+ +    if (ret!=TMPI_SUCCESS)
+ +        return NULL;
+ +
+ +    /* make a new comm_rec to reflect the new situation */
+ +    crn=init_par_threads(cr);
+ +    return crn;
+ +}
+ +
+ +
++static int get_tmpi_omp_thread_distribution(const gmx_hw_opt_t *hw_opt,
++                                            int nthreads_tot,
++                                            int ngpu)
++{
++    int nthreads_tmpi;
++
++    /* There are no separate PME nodes here, as we ensured in
++     * check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes
++     * and a conditional ensures we would not have ended up here.
++     * Note that separate PME nodes might be switched on later.
++     */
++    if (ngpu > 0)
++    {
++        nthreads_tmpi = ngpu;
++        if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi)
++        {
++            nthreads_tmpi = nthreads_tot;
++        }
++    }
++    else if (hw_opt->nthreads_omp > 0)
++    {
++        if (hw_opt->nthreads_omp > nthreads_tot)
++        {
++            gmx_fatal(FARGS,"More OpenMP threads requested (%d) than the total number of threads requested (%d)",hw_opt->nthreads_omp,nthreads_tot);
++        }
++        nthreads_tmpi = nthreads_tot/hw_opt->nthreads_omp;
++    }
++    else
++    {
++        /* TODO choose nthreads_omp based on hardware topology
++           when we have a hardware topology detection library */
++        /* Don't use OpenMP parallelization */
++        nthreads_tmpi = nthreads_tot;
++    }
++
++    return nthreads_tmpi;
++}
++
++
+ +/* Get the number of threads to use for thread-MPI based on how many
+ + * were requested, which algorithms we're using,
+ + * and how many particles there are.
++ * At the point we have already called check_and_update_hw_opt.
++ * Thus all options should be internally consistent and consistent
++ * with the hardware, except that ntmpi could be larger than #GPU.
+ + */
-     int nthreads,nthreads_new;
-     int min_atoms_per_thread;
++static int get_nthreads_mpi(gmx_hw_info_t *hwinfo,
++                            gmx_hw_opt_t *hw_opt,
++                            t_inputrec *inputrec, gmx_mtop_t *mtop,
++                            const t_commrec *cr,
++                            FILE *fplog)
+ +{
-     nthreads = nthreads_requested;
++    int nthreads_tot_max,nthreads_tmpi,nthreads_new,ngpu;
++    int min_atoms_per_mpi_thread;
+ +    char *env;
++    char sbuf[STRLEN];
++    gmx_bool bCanUseGPU;
+ +
-     /* determine # of hardware threads. */
-     if (nthreads_requested < 1)
++    if (hw_opt->nthreads_tmpi > 0)
++    {
++        /* Trivial, return right away */
++        return hw_opt->nthreads_tmpi;
++    }
+ +
-         if ((env = getenv("GMX_MAX_THREADS")) != NULL)
-         {
-             nthreads = 0;
-             sscanf(env,"%d",&nthreads);
-             if (nthreads < 1)
-             {
-                 gmx_fatal(FARGS,"GMX_MAX_THREADS (%d) should be larger than 0",
-                           nthreads);
-             }
-         }
-         else
-         {
-             nthreads = tMPI_Thread_get_hw_number();
-         }
++    /* How many total (#tMPI*#OpenMP) threads can we start? */ 
++    if (hw_opt->nthreads_tot > 0)
+ +    {
-         min_atoms_per_thread = 0;
++        nthreads_tot_max = hw_opt->nthreads_tot;
++    }
++    else
++    {
++        nthreads_tot_max = tMPI_Thread_get_hw_number();
++    }
++
++    bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET && hwinfo->bCanUseGPU);
++    if (bCanUseGPU)
++    {
++        ngpu = hwinfo->gpu_info.ncuda_dev_use;
++    }
++    else
++    {
++        ngpu = 0;
+ +    }
+ +
++    nthreads_tmpi =
++        get_tmpi_omp_thread_distribution(hw_opt,nthreads_tot_max,ngpu);
++
+ +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
+ +    {
+ +        /* Steps are divided over the nodes iso splitting the atoms */
-         min_atoms_per_thread = MIN_ATOMS_PER_THREAD;
++        min_atoms_per_mpi_thread = 0;
+ +    }
+ +    else
+ +    {
-     if (nthreads != 1 && 
++        if (bCanUseGPU)
++        {
++            min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU;
++        }
++        else
++        {
++            min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD;
++        }
+ +    }
+ +
+ +    /* Check if an algorithm does not support parallel simulation.  */
-         fprintf(stderr,"\nThe integration or electrostatics algorithm doesn't support parallel runs. Not starting any threads.\n");
-         nthreads = 1;
++    if (nthreads_tmpi != 1 &&
+ +        ( inputrec->eI == eiLBFGS ||
+ +          inputrec->coulombtype == eelEWALD ) )
+ +    {
-     else if (nthreads_requested < 1 &&
-              mtop->natoms/nthreads < min_atoms_per_thread)
++        nthreads_tmpi = 1;
++
++        md_print_warn(cr,fplog,"The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
++        if (hw_opt->nthreads_tmpi > nthreads_tmpi)
++        {
++            gmx_fatal(FARGS,"You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
++        }
+ +    }
-         nthreads_new = max(1,mtop->natoms/min_atoms_per_thread);
++    else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread)
+ +    {
+ +        /* the thread number was chosen automatically, but there are too many
+ +           threads (too few atoms per thread) */
-         if (nthreads_new > 8 || (nthreads == 8 && nthreads_new > 4))
++        nthreads_new = max(1,mtop->natoms/min_atoms_per_mpi_thread);
+ +
-             /* Use only multiples of 4 above 8 threads
++        if (nthreads_new > 8 || (nthreads_tmpi == 8 && nthreads_new > 4))
+ +        {
-         nthreads = nthreads_new;
++            /* TODO replace this once we have proper HT detection
++             * Use only multiples of 4 above 8 threads
+ +             * or with an 8-core processor
+ +             * (to avoid 6 threads on 8 core processors with 4 real cores).
+ +             */
+ +            nthreads_new = (nthreads_new/4)*4;
+ +        }
+ +        else if (nthreads_new > 4)
+ +        {
+ +            /* Avoid 5 or 7 threads */
+ +            nthreads_new = (nthreads_new/2)*2;
+ +        }
+ +
-         fprintf(stderr,"      only starting %d threads.\n",nthreads);
-         fprintf(stderr,"      You can use the -nt option to optimize the number of threads.\n\n");
++        nthreads_tmpi = nthreads_new;
+ +
+ +        fprintf(stderr,"\n");
+ +        fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
-     return nthreads;
++        fprintf(stderr,"      only starting %d thread-MPI threads.\n",nthreads_tmpi);
++        fprintf(stderr,"      You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n");
++    }
++
++    return nthreads_tmpi;
++}
++#endif /* GMX_THREAD_MPI */
++
++
++/* Environment variable for setting nstlist */
++static const char*  NSTLIST_ENVVAR          =  "GMX_NSTLIST";
++/* Try to increase nstlist when using a GPU with nstlist less than this */
++static const int    NSTLIST_GPU_ENOUGH      = 20;
++/* Increase nstlist until the non-bonded cost increases more than this factor */
++static const float  NBNXN_GPU_LIST_OK_FAC   = 1.25;
++/* Don't increase nstlist beyond a non-bonded cost increases of this factor */
++static const float  NBNXN_GPU_LIST_MAX_FAC  = 1.40;
++
++/* Try to increase nstlist when running on a GPU */
++static void increase_nstlist(FILE *fp,t_commrec *cr,
++                             t_inputrec *ir,const gmx_mtop_t *mtop,matrix box)
++{
++    char *env;
++    int  nstlist_orig,nstlist_prev;
++    verletbuf_list_setup_t ls;
++    real rlist_inc,rlist_ok,rlist_max,rlist_new,rlist_prev;
++    int  i;
++    t_state state_tmp;
++    gmx_bool bBox,bDD,bCont;
++    const char *nstl_fmt="\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
++    const char *vbd_err="Can not increase nstlist for GPU run because verlet-buffer-drift is not set or used";
++    const char *box_err="Can not increase nstlist for GPU run because the box is too small";
++    const char *dd_err ="Can not increase nstlist for GPU run because of domain decomposition limitations";
++    char buf[STRLEN];
++
++    /* Number of + nstlist alternative values to try when switching  */
++    const int nstl[]={ 20, 25, 40, 50 };
++#define NNSTL  sizeof(nstl)/sizeof(nstl[0])
++
++    env = getenv(NSTLIST_ENVVAR);
++    if (env == NULL)
++    {
++        if (fp != NULL)
++        {
++            fprintf(fp,nstl_fmt,ir->nstlist);
++        }
++    }
++
++    if (ir->verletbuf_drift == 0)
++    {
++        gmx_fatal(FARGS,"You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
++    }
++
++    if (ir->verletbuf_drift < 0)
++    {
++        if (MASTER(cr))
++        {
++            fprintf(stderr,"%s\n",vbd_err);
++        }
++        if (fp != NULL)
++        {
++            fprintf(fp,"%s\n",vbd_err);
++        }
++
++        return;
++    }
++
++    nstlist_orig = ir->nstlist;
++    if (env != NULL)
++    {
++        sprintf(buf,"Getting nstlist from environment variable GMX_NSTLIST=%s",env);
++        if (MASTER(cr))
++        {
++            fprintf(stderr,"%s\n",buf);
++        }
++        if (fp != NULL)
++        {
++            fprintf(fp,"%s\n",buf);
++        }
++        sscanf(env,"%d",&ir->nstlist);
++    }
++
++    verletbuf_get_list_setup(TRUE,&ls);
++
++    /* Allow rlist to make the list double the size of the cut-off sphere */
++    rlist_inc = nbnxn_get_rlist_effective_inc(NBNXN_GPU_CLUSTER_SIZE,mtop->natoms/det(box));
++    rlist_ok  = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_OK_FAC,1.0/3.0) - rlist_inc;
++    rlist_max = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_MAX_FAC,1.0/3.0) - rlist_inc;
++    if (debug)
++    {
++        fprintf(debug,"GPU nstlist tuning: rlist_inc %.3f rlist_max %.3f\n",
++                rlist_inc,rlist_max);
++    }
++
++    i = 0;
++    nstlist_prev = nstlist_orig;
++    rlist_prev   = ir->rlist;
++    do
++    {
++        if (env == NULL)
++        {
++            ir->nstlist = nstl[i];
++        }
++
++        /* Set the pair-list buffer size in ir */
++        calc_verlet_buffer_size(mtop,det(box),ir,ir->verletbuf_drift,&ls,
++                                NULL,&rlist_new);
++
++        /* Does rlist fit in the box? */
++        bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC,box));
++        bDD  = TRUE;
++        if (bBox && DOMAINDECOMP(cr))
++        {
++            /* Check if rlist fits in the domain decomposition */
++            if (inputrec2nboundeddim(ir) < DIM)
++            {
++                gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
++            }
++            copy_mat(box,state_tmp.box);
++            bDD = change_dd_cutoff(cr,&state_tmp,ir,rlist_new);
++        }
++
++        bCont = FALSE;
++
++        if (env == NULL)
++        {
++            if (bBox && bDD && rlist_new <= rlist_max)
++            {
++                /* Increase nstlist */
++                nstlist_prev = ir->nstlist;
++                rlist_prev   = rlist_new;
++                bCont = (i+1 < NNSTL && rlist_new < rlist_ok);
++            }
++            else
++            {
++                /* Stick with the previous nstlist */
++                ir->nstlist = nstlist_prev;
++                rlist_new   = rlist_prev;
++                bBox = TRUE;
++                bDD  = TRUE;
++            }
++        }
++
++        i++;
++    }
++    while (bCont);
++
++    if (!bBox || !bDD)
++    {
++        gmx_warning(!bBox ? box_err : dd_err);
++        if (fp != NULL)
++        {
++            fprintf(fp,"\n%s\n",bBox ? box_err : dd_err);
++        }
++        ir->nstlist = nstlist_orig;
++    }
++    else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
++    {
++        sprintf(buf,"Changing nstlist from %d to %d, rlist from %g to %g",
++                nstlist_orig,ir->nstlist,
++                ir->rlist,rlist_new);
++        if (MASTER(cr))
++        {
++            fprintf(stderr,"%s\n\n",buf);
++        }
++        if (fp != NULL)
++        {
++            fprintf(fp,"%s\n\n",buf);
++        }
++        ir->rlist     = rlist_new;
++        ir->rlistlong = rlist_new;
++    }
++}
++
++static void prepare_verlet_scheme(FILE *fplog,
++                                  gmx_hw_info_t *hwinfo,
++                                  t_commrec *cr,
++                                  gmx_hw_opt_t *hw_opt,
++                                  const char *nbpu_opt,
++                                  t_inputrec *ir,
++                                  const gmx_mtop_t *mtop,
++                                  matrix box,
++                                  gmx_bool *bUseGPU)
++{
++    /* Here we only check for GPU usage on the MPI master process,
++     * as here we don't know how many GPUs we will use yet.
++     * We check for a GPU on all processes later.
++     */
++    *bUseGPU = hwinfo->bCanUseGPU || (getenv("GMX_EMULATE_GPU") != NULL);
++
++    if (ir->verletbuf_drift > 0)
++    {
++        /* Update the Verlet buffer size for the current run setup */
++        verletbuf_list_setup_t ls;
++        real rlist_new;
++
++        /* Here we assume CPU acceleration is on. But as currently
++         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
++         * and 4x2 gives a larger buffer than 4x4, this is ok.
++         */
++        verletbuf_get_list_setup(*bUseGPU,&ls);
++
++        calc_verlet_buffer_size(mtop,det(box),ir,
++                                ir->verletbuf_drift,&ls,
++                                NULL,&rlist_new);
++        if (rlist_new != ir->rlist)
++        {
++            if (fplog != NULL)
++            {
++                fprintf(fplog,"\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
++                        ir->rlist,rlist_new,
++                        ls.cluster_size_i,ls.cluster_size_j);
++            }
++            ir->rlist     = rlist_new;
++            ir->rlistlong = rlist_new;
++        }
++    }
++
++    /* With GPU or emulation we should check nstlist for performance */
++    if ((EI_DYNAMICS(ir->eI) &&
++         *bUseGPU &&
++         ir->nstlist < NSTLIST_GPU_ENOUGH) ||
++        getenv(NSTLIST_ENVVAR) != NULL)
++    {
++        /* Choose a better nstlist */
++        increase_nstlist(fplog,cr,ir,mtop,box);
++    }
++}
++
++static void convert_to_verlet_scheme(FILE *fplog,
++                                     t_inputrec *ir,
++                                     gmx_mtop_t *mtop,real box_vol)
++{
++    char *conv_mesg="Converting input file with group cut-off scheme to the Verlet cut-off scheme";
++
++    md_print_warn(NULL,fplog,"%s\n",conv_mesg);
++
++    ir->cutoff_scheme   = ecutsVERLET;
++    ir->verletbuf_drift = 0.005;
++
++    if (ir->rcoulomb != ir->rvdw)
++    {
++        gmx_fatal(FARGS,"The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
++    }
++
++    if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
++    {
++        gmx_fatal(FARGS,"User non-bonded potentials are not (yet) supported with the Verlet scheme");
++    }
++    else if (EVDW_SWITCHED(ir->vdwtype) || EEL_SWITCHED(ir->coulombtype))
++    {
++        md_print_warn(NULL,fplog,"Converting switched or shifted interactions to a shifted potential (without force shift), this will lead to slightly different interaction potentials");
++
++        if (EVDW_SWITCHED(ir->vdwtype))
++        {
++            ir->vdwtype = evdwCUT;
++        }
++        if (EEL_SWITCHED(ir->coulombtype))
++        {
++            if (EEL_FULL(ir->coulombtype))
++            {
++                /* With full electrostatic only PME can be switched */
++                ir->coulombtype = eelPME;
++            }
++            else
++            {
++                md_print_warn(NULL,fplog,"NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n",eel_names[ir->coulombtype]);
++                ir->coulombtype = eelRF;
++                ir->epsilon_rf  = 0.0;
++            }
++        }
++
++        /* We set the target energy drift to a small number.
++         * Note that this is only for testing. For production the user
++         * should think about this and set the mdp options.
++         */
++        ir->verletbuf_drift = 1e-4;
++    }
++
++    if (inputrec2nboundeddim(ir) != 3)
++    {
++        gmx_fatal(FARGS,"Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
+ +    }
- int mdrunner(int nthreads_requested, FILE *fplog,t_commrec *cr,int nfile,
++
++    if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
++    {
++        gmx_fatal(FARGS,"Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
++    }
++
++    if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
++    {
++        verletbuf_list_setup_t ls;
++
++        verletbuf_get_list_setup(FALSE,&ls);
++        calc_verlet_buffer_size(mtop,box_vol,ir,ir->verletbuf_drift,&ls,
++                                NULL,&ir->rlist);
++    }
++    else
++    {
++        ir->verletbuf_drift = -1;
++        ir->rlist           = 1.05*max(ir->rvdw,ir->rcoulomb);
++    }
++
++    gmx_mtop_remove_chargegroups(mtop);
+ +}
++
++
++/* Set CPU affinity. Can be important for performance.
++   On some systems (e.g. Cray) CPU Affinity is set by default.
++   But default assigning doesn't work (well) with only some ranks
++   having threads. This causes very low performance.
++   External tools have cumbersome syntax for setting affinity
++   in the case that only some ranks have threads.
++   Thus it is important that GROMACS sets the affinity internally
++   if only PME is using threads.
++*/
++static void set_cpu_affinity(FILE *fplog,
++                             const t_commrec *cr,
++                             const gmx_hw_opt_t *hw_opt,
++                             int nthreads_pme,
++                             const gmx_hw_info_t *hwinfo,
++                             const t_inputrec *inputrec)
++{
++#ifdef GMX_OPENMP /* TODO: actually we could do this even without OpenMP?! */
++#ifdef __linux /* TODO: only linux? why not everywhere if sched_setaffinity is available */
++    if (hw_opt->bThreadPinning)
++    {
++        int thread, nthread_local, nthread_node, nthread_hw_max, nphyscore;
++        int offset;
++        char *env;
++
++        /* threads on this MPI process or TMPI thread */
++        if (cr->duty & DUTY_PP)
++        {
++            nthread_local = gmx_omp_nthreads_get(emntNonbonded);
++        }
++        else
++        {
++            nthread_local = gmx_omp_nthreads_get(emntPME);
++        }
++
++        /* map the current process to cores */
++        thread = 0;
++        nthread_node = nthread_local;
++#ifdef GMX_MPI
++        if (PAR(cr) || MULTISIM(cr))
++        {
++            /* We need to determine a scan of the thread counts in this
++             * compute node.
++             */
++            MPI_Comm comm_intra;
++
++            MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),cr->nodeid_intra,
++                           &comm_intra);
++            MPI_Scan(&nthread_local,&thread,1,MPI_INT,MPI_SUM,comm_intra);
++            /* MPI_Scan is inclusive, but here we need exclusive */
++            thread -= nthread_local;
++            /* Get the total number of threads on this physical node */
++            MPI_Allreduce(&nthread_local,&nthread_node,1,MPI_INT,MPI_SUM,comm_intra);
++            MPI_Comm_free(&comm_intra);
++        }
+ +#endif
+ +
++        offset = 0;
++        if (hw_opt->core_pinning_offset > 0)
++        {
++            offset = hw_opt->core_pinning_offset;
++            if (SIMMASTER(cr))
++            {
++                fprintf(stderr, "Applying core pinning offset %d\n", offset);
++            }
++            if (fplog)
++            {
++                fprintf(fplog, "Applying core pinning offset %d\n", offset);
++            }
++        }
++
++        /* With Intel Hyper-Threading enabled, we want to pin consecutive
++         * threads to physical cores when using more threads than physical
++         * cores or when the user requests so.
++         */
++        nthread_hw_max = hwinfo->nthreads_hw_avail;
++        nphyscore = -1;
++        if (hw_opt->bPinHyperthreading ||
++            (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
++             nthread_node > nthread_hw_max/2 && getenv("GMX_DISABLE_PINHT") == NULL))
++        {
++            if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) != GMX_CPUID_X86_SMT_ENABLED)
++            {
++                /* We print to stderr on all processes, as we might have
++                 * different settings on different physical nodes.
++                 */
++                if (gmx_cpuid_vendor(hwinfo->cpuid_info) != GMX_CPUID_VENDOR_INTEL)
++                {
++                    md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
++                                  "but non-Intel CPU detected (vendor: %s)\n",
++                                  gmx_cpuid_vendor_string[gmx_cpuid_vendor(hwinfo->cpuid_info)]);
++                }
++                else
++                {
++                    md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
++                                  "but the CPU detected does not have Intel Hyper-Threading support "
++                                  "(or it is turned off)\n");
++                }
++            }
++            nphyscore = nthread_hw_max/2;
+ +
-              int nstepout,int resetstep,int nmultisim, int repl_ex_nst, int repl_ex_nex,
++            if (SIMMASTER(cr))
++            {
++                fprintf(stderr, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
++                        nphyscore);
++            }
++            if (fplog)
++            {
++                fprintf(fplog, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
++                        nphyscore);
++            }
++        }
++
++        /* set the per-thread affinity */
++#pragma omp parallel firstprivate(thread) num_threads(nthread_local)
++        {
++            cpu_set_t mask;
++            int core;
++
++            CPU_ZERO(&mask);
++            thread += gmx_omp_get_thread_num();
++            if (nphyscore <= 0)
++            {
++                core = offset + thread;
++            }
++            else
++            {
++                /* Lock pairs of threads to the same hyperthreaded core */
++                core = offset + thread/2 + (thread % 2)*nphyscore;
++            }
++            CPU_SET(core, &mask);
++            sched_setaffinity((pid_t) syscall (SYS_gettid), sizeof(cpu_set_t), &mask);
++        }
++    }
++#endif /* __linux    */
++#endif /* GMX_OPENMP */
++}
++
++
++static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
++                                    int cutoff_scheme)
++{
++    gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp);
++
++#ifndef GMX_THREAD_MPI
++    if (hw_opt->nthreads_tot > 0)
++    {
++        gmx_fatal(FARGS,"Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
++    }
++    if (hw_opt->nthreads_tmpi > 0)
++    {
++        gmx_fatal(FARGS,"Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
++    }
++#endif
++
++    if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
++    {
++        /* We have the same number of OpenMP threads for PP and PME processes,
++         * thus we can perform several consistency checks.
++         */
++        if (hw_opt->nthreads_tmpi > 0 &&
++            hw_opt->nthreads_omp > 0 &&
++            hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
++        {
++            gmx_fatal(FARGS,"The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
++                      hw_opt->nthreads_tot,hw_opt->nthreads_tmpi,hw_opt->nthreads_omp);
++        }
++
++        if (hw_opt->nthreads_tmpi > 0 &&
++            hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
++        {
++            gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
++                      hw_opt->nthreads_tot,hw_opt->nthreads_tmpi);
++        }
++
++        if (hw_opt->nthreads_omp > 0 &&
++            hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)
++        {
++            gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",
++                      hw_opt->nthreads_tot,hw_opt->nthreads_omp);
++        }
++
++        if (hw_opt->nthreads_tmpi > 0 &&
++            hw_opt->nthreads_omp <= 0)
++        {
++            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
++        }
++    }
++
++#ifndef GMX_OPENMP
++    if (hw_opt->nthreads_omp > 1)
++    {
++        gmx_fatal(FARGS,"OpenMP threads are requested, but Gromacs was compiled without OpenMP support");
++    }
++#endif
++
++    if (cutoff_scheme == ecutsGROUP)
++    {
++        /* We only have OpenMP support for PME only nodes */
++        if (hw_opt->nthreads_omp > 1)
++        {
++            gmx_fatal(FARGS,"OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
++                      ecutscheme_names[cutoff_scheme],
++                      ecutscheme_names[ecutsVERLET]);
++        }
++        hw_opt->nthreads_omp = 1;
++    }
++
++    if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
++    {
++        gmx_fatal(FARGS,"You need to specify -ntomp in addition to -ntomp_pme");
++    }
++
++    if (hw_opt->nthreads_tot == 1)
++    {
++        hw_opt->nthreads_tmpi = 1;
++
++        if (hw_opt->nthreads_omp > 1)
++        {
++            gmx_fatal(FARGS,"You requested %d OpenMP threads with %d total threads",
++                      hw_opt->nthreads_tmpi,hw_opt->nthreads_tot);
++        }
++        hw_opt->nthreads_omp = 1;
++    }
++
++    if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
++    {
++        hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
++    }
++
++    if (debug)
++    {
++        fprintf(debug,"hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
++                hw_opt->nthreads_tot,
++                hw_opt->nthreads_tmpi,
++                hw_opt->nthreads_omp,
++                hw_opt->nthreads_omp_pme,
++                hw_opt->gpu_id!=NULL ? hw_opt->gpu_id : "");
++                
++    }
++}
++
++
++/* Override the value in inputrec with value passed on the command line (if any) */
++static void override_nsteps_cmdline(FILE *fplog,
++                                    int nsteps_cmdline,
++                                    t_inputrec *ir,
++                                    const t_commrec *cr)
++{
++    assert(ir);
++    assert(cr);
++
++    /* override with anything else than the default -2 */
++    if (nsteps_cmdline > -2)
++    {
++        char stmp[STRLEN];
++
++        ir->nsteps = nsteps_cmdline;
++        if (EI_DYNAMICS(ir->eI))
++        {
++            sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps, %.3f ps",
++                    nsteps_cmdline, nsteps_cmdline*ir->delta_t);
++        }
++        else
++        {
++            sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps",
++                    nsteps_cmdline);
++        }
++
++        md_print_warn(cr, fplog, "%s\n", stmp);
++    }
++}
++
++/* Data structure set by SIMMASTER which needs to be passed to all nodes
++ * before the other nodes have read the tpx file and called gmx_detect_hardware.
++ */
++typedef struct {
++    int      cutoff_scheme; /* The cutoff-scheme from inputrec_t */
++    gmx_bool bUseGPU;       /* Use GPU or GPU emulation          */
++} master_inf_t;
++
++int mdrunner(gmx_hw_opt_t *hw_opt,
++             FILE *fplog,t_commrec *cr,int nfile,
+ +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ +             gmx_bool bCompact, int nstglobalcomm,
+ +             ivec ddxyz,int dd_node_order,real rdd,real rconstr,
+ +             const char *dddlb_opt,real dlb_scale,
+ +             const char *ddcsx,const char *ddcsy,const char *ddcsz,
-     int         nthreads_mpi=1;
++             const char *nbpu_opt,
++             int nsteps_cmdline, int nstepout,int resetstep,
++             int nmultisim,int repl_ex_nst,int repl_ex_nex,
+ +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
+ +             const char *deviceOptions, unsigned long Flags)
+ +{
++    gmx_bool   bForceUseGPU,bTryUseGPU;
+ +    double     nodetime=0,realtime;
+ +    t_inputrec *inputrec;
+ +    t_state    *state=NULL;
+ +    matrix     box;
+ +    gmx_ddbox_t ddbox={0};
+ +    int        npme_major,npme_minor;
+ +    real       tmpr1,tmpr2;
+ +    t_nrnb     *nrnb;
+ +    gmx_mtop_t *mtop=NULL;
+ +    t_mdatoms  *mdatoms=NULL;
+ +    t_forcerec *fr=NULL;
+ +    t_fcdata   *fcd=NULL;
+ +    real       ewaldcoeff=0;
+ +    gmx_pme_t  *pmedata=NULL;
+ +    gmx_vsite_t *vsite=NULL;
+ +    gmx_constr_t constr;
+ +    int        i,m,nChargePerturbed=-1,status,nalloc;
+ +    char       *gro;
+ +    gmx_wallcycle_t wcycle;
+ +    gmx_bool       bReadRNG,bReadEkin;
+ +    int        list;
+ +    gmx_runtime_t runtime;
+ +    int        rc;
+ +    gmx_large_int_t reset_counters;
+ +    gmx_edsam_t ed=NULL;
+ +    t_commrec   *cr_old=cr; 
- 
-     if (bVerbose && SIMMASTER(cr))
-     {
-         fprintf(stderr,"Getting Loaded...\n");
-     }
+ +    int         nthreads_pme=1;
++    int         nthreads_pp=1;
+ +    gmx_membed_t membed=NULL;
++    gmx_hw_info_t *hwinfo=NULL;
++    master_inf_t minf={-1,FALSE};
+ +
+ +    /* CAUTION: threads may be started later on in this function, so
+ +       cr doesn't reflect the final parallel state right now */
+ +    snew(inputrec,1);
+ +    snew(mtop,1);
-     if (MASTER(cr)) 
+ +    
+ +    if (Flags & MD_APPENDFILES) 
+ +    {
+ +        fplog = NULL;
+ +    }
+ +
++    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
++    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
++
+ +    snew(state,1);
-         /* NOW the threads will be started: */
++    if (SIMMASTER(cr)) 
+ +    {
+ +        /* Read (nearly) all data required for the simulation */
+ +        read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
+ +
-         nthreads_mpi = get_nthreads_mpi(nthreads_requested, inputrec, mtop);
++        if (inputrec->cutoff_scheme != ecutsVERLET &&
++            ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
++        {
++            convert_to_verlet_scheme(fplog,inputrec,mtop,det(state->box));
++        }
++
++        /* Detect hardware, gather information. With tMPI only thread 0 does it
++         * and after threads are started broadcasts hwinfo around. */
++        snew(hwinfo, 1);
++        gmx_detect_hardware(fplog, hwinfo, cr,
++                            bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
++
++        minf.cutoff_scheme = inputrec->cutoff_scheme;
++        minf.bUseGPU       = FALSE;
++
++        if (inputrec->cutoff_scheme == ecutsVERLET)
++        {
++            prepare_verlet_scheme(fplog,hwinfo,cr,hw_opt,nbpu_opt,
++                                  inputrec,mtop,state->box,
++                                  &minf.bUseGPU);
++        }
++        else if (hwinfo->bCanUseGPU)
++        {
++            md_print_warn(cr,fplog,
++                          "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
++                          "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
++                          "      (for quick performance testing you can use the -testverlet option)\n");
++
++            if (bForceUseGPU)
++            {
++                gmx_fatal(FARGS,"GPU requested, but can't be used without cutoff-scheme=Verlet");
++            }
++        }
++    }
++#ifndef GMX_THREAD_MPI
++    if (PAR(cr))
++    {
++        gmx_bcast_sim(sizeof(minf),&minf,cr);
++    }
++#endif
++    if (minf.bUseGPU && cr->npmenodes == -1)
++    {
++        /* Don't automatically use PME-only nodes with GPUs */
++        cr->npmenodes = 0;
++    }
++
+ +#ifdef GMX_THREAD_MPI
-         if (nthreads_mpi > 1)
++    /* With thread-MPI inputrec is only set here on the master thread */
++    if (SIMMASTER(cr))
++#endif
++    {
++        check_and_update_hw_opt(hw_opt,minf.cutoff_scheme);
+ +
-             cr=mdrunner_start_threads(nthreads_mpi, fplog, cr_old, nfile, fnm,
++#ifdef GMX_THREAD_MPI
++        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
++        {
++            gmx_fatal(FARGS,"You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes");
++        }
++#endif
++
++        if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp &&
++            cr->npmenodes <= 0)
++        {
++            gmx_fatal(FARGS,"You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes");
++        }
++    }
++
++#ifdef GMX_THREAD_MPI
++    if (SIMMASTER(cr))
++    {
++        /* NOW the threads will be started: */
++        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
++                                                 hw_opt,
++                                                 inputrec, mtop,
++                                                 cr, fplog);
++        if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
++        {
++            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
++        }
++
++        if (hw_opt->nthreads_tmpi > 1)
+ +        {
+ +            /* now start the threads. */
-                                       nstepout, resetstep, nmultisim, 
++            cr=mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm, 
+ +                                      oenv, bVerbose, bCompact, nstglobalcomm, 
+ +                                      ddxyz, dd_node_order, rdd, rconstr, 
+ +                                      dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
- #endif
++                                      nbpu_opt,
++                                      nsteps_cmdline, nstepout, resetstep, nmultisim, 
+ +                                      repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
+ +                                      cpt_period, max_hours, deviceOptions, 
+ +                                      Flags);
+ +            /* the main thread continues here with a new cr. We don't deallocate
+ +               the old cr because other threads may still be reading it. */
+ +            if (cr == NULL)
+ +            {
+ +                gmx_comm("Failed to spawn threads");
+ +            }
+ +        }
-                   "but mdrun was compiled without threads or MPI enabled"
+ +    }
++#endif
+ +    /* END OF CAUTION: cr is now reliable */
+ +
+ +    /* g_membed initialisation *
+ +     * Because we change the mtop, init_membed is called before the init_parallel *
+ +     * (in case we ever want to make it run in parallel) */
+ +    if (opt2bSet("-membed",nfile,fnm))
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            fprintf(stderr,"Initializing membed");
+ +        }
+ +        membed = init_membed(fplog,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
+ +    }
+ +
+ +    if (PAR(cr))
+ +    {
+ +        /* now broadcast everything to the non-master nodes/threads: */
+ +        init_parallel(fplog, cr, inputrec, mtop);
++
++        /* This check needs to happen after get_nthreads_mpi() */
++        if (inputrec->cutoff_scheme == ecutsVERLET && (Flags & MD_PARTDEC))
++        {
++            gmx_fatal_collective(FARGS,cr,NULL,
++                                 "The Verlet cut-off scheme is not supported with particle decomposition.\n"
++                                 "You can achieve the same effect as particle decomposition by running in parallel using only OpenMP threads.");
++        }
+ +    }
+ +    if (fplog != NULL)
+ +    {
+ +        pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
+ +    }
+ +
++#if defined GMX_THREAD_MPI
++    /* With tMPI we detected on thread 0 and we'll just pass the hwinfo pointer
++     * to the other threads  -- slightly uncool, but works fine, just need to
++     * make sure that the data doesn't get freed twice. */
++    if (cr->nnodes > 1)
++    {
++        if (!SIMMASTER(cr))
++        {
++            snew(hwinfo, 1);
++        }
++        gmx_bcast(sizeof(&hwinfo), &hwinfo, cr);
++    }
++#else
++    if (PAR(cr) && !SIMMASTER(cr))
++    {
++        /* now we have inputrec on all nodes, can run the detection */
++        /* TODO: perhaps it's better to propagate within a node instead? */
++        snew(hwinfo, 1);
++        gmx_detect_hardware(fplog, hwinfo, cr,
++                                 bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
++    }
++#endif
++
+ +    /* now make sure the state is initialized and propagated */
+ +    set_state_entries(state,inputrec,cr->nnodes);
+ +
+ +    /* remove when vv and rerun works correctly! */
+ +    if (PAR(cr) && EI_VV(inputrec->eI) && ((Flags & MD_RERUN) || (Flags & MD_RERUN_VSITE)))
+ +    {
+ +        gmx_fatal(FARGS,
+ +                  "Currently can't do velocity verlet with rerun in parallel.");
+ +    }
+ +
+ +    /* A parallel command line option consistency check that we can
+ +       only do after any threads have started. */
+ +    if (!PAR(cr) &&
+ +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
+ +    {
+ +        gmx_fatal(FARGS,
+ +                  "The -dd or -npme option request a parallel simulation, "
+ +#ifndef GMX_MPI
-                   "but mdrun was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec" 
++                  "but %s was compiled without threads or MPI enabled"
+ +#else
+ +#ifdef GMX_THREAD_MPI
+ +                  "but the number of threads (option -nt) is 1"
+ +#else
-     if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog))
++                  "but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"
+ +#endif
+ +#endif
++                  , ShortProgram()
+ +            );
+ +    }
+ +
+ +    if ((Flags & MD_RERUN) &&
+ +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
+ +    {
+ +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
+ +    }
+ +
-     if (bVerbose && SIMMASTER(cr))
-     {
-         fprintf(stderr,"Loaded with Money\n\n");
-     }
- 
++    if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog) && PAR(cr))
+ +    {
+ +        /* All-vs-all loops do not work with domain decomposition */
+ +        Flags |= MD_PARTDEC;
+ +    }
+ +
+ +    if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
+ +    {
+ +        if (cr->npmenodes > 0)
+ +        {
+ +            if (!EEL_PME(inputrec->coulombtype))
+ +            {
+ +                gmx_fatal_collective(FARGS,cr,NULL,
+ +                                     "PME nodes are requested, but the system does not use PME electrostatics");
+ +            }
+ +            if (Flags & MD_PARTDEC)
+ +            {
+ +                gmx_fatal_collective(FARGS,cr,NULL,
+ +                                     "PME nodes are requested, but particle decomposition does not support separate PME nodes");
+ +            }
+ +        }
+ +
+ +        cr->npmenodes = 0;
+ +    }
+ +
+ +#ifdef GMX_FAHCORE
+ +    fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
+ +#endif
+ +
+ +    /* NMR restraints must be initialized before load_checkpoint,
+ +     * since with time averaging the history is added to t_state.
+ +     * For proper consistency check we therefore need to extend
+ +     * t_state here.
+ +     * So the PME-only nodes (if present) will also initialize
+ +     * the distance restraints.
+ +     */
+ +    snew(fcd,1);
+ +
+ +    /* This needs to be called before read_checkpoint to extend the state */
+ +    init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state);
+ +
+ +    if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
+ +    {
+ +        if (PAR(cr) && !(Flags & MD_PARTDEC))
+ +        {
+ +            gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
+ +        }
+ +        /* Orientation restraints */
+ +        if (MASTER(cr))
+ +        {
+ +            init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
+ +                        state);
+ +        }
+ +    }
+ +
+ +    if (DEFORM(*inputrec))
+ +    {
+ +        /* Store the deform reference box before reading the checkpoint */
+ +        if (SIMMASTER(cr))
+ +        {
+ +            copy_mat(state->box,box);
+ +        }
+ +        if (PAR(cr))
+ +        {
+ +            gmx_bcast(sizeof(box),box,cr);
+ +        }
+ +        /* Because we do not have the update struct available yet
+ +         * in which the reference values should be stored,
+ +         * we store them temporarily in static variables.
+ +         * This should be thread safe, since they are only written once
+ +         * and with identical values.
+ +         */
+ +#ifdef GMX_THREAD_MPI
+ +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
+ +#endif
+ +        deform_init_init_step_tpx = inputrec->init_step;
+ +        copy_mat(box,deform_init_box_tpx);
+ +#ifdef GMX_THREAD_MPI
+ +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
+ +#endif
+ +    }
+ +
+ +    if (opt2bSet("-cpi",nfile,fnm)) 
+ +    {
+ +        /* Check if checkpoint file exists before doing continuation.
+ +         * This way we can use identical input options for the first and subsequent runs...
+ +         */
+ +        if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
+ +        {
+ +            load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
+ +                            cr,Flags & MD_PARTDEC,ddxyz,
+ +                            inputrec,state,&bReadRNG,&bReadEkin,
+ +                            (Flags & MD_APPENDFILES),
+ +                            (Flags & MD_APPENDFILESSET));
+ +            
+ +            if (bReadRNG)
+ +            {
+ +                Flags |= MD_READ_RNG;
+ +            }
+ +            if (bReadEkin)
+ +            {
+ +                Flags |= MD_READ_EKIN;
+ +            }
+ +        }
+ +    }
+ +
+ +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
+ +#ifdef GMX_THREAD_MPI
+ +        /* With thread MPI only the master node/thread exists in mdrun.c,
+ +         * therefore non-master nodes need to open the "seppot" log file here.
+ +         */
+ +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
+ +#endif
+ +        )
+ +    {
+ +        gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
+ +                             Flags,&fplog);
+ +    }
+ +
++    /* override nsteps with value from cmdline */
++    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
++
+ +    if (SIMMASTER(cr)) 
+ +    {
+ +        copy_mat(state->box,box);
+ +    }
+ +
+ +    if (PAR(cr)) 
+ +    {
+ +        gmx_bcast(sizeof(box),box,cr);
+ +    }
+ +
+ +    /* Essential dynamics */
+ +    if (opt2bSet("-ei",nfile,fnm))
+ +    {
+ +        /* Open input and output files, allocate space for ED data structure */
+ +        ed = ed_open(nfile,fnm,Flags,cr);
+ +    }
+ +
-     /* get number of OpenMP/PME threads
-      * env variable should be read only on one node to make sure it is identical everywhere */
- #ifdef GMX_OPENMP
-     if (EEL_PME(inputrec->coulombtype))
-     {
-         if (MASTER(cr))
-         {
-             char *ptr;
-             if ((ptr=getenv("GMX_PME_NTHREADS")) != NULL)
-             {
-                 sscanf(ptr,"%d",&nthreads_pme);
-             }
-             if (fplog != NULL && nthreads_pme > 1)
-             {
-                 fprintf(fplog,"Using %d threads for PME\n",nthreads_pme);
-             }
-         }
-         if (PAR(cr))
-         {
-             gmx_bcast_sim(sizeof(nthreads_pme),&nthreads_pme,cr);
-         }
-     }
+ +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
+ +                     EI_TPI(inputrec->eI) ||
+ +                     inputrec->eI == eiNM))
+ +    {
+ +        cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
+ +                                           dddlb_opt,dlb_scale,
+ +                                           ddcsx,ddcsy,ddcsz,
+ +                                           mtop,inputrec,
+ +                                           box,state->x,
+ +                                           &ddbox,&npme_major,&npme_minor);
+ +
+ +        make_dd_communicators(fplog,cr,dd_node_order);
+ +
+ +        /* Set overallocation to avoid frequent reallocation of arrays */
+ +        set_over_alloc_dd(TRUE);
+ +    }
+ +    else
+ +    {
+ +        /* PME, if used, is done on all nodes with 1D decomposition */
+ +        cr->npmenodes = 0;
+ +        cr->duty = (DUTY_PP | DUTY_PME);
+ +        npme_major = 1;
+ +        npme_minor = 1;
+ +        if (!EI_TPI(inputrec->eI))
+ +        {
+ +            npme_major = cr->nnodes;
+ +        }
+ +        
+ +        if (inputrec->ePBC == epbcSCREW)
+ +        {
+ +            gmx_fatal(FARGS,
+ +                      "pbc=%s is only implemented with domain decomposition",
+ +                      epbc_names[inputrec->ePBC]);
+ +        }
+ +    }
+ +
+ +    if (PAR(cr))
+ +    {
+ +        /* After possible communicator splitting in make_dd_communicators.
+ +         * we can set up the intra/inter node communication.
+ +         */
+ +        gmx_setup_nodecomm(fplog,cr);
+ +    }
+ +
-     wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pme);
++    /* Initialize per-node process ID and counters. */
++    gmx_init_intra_counters(cr);
++
++#ifdef GMX_MPI
++    md_print_info(cr,fplog,"Using %d MPI %s\n",
++                  cr->nnodes,
++#ifdef GMX_THREAD_MPI
++                  cr->nnodes==1 ? "thread" : "threads"
++#else
++                  cr->nnodes==1 ? "process" : "processes"
++#endif
++                  );
+ +#endif
+ +
- 
++    gmx_omp_nthreads_init(fplog, cr,
++                          hwinfo->nthreads_hw_avail,
++                          hw_opt->nthreads_omp,
++                          hw_opt->nthreads_omp_pme,
++                          (cr->duty & DUTY_PP) == 0,
++                          inputrec->cutoff_scheme == ecutsVERLET);
++
++    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi, minf.bUseGPU);
++
++    /* getting number of PP/PME threads
++       PME: env variable should be read only on one node to make sure it is 
++       identical everywhere;
++     */
++    /* TODO nthreads_pp is only used for pinning threads.
++     * This is a temporary solution until we have a hw topology library.
++     */
++    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
++    nthreads_pme = gmx_omp_nthreads_get(emntPME);
++
++    wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pp,nthreads_pme);
++
+ +    if (PAR(cr))
+ +    {
+ +        /* Master synchronizes its value of reset_counters with all nodes 
+ +         * including PME only nodes */
+ +        reset_counters = wcycle_get_reset_counters(wcycle);
+ +        gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
+ +        wcycle_set_reset_counters(wcycle, reset_counters);
+ +    }
+ +
-                       opt2fn("-tableb",nfile,fnm),FALSE,pforce);
+ +    snew(nrnb,1);
+ +    if (cr->duty & DUTY_PP)
+ +    {
+ +        /* For domain decomposition we allocate dynamically
+ +         * in dd_partition_system.
+ +         */
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            bcast_state_setup(cr,state);
+ +        }
+ +        else
+ +        {
+ +            if (PAR(cr))
+ +            {
+ +                bcast_state(cr,state,TRUE);
+ +            }
+ +        }
+ +
+ +        /* Initiate forcerecord */
+ +        fr = mk_forcerec();
++        fr->hwinfo = hwinfo;
+ +        init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
+ +                      opt2fn("-table",nfile,fnm),
+ +                      opt2fn("-tabletf",nfile,fnm),
+ +                      opt2fn("-tablep",nfile,fnm),
- 
-         /* Set CPU affinity. Can be important for performance.
-            On some systems (e.g. Cray) CPU Affinity is set by default.
-            But default assigning doesn't work (well) with only some ranks
-            having threads. This causes very low performance.
-            External tools have cumbersome syntax for setting affinity
-            in the case that only some ranks have threads.
-            Thus it is important that GROMACS sets the affinity internally at
-            if only PME is using threads.
-         */
- 
- #ifdef GMX_OPENMP
- #ifdef __linux
- #ifdef GMX_LIB_MPI
-         {
-             int core;
-             MPI_Comm comm_intra; /* intra communicator (but different to nc.comm_intra includes PME nodes) */
-             MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),gmx_node_rank(),&comm_intra);
-             int local_omp_nthreads = (cr->duty & DUTY_PME) ? nthreads_pme : 1; /* threads on this node */
-             MPI_Scan(&local_omp_nthreads,&core, 1, MPI_INT, MPI_SUM, comm_intra);
-             core-=local_omp_nthreads; /* make exclusive scan */
- #pragma omp parallel firstprivate(core) num_threads(local_omp_nthreads)
-             {
-                 cpu_set_t mask;
-                 CPU_ZERO(&mask);
-                 core+=gmx_omp_get_thread_num();
-                 CPU_SET(core,&mask);
-                 sched_setaffinity((pid_t) syscall (SYS_gettid),sizeof(cpu_set_t),&mask);
-             }
-         }
- #endif /*GMX_MPI*/
- #endif /*__linux*/
- #endif /*GMX_OPENMP*/
- 
++                      opt2fn("-tableb",nfile,fnm),
++                      nbpu_opt,
++                      FALSE,pforce);
+ +
+ +        /* version for PCA_NOT_READ_NODE (see md.c) */
+ +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
+ +          "nofile","nofile","nofile","nofile",FALSE,pforce);
+ +          */        
+ +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
+ +
+ +        /* Initialize QM-MM */
+ +        if(fr->bQMMM)
+ +        {
+ +            init_QMMMrec(cr,box,mtop,inputrec,fr);
+ +        }
+ +
+ +        /* Initialize the mdatoms structure.
+ +         * mdatoms is not filled with atom data,
+ +         * as this can not be done now with domain decomposition.
+ +         */
+ +        mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
+ +
+ +        /* Initialize the virtual site communication */
+ +        vsite = init_vsite(mtop,cr);
+ +
+ +        calc_shifts(box,fr->shift_vec);
+ +
+ +        /* With periodic molecules the charge groups should be whole at start up
+ +         * and the virtual sites should not be far from their proper positions.
+ +         */
+ +        if (!inputrec->bContinuation && MASTER(cr) &&
+ +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
+ +        {
+ +            /* Make molecules whole at start of run */
+ +            if (fr->ePBC != epbcNONE)
+ +            {
+ +                do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
+ +            }
+ +            if (vsite)
+ +            {
+ +                /* Correct initial vsite positions are required
+ +                 * for the initial distribution in the domain decomposition
+ +                 * and for the initial shell prediction.
+ +                 */
+ +                construct_vsites_mtop(fplog,vsite,mtop,state->x);
+ +            }
+ +        }
+ +
+ +        if (EEL_PME(fr->eeltype))
+ +        {
+ +            ewaldcoeff = fr->ewaldcoeff;
+ +            pmedata = &fr->pmedata;
+ +        }
+ +        else
+ +        {
+ +            pmedata = NULL;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* This is a PME only node */
+ +
+ +        /* We don't need the state */
+ +        done_state(state);
+ +
+ +        ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
+ +        snew(pmedata,1);
+ +    }
+ +
++#if defined GMX_THREAD_MPI
++    /* With the number of TMPI threads equal to the number of cores
++     * we already pinned in thread-MPI, so don't pin again here.
++     */
++    if (hw_opt->nthreads_tmpi != tMPI_Thread_get_hw_number())
++#endif
++    {
++        /* Set the CPU affinity */
++        set_cpu_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec);
++    }
++
+ +    /* Initiate PME if necessary,
+ +     * either on all nodes or on dedicated PME nodes only. */
+ +    if (EEL_PME(inputrec->coulombtype))
+ +    {
+ +        if (mdatoms)
+ +        {
+ +            nChargePerturbed = mdatoms->nChargePerturbed;
+ +        }
+ +        if (cr->npmenodes > 0)
+ +        {
+ +            /* The PME only nodes need to know nChargePerturbed */
+ +            gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
+ +        }
+ +
+ +        if (cr->duty & DUTY_PME)
+ +        {
+ +            status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
+ +                                  mtop ? mtop->natoms : 0,nChargePerturbed,
+ +                                  (Flags & MD_REPRODUCIBLE),nthreads_pme);
+ +            if (status != 0) 
+ +            {
+ +                gmx_fatal(FARGS,"Error %d initializing PME",status);
+ +            }
+ +        }
+ +    }
+ +
+ +
+ +    if (integrator[inputrec->eI].func == do_md
+ +#ifdef GMX_OPENMM
+ +        ||
+ +        integrator[inputrec->eI].func == do_md_openmm
+ +#endif
+ +        )
+ +    {
+ +        /* Turn on signal handling on all nodes */
+ +        /*
+ +         * (A user signal from the PME nodes (if any)
+ +         * is communicated to the PP nodes.
+ +         */
+ +        signal_handler_install();
+ +    }
+ +
+ +    if (cr->duty & DUTY_PP)
+ +    {
+ +        if (inputrec->ePull != epullNO)
+ +        {
+ +            /* Initialize pull code */
+ +            init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv, inputrec->fepvals->init_lambda,
+ +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
+ +        }
+ +        
+ +        if (inputrec->bRot)
+ +        {
+ +           /* Initialize enforced rotation code */
+ +           init_rot(fplog,inputrec,nfile,fnm,cr,state->x,box,mtop,oenv,
+ +                    bVerbose,Flags);
+ +        }
+ +
+ +        constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
+ +
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
+ +                            Flags & MD_DDBONDCHECK,fr->cginfo_mb);
+ +
+ +            set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
+ +
+ +            setup_dd_grid(fplog,cr->dd);
+ +        }
+ +
+ +        /* Now do whatever the user wants us to do (how flexible...) */
+ +        integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
+ +                                      oenv,bVerbose,bCompact,
+ +                                      nstglobalcomm,
+ +                                      vsite,constr,
+ +                                      nstepout,inputrec,mtop,
+ +                                      fcd,state,
+ +                                      mdatoms,nrnb,wcycle,ed,fr,
+ +                                      repl_ex_nst,repl_ex_nex,repl_ex_seed,
+ +                                      membed,
+ +                                      cpt_period,max_hours,
+ +                                      deviceOptions,
+ +                                      Flags,
+ +                                      &runtime);
+ +
+ +        if (inputrec->ePull != epullNO)
+ +        {
+ +            finish_pull(fplog,inputrec->pull);
+ +        }
+ +        
+ +        if (inputrec->bRot)
+ +        {
+ +            finish_rot(fplog,inputrec->rot);
+ +        }
+ +
+ +    } 
+ +    else 
+ +    {
+ +        /* do PME only */
+ +        gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
+ +    }
+ +
+ +    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
+ +    {
+ +        /* Some timing stats */  
+ +        if (SIMMASTER(cr))
+ +        {
+ +            if (runtime.proc == 0)
+ +            {
+ +                runtime.proc = runtime.real;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            runtime.real = 0;
+ +        }
+ +    }
+ +
+ +    wallcycle_stop(wcycle,ewcRUN);
+ +
+ +    /* Finish up, write some stuff
+ +     * if rerunMD, don't write last frame again 
+ +     */
+ +    finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
+ +               inputrec,nrnb,wcycle,&runtime,
++               fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
++                 nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
++               nthreads_pp, 
+ +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
+ +
++    if ((cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU)
++    {
++        char gpu_err_str[STRLEN];
++
++        /* free GPU memory and uninitialize GPU (by destroying the context) */
++        nbnxn_cuda_free(fplog, fr->nbv->cu_nbv);
++
++        if (!free_gpu(gpu_err_str))
++        {
++            gmx_warning("On node %d failed to free GPU #%d: %s",
++                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
++        }
++    }
++
+ +    if (opt2bSet("-membed",nfile,fnm))
+ +    {
+ +        sfree(membed);
+ +    }
+ +
++#ifdef GMX_THREAD_MPI
++    if (PAR(cr) && SIMMASTER(cr))
++#endif
++    {
++        gmx_hardware_info_free(hwinfo);
++    }
++
+ +    /* Does what it says */  
+ +    print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
+ +
+ +    /* Close logfile already here if we were appending to it */
+ +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
+ +    {
+ +        gmx_log_close(fplog);
+ +    } 
+ +
+ +    rc=(int)gmx_get_stop_condition();
+ +
+ +#ifdef GMX_THREAD_MPI
+ +    /* we need to join all threads. The sub-threads join when they
+ +       exit this function, but the master thread needs to be told to 
+ +       wait for that. */
+ +    if (PAR(cr) && MASTER(cr))
+ +    {
+ +        tMPI_Finalize();
+ +    }
+ +#endif
+ +
+ +    return rc;
+ +}
diff --cc src/tools/gmx_pme_error.c
Simple merge
diff --cc src/tools/gmx_trjconv.c
Simple merge
diff --cc src/tools/gmx_tune_pme.c
Simple merge
author	Roland Schulz <roland@utk.edu>
	Wed, 17 Oct 2012 03:21:42 +0000 (23:21 -0400)
committer	Roland Schulz <roland@utk.edu>
	Wed, 17 Oct 2012 03:21:42 +0000 (23:21 -0400)
		1	2
CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/ThreadMPI.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/gmxDetectAcceleration.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/gmxSetBuildInformation.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
src/config.h.cmakein	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/CMakeLists.txt	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/CMakeLists.txt	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/bondfree.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/calcgrid.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/checkpoint.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/cuda_tools/CMakeLists.txt	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxlib/cuda_tools/cudautils.cu	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxlib/cuda_tools/cudautils.cuh	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxlib/cuda_tools/pmalloc_cuda.cu	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxlib/cuda_tools/vectype_ops.cuh	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxlib/disre.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/ewald_util.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/gmx_cpuid.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxlib/gmx_detect_hardware.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxlib/gmx_fatal.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/gmx_omp.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/gmx_omp_nthreads.c	patch \|	\|	\|	blob
src/gromacs/gmxlib/gpu_utils/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/gpu_utils/gpu_utils.cu	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/gpu_utils/memtestG80_core.cu	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/gpu_utils/memtestG80_core.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/main.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/maths.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/md_logging.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxlib/mtop_util.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/names.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/network.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/nonbonded/nonbonded.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/nrnb.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/pbc.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/smalloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/tpxio.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/txtdump.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/version.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxpreprocess/calc_verletbuf.c	patch \|	\|	\|	blob
src/gromacs/gmxpreprocess/calc_verletbuf.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxpreprocess/readir.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxpreprocess/readir.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/bondf.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/constr.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/coulomb.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/domdec.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/domdec_network.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/force.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/futil.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/genborn.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_avx_double.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_avx_single.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_cpuid.h	patch \|	\|	\|	blob
src/gromacs/legacyheaders/gmx_detect_hardware.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_fatal.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/gmx_fatal_collective.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_hash.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_math_x86_avx_128_fma_double.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_math_x86_avx_128_fma_single.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_math_x86_avx_256_double.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_math_x86_avx_256_single.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_math_x86_sse2_double.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_math_x86_sse4_1_double.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_math_x86_sse4_1_single.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_omp.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_omp_nthreads.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_wallcycle.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_x86_avx_128_fma.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_x86_simd_double.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_x86_simd_macros.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_x86_simd_single.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gpu_utils.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/main.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/maths.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/md_logging.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/md_support.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/mdebin.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/mdrun.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/mtop_util.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/mvdata.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/names.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/nbnxn_search.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/network.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/nrnb.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/nsgrid.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/pbc.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/physics.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/pmalloc_cuda.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/pme.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/sim_util.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/smalloc.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/sysstuff.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/tables.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/thread_mpi/atomic/gcc_intrinsics.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/thread_mpi/mpi_bindings.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/typedefs.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/commrec.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/types/enums.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/types/fcdata.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/force_flags.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/forcerec.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/graph.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/group.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/hw_info.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/idef.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/types/ifunc.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/inputrec.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/interaction_const.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/nb_verlet.h	patch \|	\|	\|	blob
src/gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/nbnxn_pairlist.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/nrnb.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/types/simple.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/update.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/vsite.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/CMakeLists.txt	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/calcmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/clincs.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/constr.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/csettle.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/domdec.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/domdec_con.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/domdec_top.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/edsam.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/fft5d.cpp	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/force.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/forcerec.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/gmx_wallcycle.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/groupcoord.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/iteratedconstraints.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/md_support.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/mdatom.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/mdebin.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/minimize.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/nbnxn_consts.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_cuda/CMakeLists.txt	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_legacy.cuh	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h	patch \|	\|	\|	blob
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_inner.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref_outer.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd128.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd256.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_includes.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_inner.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_outer.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_x86_simd_utils.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_search.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_search_x86_simd.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/nlistheuristics.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/ns.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/nsgrid.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/partdec.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/perf_est.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/pme.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/pme_pp.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/pme_sse_single.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/pull.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/pull_rotation.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/qmmm.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/shakef.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/shellfc.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/sim_util.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/stat.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/tables.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/tgroup.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/tpi.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/update.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/version.h.cmakein	patch \|	\|	diff2 \|	blob \| history
src/programs/gmxcheck/tpbcmp.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/grompp/grompp.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/CMakeLists.txt	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/md.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/md_openmm.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/mdrun.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/mdrun/membed.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/mdrun/membed.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/mdrun/openmm_wrapper.cpp	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/pme_switch.c	patch \|	\|	\|	blob
src/programs/mdrun/pme_switch.h	patch \|	\|	diff2 \|	blob \| history
src/programs/mdrun/repl_ex.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/mdrun/runner.c	patch \|	diff1 \|	\|	blob \| history
src/tools/gmx_pme_error.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/tools/gmx_trjconv.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/tools/gmx_tune_pme.c	patch \|	diff1 \|	diff2 \|	blob \| history